blob: 4d14451254fc975253dc7045cded3e1e2008671d [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Gregory P. Smith8cb65692015-04-25 23:22:26 +00008#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00009#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000010
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020011/*[clinic input]
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030012class bytes "PyBytesObject *" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020013[clinic start generated code]*/
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030014/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020015
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030016#include "clinic/bytesobject.c.h"
17
Christian Heimes2c9c7a52008-05-26 13:42:13 +000018#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000019Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000020#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000021
Christian Heimes2c9c7a52008-05-26 13:42:13 +000022static PyBytesObject *characters[UCHAR_MAX + 1];
23static PyBytesObject *nullstring;
24
Mark Dickinsonfd24b322008-12-06 15:33:31 +000025/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
26 for a string of length n should request PyBytesObject_SIZE + n bytes.
27
28 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
29 3 bytes per string allocation on a typical system.
30*/
31#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
32
Victor Stinner2bf89932015-10-14 11:25:33 +020033/* Forward declaration */
34Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
35 char *str);
36
Christian Heimes2c9c7a52008-05-26 13:42:13 +000037/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000038 For PyBytes_FromString(), the parameter `str' points to a null-terminated
39 string containing exactly `size' bytes.
40
Martin Pantera90a4a92016-05-30 04:04:50 +000041 For PyBytes_FromStringAndSize(), the parameter `str' is
Christian Heimes2c9c7a52008-05-26 13:42:13 +000042 either NULL or else points to a string containing at least `size' bytes.
43 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
44 not have to be null-terminated. (Therefore it is safe to construct a
45 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
46 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
47 bytes (setting the last byte to the null terminating character) and you can
48 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000049 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000050 alter the data yourself, since the strings may be shared.
51
52 The PyObject member `op->ob_size', which denotes the number of "extra
53 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020054 allocated for string data, not counting the null terminating character.
55 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000056 PyBytes_FromStringAndSize()) or the length of the string in the `str'
57 parameter (for PyBytes_FromString()).
58*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020059static PyObject *
60_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000061{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020062 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020063 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020064
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000066#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000068#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000069 Py_INCREF(op);
70 return (PyObject *)op;
71 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000072
Victor Stinner049e5092014-08-17 22:20:00 +020073 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000074 PyErr_SetString(PyExc_OverflowError,
75 "byte string is too large");
76 return NULL;
77 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000078
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000079 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020080 if (use_calloc)
81 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
82 else
83 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084 if (op == NULL)
85 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +010086 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020088 if (!use_calloc)
89 op->ob_sval[size] = '\0';
90 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 if (size == 0) {
92 nullstring = op;
93 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020094 }
95 return (PyObject *) op;
96}
97
98PyObject *
99PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
100{
101 PyBytesObject *op;
102 if (size < 0) {
103 PyErr_SetString(PyExc_SystemError,
104 "Negative size passed to PyBytes_FromStringAndSize");
105 return NULL;
106 }
107 if (size == 1 && str != NULL &&
108 (op = characters[*str & UCHAR_MAX]) != NULL)
109 {
110#ifdef COUNT_ALLOCS
111 one_strings++;
112#endif
113 Py_INCREF(op);
114 return (PyObject *)op;
115 }
116
117 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
118 if (op == NULL)
119 return NULL;
120 if (str == NULL)
121 return (PyObject *) op;
122
123 Py_MEMCPY(op->ob_sval, str, size);
124 /* share short strings */
125 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000126 characters[*str & UCHAR_MAX] = op;
127 Py_INCREF(op);
128 }
129 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000130}
131
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000132PyObject *
133PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000134{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200135 size_t size;
136 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000137
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000138 assert(str != NULL);
139 size = strlen(str);
140 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
141 PyErr_SetString(PyExc_OverflowError,
142 "byte string is too long");
143 return NULL;
144 }
145 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000146#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000147 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000148#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 Py_INCREF(op);
150 return (PyObject *)op;
151 }
152 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000153#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000155#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 Py_INCREF(op);
157 return (PyObject *)op;
158 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000159
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000160 /* Inline PyObject_NewVar */
161 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
162 if (op == NULL)
163 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100164 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000165 op->ob_shash = -1;
166 Py_MEMCPY(op->ob_sval, str, size+1);
167 /* share short strings */
168 if (size == 0) {
169 nullstring = op;
170 Py_INCREF(op);
171 } else if (size == 1) {
172 characters[*str & UCHAR_MAX] = op;
173 Py_INCREF(op);
174 }
175 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000176}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000177
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000178PyObject *
179PyBytes_FromFormatV(const char *format, va_list vargs)
180{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000181 char *s;
Victor Stinner03dab782015-10-14 00:21:35 +0200182 const char *f;
183 const char *p;
184 Py_ssize_t prec;
185 int longflag;
186 int size_tflag;
187 /* Longest 64-bit formatted numbers:
188 - "18446744073709551615\0" (21 bytes)
189 - "-9223372036854775808\0" (21 bytes)
190 Decimal takes the most space (it isn't enough for octal.)
Guido van Rossum343e97f2007-04-09 00:43:24 +0000191
Victor Stinner03dab782015-10-14 00:21:35 +0200192 Longest 64-bit pointer representation:
193 "0xffffffffffffffff\0" (19 bytes). */
194 char buffer[21];
195 _PyBytesWriter writer;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000196
Victor Stinner03dab782015-10-14 00:21:35 +0200197 _PyBytesWriter_Init(&writer);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000198
Victor Stinner03dab782015-10-14 00:21:35 +0200199 s = _PyBytesWriter_Alloc(&writer, strlen(format));
200 if (s == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000201 return NULL;
Victor Stinner03dab782015-10-14 00:21:35 +0200202 writer.overallocate = 1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000203
Victor Stinner03dab782015-10-14 00:21:35 +0200204#define WRITE_BYTES(str) \
205 do { \
206 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
207 if (s == NULL) \
208 goto error; \
209 } while (0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000210
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000211 for (f = format; *f; f++) {
Victor Stinner03dab782015-10-14 00:21:35 +0200212 if (*f != '%') {
213 *s++ = *f;
214 continue;
215 }
216
217 p = f++;
218
219 /* ignore the width (ex: 10 in "%10s") */
220 while (Py_ISDIGIT(*f))
221 f++;
222
223 /* parse the precision (ex: 10 in "%.10s") */
224 prec = 0;
225 if (*f == '.') {
226 f++;
227 for (; Py_ISDIGIT(*f); f++) {
228 prec = (prec * 10) + (*f - '0');
229 }
230 }
231
232 while (*f && *f != '%' && !Py_ISALPHA(*f))
233 f++;
234
235 /* handle the long flag ('l'), but only for %ld and %lu.
236 others can be added when necessary. */
237 longflag = 0;
238 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
239 longflag = 1;
240 ++f;
241 }
242
243 /* handle the size_t flag ('z'). */
244 size_tflag = 0;
245 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
246 size_tflag = 1;
247 ++f;
248 }
249
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700250 /* subtract bytes preallocated for the format string
Victor Stinner03dab782015-10-14 00:21:35 +0200251 (ex: 2 for "%s") */
252 writer.min_size -= (f - p + 1);
253
254 switch (*f) {
255 case 'c':
256 {
257 int c = va_arg(vargs, int);
258 if (c < 0 || c > 255) {
259 PyErr_SetString(PyExc_OverflowError,
260 "PyBytes_FromFormatV(): %c format "
261 "expects an integer in range [0; 255]");
262 goto error;
263 }
264 writer.min_size++;
265 *s++ = (unsigned char)c;
266 break;
267 }
268
269 case 'd':
270 if (longflag)
271 sprintf(buffer, "%ld", va_arg(vargs, long));
272 else if (size_tflag)
273 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
274 va_arg(vargs, Py_ssize_t));
275 else
276 sprintf(buffer, "%d", va_arg(vargs, int));
277 assert(strlen(buffer) < sizeof(buffer));
278 WRITE_BYTES(buffer);
279 break;
280
281 case 'u':
282 if (longflag)
283 sprintf(buffer, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
286 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
288 else
289 sprintf(buffer, "%u",
290 va_arg(vargs, unsigned int));
291 assert(strlen(buffer) < sizeof(buffer));
292 WRITE_BYTES(buffer);
293 break;
294
295 case 'i':
296 sprintf(buffer, "%i", va_arg(vargs, int));
297 assert(strlen(buffer) < sizeof(buffer));
298 WRITE_BYTES(buffer);
299 break;
300
301 case 'x':
302 sprintf(buffer, "%x", va_arg(vargs, int));
303 assert(strlen(buffer) < sizeof(buffer));
304 WRITE_BYTES(buffer);
305 break;
306
307 case 's':
308 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000309 Py_ssize_t i;
Victor Stinner03dab782015-10-14 00:21:35 +0200310
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200311 p = va_arg(vargs, const char*);
Victor Stinner03dab782015-10-14 00:21:35 +0200312 i = strlen(p);
313 if (prec > 0 && i > prec)
314 i = prec;
315 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
316 if (s == NULL)
317 goto error;
318 break;
319 }
320
321 case 'p':
322 sprintf(buffer, "%p", va_arg(vargs, void*));
323 assert(strlen(buffer) < sizeof(buffer));
324 /* %p is ill-defined: ensure leading 0x. */
325 if (buffer[1] == 'X')
326 buffer[1] = 'x';
327 else if (buffer[1] != 'x') {
328 memmove(buffer+2, buffer, strlen(buffer)+1);
329 buffer[0] = '0';
330 buffer[1] = 'x';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000331 }
Victor Stinner03dab782015-10-14 00:21:35 +0200332 WRITE_BYTES(buffer);
333 break;
334
335 case '%':
336 writer.min_size++;
337 *s++ = '%';
338 break;
339
340 default:
341 if (*f == 0) {
342 /* fix min_size if we reached the end of the format string */
343 writer.min_size++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000344 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000345
Victor Stinner03dab782015-10-14 00:21:35 +0200346 /* invalid format string: copy unformatted string and exit */
347 WRITE_BYTES(p);
348 return _PyBytesWriter_Finish(&writer, s);
349 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000351
Victor Stinner03dab782015-10-14 00:21:35 +0200352#undef WRITE_BYTES
353
354 return _PyBytesWriter_Finish(&writer, s);
355
356 error:
357 _PyBytesWriter_Dealloc(&writer);
358 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000359}
360
361PyObject *
362PyBytes_FromFormat(const char *format, ...)
363{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000364 PyObject* ret;
365 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000366
367#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000368 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000369#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000370 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000371#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 ret = PyBytes_FromFormatV(format, vargs);
373 va_end(vargs);
374 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000375}
376
Ethan Furmanb95b5612015-01-23 20:05:18 -0800377/* Helpers for formatstring */
378
379Py_LOCAL_INLINE(PyObject *)
380getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
381{
382 Py_ssize_t argidx = *p_argidx;
383 if (argidx < arglen) {
384 (*p_argidx)++;
385 if (arglen < 0)
386 return args;
387 else
388 return PyTuple_GetItem(args, argidx);
389 }
390 PyErr_SetString(PyExc_TypeError,
391 "not enough arguments for format string");
392 return NULL;
393}
394
395/* Format codes
396 * F_LJUST '-'
397 * F_SIGN '+'
398 * F_BLANK ' '
399 * F_ALT '#'
400 * F_ZERO '0'
401 */
402#define F_LJUST (1<<0)
403#define F_SIGN (1<<1)
404#define F_BLANK (1<<2)
405#define F_ALT (1<<3)
406#define F_ZERO (1<<4)
407
408/* Returns a new reference to a PyBytes object, or NULL on failure. */
409
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200410static char*
411formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200412 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800413{
414 char *p;
415 PyObject *result;
416 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200417 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800418
419 x = PyFloat_AsDouble(v);
420 if (x == -1.0 && PyErr_Occurred()) {
421 PyErr_Format(PyExc_TypeError, "float argument required, "
422 "not %.200s", Py_TYPE(v)->tp_name);
423 return NULL;
424 }
425
426 if (prec < 0)
427 prec = 6;
428
429 p = PyOS_double_to_string(x, type, prec,
430 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
431
432 if (p == NULL)
433 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200434
435 len = strlen(p);
436 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200437 str = _PyBytesWriter_Prepare(writer, str, len);
438 if (str == NULL)
439 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200440 Py_MEMCPY(str, p, len);
Victor Stinner71dc3d82016-04-26 12:35:13 +0200441 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200442 str += len;
443 return str;
444 }
445
446 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800447 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200448 *p_result = result;
449 return str;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800450}
451
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300452static PyObject *
453formatlong(PyObject *v, int flags, int prec, int type)
454{
455 PyObject *result, *iobj;
456 if (type == 'i')
457 type = 'd';
458 if (PyLong_Check(v))
459 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
460 if (PyNumber_Check(v)) {
461 /* make sure number is a type of integer for o, x, and X */
462 if (type == 'o' || type == 'x' || type == 'X')
463 iobj = PyNumber_Index(v);
464 else
465 iobj = PyNumber_Long(v);
466 if (iobj == NULL) {
467 if (!PyErr_ExceptionMatches(PyExc_TypeError))
468 return NULL;
469 }
470 else if (!PyLong_Check(iobj))
471 Py_CLEAR(iobj);
472 if (iobj != NULL) {
473 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
474 Py_DECREF(iobj);
475 return result;
476 }
477 }
478 PyErr_Format(PyExc_TypeError,
479 "%%%c format: %s is required, not %.200s", type,
480 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
481 : "a number",
482 Py_TYPE(v)->tp_name);
483 return NULL;
484}
485
486static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200487byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800488{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300489 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200490 *p = PyBytes_AS_STRING(arg)[0];
491 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800492 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300493 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200494 *p = PyByteArray_AS_STRING(arg)[0];
495 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800496 }
497 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300498 PyObject *iobj;
499 long ival;
500 int overflow;
501 /* make sure number is a type of integer */
502 if (PyLong_Check(arg)) {
503 ival = PyLong_AsLongAndOverflow(arg, &overflow);
504 }
505 else {
506 iobj = PyNumber_Index(arg);
507 if (iobj == NULL) {
508 if (!PyErr_ExceptionMatches(PyExc_TypeError))
509 return 0;
510 goto onError;
511 }
512 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
513 Py_DECREF(iobj);
514 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300515 if (!overflow && ival == -1 && PyErr_Occurred())
516 goto onError;
517 if (overflow || !(0 <= ival && ival <= 255)) {
518 PyErr_SetString(PyExc_OverflowError,
519 "%c arg not in range(256)");
520 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800521 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300522 *p = (char)ival;
523 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800524 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300525 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200526 PyErr_SetString(PyExc_TypeError,
527 "%c requires an integer in range(256) or a single byte");
528 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800529}
530
531static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200532format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800533{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200534 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800535 _Py_IDENTIFIER(__bytes__);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800536 /* is it a bytes object? */
537 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200538 *pbuf = PyBytes_AS_STRING(v);
539 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800540 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200541 return v;
542 }
543 if (PyByteArray_Check(v)) {
544 *pbuf = PyByteArray_AS_STRING(v);
545 *plen = PyByteArray_GET_SIZE(v);
546 Py_INCREF(v);
547 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800548 }
549 /* does it support __bytes__? */
550 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
551 if (func != NULL) {
552 result = PyObject_CallFunctionObjArgs(func, NULL);
553 Py_DECREF(func);
554 if (result == NULL)
555 return NULL;
556 if (!PyBytes_Check(result)) {
557 PyErr_Format(PyExc_TypeError,
558 "__bytes__ returned non-bytes (type %.200s)",
559 Py_TYPE(result)->tp_name);
560 Py_DECREF(result);
561 return NULL;
562 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200563 *pbuf = PyBytes_AS_STRING(result);
564 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800565 return result;
566 }
567 PyErr_Format(PyExc_TypeError,
568 "%%b requires bytes, or an object that implements __bytes__, not '%.100s'",
569 Py_TYPE(v)->tp_name);
570 return NULL;
571}
572
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200573/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800574
575PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +0200576_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
577 PyObject *args, int use_bytearray)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800578{
Victor Stinner772b2b02015-10-14 09:56:53 +0200579 const char *fmt;
580 char *res;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800581 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200582 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800583 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800584 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200585 _PyBytesWriter writer;
586
Victor Stinner772b2b02015-10-14 09:56:53 +0200587 if (args == NULL) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800588 PyErr_BadInternalCall();
589 return NULL;
590 }
Victor Stinner772b2b02015-10-14 09:56:53 +0200591 fmt = format;
592 fmtcnt = format_len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200593
594 _PyBytesWriter_Init(&writer);
Victor Stinner772b2b02015-10-14 09:56:53 +0200595 writer.use_bytearray = use_bytearray;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200596
597 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
598 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800599 return NULL;
Victor Stinner772b2b02015-10-14 09:56:53 +0200600 if (!use_bytearray)
601 writer.overallocate = 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200602
Ethan Furmanb95b5612015-01-23 20:05:18 -0800603 if (PyTuple_Check(args)) {
604 arglen = PyTuple_GET_SIZE(args);
605 argidx = 0;
606 }
607 else {
608 arglen = -1;
609 argidx = -2;
610 }
611 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
612 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
613 !PyByteArray_Check(args)) {
614 dict = args;
615 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200616
Ethan Furmanb95b5612015-01-23 20:05:18 -0800617 while (--fmtcnt >= 0) {
618 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200619 Py_ssize_t len;
620 char *pos;
621
622 pos = strchr(fmt + 1, '%');
623 if (pos != NULL)
624 len = pos - fmt;
Victor Stinner772b2b02015-10-14 09:56:53 +0200625 else
626 len = format_len - (fmt - format);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200627 assert(len != 0);
628
629 Py_MEMCPY(res, fmt, len);
630 res += len;
631 fmt += len;
632 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800633 }
634 else {
635 /* Got a format specifier */
636 int flags = 0;
637 Py_ssize_t width = -1;
638 int prec = -1;
639 int c = '\0';
640 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800641 PyObject *v = NULL;
642 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200643 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800644 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200645 Py_ssize_t len = 0;
646 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200647 Py_ssize_t alloc;
648#ifdef Py_DEBUG
649 char *before;
650#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -0800651
Ethan Furmanb95b5612015-01-23 20:05:18 -0800652 fmt++;
653 if (*fmt == '(') {
Victor Stinner772b2b02015-10-14 09:56:53 +0200654 const char *keystart;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800655 Py_ssize_t keylen;
656 PyObject *key;
657 int pcount = 1;
658
659 if (dict == NULL) {
660 PyErr_SetString(PyExc_TypeError,
661 "format requires a mapping");
662 goto error;
663 }
664 ++fmt;
665 --fmtcnt;
666 keystart = fmt;
667 /* Skip over balanced parentheses */
668 while (pcount > 0 && --fmtcnt >= 0) {
669 if (*fmt == ')')
670 --pcount;
671 else if (*fmt == '(')
672 ++pcount;
673 fmt++;
674 }
675 keylen = fmt - keystart - 1;
676 if (fmtcnt < 0 || pcount > 0) {
677 PyErr_SetString(PyExc_ValueError,
678 "incomplete format key");
679 goto error;
680 }
681 key = PyBytes_FromStringAndSize(keystart,
682 keylen);
683 if (key == NULL)
684 goto error;
685 if (args_owned) {
686 Py_DECREF(args);
687 args_owned = 0;
688 }
689 args = PyObject_GetItem(dict, key);
690 Py_DECREF(key);
691 if (args == NULL) {
692 goto error;
693 }
694 args_owned = 1;
695 arglen = -1;
696 argidx = -2;
697 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200698
699 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800700 while (--fmtcnt >= 0) {
701 switch (c = *fmt++) {
702 case '-': flags |= F_LJUST; continue;
703 case '+': flags |= F_SIGN; continue;
704 case ' ': flags |= F_BLANK; continue;
705 case '#': flags |= F_ALT; continue;
706 case '0': flags |= F_ZERO; continue;
707 }
708 break;
709 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200710
711 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800712 if (c == '*') {
713 v = getnextarg(args, arglen, &argidx);
714 if (v == NULL)
715 goto error;
716 if (!PyLong_Check(v)) {
717 PyErr_SetString(PyExc_TypeError,
718 "* wants int");
719 goto error;
720 }
721 width = PyLong_AsSsize_t(v);
722 if (width == -1 && PyErr_Occurred())
723 goto error;
724 if (width < 0) {
725 flags |= F_LJUST;
726 width = -width;
727 }
728 if (--fmtcnt >= 0)
729 c = *fmt++;
730 }
731 else if (c >= 0 && isdigit(c)) {
732 width = c - '0';
733 while (--fmtcnt >= 0) {
734 c = Py_CHARMASK(*fmt++);
735 if (!isdigit(c))
736 break;
737 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
738 PyErr_SetString(
739 PyExc_ValueError,
740 "width too big");
741 goto error;
742 }
743 width = width*10 + (c - '0');
744 }
745 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200746
747 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800748 if (c == '.') {
749 prec = 0;
750 if (--fmtcnt >= 0)
751 c = *fmt++;
752 if (c == '*') {
753 v = getnextarg(args, arglen, &argidx);
754 if (v == NULL)
755 goto error;
756 if (!PyLong_Check(v)) {
757 PyErr_SetString(
758 PyExc_TypeError,
759 "* wants int");
760 goto error;
761 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200762 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800763 if (prec == -1 && PyErr_Occurred())
764 goto error;
765 if (prec < 0)
766 prec = 0;
767 if (--fmtcnt >= 0)
768 c = *fmt++;
769 }
770 else if (c >= 0 && isdigit(c)) {
771 prec = c - '0';
772 while (--fmtcnt >= 0) {
773 c = Py_CHARMASK(*fmt++);
774 if (!isdigit(c))
775 break;
776 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
777 PyErr_SetString(
778 PyExc_ValueError,
779 "prec too big");
780 goto error;
781 }
782 prec = prec*10 + (c - '0');
783 }
784 }
785 } /* prec */
786 if (fmtcnt >= 0) {
787 if (c == 'h' || c == 'l' || c == 'L') {
788 if (--fmtcnt >= 0)
789 c = *fmt++;
790 }
791 }
792 if (fmtcnt < 0) {
793 PyErr_SetString(PyExc_ValueError,
794 "incomplete format");
795 goto error;
796 }
797 if (c != '%') {
798 v = getnextarg(args, arglen, &argidx);
799 if (v == NULL)
800 goto error;
801 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200802
803 if (fmtcnt < 0) {
804 /* last writer: disable writer overallocation */
805 writer.overallocate = 0;
806 }
807
Ethan Furmanb95b5612015-01-23 20:05:18 -0800808 sign = 0;
809 fill = ' ';
810 switch (c) {
811 case '%':
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200812 *res++ = '%';
813 continue;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200814
Ethan Furman62e977f2015-03-11 08:17:00 -0700815 case 'r':
816 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800817 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200818 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800819 if (temp == NULL)
820 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200821 assert(PyUnicode_IS_ASCII(temp));
822 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
823 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800824 if (prec >= 0 && len > prec)
825 len = prec;
826 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200827
Ethan Furmanb95b5612015-01-23 20:05:18 -0800828 case 's':
829 // %s is only for 2/3 code; 3 only code should use %b
830 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200831 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800832 if (temp == NULL)
833 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800834 if (prec >= 0 && len > prec)
835 len = prec;
836 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200837
Ethan Furmanb95b5612015-01-23 20:05:18 -0800838 case 'i':
839 case 'd':
840 case 'u':
841 case 'o':
842 case 'x':
843 case 'X':
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200844 if (PyLong_CheckExact(v)
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200845 && width == -1 && prec == -1
846 && !(flags & (F_SIGN | F_BLANK))
847 && c != 'X')
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200848 {
849 /* Fast path */
850 int alternate = flags & F_ALT;
851 int base;
852
853 switch(c)
854 {
855 default:
856 assert(0 && "'type' not in [diuoxX]");
857 case 'd':
858 case 'i':
859 case 'u':
860 base = 10;
861 break;
862 case 'o':
863 base = 8;
864 break;
865 case 'x':
866 case 'X':
867 base = 16;
868 break;
869 }
870
871 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200872 writer.min_size -= 2; /* size preallocated for "%d" */
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200873 res = _PyLong_FormatBytesWriter(&writer, res,
874 v, base, alternate);
875 if (res == NULL)
876 goto error;
877 continue;
878 }
879
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300880 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200881 if (!temp)
882 goto error;
883 assert(PyUnicode_IS_ASCII(temp));
884 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
885 len = PyUnicode_GET_LENGTH(temp);
886 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800887 if (flags & F_ZERO)
888 fill = '0';
889 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200890
Ethan Furmanb95b5612015-01-23 20:05:18 -0800891 case 'e':
892 case 'E':
893 case 'f':
894 case 'F':
895 case 'g':
896 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200897 if (width == -1 && prec == -1
898 && !(flags & (F_SIGN | F_BLANK)))
899 {
900 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200901 writer.min_size -= 2; /* size preallocated for "%f" */
Victor Stinnerad771582015-10-09 12:38:53 +0200902 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200903 if (res == NULL)
904 goto error;
905 continue;
906 }
907
Victor Stinnerad771582015-10-09 12:38:53 +0200908 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800909 goto error;
910 pbuf = PyBytes_AS_STRING(temp);
911 len = PyBytes_GET_SIZE(temp);
912 sign = 1;
913 if (flags & F_ZERO)
914 fill = '0';
915 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200916
Ethan Furmanb95b5612015-01-23 20:05:18 -0800917 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200918 pbuf = &onechar;
919 len = byte_converter(v, &onechar);
920 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800921 goto error;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200922 if (width == -1) {
923 /* Fast path */
924 *res++ = onechar;
925 continue;
926 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800927 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200928
Ethan Furmanb95b5612015-01-23 20:05:18 -0800929 default:
930 PyErr_Format(PyExc_ValueError,
931 "unsupported format character '%c' (0x%x) "
932 "at index %zd",
933 c, c,
Victor Stinner772b2b02015-10-14 09:56:53 +0200934 (Py_ssize_t)(fmt - 1 - format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800935 goto error;
936 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200937
Ethan Furmanb95b5612015-01-23 20:05:18 -0800938 if (sign) {
939 if (*pbuf == '-' || *pbuf == '+') {
940 sign = *pbuf++;
941 len--;
942 }
943 else if (flags & F_SIGN)
944 sign = '+';
945 else if (flags & F_BLANK)
946 sign = ' ';
947 else
948 sign = 0;
949 }
950 if (width < len)
951 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200952
953 alloc = width;
954 if (sign != 0 && len == width)
955 alloc++;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200956 /* 2: size preallocated for %s */
957 if (alloc > 2) {
958 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200959 if (res == NULL)
960 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800961 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200962#ifdef Py_DEBUG
963 before = res;
964#endif
965
966 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800967 if (sign) {
968 if (fill != ' ')
969 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800970 if (width > len)
971 width--;
972 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200973
974 /* Write the numeric prefix for "x", "X" and "o" formats
975 if the alternate form is used.
976 For example, write "0x" for the "%#x" format. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800977 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
978 assert(pbuf[0] == '0');
979 assert(pbuf[1] == c);
980 if (fill != ' ') {
981 *res++ = *pbuf++;
982 *res++ = *pbuf++;
983 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800984 width -= 2;
985 if (width < 0)
986 width = 0;
987 len -= 2;
988 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200989
990 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800991 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200992 memset(res, fill, width - len);
993 res += (width - len);
994 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800995 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200996
997 /* If padding with spaces: write sign if needed and/or numeric
998 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800999 if (fill == ' ') {
1000 if (sign)
1001 *res++ = sign;
1002 if ((flags & F_ALT) &&
1003 (c == 'x' || c == 'X')) {
1004 assert(pbuf[0] == '0');
1005 assert(pbuf[1] == c);
1006 *res++ = *pbuf++;
1007 *res++ = *pbuf++;
1008 }
1009 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001010
1011 /* Copy bytes */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001012 Py_MEMCPY(res, pbuf, len);
1013 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001014
1015 /* Pad right with the fill character if needed */
1016 if (width > len) {
1017 memset(res, ' ', width - len);
1018 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001019 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001020
Ethan Furmanb95b5612015-01-23 20:05:18 -08001021 if (dict && (argidx < arglen) && c != '%') {
1022 PyErr_SetString(PyExc_TypeError,
1023 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -08001024 Py_XDECREF(temp);
1025 goto error;
1026 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001027 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001028
1029#ifdef Py_DEBUG
1030 /* check that we computed the exact size for this write */
1031 assert((res - before) == alloc);
1032#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -08001033 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001034
1035 /* If overallocation was disabled, ensure that it was the last
1036 write. Otherwise, we missed an optimization */
Victor Stinner772b2b02015-10-14 09:56:53 +02001037 assert(writer.overallocate || fmtcnt < 0 || use_bytearray);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001038 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001039
Ethan Furmanb95b5612015-01-23 20:05:18 -08001040 if (argidx < arglen && !dict) {
1041 PyErr_SetString(PyExc_TypeError,
1042 "not all arguments converted during bytes formatting");
1043 goto error;
1044 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001045
Ethan Furmanb95b5612015-01-23 20:05:18 -08001046 if (args_owned) {
1047 Py_DECREF(args);
1048 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001049 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001050
1051 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001052 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001053 if (args_owned) {
1054 Py_DECREF(args);
1055 }
1056 return NULL;
1057}
1058
1059/* =-= */
1060
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001061static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001062bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001063{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001064 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001065}
1066
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001067/* Unescape a backslash-escaped string. If unicode is non-zero,
1068 the string is a u-literal. If recode_encoding is non-zero,
1069 the string is UTF-8 encoded and should be re-encoded in the
1070 specified encoding. */
1071
Victor Stinner2ec80632015-10-14 13:32:13 +02001072static char *
1073_PyBytes_DecodeEscapeRecode(const char **s, const char *end,
1074 const char *errors, const char *recode_encoding,
1075 _PyBytesWriter *writer, char *p)
1076{
1077 PyObject *u, *w;
1078 const char* t;
1079
1080 t = *s;
1081 /* Decode non-ASCII bytes as UTF-8. */
1082 while (t < end && (*t & 0x80))
1083 t++;
1084 u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
1085 if (u == NULL)
1086 return NULL;
1087
1088 /* Recode them in target encoding. */
1089 w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
1090 Py_DECREF(u);
1091 if (w == NULL)
1092 return NULL;
1093 assert(PyBytes_Check(w));
1094
1095 /* Append bytes to output buffer. */
Raymond Hettinger15f44ab2016-08-30 10:47:49 -07001096 writer->min_size--; /* subtract 1 preallocated byte */
Victor Stinner2ec80632015-10-14 13:32:13 +02001097 p = _PyBytesWriter_WriteBytes(writer, p,
1098 PyBytes_AS_STRING(w),
1099 PyBytes_GET_SIZE(w));
1100 Py_DECREF(w);
1101 if (p == NULL)
1102 return NULL;
1103
1104 *s = t;
1105 return p;
1106}
1107
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001108PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001109 Py_ssize_t len,
1110 const char *errors,
1111 Py_ssize_t unicode,
1112 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001113{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001114 int c;
Victor Stinner2ec80632015-10-14 13:32:13 +02001115 char *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001116 const char *end;
Victor Stinner2ec80632015-10-14 13:32:13 +02001117 _PyBytesWriter writer;
1118
1119 _PyBytesWriter_Init(&writer);
1120
1121 p = _PyBytesWriter_Alloc(&writer, len);
1122 if (p == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001123 return NULL;
Victor Stinner2ec80632015-10-14 13:32:13 +02001124 writer.overallocate = 1;
1125
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001126 end = s + len;
1127 while (s < end) {
1128 if (*s != '\\') {
1129 non_esc:
Victor Stinner2ec80632015-10-14 13:32:13 +02001130 if (!(recode_encoding && (*s & 0x80))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001131 *p++ = *s++;
1132 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001133 else {
1134 /* non-ASCII character and need to recode */
1135 p = _PyBytes_DecodeEscapeRecode(&s, end,
1136 errors, recode_encoding,
1137 &writer, p);
1138 if (p == NULL)
1139 goto failed;
1140 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001141 continue;
1142 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001143
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001144 s++;
Victor Stinner2ec80632015-10-14 13:32:13 +02001145 if (s == end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001146 PyErr_SetString(PyExc_ValueError,
1147 "Trailing \\ in string");
1148 goto failed;
1149 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001150
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001151 switch (*s++) {
1152 /* XXX This assumes ASCII! */
1153 case '\n': break;
1154 case '\\': *p++ = '\\'; break;
1155 case '\'': *p++ = '\''; break;
1156 case '\"': *p++ = '\"'; break;
1157 case 'b': *p++ = '\b'; break;
1158 case 'f': *p++ = '\014'; break; /* FF */
1159 case 't': *p++ = '\t'; break;
1160 case 'n': *p++ = '\n'; break;
1161 case 'r': *p++ = '\r'; break;
1162 case 'v': *p++ = '\013'; break; /* VT */
1163 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1164 case '0': case '1': case '2': case '3':
1165 case '4': case '5': case '6': case '7':
1166 c = s[-1] - '0';
1167 if (s < end && '0' <= *s && *s <= '7') {
1168 c = (c<<3) + *s++ - '0';
1169 if (s < end && '0' <= *s && *s <= '7')
1170 c = (c<<3) + *s++ - '0';
1171 }
1172 *p++ = c;
1173 break;
1174 case 'x':
Victor Stinner2ec80632015-10-14 13:32:13 +02001175 if (s+1 < end) {
1176 int digit1, digit2;
1177 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1178 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1179 if (digit1 < 16 && digit2 < 16) {
1180 *p++ = (unsigned char)((digit1 << 4) + digit2);
1181 s += 2;
1182 break;
1183 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001184 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001185 /* invalid hexadecimal digits */
1186
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001187 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001188 PyErr_Format(PyExc_ValueError,
1189 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001190 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001191 goto failed;
1192 }
1193 if (strcmp(errors, "replace") == 0) {
1194 *p++ = '?';
1195 } else if (strcmp(errors, "ignore") == 0)
1196 /* do nothing */;
1197 else {
1198 PyErr_Format(PyExc_ValueError,
1199 "decoding error; unknown "
1200 "error handling code: %.400s",
1201 errors);
1202 goto failed;
1203 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001204 /* skip \x */
1205 if (s < end && Py_ISXDIGIT(s[0]))
1206 s++; /* and a hexdigit */
1207 break;
Victor Stinner2ec80632015-10-14 13:32:13 +02001208
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001209 default:
R David Murray110b6fe2016-09-08 15:34:08 -04001210 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, "invalid escape sequence '\\%c'", *(--s)) < 0)
1211 goto failed;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001212 *p++ = '\\';
Ezio Melotti42da6632011-03-15 05:18:48 +02001213 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001214 UTF-8 bytes may follow. */
1215 }
1216 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001217
1218 return _PyBytesWriter_Finish(&writer, p);
1219
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001220 failed:
Victor Stinner2ec80632015-10-14 13:32:13 +02001221 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001222 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001223}
1224
1225/* -------------------------------------------------------------------- */
1226/* object api */
1227
1228Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001229PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001230{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001231 if (!PyBytes_Check(op)) {
1232 PyErr_Format(PyExc_TypeError,
1233 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1234 return -1;
1235 }
1236 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001237}
1238
1239char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001240PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001241{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001242 if (!PyBytes_Check(op)) {
1243 PyErr_Format(PyExc_TypeError,
1244 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1245 return NULL;
1246 }
1247 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001248}
1249
1250int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001251PyBytes_AsStringAndSize(PyObject *obj,
1252 char **s,
1253 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001254{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001255 if (s == NULL) {
1256 PyErr_BadInternalCall();
1257 return -1;
1258 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001259
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001260 if (!PyBytes_Check(obj)) {
1261 PyErr_Format(PyExc_TypeError,
1262 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1263 return -1;
1264 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001265
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001266 *s = PyBytes_AS_STRING(obj);
1267 if (len != NULL)
1268 *len = PyBytes_GET_SIZE(obj);
1269 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001270 PyErr_SetString(PyExc_ValueError,
1271 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001272 return -1;
1273 }
1274 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001275}
Neal Norwitz6968b052007-02-27 19:02:19 +00001276
1277/* -------------------------------------------------------------------- */
1278/* Methods */
1279
Eric Smith0923d1d2009-04-16 20:16:10 +00001280#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001281
1282#include "stringlib/fastsearch.h"
1283#include "stringlib/count.h"
1284#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001285#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001286#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001287#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001288#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001289
Eric Smith0f78bff2009-11-30 01:01:42 +00001290#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001291
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001292PyObject *
1293PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001294{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001295 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001296 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001297 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001298 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001299 unsigned char quote, *s, *p;
1300
1301 /* Compute size of output string */
1302 squotes = dquotes = 0;
1303 newsize = 3; /* b'' */
1304 s = (unsigned char*)op->ob_sval;
1305 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001306 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001307 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001308 case '\'': squotes++; break;
1309 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001310 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001311 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001312 default:
1313 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001314 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001315 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001316 if (newsize > PY_SSIZE_T_MAX - incr)
1317 goto overflow;
1318 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001319 }
1320 quote = '\'';
1321 if (smartquotes && squotes && !dquotes)
1322 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001323 if (squotes && quote == '\'') {
1324 if (newsize > PY_SSIZE_T_MAX - squotes)
1325 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001326 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001327 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001328
1329 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001330 if (v == NULL) {
1331 return NULL;
1332 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001333 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001334
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001335 *p++ = 'b', *p++ = quote;
1336 for (i = 0; i < length; i++) {
1337 unsigned char c = op->ob_sval[i];
1338 if (c == quote || c == '\\')
1339 *p++ = '\\', *p++ = c;
1340 else if (c == '\t')
1341 *p++ = '\\', *p++ = 't';
1342 else if (c == '\n')
1343 *p++ = '\\', *p++ = 'n';
1344 else if (c == '\r')
1345 *p++ = '\\', *p++ = 'r';
1346 else if (c < ' ' || c >= 0x7f) {
1347 *p++ = '\\';
1348 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001349 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1350 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001351 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001352 else
1353 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001354 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001355 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001356 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001357 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001358
1359 overflow:
1360 PyErr_SetString(PyExc_OverflowError,
1361 "bytes object is too large to make repr");
1362 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001363}
1364
Neal Norwitz6968b052007-02-27 19:02:19 +00001365static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001366bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001367{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001368 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001369}
1370
Neal Norwitz6968b052007-02-27 19:02:19 +00001371static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001372bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001373{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001374 if (Py_BytesWarningFlag) {
1375 if (PyErr_WarnEx(PyExc_BytesWarning,
1376 "str() on a bytes instance", 1))
1377 return NULL;
1378 }
1379 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001380}
1381
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001382static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001383bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001384{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001385 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001386}
Neal Norwitz6968b052007-02-27 19:02:19 +00001387
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001388/* This is also used by PyBytes_Concat() */
1389static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001390bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001391{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001392 Py_buffer va, vb;
1393 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001394
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001395 va.len = -1;
1396 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001397 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1398 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001399 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1400 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
1401 goto done;
1402 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001403
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001404 /* Optimize end cases */
1405 if (va.len == 0 && PyBytes_CheckExact(b)) {
1406 result = b;
1407 Py_INCREF(result);
1408 goto done;
1409 }
1410 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1411 result = a;
1412 Py_INCREF(result);
1413 goto done;
1414 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001415
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001416 if (va.len > PY_SSIZE_T_MAX - vb.len) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001417 PyErr_NoMemory();
1418 goto done;
1419 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001420
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001421 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001422 if (result != NULL) {
1423 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1424 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1425 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001426
1427 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001428 if (va.len != -1)
1429 PyBuffer_Release(&va);
1430 if (vb.len != -1)
1431 PyBuffer_Release(&vb);
1432 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001433}
Neal Norwitz6968b052007-02-27 19:02:19 +00001434
1435static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001436bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001437{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001438 Py_ssize_t i;
1439 Py_ssize_t j;
1440 Py_ssize_t size;
1441 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001442 size_t nbytes;
1443 if (n < 0)
1444 n = 0;
1445 /* watch out for overflows: the size can overflow int,
1446 * and the # of bytes needed can overflow size_t
1447 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001448 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001449 PyErr_SetString(PyExc_OverflowError,
1450 "repeated bytes are too long");
1451 return NULL;
1452 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001453 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001454 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1455 Py_INCREF(a);
1456 return (PyObject *)a;
1457 }
1458 nbytes = (size_t)size;
1459 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1460 PyErr_SetString(PyExc_OverflowError,
1461 "repeated bytes are too long");
1462 return NULL;
1463 }
1464 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1465 if (op == NULL)
1466 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +01001467 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001468 op->ob_shash = -1;
1469 op->ob_sval[size] = '\0';
1470 if (Py_SIZE(a) == 1 && n > 0) {
1471 memset(op->ob_sval, a->ob_sval[0] , n);
1472 return (PyObject *) op;
1473 }
1474 i = 0;
1475 if (i < size) {
1476 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1477 i = Py_SIZE(a);
1478 }
1479 while (i < size) {
1480 j = (i <= size-i) ? i : size-i;
1481 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1482 i += j;
1483 }
1484 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001485}
1486
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001487static int
1488bytes_contains(PyObject *self, PyObject *arg)
1489{
1490 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1491}
1492
Neal Norwitz6968b052007-02-27 19:02:19 +00001493static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001494bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001495{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001496 if (i < 0 || i >= Py_SIZE(a)) {
1497 PyErr_SetString(PyExc_IndexError, "index out of range");
1498 return NULL;
1499 }
1500 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001501}
1502
Benjamin Peterson621b4302016-09-09 13:54:34 -07001503static int
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001504bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1505{
1506 int cmp;
1507 Py_ssize_t len;
1508
1509 len = Py_SIZE(a);
1510 if (Py_SIZE(b) != len)
1511 return 0;
1512
1513 if (a->ob_sval[0] != b->ob_sval[0])
1514 return 0;
1515
1516 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1517 return (cmp == 0);
1518}
1519
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001520static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001521bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001522{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001523 int c;
1524 Py_ssize_t len_a, len_b;
1525 Py_ssize_t min_len;
1526 PyObject *result;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001527 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001528
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001529 /* Make sure both arguments are strings. */
1530 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001531 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001532 rc = PyObject_IsInstance((PyObject*)a,
1533 (PyObject*)&PyUnicode_Type);
1534 if (!rc)
1535 rc = PyObject_IsInstance((PyObject*)b,
1536 (PyObject*)&PyUnicode_Type);
1537 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001538 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001539 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001540 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001541 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001542 return NULL;
1543 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001544 else {
1545 rc = PyObject_IsInstance((PyObject*)a,
1546 (PyObject*)&PyLong_Type);
1547 if (!rc)
1548 rc = PyObject_IsInstance((PyObject*)b,
1549 (PyObject*)&PyLong_Type);
1550 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001551 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001552 if (rc) {
1553 if (PyErr_WarnEx(PyExc_BytesWarning,
1554 "Comparison between bytes and int", 1))
1555 return NULL;
1556 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001557 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001558 }
1559 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001560 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001561 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001562 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001563 case Py_EQ:
1564 case Py_LE:
1565 case Py_GE:
1566 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001567 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001568 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001569 case Py_NE:
1570 case Py_LT:
1571 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001572 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001573 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001574 default:
1575 PyErr_BadArgument();
1576 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001577 }
1578 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001579 else if (op == Py_EQ || op == Py_NE) {
1580 int eq = bytes_compare_eq(a, b);
1581 eq ^= (op == Py_NE);
1582 result = eq ? Py_True : Py_False;
1583 }
1584 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001585 len_a = Py_SIZE(a);
1586 len_b = Py_SIZE(b);
1587 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001588 if (min_len > 0) {
1589 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001590 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001591 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001592 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001593 else
1594 c = 0;
1595 if (c == 0)
1596 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1597 switch (op) {
1598 case Py_LT: c = c < 0; break;
1599 case Py_LE: c = c <= 0; break;
1600 case Py_GT: c = c > 0; break;
1601 case Py_GE: c = c >= 0; break;
1602 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001603 PyErr_BadArgument();
1604 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001605 }
1606 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001607 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001608
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001609 Py_INCREF(result);
1610 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001611}
1612
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001613static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001614bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001615{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001616 if (a->ob_shash == -1) {
1617 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001618 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001619 }
1620 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001621}
1622
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001623static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001624bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001625{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001626 if (PyIndex_Check(item)) {
1627 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1628 if (i == -1 && PyErr_Occurred())
1629 return NULL;
1630 if (i < 0)
1631 i += PyBytes_GET_SIZE(self);
1632 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1633 PyErr_SetString(PyExc_IndexError,
1634 "index out of range");
1635 return NULL;
1636 }
1637 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1638 }
1639 else if (PySlice_Check(item)) {
1640 Py_ssize_t start, stop, step, slicelength, cur, i;
1641 char* source_buf;
1642 char* result_buf;
1643 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001644
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001645 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001646 PyBytes_GET_SIZE(self),
1647 &start, &stop, &step, &slicelength) < 0) {
1648 return NULL;
1649 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001650
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001651 if (slicelength <= 0) {
1652 return PyBytes_FromStringAndSize("", 0);
1653 }
1654 else if (start == 0 && step == 1 &&
1655 slicelength == PyBytes_GET_SIZE(self) &&
1656 PyBytes_CheckExact(self)) {
1657 Py_INCREF(self);
1658 return (PyObject *)self;
1659 }
1660 else if (step == 1) {
1661 return PyBytes_FromStringAndSize(
1662 PyBytes_AS_STRING(self) + start,
1663 slicelength);
1664 }
1665 else {
1666 source_buf = PyBytes_AS_STRING(self);
1667 result = PyBytes_FromStringAndSize(NULL, slicelength);
1668 if (result == NULL)
1669 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001670
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001671 result_buf = PyBytes_AS_STRING(result);
1672 for (cur = start, i = 0; i < slicelength;
1673 cur += step, i++) {
1674 result_buf[i] = source_buf[cur];
1675 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001676
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001677 return result;
1678 }
1679 }
1680 else {
1681 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001682 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001683 Py_TYPE(item)->tp_name);
1684 return NULL;
1685 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001686}
1687
1688static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001689bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001690{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001691 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1692 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001693}
1694
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001695static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001696 (lenfunc)bytes_length, /*sq_length*/
1697 (binaryfunc)bytes_concat, /*sq_concat*/
1698 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1699 (ssizeargfunc)bytes_item, /*sq_item*/
1700 0, /*sq_slice*/
1701 0, /*sq_ass_item*/
1702 0, /*sq_ass_slice*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001703 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001704};
1705
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001706static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001707 (lenfunc)bytes_length,
1708 (binaryfunc)bytes_subscript,
1709 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001710};
1711
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001712static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001713 (getbufferproc)bytes_buffer_getbuffer,
1714 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001715};
1716
1717
1718#define LEFTSTRIP 0
1719#define RIGHTSTRIP 1
1720#define BOTHSTRIP 2
1721
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001722/*[clinic input]
1723bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001724
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001725 sep: object = None
1726 The delimiter according which to split the bytes.
1727 None (the default value) means split on ASCII whitespace characters
1728 (space, tab, return, newline, formfeed, vertical tab).
1729 maxsplit: Py_ssize_t = -1
1730 Maximum number of splits to do.
1731 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001732
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001733Return a list of the sections in the bytes, using sep as the delimiter.
1734[clinic start generated code]*/
1735
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001736static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001737bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1738/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001739{
1740 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001741 const char *s = PyBytes_AS_STRING(self), *sub;
1742 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001743 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001744
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001745 if (maxsplit < 0)
1746 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001747 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001748 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001749 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001750 return NULL;
1751 sub = vsub.buf;
1752 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001753
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001754 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1755 PyBuffer_Release(&vsub);
1756 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001757}
1758
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001759/*[clinic input]
1760bytes.partition
1761
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001762 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001763 /
1764
1765Partition the bytes into three parts using the given separator.
1766
1767This will search for the separator sep in the bytes. If the separator is found,
1768returns a 3-tuple containing the part before the separator, the separator
1769itself, and the part after it.
1770
1771If the separator is not found, returns a 3-tuple containing the original bytes
1772object and two empty bytes objects.
1773[clinic start generated code]*/
1774
Neal Norwitz6968b052007-02-27 19:02:19 +00001775static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001776bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001777/*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001778{
Neal Norwitz6968b052007-02-27 19:02:19 +00001779 return stringlib_partition(
1780 (PyObject*) self,
1781 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001782 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001783 );
1784}
1785
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001786/*[clinic input]
1787bytes.rpartition
1788
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001789 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001790 /
1791
1792Partition the bytes into three parts using the given separator.
1793
1794This will search for the separator sep in the bytes, starting and the end. If
1795the separator is found, returns a 3-tuple containing the part before the
1796separator, the separator itself, and the part after it.
1797
1798If the separator is not found, returns a 3-tuple containing two empty bytes
1799objects and the original bytes object.
1800[clinic start generated code]*/
1801
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001802static PyObject *
1803bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001804/*[clinic end generated code: output=191b114cbb028e50 input=67f689e63a62d478]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001805{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001806 return stringlib_rpartition(
1807 (PyObject*) self,
1808 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001809 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001810 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001811}
1812
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001813/*[clinic input]
1814bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001815
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001816Return a list of the sections in the bytes, using sep as the delimiter.
1817
1818Splitting is done starting at the end of the bytes and working to the front.
1819[clinic start generated code]*/
1820
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001821static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001822bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1823/*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001824{
1825 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001826 const char *s = PyBytes_AS_STRING(self), *sub;
1827 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001828 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001829
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001830 if (maxsplit < 0)
1831 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001832 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001833 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001834 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001835 return NULL;
1836 sub = vsub.buf;
1837 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001838
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001839 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1840 PyBuffer_Release(&vsub);
1841 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001842}
1843
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001844
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001845/*[clinic input]
1846bytes.join
1847
1848 iterable_of_bytes: object
1849 /
1850
1851Concatenate any number of bytes objects.
1852
1853The bytes whose method is called is inserted in between each pair.
1854
1855The result is returned as a new bytes object.
1856
1857Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1858[clinic start generated code]*/
1859
Neal Norwitz6968b052007-02-27 19:02:19 +00001860static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001861bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1862/*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001863{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001864 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001865}
1866
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001867PyObject *
1868_PyBytes_Join(PyObject *sep, PyObject *x)
1869{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001870 assert(sep != NULL && PyBytes_Check(sep));
1871 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001872 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001873}
1874
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001875static PyObject *
1876bytes_find(PyBytesObject *self, PyObject *args)
1877{
1878 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1879}
1880
1881static PyObject *
1882bytes_index(PyBytesObject *self, PyObject *args)
1883{
1884 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1885}
1886
1887
1888static PyObject *
1889bytes_rfind(PyBytesObject *self, PyObject *args)
1890{
1891 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1892}
1893
1894
1895static PyObject *
1896bytes_rindex(PyBytesObject *self, PyObject *args)
1897{
1898 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1899}
1900
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001901
1902Py_LOCAL_INLINE(PyObject *)
1903do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001904{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001905 Py_buffer vsep;
1906 char *s = PyBytes_AS_STRING(self);
1907 Py_ssize_t len = PyBytes_GET_SIZE(self);
1908 char *sep;
1909 Py_ssize_t seplen;
1910 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001911
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001912 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001913 return NULL;
1914 sep = vsep.buf;
1915 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001916
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001917 i = 0;
1918 if (striptype != RIGHTSTRIP) {
1919 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1920 i++;
1921 }
1922 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001923
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001924 j = len;
1925 if (striptype != LEFTSTRIP) {
1926 do {
1927 j--;
1928 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1929 j++;
1930 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001931
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001932 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001933
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001934 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1935 Py_INCREF(self);
1936 return (PyObject*)self;
1937 }
1938 else
1939 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001940}
1941
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001942
1943Py_LOCAL_INLINE(PyObject *)
1944do_strip(PyBytesObject *self, int striptype)
1945{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001946 char *s = PyBytes_AS_STRING(self);
1947 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001948
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001949 i = 0;
1950 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001951 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001952 i++;
1953 }
1954 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001955
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001956 j = len;
1957 if (striptype != LEFTSTRIP) {
1958 do {
1959 j--;
David Malcolm96960882010-11-05 17:23:41 +00001960 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001961 j++;
1962 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001963
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001964 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1965 Py_INCREF(self);
1966 return (PyObject*)self;
1967 }
1968 else
1969 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001970}
1971
1972
1973Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001974do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001975{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001976 if (bytes != NULL && bytes != Py_None) {
1977 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001978 }
1979 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001980}
1981
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001982/*[clinic input]
1983bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001984
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001985 bytes: object = None
1986 /
1987
1988Strip leading and trailing bytes contained in the argument.
1989
1990If the argument is omitted or None, strip leading and trailing ASCII whitespace.
1991[clinic start generated code]*/
1992
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001993static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001994bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001995/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001996{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001997 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001998}
1999
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002000/*[clinic input]
2001bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002002
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002003 bytes: object = None
2004 /
2005
2006Strip leading bytes contained in the argument.
2007
2008If the argument is omitted or None, strip leading ASCII whitespace.
2009[clinic start generated code]*/
2010
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002011static PyObject *
2012bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002013/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002014{
2015 return do_argstrip(self, LEFTSTRIP, bytes);
2016}
2017
2018/*[clinic input]
2019bytes.rstrip
2020
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002021 bytes: object = None
2022 /
2023
2024Strip trailing bytes contained in the argument.
2025
2026If the argument is omitted or None, strip trailing ASCII whitespace.
2027[clinic start generated code]*/
2028
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002029static PyObject *
2030bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002031/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002032{
2033 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002034}
Neal Norwitz6968b052007-02-27 19:02:19 +00002035
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002036
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002037static PyObject *
2038bytes_count(PyBytesObject *self, PyObject *args)
2039{
2040 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2041}
2042
2043
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002044/*[clinic input]
2045bytes.translate
2046
Victor Stinner049e5092014-08-17 22:20:00 +02002047 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002048 Translation table, which must be a bytes object of length 256.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002049 /
Martin Panter1b6c6da2016-08-27 08:35:02 +00002050 delete as deletechars: object(c_default="NULL") = b''
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002051
2052Return a copy with each character mapped by the given translation table.
2053
Martin Panter1b6c6da2016-08-27 08:35:02 +00002054All characters occurring in the optional argument delete are removed.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002055The remaining characters are mapped through the given translation table.
2056[clinic start generated code]*/
2057
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002058static PyObject *
Martin Panter1b6c6da2016-08-27 08:35:02 +00002059bytes_translate_impl(PyBytesObject *self, PyObject *table,
Larry Hastings89964c42015-04-14 18:07:59 -04002060 PyObject *deletechars)
Martin Panter1b6c6da2016-08-27 08:35:02 +00002061/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002062{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002063 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002064 Py_buffer table_view = {NULL, NULL};
2065 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002066 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002067 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002068 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002069 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002070 Py_ssize_t inlen, tablen, dellen = 0;
2071 PyObject *result;
2072 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002073
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002074 if (PyBytes_Check(table)) {
2075 table_chars = PyBytes_AS_STRING(table);
2076 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002077 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002078 else if (table == Py_None) {
2079 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002080 tablen = 256;
2081 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002082 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002083 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002084 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002085 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002086 tablen = table_view.len;
2087 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002088
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002089 if (tablen != 256) {
2090 PyErr_SetString(PyExc_ValueError,
2091 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002092 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002093 return NULL;
2094 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002095
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002096 if (deletechars != NULL) {
2097 if (PyBytes_Check(deletechars)) {
2098 del_table_chars = PyBytes_AS_STRING(deletechars);
2099 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002100 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002101 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002102 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002103 PyBuffer_Release(&table_view);
2104 return NULL;
2105 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002106 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002107 dellen = del_table_view.len;
2108 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002109 }
2110 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002111 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002112 dellen = 0;
2113 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002114
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002115 inlen = PyBytes_GET_SIZE(input_obj);
2116 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002117 if (result == NULL) {
2118 PyBuffer_Release(&del_table_view);
2119 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002120 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002121 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002122 output_start = output = PyBytes_AS_STRING(result);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002123 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002124
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002125 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002126 /* If no deletions are required, use faster code */
2127 for (i = inlen; --i >= 0; ) {
2128 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002129 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002130 changed = 1;
2131 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002132 if (!changed && PyBytes_CheckExact(input_obj)) {
2133 Py_INCREF(input_obj);
2134 Py_DECREF(result);
2135 result = input_obj;
2136 }
2137 PyBuffer_Release(&del_table_view);
2138 PyBuffer_Release(&table_view);
2139 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002140 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002141
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002142 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002143 for (i = 0; i < 256; i++)
2144 trans_table[i] = Py_CHARMASK(i);
2145 } else {
2146 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002147 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002148 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002149 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002150
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002151 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002152 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002153 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002154
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002155 for (i = inlen; --i >= 0; ) {
2156 c = Py_CHARMASK(*input++);
2157 if (trans_table[c] != -1)
2158 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2159 continue;
2160 changed = 1;
2161 }
2162 if (!changed && PyBytes_CheckExact(input_obj)) {
2163 Py_DECREF(result);
2164 Py_INCREF(input_obj);
2165 return input_obj;
2166 }
2167 /* Fix the size of the resulting string */
2168 if (inlen > 0)
2169 _PyBytes_Resize(&result, output - output_start);
2170 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002171}
2172
2173
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002174/*[clinic input]
2175
2176@staticmethod
2177bytes.maketrans
2178
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002179 frm: Py_buffer
2180 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002181 /
2182
2183Return a translation table useable for the bytes or bytearray translate method.
2184
2185The returned table will be one where each byte in frm is mapped to the byte at
2186the same position in to.
2187
2188The bytes objects frm and to must be of the same length.
2189[clinic start generated code]*/
2190
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002191static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002192bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002193/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002194{
2195 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002196}
2197
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002198
2199/*[clinic input]
2200bytes.replace
2201
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002202 old: Py_buffer
2203 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002204 count: Py_ssize_t = -1
2205 Maximum number of occurrences to replace.
2206 -1 (the default value) means replace all occurrences.
2207 /
2208
2209Return a copy with all occurrences of substring old replaced by new.
2210
2211If the optional argument count is given, only the first count occurrences are
2212replaced.
2213[clinic start generated code]*/
2214
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002215static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002216bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
Larry Hastings89964c42015-04-14 18:07:59 -04002217 Py_ssize_t count)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002218/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002219{
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +03002220 return stringlib_replace((PyObject *)self,
2221 (const char *)old->buf, old->len,
2222 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002223}
2224
2225/** End DALKE **/
2226
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002227
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002228static PyObject *
2229bytes_startswith(PyBytesObject *self, PyObject *args)
2230{
2231 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2232}
2233
2234static PyObject *
2235bytes_endswith(PyBytesObject *self, PyObject *args)
2236{
2237 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2238}
2239
2240
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002241/*[clinic input]
2242bytes.decode
2243
2244 encoding: str(c_default="NULL") = 'utf-8'
2245 The encoding with which to decode the bytes.
2246 errors: str(c_default="NULL") = 'strict'
2247 The error handling scheme to use for the handling of decoding errors.
2248 The default is 'strict' meaning that decoding errors raise a
2249 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2250 as well as any other name registered with codecs.register_error that
2251 can handle UnicodeDecodeErrors.
2252
2253Decode the bytes using the codec registered for encoding.
2254[clinic start generated code]*/
2255
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002256static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002257bytes_decode_impl(PyBytesObject *self, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04002258 const char *errors)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002259/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002260{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002261 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002262}
2263
Guido van Rossum20188312006-05-05 15:15:40 +00002264
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002265/*[clinic input]
2266bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002267
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +03002268 keepends: int(c_default="0") = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002269
2270Return a list of the lines in the bytes, breaking at line boundaries.
2271
2272Line breaks are not included in the resulting list unless keepends is given and
2273true.
2274[clinic start generated code]*/
2275
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002276static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002277bytes_splitlines_impl(PyBytesObject *self, int keepends)
2278/*[clinic end generated code: output=3484149a5d880ffb input=7f4aac67144f9944]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002279{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002280 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002281 (PyObject*) self, PyBytes_AS_STRING(self),
2282 PyBytes_GET_SIZE(self), keepends
2283 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002284}
2285
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002286/*[clinic input]
2287@classmethod
2288bytes.fromhex
2289
2290 string: unicode
2291 /
2292
2293Create a bytes object from a string of hexadecimal numbers.
2294
2295Spaces between two numbers are accepted.
2296Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2297[clinic start generated code]*/
2298
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002299static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002300bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002301/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002302{
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002303 PyObject *result = _PyBytes_FromHex(string, 0);
2304 if (type != &PyBytes_Type && result != NULL) {
2305 Py_SETREF(result, PyObject_CallFunctionObjArgs((PyObject *)type,
2306 result, NULL));
2307 }
2308 return result;
Victor Stinner2bf89932015-10-14 11:25:33 +02002309}
2310
2311PyObject*
2312_PyBytes_FromHex(PyObject *string, int use_bytearray)
2313{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002314 char *buf;
Victor Stinner2bf89932015-10-14 11:25:33 +02002315 Py_ssize_t hexlen, invalid_char;
2316 unsigned int top, bot;
2317 Py_UCS1 *str, *end;
2318 _PyBytesWriter writer;
2319
2320 _PyBytesWriter_Init(&writer);
2321 writer.use_bytearray = use_bytearray;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002322
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002323 assert(PyUnicode_Check(string));
2324 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002325 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002326 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002327
Victor Stinner2bf89932015-10-14 11:25:33 +02002328 if (!PyUnicode_IS_ASCII(string)) {
2329 void *data = PyUnicode_DATA(string);
2330 unsigned int kind = PyUnicode_KIND(string);
2331 Py_ssize_t i;
2332
2333 /* search for the first non-ASCII character */
2334 for (i = 0; i < hexlen; i++) {
2335 if (PyUnicode_READ(kind, data, i) >= 128)
2336 break;
2337 }
2338 invalid_char = i;
2339 goto error;
2340 }
2341
2342 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2343 str = PyUnicode_1BYTE_DATA(string);
2344
2345 /* This overestimates if there are spaces */
2346 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2347 if (buf == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002348 return NULL;
Victor Stinner2bf89932015-10-14 11:25:33 +02002349
2350 end = str + hexlen;
2351 while (str < end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002352 /* skip over spaces in the input */
Victor Stinner2bf89932015-10-14 11:25:33 +02002353 if (*str == ' ') {
2354 do {
2355 str++;
2356 } while (*str == ' ');
2357 if (str >= end)
2358 break;
2359 }
2360
2361 top = _PyLong_DigitValue[*str];
2362 if (top >= 16) {
2363 invalid_char = str - PyUnicode_1BYTE_DATA(string);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002364 goto error;
2365 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002366 str++;
2367
2368 bot = _PyLong_DigitValue[*str];
2369 if (bot >= 16) {
2370 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2371 goto error;
2372 }
2373 str++;
2374
2375 *buf++ = (unsigned char)((top << 4) + bot);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002376 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002377
2378 return _PyBytesWriter_Finish(&writer, buf);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002379
2380 error:
Victor Stinner2bf89932015-10-14 11:25:33 +02002381 PyErr_Format(PyExc_ValueError,
2382 "non-hexadecimal number found in "
2383 "fromhex() arg at position %zd", invalid_char);
2384 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002385 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002386}
2387
Gregory P. Smith8cb65692015-04-25 23:22:26 +00002388PyDoc_STRVAR(hex__doc__,
2389"B.hex() -> string\n\
2390\n\
2391Create a string of hexadecimal numbers from a bytes object.\n\
2392Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'.");
2393
2394static PyObject *
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002395bytes_hex(PyBytesObject *self)
2396{
2397 char* argbuf = PyBytes_AS_STRING(self);
2398 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2399 return _Py_strhex(argbuf, arglen);
2400}
2401
2402static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002403bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002404{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002405 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002406}
2407
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002408
2409static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002410bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002411 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2412 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2413 _Py_capitalize__doc__},
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002414 {"center", (PyCFunction)stringlib_center, METH_VARARGS,
2415 _Py_center__doc__},
2416 {"count", (PyCFunction)bytes_count, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002417 _Py_count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002418 BYTES_DECODE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002419 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002420 _Py_endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02002421 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002422 _Py_expandtabs__doc__},
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002423 {"find", (PyCFunction)bytes_find, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002424 _Py_find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002425 BYTES_FROMHEX_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002426 {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__},
2427 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002428 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2429 _Py_isalnum__doc__},
2430 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2431 _Py_isalpha__doc__},
2432 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2433 _Py_isdigit__doc__},
2434 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2435 _Py_islower__doc__},
2436 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2437 _Py_isspace__doc__},
2438 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2439 _Py_istitle__doc__},
2440 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2441 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002442 BYTES_JOIN_METHODDEF
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002443 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, _Py_ljust__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002444 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002445 BYTES_LSTRIP_METHODDEF
2446 BYTES_MAKETRANS_METHODDEF
2447 BYTES_PARTITION_METHODDEF
2448 BYTES_REPLACE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002449 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2450 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002451 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, _Py_rjust__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002452 BYTES_RPARTITION_METHODDEF
2453 BYTES_RSPLIT_METHODDEF
2454 BYTES_RSTRIP_METHODDEF
2455 BYTES_SPLIT_METHODDEF
2456 BYTES_SPLITLINES_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002457 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002458 _Py_startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002459 BYTES_STRIP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002460 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2461 _Py_swapcase__doc__},
2462 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002463 BYTES_TRANSLATE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002464 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002465 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, _Py_zfill__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002466 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002467};
2468
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002469static PyObject *
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002470bytes_mod(PyObject *self, PyObject *arg)
Ethan Furmanb95b5612015-01-23 20:05:18 -08002471{
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002472 if (!PyBytes_Check(self)) {
2473 Py_RETURN_NOTIMPLEMENTED;
Victor Stinner772b2b02015-10-14 09:56:53 +02002474 }
Victor Stinner772b2b02015-10-14 09:56:53 +02002475 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002476 arg, 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -08002477}
2478
2479static PyNumberMethods bytes_as_number = {
2480 0, /*nb_add*/
2481 0, /*nb_subtract*/
2482 0, /*nb_multiply*/
2483 bytes_mod, /*nb_remainder*/
2484};
2485
2486static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002487bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002488
2489static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002490bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002491{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002492 PyObject *x = NULL;
2493 const char *encoding = NULL;
2494 const char *errors = NULL;
2495 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002496 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002497 Py_ssize_t size;
2498 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002499 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002500
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002501 if (type != &PyBytes_Type)
Serhiy Storchaka15095802015-11-25 15:47:01 +02002502 return bytes_subtype_new(type, args, kwds);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002503 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2504 &encoding, &errors))
2505 return NULL;
2506 if (x == NULL) {
2507 if (encoding != NULL || errors != NULL) {
2508 PyErr_SetString(PyExc_TypeError,
2509 "encoding or errors without sequence "
2510 "argument");
2511 return NULL;
2512 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02002513 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002514 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002515
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002516 if (encoding != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002517 /* Encode via the codec registry */
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002518 if (!PyUnicode_Check(x)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002519 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002520 "encoding without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002521 return NULL;
2522 }
2523 new = PyUnicode_AsEncodedString(x, encoding, errors);
2524 if (new == NULL)
2525 return NULL;
2526 assert(PyBytes_Check(new));
2527 return new;
2528 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002529
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002530 if (errors != NULL) {
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002531 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002532 PyUnicode_Check(x) ?
2533 "string argument without an encoding" :
2534 "errors without a string argument");
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002535 return NULL;
2536 }
2537
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002538 /* We'd like to call PyObject_Bytes here, but we need to check for an
2539 integer argument before deferring to PyBytes_FromObject, something
2540 PyObject_Bytes doesn't do. */
2541 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2542 if (func != NULL) {
2543 new = PyObject_CallFunctionObjArgs(func, NULL);
2544 Py_DECREF(func);
2545 if (new == NULL)
2546 return NULL;
2547 if (!PyBytes_Check(new)) {
2548 PyErr_Format(PyExc_TypeError,
2549 "__bytes__ returned non-bytes (type %.200s)",
2550 Py_TYPE(new)->tp_name);
2551 Py_DECREF(new);
2552 return NULL;
2553 }
2554 return new;
2555 }
2556 else if (PyErr_Occurred())
2557 return NULL;
2558
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002559 if (PyUnicode_Check(x)) {
2560 PyErr_SetString(PyExc_TypeError,
2561 "string argument without an encoding");
2562 return NULL;
2563 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002564 /* Is it an integer? */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002565 if (PyIndex_Check(x)) {
2566 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2567 if (size == -1 && PyErr_Occurred()) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002568 return NULL;
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002569 }
2570 if (size < 0) {
2571 PyErr_SetString(PyExc_ValueError, "negative count");
2572 return NULL;
2573 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02002574 new = _PyBytes_FromSize(size, 1);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002575 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002576 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002577 return new;
2578 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002579
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002580 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002581}
2582
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002583static PyObject*
2584_PyBytes_FromBuffer(PyObject *x)
2585{
2586 PyObject *new;
2587 Py_buffer view;
2588
2589 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2590 return NULL;
2591
2592 new = PyBytes_FromStringAndSize(NULL, view.len);
2593 if (!new)
2594 goto fail;
2595 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2596 &view, view.len, 'C') < 0)
2597 goto fail;
2598 PyBuffer_Release(&view);
2599 return new;
2600
2601fail:
2602 Py_XDECREF(new);
2603 PyBuffer_Release(&view);
2604 return NULL;
2605}
2606
Victor Stinner3c50ce32015-10-14 13:50:40 +02002607#define _PyBytes_FROM_LIST_BODY(x, GET_ITEM) \
2608 do { \
2609 PyObject *bytes; \
2610 Py_ssize_t i; \
2611 Py_ssize_t value; \
2612 char *str; \
2613 PyObject *item; \
2614 \
2615 bytes = PyBytes_FromStringAndSize(NULL, Py_SIZE(x)); \
2616 if (bytes == NULL) \
2617 return NULL; \
2618 str = ((PyBytesObject *)bytes)->ob_sval; \
2619 \
2620 for (i = 0; i < Py_SIZE(x); i++) { \
2621 item = GET_ITEM((x), i); \
Serhiy Storchakaf54d7812016-07-06 21:39:44 +03002622 value = PyNumber_AsSsize_t(item, NULL); \
Victor Stinner3c50ce32015-10-14 13:50:40 +02002623 if (value == -1 && PyErr_Occurred()) \
2624 goto error; \
2625 \
2626 if (value < 0 || value >= 256) { \
2627 PyErr_SetString(PyExc_ValueError, \
2628 "bytes must be in range(0, 256)"); \
2629 goto error; \
2630 } \
2631 *str++ = (char) value; \
2632 } \
2633 return bytes; \
2634 \
2635 error: \
2636 Py_DECREF(bytes); \
2637 return NULL; \
2638 } while (0)
2639
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002640static PyObject*
2641_PyBytes_FromList(PyObject *x)
2642{
Victor Stinner3c50ce32015-10-14 13:50:40 +02002643 _PyBytes_FROM_LIST_BODY(x, PyList_GET_ITEM);
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002644}
2645
2646static PyObject*
2647_PyBytes_FromTuple(PyObject *x)
2648{
Victor Stinner3c50ce32015-10-14 13:50:40 +02002649 _PyBytes_FROM_LIST_BODY(x, PyTuple_GET_ITEM);
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002650}
2651
2652static PyObject *
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002653_PyBytes_FromIterator(PyObject *it, PyObject *x)
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002654{
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002655 char *str;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002656 Py_ssize_t i, size;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002657 _PyBytesWriter writer;
2658
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002659 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02002660 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002661 if (size == -1 && PyErr_Occurred())
2662 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002663
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002664 _PyBytesWriter_Init(&writer);
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002665 str = _PyBytesWriter_Alloc(&writer, size);
2666 if (str == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002667 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002668 writer.overallocate = 1;
2669 size = writer.allocated;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002670
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002671 /* Run the iterator to exhaustion */
2672 for (i = 0; ; i++) {
2673 PyObject *item;
2674 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002675
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002676 /* Get the next item */
2677 item = PyIter_Next(it);
2678 if (item == NULL) {
2679 if (PyErr_Occurred())
2680 goto error;
2681 break;
2682 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002683
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002684 /* Interpret it as an int (__index__) */
Serhiy Storchakaf54d7812016-07-06 21:39:44 +03002685 value = PyNumber_AsSsize_t(item, NULL);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002686 Py_DECREF(item);
2687 if (value == -1 && PyErr_Occurred())
2688 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002689
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002690 /* Range check */
2691 if (value < 0 || value >= 256) {
2692 PyErr_SetString(PyExc_ValueError,
2693 "bytes must be in range(0, 256)");
2694 goto error;
2695 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002696
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002697 /* Append the byte */
2698 if (i >= size) {
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002699 str = _PyBytesWriter_Resize(&writer, str, size+1);
2700 if (str == NULL)
2701 return NULL;
2702 size = writer.allocated;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002703 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002704 *str++ = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002705 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002706
2707 return _PyBytesWriter_Finish(&writer, str);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002708
2709 error:
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002710 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002711 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002712}
2713
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002714PyObject *
2715PyBytes_FromObject(PyObject *x)
2716{
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002717 PyObject *it, *result;
2718
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002719 if (x == NULL) {
2720 PyErr_BadInternalCall();
2721 return NULL;
2722 }
2723
2724 if (PyBytes_CheckExact(x)) {
2725 Py_INCREF(x);
2726 return x;
2727 }
2728
2729 /* Use the modern buffer interface */
2730 if (PyObject_CheckBuffer(x))
2731 return _PyBytes_FromBuffer(x);
2732
2733 if (PyList_CheckExact(x))
2734 return _PyBytes_FromList(x);
2735
2736 if (PyTuple_CheckExact(x))
2737 return _PyBytes_FromTuple(x);
2738
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002739 if (!PyUnicode_Check(x)) {
2740 it = PyObject_GetIter(x);
2741 if (it != NULL) {
2742 result = _PyBytes_FromIterator(it, x);
2743 Py_DECREF(it);
2744 return result;
2745 }
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002746 }
2747
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002748 PyErr_Format(PyExc_TypeError,
2749 "cannot convert '%.200s' object to bytes",
2750 x->ob_type->tp_name);
2751 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002752}
2753
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002754static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002755bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002756{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002757 PyObject *tmp, *pnew;
2758 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002759
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002760 assert(PyType_IsSubtype(type, &PyBytes_Type));
2761 tmp = bytes_new(&PyBytes_Type, args, kwds);
2762 if (tmp == NULL)
2763 return NULL;
Serhiy Storchaka15095802015-11-25 15:47:01 +02002764 assert(PyBytes_Check(tmp));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002765 n = PyBytes_GET_SIZE(tmp);
2766 pnew = type->tp_alloc(type, n);
2767 if (pnew != NULL) {
2768 Py_MEMCPY(PyBytes_AS_STRING(pnew),
2769 PyBytes_AS_STRING(tmp), n+1);
2770 ((PyBytesObject *)pnew)->ob_shash =
2771 ((PyBytesObject *)tmp)->ob_shash;
2772 }
2773 Py_DECREF(tmp);
2774 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002775}
2776
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002777PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002778"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002779bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002780bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002781bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2782bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002783\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002784Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002785 - an iterable yielding integers in range(256)\n\
2786 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002787 - any object implementing the buffer API.\n\
2788 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002789
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002790static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002791
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002792PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002793 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2794 "bytes",
2795 PyBytesObject_SIZE,
2796 sizeof(char),
2797 bytes_dealloc, /* tp_dealloc */
2798 0, /* tp_print */
2799 0, /* tp_getattr */
2800 0, /* tp_setattr */
2801 0, /* tp_reserved */
2802 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08002803 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002804 &bytes_as_sequence, /* tp_as_sequence */
2805 &bytes_as_mapping, /* tp_as_mapping */
2806 (hashfunc)bytes_hash, /* tp_hash */
2807 0, /* tp_call */
2808 bytes_str, /* tp_str */
2809 PyObject_GenericGetAttr, /* tp_getattro */
2810 0, /* tp_setattro */
2811 &bytes_as_buffer, /* tp_as_buffer */
2812 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2813 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2814 bytes_doc, /* tp_doc */
2815 0, /* tp_traverse */
2816 0, /* tp_clear */
2817 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2818 0, /* tp_weaklistoffset */
2819 bytes_iter, /* tp_iter */
2820 0, /* tp_iternext */
2821 bytes_methods, /* tp_methods */
2822 0, /* tp_members */
2823 0, /* tp_getset */
2824 &PyBaseObject_Type, /* tp_base */
2825 0, /* tp_dict */
2826 0, /* tp_descr_get */
2827 0, /* tp_descr_set */
2828 0, /* tp_dictoffset */
2829 0, /* tp_init */
2830 0, /* tp_alloc */
2831 bytes_new, /* tp_new */
2832 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002833};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002834
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002835void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002836PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002837{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002838 assert(pv != NULL);
2839 if (*pv == NULL)
2840 return;
2841 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002842 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002843 return;
2844 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02002845
2846 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2847 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05002848 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02002849 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02002850
Antoine Pitrou161d6952014-05-01 14:36:20 +02002851 wb.len = -1;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002852 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02002853 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2854 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2855 Py_CLEAR(*pv);
2856 return;
2857 }
2858
2859 oldsize = PyBytes_GET_SIZE(*pv);
2860 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2861 PyErr_NoMemory();
2862 goto error;
2863 }
2864 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2865 goto error;
2866
2867 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2868 PyBuffer_Release(&wb);
2869 return;
2870
2871 error:
2872 PyBuffer_Release(&wb);
2873 Py_CLEAR(*pv);
2874 return;
2875 }
2876
2877 else {
2878 /* Multiple references, need to create new object */
2879 PyObject *v;
2880 v = bytes_concat(*pv, w);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002881 Py_SETREF(*pv, v);
Antoine Pitrou161d6952014-05-01 14:36:20 +02002882 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002883}
2884
2885void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002886PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002887{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002888 PyBytes_Concat(pv, w);
2889 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002890}
2891
2892
Ethan Furmanb95b5612015-01-23 20:05:18 -08002893/* The following function breaks the notion that bytes are immutable:
2894 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002895 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08002896 as creating a new bytes object and destroying the old one, only
2897 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002898 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08002899 Note that if there's not enough memory to resize the bytes object, the
2900 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002901 memory" exception is set, and -1 is returned. Else (on success) 0 is
2902 returned, and the value in *pv may or may not be the same as on input.
2903 As always, an extra byte is allocated for a trailing \0 byte (newsize
2904 does *not* include that), and a trailing \0 byte is stored.
2905*/
2906
2907int
2908_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2909{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002910 PyObject *v;
2911 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002912 v = *pv;
2913 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
2914 *pv = 0;
2915 Py_DECREF(v);
2916 PyErr_BadInternalCall();
2917 return -1;
2918 }
2919 /* XXX UNREF/NEWREF interface should be more symmetrical */
2920 _Py_DEC_REFTOTAL;
2921 _Py_ForgetReference(v);
2922 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03002923 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002924 if (*pv == NULL) {
2925 PyObject_Del(v);
2926 PyErr_NoMemory();
2927 return -1;
2928 }
2929 _Py_NewReference(*pv);
2930 sv = (PyBytesObject *) *pv;
2931 Py_SIZE(sv) = newsize;
2932 sv->ob_sval[newsize] = '\0';
2933 sv->ob_shash = -1; /* invalidate cached hash value */
2934 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002935}
2936
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002937void
2938PyBytes_Fini(void)
2939{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002940 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002941 for (i = 0; i < UCHAR_MAX + 1; i++)
2942 Py_CLEAR(characters[i]);
2943 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002944}
2945
Benjamin Peterson4116f362008-05-27 00:36:20 +00002946/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002947
2948typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002949 PyObject_HEAD
2950 Py_ssize_t it_index;
2951 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002952} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002953
2954static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002955striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002956{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002957 _PyObject_GC_UNTRACK(it);
2958 Py_XDECREF(it->it_seq);
2959 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002960}
2961
2962static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002963striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002964{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002965 Py_VISIT(it->it_seq);
2966 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002967}
2968
2969static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002970striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002971{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002972 PyBytesObject *seq;
2973 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002974
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002975 assert(it != NULL);
2976 seq = it->it_seq;
2977 if (seq == NULL)
2978 return NULL;
2979 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002980
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002981 if (it->it_index < PyBytes_GET_SIZE(seq)) {
2982 item = PyLong_FromLong(
2983 (unsigned char)seq->ob_sval[it->it_index]);
2984 if (item != NULL)
2985 ++it->it_index;
2986 return item;
2987 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002988
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002989 it->it_seq = NULL;
Serhiy Storchakafbb1c5e2016-03-30 20:40:02 +03002990 Py_DECREF(seq);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002991 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002992}
2993
2994static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002995striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002996{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002997 Py_ssize_t len = 0;
2998 if (it->it_seq)
2999 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3000 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003001}
3002
3003PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003004 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003005
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003006static PyObject *
3007striter_reduce(striterobject *it)
3008{
3009 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02003010 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003011 it->it_seq, it->it_index);
3012 } else {
3013 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
3014 if (u == NULL)
3015 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02003016 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003017 }
3018}
3019
3020PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3021
3022static PyObject *
3023striter_setstate(striterobject *it, PyObject *state)
3024{
3025 Py_ssize_t index = PyLong_AsSsize_t(state);
3026 if (index == -1 && PyErr_Occurred())
3027 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003028 if (it->it_seq != NULL) {
3029 if (index < 0)
3030 index = 0;
3031 else if (index > PyBytes_GET_SIZE(it->it_seq))
3032 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3033 it->it_index = index;
3034 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003035 Py_RETURN_NONE;
3036}
3037
3038PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3039
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003040static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003041 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3042 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003043 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3044 reduce_doc},
3045 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3046 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003047 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003048};
3049
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003050PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003051 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3052 "bytes_iterator", /* tp_name */
3053 sizeof(striterobject), /* tp_basicsize */
3054 0, /* tp_itemsize */
3055 /* methods */
3056 (destructor)striter_dealloc, /* tp_dealloc */
3057 0, /* tp_print */
3058 0, /* tp_getattr */
3059 0, /* tp_setattr */
3060 0, /* tp_reserved */
3061 0, /* tp_repr */
3062 0, /* tp_as_number */
3063 0, /* tp_as_sequence */
3064 0, /* tp_as_mapping */
3065 0, /* tp_hash */
3066 0, /* tp_call */
3067 0, /* tp_str */
3068 PyObject_GenericGetAttr, /* tp_getattro */
3069 0, /* tp_setattro */
3070 0, /* tp_as_buffer */
3071 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3072 0, /* tp_doc */
3073 (traverseproc)striter_traverse, /* tp_traverse */
3074 0, /* tp_clear */
3075 0, /* tp_richcompare */
3076 0, /* tp_weaklistoffset */
3077 PyObject_SelfIter, /* tp_iter */
3078 (iternextfunc)striter_next, /* tp_iternext */
3079 striter_methods, /* tp_methods */
3080 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003081};
3082
3083static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003084bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003085{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003086 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003087
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003088 if (!PyBytes_Check(seq)) {
3089 PyErr_BadInternalCall();
3090 return NULL;
3091 }
3092 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3093 if (it == NULL)
3094 return NULL;
3095 it->it_index = 0;
3096 Py_INCREF(seq);
3097 it->it_seq = (PyBytesObject *)seq;
3098 _PyObject_GC_TRACK(it);
3099 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003100}
Victor Stinner00165072015-10-09 01:53:21 +02003101
3102
3103/* _PyBytesWriter API */
3104
3105#ifdef MS_WINDOWS
3106 /* On Windows, overallocate by 50% is the best factor */
3107# define OVERALLOCATE_FACTOR 2
3108#else
3109 /* On Linux, overallocate by 25% is the best factor */
3110# define OVERALLOCATE_FACTOR 4
3111#endif
3112
3113void
3114_PyBytesWriter_Init(_PyBytesWriter *writer)
3115{
Victor Stinner661aacc2015-10-14 09:41:48 +02003116 /* Set all attributes before small_buffer to 0 */
3117 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003118#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003119 memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003120#endif
3121}
3122
3123void
3124_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3125{
3126 Py_CLEAR(writer->buffer);
3127}
3128
3129Py_LOCAL_INLINE(char*)
3130_PyBytesWriter_AsString(_PyBytesWriter *writer)
3131{
Victor Stinner661aacc2015-10-14 09:41:48 +02003132 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003133 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003134 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003135 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003136 else if (writer->use_bytearray) {
3137 assert(writer->buffer != NULL);
3138 return PyByteArray_AS_STRING(writer->buffer);
3139 }
3140 else {
3141 assert(writer->buffer != NULL);
3142 return PyBytes_AS_STRING(writer->buffer);
3143 }
Victor Stinner00165072015-10-09 01:53:21 +02003144}
3145
3146Py_LOCAL_INLINE(Py_ssize_t)
Victor Stinner2bf89932015-10-14 11:25:33 +02003147_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
Victor Stinner00165072015-10-09 01:53:21 +02003148{
3149 char *start = _PyBytesWriter_AsString(writer);
3150 assert(str != NULL);
3151 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003152 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003153 return str - start;
3154}
3155
3156Py_LOCAL_INLINE(void)
3157_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3158{
3159#ifdef Py_DEBUG
3160 char *start, *end;
3161
Victor Stinner661aacc2015-10-14 09:41:48 +02003162 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003163 assert(writer->buffer == NULL);
3164 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003165 else {
3166 assert(writer->buffer != NULL);
3167 if (writer->use_bytearray)
3168 assert(PyByteArray_CheckExact(writer->buffer));
3169 else
3170 assert(PyBytes_CheckExact(writer->buffer));
3171 assert(Py_REFCNT(writer->buffer) == 1);
3172 }
Victor Stinner00165072015-10-09 01:53:21 +02003173
Victor Stinner661aacc2015-10-14 09:41:48 +02003174 if (writer->use_bytearray) {
3175 /* bytearray has its own overallocation algorithm,
3176 writer overallocation must be disabled */
3177 assert(!writer->overallocate);
3178 }
3179
3180 assert(0 <= writer->allocated);
Victor Stinner53926a12015-10-09 12:37:03 +02003181 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003182 /* the last byte must always be null */
Victor Stinner661aacc2015-10-14 09:41:48 +02003183 start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003184 assert(start[writer->allocated] == 0);
3185
3186 end = start + writer->allocated;
3187 assert(str != NULL);
3188 assert(start <= str && str <= end);
3189#endif
3190}
3191
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003192void*
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003193_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
Victor Stinner00165072015-10-09 01:53:21 +02003194{
3195 Py_ssize_t allocated, pos;
3196
3197 _PyBytesWriter_CheckConsistency(writer, str);
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003198 assert(writer->allocated < size);
Victor Stinner00165072015-10-09 01:53:21 +02003199
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003200 allocated = size;
Victor Stinner00165072015-10-09 01:53:21 +02003201 if (writer->overallocate
3202 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3203 /* overallocate to limit the number of realloc() */
3204 allocated += allocated / OVERALLOCATE_FACTOR;
3205 }
3206
Victor Stinner2bf89932015-10-14 11:25:33 +02003207 pos = _PyBytesWriter_GetSize(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003208 if (!writer->use_small_buffer) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003209 if (writer->use_bytearray) {
3210 if (PyByteArray_Resize(writer->buffer, allocated))
3211 goto error;
3212 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3213 but we cannot use ob_alloc because bytes may need to be moved
3214 to use the whole buffer. bytearray uses an internal optimization
3215 to avoid moving or copying bytes when bytes are removed at the
3216 beginning (ex: del bytearray[:1]). */
3217 }
3218 else {
3219 if (_PyBytes_Resize(&writer->buffer, allocated))
3220 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003221 }
3222 }
3223 else {
3224 /* convert from stack buffer to bytes object buffer */
3225 assert(writer->buffer == NULL);
3226
Victor Stinner661aacc2015-10-14 09:41:48 +02003227 if (writer->use_bytearray)
3228 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3229 else
3230 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003231 if (writer->buffer == NULL)
Victor Stinner661aacc2015-10-14 09:41:48 +02003232 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003233
3234 if (pos != 0) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003235 char *dest;
3236 if (writer->use_bytearray)
3237 dest = PyByteArray_AS_STRING(writer->buffer);
3238 else
3239 dest = PyBytes_AS_STRING(writer->buffer);
3240 Py_MEMCPY(dest,
Victor Stinnerb3653a32015-10-09 03:38:24 +02003241 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02003242 pos);
3243 }
3244
Victor Stinnerb3653a32015-10-09 03:38:24 +02003245 writer->use_small_buffer = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003246#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003247 memset(writer->small_buffer, 0xDB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003248#endif
Victor Stinner00165072015-10-09 01:53:21 +02003249 }
3250 writer->allocated = allocated;
3251
3252 str = _PyBytesWriter_AsString(writer) + pos;
3253 _PyBytesWriter_CheckConsistency(writer, str);
3254 return str;
Victor Stinner661aacc2015-10-14 09:41:48 +02003255
3256error:
3257 _PyBytesWriter_Dealloc(writer);
3258 return NULL;
Victor Stinner00165072015-10-09 01:53:21 +02003259}
3260
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003261void*
3262_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3263{
3264 Py_ssize_t new_min_size;
3265
3266 _PyBytesWriter_CheckConsistency(writer, str);
3267 assert(size >= 0);
3268
3269 if (size == 0) {
3270 /* nothing to do */
3271 return str;
3272 }
3273
3274 if (writer->min_size > PY_SSIZE_T_MAX - size) {
3275 PyErr_NoMemory();
3276 _PyBytesWriter_Dealloc(writer);
3277 return NULL;
3278 }
3279 new_min_size = writer->min_size + size;
3280
3281 if (new_min_size > writer->allocated)
3282 str = _PyBytesWriter_Resize(writer, str, new_min_size);
3283
3284 writer->min_size = new_min_size;
3285 return str;
3286}
3287
Victor Stinner00165072015-10-09 01:53:21 +02003288/* Allocate the buffer to write size bytes.
3289 Return the pointer to the beginning of buffer data.
3290 Raise an exception and return NULL on error. */
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003291void*
Victor Stinner00165072015-10-09 01:53:21 +02003292_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3293{
3294 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02003295 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02003296 assert(size >= 0);
3297
Victor Stinnerb3653a32015-10-09 03:38:24 +02003298 writer->use_small_buffer = 1;
Victor Stinnerb13b97d2015-10-09 02:52:16 +02003299#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003300 writer->allocated = sizeof(writer->small_buffer) - 1;
Victor Stinnerf6358a72015-10-14 12:02:39 +02003301 /* In debug mode, don't use the full small buffer because it is less
3302 efficient than bytes and bytearray objects to detect buffer underflow
3303 and buffer overflow. Use 10 bytes of the small buffer to test also
3304 code using the smaller buffer in debug mode.
3305
3306 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3307 in debug mode to also be able to detect stack overflow when running
3308 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3309 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3310 stack overflow. */
3311 writer->allocated = Py_MIN(writer->allocated, 10);
3312 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3313 to detect buffer overflow */
Victor Stinnerb3653a32015-10-09 03:38:24 +02003314 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003315#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02003316 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02003317#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02003318 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02003319}
3320
3321PyObject *
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003322_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
Victor Stinner00165072015-10-09 01:53:21 +02003323{
Victor Stinner2bf89932015-10-14 11:25:33 +02003324 Py_ssize_t size;
Victor Stinner00165072015-10-09 01:53:21 +02003325 PyObject *result;
3326
3327 _PyBytesWriter_CheckConsistency(writer, str);
3328
Victor Stinner2bf89932015-10-14 11:25:33 +02003329 size = _PyBytesWriter_GetSize(writer, str);
3330 if (size == 0 && !writer->use_bytearray) {
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003331 Py_CLEAR(writer->buffer);
3332 /* Get the empty byte string singleton */
3333 result = PyBytes_FromStringAndSize(NULL, 0);
3334 }
3335 else if (writer->use_small_buffer) {
Victor Stinnere914d412016-04-15 17:52:27 +02003336 if (writer->use_bytearray) {
3337 result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3338 }
3339 else {
3340 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3341 }
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003342 }
3343 else {
3344 result = writer->buffer;
3345 writer->buffer = NULL;
3346
Victor Stinner2bf89932015-10-14 11:25:33 +02003347 if (size != writer->allocated) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003348 if (writer->use_bytearray) {
Victor Stinner2bf89932015-10-14 11:25:33 +02003349 if (PyByteArray_Resize(result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003350 Py_DECREF(result);
3351 return NULL;
3352 }
3353 }
3354 else {
Victor Stinner2bf89932015-10-14 11:25:33 +02003355 if (_PyBytes_Resize(&result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003356 assert(result == NULL);
3357 return NULL;
3358 }
Victor Stinner00165072015-10-09 01:53:21 +02003359 }
3360 }
Victor Stinner00165072015-10-09 01:53:21 +02003361 }
Victor Stinner00165072015-10-09 01:53:21 +02003362 return result;
3363}
Victor Stinnerce179bf2015-10-09 12:57:22 +02003364
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003365void*
Victor Stinnere9aa5952015-10-12 13:57:47 +02003366_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003367 const void *bytes, Py_ssize_t size)
Victor Stinnerce179bf2015-10-09 12:57:22 +02003368{
Victor Stinnere9aa5952015-10-12 13:57:47 +02003369 char *str = (char *)ptr;
3370
Victor Stinnerce179bf2015-10-09 12:57:22 +02003371 str = _PyBytesWriter_Prepare(writer, str, size);
3372 if (str == NULL)
3373 return NULL;
3374
3375 Py_MEMCPY(str, bytes, size);
3376 str += size;
3377
3378 return str;
3379}