blob: ff87dfe775c98efb0f141aef8dc5bb6cad819304 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Gregory P. Smith8cb65692015-04-25 23:22:26 +00008#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00009#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000010
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020011/*[clinic input]
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030012class bytes "PyBytesObject *" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020013[clinic start generated code]*/
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030014/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020015
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030016#include "clinic/bytesobject.c.h"
17
Christian Heimes2c9c7a52008-05-26 13:42:13 +000018#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000019Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000020#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000021
Christian Heimes2c9c7a52008-05-26 13:42:13 +000022static PyBytesObject *characters[UCHAR_MAX + 1];
23static PyBytesObject *nullstring;
24
Mark Dickinsonfd24b322008-12-06 15:33:31 +000025/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
26 for a string of length n should request PyBytesObject_SIZE + n bytes.
27
28 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
29 3 bytes per string allocation on a typical system.
30*/
31#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
32
Victor Stinner2bf89932015-10-14 11:25:33 +020033/* Forward declaration */
34Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
35 char *str);
36
Christian Heimes2c9c7a52008-05-26 13:42:13 +000037/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000038 For PyBytes_FromString(), the parameter `str' points to a null-terminated
39 string containing exactly `size' bytes.
40
Martin Pantera90a4a92016-05-30 04:04:50 +000041 For PyBytes_FromStringAndSize(), the parameter `str' is
Christian Heimes2c9c7a52008-05-26 13:42:13 +000042 either NULL or else points to a string containing at least `size' bytes.
43 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
44 not have to be null-terminated. (Therefore it is safe to construct a
45 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
46 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
47 bytes (setting the last byte to the null terminating character) and you can
48 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000049 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000050 alter the data yourself, since the strings may be shared.
51
52 The PyObject member `op->ob_size', which denotes the number of "extra
53 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020054 allocated for string data, not counting the null terminating character.
55 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000056 PyBytes_FromStringAndSize()) or the length of the string in the `str'
57 parameter (for PyBytes_FromString()).
58*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020059static PyObject *
60_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000061{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020062 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020063 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020064
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000066#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000068#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000069 Py_INCREF(op);
70 return (PyObject *)op;
71 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000072
Victor Stinner049e5092014-08-17 22:20:00 +020073 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000074 PyErr_SetString(PyExc_OverflowError,
75 "byte string is too large");
76 return NULL;
77 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000078
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000079 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020080 if (use_calloc)
81 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
82 else
83 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084 if (op == NULL)
85 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +010086 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020088 if (!use_calloc)
89 op->ob_sval[size] = '\0';
90 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 if (size == 0) {
92 nullstring = op;
93 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020094 }
95 return (PyObject *) op;
96}
97
98PyObject *
99PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
100{
101 PyBytesObject *op;
102 if (size < 0) {
103 PyErr_SetString(PyExc_SystemError,
104 "Negative size passed to PyBytes_FromStringAndSize");
105 return NULL;
106 }
107 if (size == 1 && str != NULL &&
108 (op = characters[*str & UCHAR_MAX]) != NULL)
109 {
110#ifdef COUNT_ALLOCS
111 one_strings++;
112#endif
113 Py_INCREF(op);
114 return (PyObject *)op;
115 }
116
117 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
118 if (op == NULL)
119 return NULL;
120 if (str == NULL)
121 return (PyObject *) op;
122
123 Py_MEMCPY(op->ob_sval, str, size);
124 /* share short strings */
125 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000126 characters[*str & UCHAR_MAX] = op;
127 Py_INCREF(op);
128 }
129 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000130}
131
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000132PyObject *
133PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000134{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200135 size_t size;
136 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000137
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000138 assert(str != NULL);
139 size = strlen(str);
140 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
141 PyErr_SetString(PyExc_OverflowError,
142 "byte string is too long");
143 return NULL;
144 }
145 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000146#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000147 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000148#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 Py_INCREF(op);
150 return (PyObject *)op;
151 }
152 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000153#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000155#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 Py_INCREF(op);
157 return (PyObject *)op;
158 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000159
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000160 /* Inline PyObject_NewVar */
161 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
162 if (op == NULL)
163 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100164 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000165 op->ob_shash = -1;
166 Py_MEMCPY(op->ob_sval, str, size+1);
167 /* share short strings */
168 if (size == 0) {
169 nullstring = op;
170 Py_INCREF(op);
171 } else if (size == 1) {
172 characters[*str & UCHAR_MAX] = op;
173 Py_INCREF(op);
174 }
175 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000176}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000177
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000178PyObject *
179PyBytes_FromFormatV(const char *format, va_list vargs)
180{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000181 char *s;
Victor Stinner03dab782015-10-14 00:21:35 +0200182 const char *f;
183 const char *p;
184 Py_ssize_t prec;
185 int longflag;
186 int size_tflag;
187 /* Longest 64-bit formatted numbers:
188 - "18446744073709551615\0" (21 bytes)
189 - "-9223372036854775808\0" (21 bytes)
190 Decimal takes the most space (it isn't enough for octal.)
Guido van Rossum343e97f2007-04-09 00:43:24 +0000191
Victor Stinner03dab782015-10-14 00:21:35 +0200192 Longest 64-bit pointer representation:
193 "0xffffffffffffffff\0" (19 bytes). */
194 char buffer[21];
195 _PyBytesWriter writer;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000196
Victor Stinner03dab782015-10-14 00:21:35 +0200197 _PyBytesWriter_Init(&writer);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000198
Victor Stinner03dab782015-10-14 00:21:35 +0200199 s = _PyBytesWriter_Alloc(&writer, strlen(format));
200 if (s == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000201 return NULL;
Victor Stinner03dab782015-10-14 00:21:35 +0200202 writer.overallocate = 1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000203
Victor Stinner03dab782015-10-14 00:21:35 +0200204#define WRITE_BYTES(str) \
205 do { \
206 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
207 if (s == NULL) \
208 goto error; \
209 } while (0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000210
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000211 for (f = format; *f; f++) {
Victor Stinner03dab782015-10-14 00:21:35 +0200212 if (*f != '%') {
213 *s++ = *f;
214 continue;
215 }
216
217 p = f++;
218
219 /* ignore the width (ex: 10 in "%10s") */
220 while (Py_ISDIGIT(*f))
221 f++;
222
223 /* parse the precision (ex: 10 in "%.10s") */
224 prec = 0;
225 if (*f == '.') {
226 f++;
227 for (; Py_ISDIGIT(*f); f++) {
228 prec = (prec * 10) + (*f - '0');
229 }
230 }
231
232 while (*f && *f != '%' && !Py_ISALPHA(*f))
233 f++;
234
235 /* handle the long flag ('l'), but only for %ld and %lu.
236 others can be added when necessary. */
237 longflag = 0;
238 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
239 longflag = 1;
240 ++f;
241 }
242
243 /* handle the size_t flag ('z'). */
244 size_tflag = 0;
245 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
246 size_tflag = 1;
247 ++f;
248 }
249
250 /* substract bytes preallocated for the format string
251 (ex: 2 for "%s") */
252 writer.min_size -= (f - p + 1);
253
254 switch (*f) {
255 case 'c':
256 {
257 int c = va_arg(vargs, int);
258 if (c < 0 || c > 255) {
259 PyErr_SetString(PyExc_OverflowError,
260 "PyBytes_FromFormatV(): %c format "
261 "expects an integer in range [0; 255]");
262 goto error;
263 }
264 writer.min_size++;
265 *s++ = (unsigned char)c;
266 break;
267 }
268
269 case 'd':
270 if (longflag)
271 sprintf(buffer, "%ld", va_arg(vargs, long));
272 else if (size_tflag)
273 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
274 va_arg(vargs, Py_ssize_t));
275 else
276 sprintf(buffer, "%d", va_arg(vargs, int));
277 assert(strlen(buffer) < sizeof(buffer));
278 WRITE_BYTES(buffer);
279 break;
280
281 case 'u':
282 if (longflag)
283 sprintf(buffer, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
286 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
288 else
289 sprintf(buffer, "%u",
290 va_arg(vargs, unsigned int));
291 assert(strlen(buffer) < sizeof(buffer));
292 WRITE_BYTES(buffer);
293 break;
294
295 case 'i':
296 sprintf(buffer, "%i", va_arg(vargs, int));
297 assert(strlen(buffer) < sizeof(buffer));
298 WRITE_BYTES(buffer);
299 break;
300
301 case 'x':
302 sprintf(buffer, "%x", va_arg(vargs, int));
303 assert(strlen(buffer) < sizeof(buffer));
304 WRITE_BYTES(buffer);
305 break;
306
307 case 's':
308 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000309 Py_ssize_t i;
Victor Stinner03dab782015-10-14 00:21:35 +0200310
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200311 p = va_arg(vargs, const char*);
Victor Stinner03dab782015-10-14 00:21:35 +0200312 i = strlen(p);
313 if (prec > 0 && i > prec)
314 i = prec;
315 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
316 if (s == NULL)
317 goto error;
318 break;
319 }
320
321 case 'p':
322 sprintf(buffer, "%p", va_arg(vargs, void*));
323 assert(strlen(buffer) < sizeof(buffer));
324 /* %p is ill-defined: ensure leading 0x. */
325 if (buffer[1] == 'X')
326 buffer[1] = 'x';
327 else if (buffer[1] != 'x') {
328 memmove(buffer+2, buffer, strlen(buffer)+1);
329 buffer[0] = '0';
330 buffer[1] = 'x';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000331 }
Victor Stinner03dab782015-10-14 00:21:35 +0200332 WRITE_BYTES(buffer);
333 break;
334
335 case '%':
336 writer.min_size++;
337 *s++ = '%';
338 break;
339
340 default:
341 if (*f == 0) {
342 /* fix min_size if we reached the end of the format string */
343 writer.min_size++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000344 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000345
Victor Stinner03dab782015-10-14 00:21:35 +0200346 /* invalid format string: copy unformatted string and exit */
347 WRITE_BYTES(p);
348 return _PyBytesWriter_Finish(&writer, s);
349 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000351
Victor Stinner03dab782015-10-14 00:21:35 +0200352#undef WRITE_BYTES
353
354 return _PyBytesWriter_Finish(&writer, s);
355
356 error:
357 _PyBytesWriter_Dealloc(&writer);
358 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000359}
360
361PyObject *
362PyBytes_FromFormat(const char *format, ...)
363{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000364 PyObject* ret;
365 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000366
367#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000368 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000369#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000370 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000371#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 ret = PyBytes_FromFormatV(format, vargs);
373 va_end(vargs);
374 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000375}
376
Ethan Furmanb95b5612015-01-23 20:05:18 -0800377/* Helpers for formatstring */
378
379Py_LOCAL_INLINE(PyObject *)
380getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
381{
382 Py_ssize_t argidx = *p_argidx;
383 if (argidx < arglen) {
384 (*p_argidx)++;
385 if (arglen < 0)
386 return args;
387 else
388 return PyTuple_GetItem(args, argidx);
389 }
390 PyErr_SetString(PyExc_TypeError,
391 "not enough arguments for format string");
392 return NULL;
393}
394
395/* Format codes
396 * F_LJUST '-'
397 * F_SIGN '+'
398 * F_BLANK ' '
399 * F_ALT '#'
400 * F_ZERO '0'
401 */
402#define F_LJUST (1<<0)
403#define F_SIGN (1<<1)
404#define F_BLANK (1<<2)
405#define F_ALT (1<<3)
406#define F_ZERO (1<<4)
407
408/* Returns a new reference to a PyBytes object, or NULL on failure. */
409
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200410static char*
411formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200412 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800413{
414 char *p;
415 PyObject *result;
416 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200417 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800418
419 x = PyFloat_AsDouble(v);
420 if (x == -1.0 && PyErr_Occurred()) {
421 PyErr_Format(PyExc_TypeError, "float argument required, "
422 "not %.200s", Py_TYPE(v)->tp_name);
423 return NULL;
424 }
425
426 if (prec < 0)
427 prec = 6;
428
429 p = PyOS_double_to_string(x, type, prec,
430 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
431
432 if (p == NULL)
433 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200434
435 len = strlen(p);
436 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200437 str = _PyBytesWriter_Prepare(writer, str, len);
438 if (str == NULL)
439 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200440 Py_MEMCPY(str, p, len);
Victor Stinner71dc3d82016-04-26 12:35:13 +0200441 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200442 str += len;
443 return str;
444 }
445
446 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800447 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200448 *p_result = result;
449 return str;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800450}
451
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300452static PyObject *
453formatlong(PyObject *v, int flags, int prec, int type)
454{
455 PyObject *result, *iobj;
456 if (type == 'i')
457 type = 'd';
458 if (PyLong_Check(v))
459 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
460 if (PyNumber_Check(v)) {
461 /* make sure number is a type of integer for o, x, and X */
462 if (type == 'o' || type == 'x' || type == 'X')
463 iobj = PyNumber_Index(v);
464 else
465 iobj = PyNumber_Long(v);
466 if (iobj == NULL) {
467 if (!PyErr_ExceptionMatches(PyExc_TypeError))
468 return NULL;
469 }
470 else if (!PyLong_Check(iobj))
471 Py_CLEAR(iobj);
472 if (iobj != NULL) {
473 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
474 Py_DECREF(iobj);
475 return result;
476 }
477 }
478 PyErr_Format(PyExc_TypeError,
479 "%%%c format: %s is required, not %.200s", type,
480 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
481 : "a number",
482 Py_TYPE(v)->tp_name);
483 return NULL;
484}
485
486static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200487byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800488{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300489 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200490 *p = PyBytes_AS_STRING(arg)[0];
491 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800492 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300493 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200494 *p = PyByteArray_AS_STRING(arg)[0];
495 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800496 }
497 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300498 PyObject *iobj;
499 long ival;
500 int overflow;
501 /* make sure number is a type of integer */
502 if (PyLong_Check(arg)) {
503 ival = PyLong_AsLongAndOverflow(arg, &overflow);
504 }
505 else {
506 iobj = PyNumber_Index(arg);
507 if (iobj == NULL) {
508 if (!PyErr_ExceptionMatches(PyExc_TypeError))
509 return 0;
510 goto onError;
511 }
512 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
513 Py_DECREF(iobj);
514 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300515 if (!overflow && ival == -1 && PyErr_Occurred())
516 goto onError;
517 if (overflow || !(0 <= ival && ival <= 255)) {
518 PyErr_SetString(PyExc_OverflowError,
519 "%c arg not in range(256)");
520 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800521 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300522 *p = (char)ival;
523 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800524 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300525 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200526 PyErr_SetString(PyExc_TypeError,
527 "%c requires an integer in range(256) or a single byte");
528 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800529}
530
531static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200532format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800533{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200534 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800535 _Py_IDENTIFIER(__bytes__);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800536 /* is it a bytes object? */
537 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200538 *pbuf = PyBytes_AS_STRING(v);
539 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800540 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200541 return v;
542 }
543 if (PyByteArray_Check(v)) {
544 *pbuf = PyByteArray_AS_STRING(v);
545 *plen = PyByteArray_GET_SIZE(v);
546 Py_INCREF(v);
547 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800548 }
549 /* does it support __bytes__? */
550 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
551 if (func != NULL) {
552 result = PyObject_CallFunctionObjArgs(func, NULL);
553 Py_DECREF(func);
554 if (result == NULL)
555 return NULL;
556 if (!PyBytes_Check(result)) {
557 PyErr_Format(PyExc_TypeError,
558 "__bytes__ returned non-bytes (type %.200s)",
559 Py_TYPE(result)->tp_name);
560 Py_DECREF(result);
561 return NULL;
562 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200563 *pbuf = PyBytes_AS_STRING(result);
564 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800565 return result;
566 }
567 PyErr_Format(PyExc_TypeError,
568 "%%b requires bytes, or an object that implements __bytes__, not '%.100s'",
569 Py_TYPE(v)->tp_name);
570 return NULL;
571}
572
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200573/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800574
575PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +0200576_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
577 PyObject *args, int use_bytearray)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800578{
Victor Stinner772b2b02015-10-14 09:56:53 +0200579 const char *fmt;
580 char *res;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800581 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200582 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800583 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800584 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200585 _PyBytesWriter writer;
586
Victor Stinner772b2b02015-10-14 09:56:53 +0200587 if (args == NULL) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800588 PyErr_BadInternalCall();
589 return NULL;
590 }
Victor Stinner772b2b02015-10-14 09:56:53 +0200591 fmt = format;
592 fmtcnt = format_len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200593
594 _PyBytesWriter_Init(&writer);
Victor Stinner772b2b02015-10-14 09:56:53 +0200595 writer.use_bytearray = use_bytearray;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200596
597 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
598 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800599 return NULL;
Victor Stinner772b2b02015-10-14 09:56:53 +0200600 if (!use_bytearray)
601 writer.overallocate = 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200602
Ethan Furmanb95b5612015-01-23 20:05:18 -0800603 if (PyTuple_Check(args)) {
604 arglen = PyTuple_GET_SIZE(args);
605 argidx = 0;
606 }
607 else {
608 arglen = -1;
609 argidx = -2;
610 }
611 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
612 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
613 !PyByteArray_Check(args)) {
614 dict = args;
615 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200616
Ethan Furmanb95b5612015-01-23 20:05:18 -0800617 while (--fmtcnt >= 0) {
618 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200619 Py_ssize_t len;
620 char *pos;
621
622 pos = strchr(fmt + 1, '%');
623 if (pos != NULL)
624 len = pos - fmt;
Victor Stinner772b2b02015-10-14 09:56:53 +0200625 else
626 len = format_len - (fmt - format);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200627 assert(len != 0);
628
629 Py_MEMCPY(res, fmt, len);
630 res += len;
631 fmt += len;
632 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800633 }
634 else {
635 /* Got a format specifier */
636 int flags = 0;
637 Py_ssize_t width = -1;
638 int prec = -1;
639 int c = '\0';
640 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800641 PyObject *v = NULL;
642 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200643 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800644 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200645 Py_ssize_t len = 0;
646 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200647 Py_ssize_t alloc;
648#ifdef Py_DEBUG
649 char *before;
650#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -0800651
Ethan Furmanb95b5612015-01-23 20:05:18 -0800652 fmt++;
653 if (*fmt == '(') {
Victor Stinner772b2b02015-10-14 09:56:53 +0200654 const char *keystart;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800655 Py_ssize_t keylen;
656 PyObject *key;
657 int pcount = 1;
658
659 if (dict == NULL) {
660 PyErr_SetString(PyExc_TypeError,
661 "format requires a mapping");
662 goto error;
663 }
664 ++fmt;
665 --fmtcnt;
666 keystart = fmt;
667 /* Skip over balanced parentheses */
668 while (pcount > 0 && --fmtcnt >= 0) {
669 if (*fmt == ')')
670 --pcount;
671 else if (*fmt == '(')
672 ++pcount;
673 fmt++;
674 }
675 keylen = fmt - keystart - 1;
676 if (fmtcnt < 0 || pcount > 0) {
677 PyErr_SetString(PyExc_ValueError,
678 "incomplete format key");
679 goto error;
680 }
681 key = PyBytes_FromStringAndSize(keystart,
682 keylen);
683 if (key == NULL)
684 goto error;
685 if (args_owned) {
686 Py_DECREF(args);
687 args_owned = 0;
688 }
689 args = PyObject_GetItem(dict, key);
690 Py_DECREF(key);
691 if (args == NULL) {
692 goto error;
693 }
694 args_owned = 1;
695 arglen = -1;
696 argidx = -2;
697 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200698
699 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800700 while (--fmtcnt >= 0) {
701 switch (c = *fmt++) {
702 case '-': flags |= F_LJUST; continue;
703 case '+': flags |= F_SIGN; continue;
704 case ' ': flags |= F_BLANK; continue;
705 case '#': flags |= F_ALT; continue;
706 case '0': flags |= F_ZERO; continue;
707 }
708 break;
709 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200710
711 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800712 if (c == '*') {
713 v = getnextarg(args, arglen, &argidx);
714 if (v == NULL)
715 goto error;
716 if (!PyLong_Check(v)) {
717 PyErr_SetString(PyExc_TypeError,
718 "* wants int");
719 goto error;
720 }
721 width = PyLong_AsSsize_t(v);
722 if (width == -1 && PyErr_Occurred())
723 goto error;
724 if (width < 0) {
725 flags |= F_LJUST;
726 width = -width;
727 }
728 if (--fmtcnt >= 0)
729 c = *fmt++;
730 }
731 else if (c >= 0 && isdigit(c)) {
732 width = c - '0';
733 while (--fmtcnt >= 0) {
734 c = Py_CHARMASK(*fmt++);
735 if (!isdigit(c))
736 break;
737 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
738 PyErr_SetString(
739 PyExc_ValueError,
740 "width too big");
741 goto error;
742 }
743 width = width*10 + (c - '0');
744 }
745 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200746
747 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800748 if (c == '.') {
749 prec = 0;
750 if (--fmtcnt >= 0)
751 c = *fmt++;
752 if (c == '*') {
753 v = getnextarg(args, arglen, &argidx);
754 if (v == NULL)
755 goto error;
756 if (!PyLong_Check(v)) {
757 PyErr_SetString(
758 PyExc_TypeError,
759 "* wants int");
760 goto error;
761 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200762 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800763 if (prec == -1 && PyErr_Occurred())
764 goto error;
765 if (prec < 0)
766 prec = 0;
767 if (--fmtcnt >= 0)
768 c = *fmt++;
769 }
770 else if (c >= 0 && isdigit(c)) {
771 prec = c - '0';
772 while (--fmtcnt >= 0) {
773 c = Py_CHARMASK(*fmt++);
774 if (!isdigit(c))
775 break;
776 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
777 PyErr_SetString(
778 PyExc_ValueError,
779 "prec too big");
780 goto error;
781 }
782 prec = prec*10 + (c - '0');
783 }
784 }
785 } /* prec */
786 if (fmtcnt >= 0) {
787 if (c == 'h' || c == 'l' || c == 'L') {
788 if (--fmtcnt >= 0)
789 c = *fmt++;
790 }
791 }
792 if (fmtcnt < 0) {
793 PyErr_SetString(PyExc_ValueError,
794 "incomplete format");
795 goto error;
796 }
797 if (c != '%') {
798 v = getnextarg(args, arglen, &argidx);
799 if (v == NULL)
800 goto error;
801 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200802
803 if (fmtcnt < 0) {
804 /* last writer: disable writer overallocation */
805 writer.overallocate = 0;
806 }
807
Ethan Furmanb95b5612015-01-23 20:05:18 -0800808 sign = 0;
809 fill = ' ';
810 switch (c) {
811 case '%':
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200812 *res++ = '%';
813 continue;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200814
Ethan Furman62e977f2015-03-11 08:17:00 -0700815 case 'r':
816 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800817 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200818 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800819 if (temp == NULL)
820 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200821 assert(PyUnicode_IS_ASCII(temp));
822 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
823 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800824 if (prec >= 0 && len > prec)
825 len = prec;
826 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200827
Ethan Furmanb95b5612015-01-23 20:05:18 -0800828 case 's':
829 // %s is only for 2/3 code; 3 only code should use %b
830 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200831 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800832 if (temp == NULL)
833 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800834 if (prec >= 0 && len > prec)
835 len = prec;
836 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200837
Ethan Furmanb95b5612015-01-23 20:05:18 -0800838 case 'i':
839 case 'd':
840 case 'u':
841 case 'o':
842 case 'x':
843 case 'X':
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200844 if (PyLong_CheckExact(v)
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200845 && width == -1 && prec == -1
846 && !(flags & (F_SIGN | F_BLANK))
847 && c != 'X')
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200848 {
849 /* Fast path */
850 int alternate = flags & F_ALT;
851 int base;
852
853 switch(c)
854 {
855 default:
856 assert(0 && "'type' not in [diuoxX]");
857 case 'd':
858 case 'i':
859 case 'u':
860 base = 10;
861 break;
862 case 'o':
863 base = 8;
864 break;
865 case 'x':
866 case 'X':
867 base = 16;
868 break;
869 }
870
871 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200872 writer.min_size -= 2; /* size preallocated for "%d" */
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200873 res = _PyLong_FormatBytesWriter(&writer, res,
874 v, base, alternate);
875 if (res == NULL)
876 goto error;
877 continue;
878 }
879
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300880 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200881 if (!temp)
882 goto error;
883 assert(PyUnicode_IS_ASCII(temp));
884 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
885 len = PyUnicode_GET_LENGTH(temp);
886 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800887 if (flags & F_ZERO)
888 fill = '0';
889 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200890
Ethan Furmanb95b5612015-01-23 20:05:18 -0800891 case 'e':
892 case 'E':
893 case 'f':
894 case 'F':
895 case 'g':
896 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200897 if (width == -1 && prec == -1
898 && !(flags & (F_SIGN | F_BLANK)))
899 {
900 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200901 writer.min_size -= 2; /* size preallocated for "%f" */
Victor Stinnerad771582015-10-09 12:38:53 +0200902 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200903 if (res == NULL)
904 goto error;
905 continue;
906 }
907
Victor Stinnerad771582015-10-09 12:38:53 +0200908 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800909 goto error;
910 pbuf = PyBytes_AS_STRING(temp);
911 len = PyBytes_GET_SIZE(temp);
912 sign = 1;
913 if (flags & F_ZERO)
914 fill = '0';
915 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200916
Ethan Furmanb95b5612015-01-23 20:05:18 -0800917 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200918 pbuf = &onechar;
919 len = byte_converter(v, &onechar);
920 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800921 goto error;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200922 if (width == -1) {
923 /* Fast path */
924 *res++ = onechar;
925 continue;
926 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800927 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200928
Ethan Furmanb95b5612015-01-23 20:05:18 -0800929 default:
930 PyErr_Format(PyExc_ValueError,
931 "unsupported format character '%c' (0x%x) "
932 "at index %zd",
933 c, c,
Victor Stinner772b2b02015-10-14 09:56:53 +0200934 (Py_ssize_t)(fmt - 1 - format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800935 goto error;
936 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200937
Ethan Furmanb95b5612015-01-23 20:05:18 -0800938 if (sign) {
939 if (*pbuf == '-' || *pbuf == '+') {
940 sign = *pbuf++;
941 len--;
942 }
943 else if (flags & F_SIGN)
944 sign = '+';
945 else if (flags & F_BLANK)
946 sign = ' ';
947 else
948 sign = 0;
949 }
950 if (width < len)
951 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200952
953 alloc = width;
954 if (sign != 0 && len == width)
955 alloc++;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200956 /* 2: size preallocated for %s */
957 if (alloc > 2) {
958 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200959 if (res == NULL)
960 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800961 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200962#ifdef Py_DEBUG
963 before = res;
964#endif
965
966 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800967 if (sign) {
968 if (fill != ' ')
969 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800970 if (width > len)
971 width--;
972 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200973
974 /* Write the numeric prefix for "x", "X" and "o" formats
975 if the alternate form is used.
976 For example, write "0x" for the "%#x" format. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800977 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
978 assert(pbuf[0] == '0');
979 assert(pbuf[1] == c);
980 if (fill != ' ') {
981 *res++ = *pbuf++;
982 *res++ = *pbuf++;
983 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800984 width -= 2;
985 if (width < 0)
986 width = 0;
987 len -= 2;
988 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200989
990 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800991 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200992 memset(res, fill, width - len);
993 res += (width - len);
994 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800995 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200996
997 /* If padding with spaces: write sign if needed and/or numeric
998 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800999 if (fill == ' ') {
1000 if (sign)
1001 *res++ = sign;
1002 if ((flags & F_ALT) &&
1003 (c == 'x' || c == 'X')) {
1004 assert(pbuf[0] == '0');
1005 assert(pbuf[1] == c);
1006 *res++ = *pbuf++;
1007 *res++ = *pbuf++;
1008 }
1009 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001010
1011 /* Copy bytes */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001012 Py_MEMCPY(res, pbuf, len);
1013 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001014
1015 /* Pad right with the fill character if needed */
1016 if (width > len) {
1017 memset(res, ' ', width - len);
1018 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001019 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001020
Ethan Furmanb95b5612015-01-23 20:05:18 -08001021 if (dict && (argidx < arglen) && c != '%') {
1022 PyErr_SetString(PyExc_TypeError,
1023 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -08001024 Py_XDECREF(temp);
1025 goto error;
1026 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001027 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001028
1029#ifdef Py_DEBUG
1030 /* check that we computed the exact size for this write */
1031 assert((res - before) == alloc);
1032#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -08001033 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001034
1035 /* If overallocation was disabled, ensure that it was the last
1036 write. Otherwise, we missed an optimization */
Victor Stinner772b2b02015-10-14 09:56:53 +02001037 assert(writer.overallocate || fmtcnt < 0 || use_bytearray);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001038 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001039
Ethan Furmanb95b5612015-01-23 20:05:18 -08001040 if (argidx < arglen && !dict) {
1041 PyErr_SetString(PyExc_TypeError,
1042 "not all arguments converted during bytes formatting");
1043 goto error;
1044 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001045
Ethan Furmanb95b5612015-01-23 20:05:18 -08001046 if (args_owned) {
1047 Py_DECREF(args);
1048 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001049 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001050
1051 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001052 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001053 if (args_owned) {
1054 Py_DECREF(args);
1055 }
1056 return NULL;
1057}
1058
1059/* =-= */
1060
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001061static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001062bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001063{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001064 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001065}
1066
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001067/* Unescape a backslash-escaped string. If unicode is non-zero,
1068 the string is a u-literal. If recode_encoding is non-zero,
1069 the string is UTF-8 encoded and should be re-encoded in the
1070 specified encoding. */
1071
Victor Stinner2ec80632015-10-14 13:32:13 +02001072static char *
1073_PyBytes_DecodeEscapeRecode(const char **s, const char *end,
1074 const char *errors, const char *recode_encoding,
1075 _PyBytesWriter *writer, char *p)
1076{
1077 PyObject *u, *w;
1078 const char* t;
1079
1080 t = *s;
1081 /* Decode non-ASCII bytes as UTF-8. */
1082 while (t < end && (*t & 0x80))
1083 t++;
1084 u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
1085 if (u == NULL)
1086 return NULL;
1087
1088 /* Recode them in target encoding. */
1089 w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
1090 Py_DECREF(u);
1091 if (w == NULL)
1092 return NULL;
1093 assert(PyBytes_Check(w));
1094
1095 /* Append bytes to output buffer. */
1096 writer->min_size--; /* substract 1 preallocated byte */
1097 p = _PyBytesWriter_WriteBytes(writer, p,
1098 PyBytes_AS_STRING(w),
1099 PyBytes_GET_SIZE(w));
1100 Py_DECREF(w);
1101 if (p == NULL)
1102 return NULL;
1103
1104 *s = t;
1105 return p;
1106}
1107
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001108PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001109 Py_ssize_t len,
1110 const char *errors,
1111 Py_ssize_t unicode,
1112 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001113{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001114 int c;
Victor Stinner2ec80632015-10-14 13:32:13 +02001115 char *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001116 const char *end;
Victor Stinner2ec80632015-10-14 13:32:13 +02001117 _PyBytesWriter writer;
1118
1119 _PyBytesWriter_Init(&writer);
1120
1121 p = _PyBytesWriter_Alloc(&writer, len);
1122 if (p == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001123 return NULL;
Victor Stinner2ec80632015-10-14 13:32:13 +02001124 writer.overallocate = 1;
1125
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001126 end = s + len;
1127 while (s < end) {
1128 if (*s != '\\') {
1129 non_esc:
Victor Stinner2ec80632015-10-14 13:32:13 +02001130 if (!(recode_encoding && (*s & 0x80))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001131 *p++ = *s++;
1132 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001133 else {
1134 /* non-ASCII character and need to recode */
1135 p = _PyBytes_DecodeEscapeRecode(&s, end,
1136 errors, recode_encoding,
1137 &writer, p);
1138 if (p == NULL)
1139 goto failed;
1140 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001141 continue;
1142 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001143
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001144 s++;
Victor Stinner2ec80632015-10-14 13:32:13 +02001145 if (s == end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001146 PyErr_SetString(PyExc_ValueError,
1147 "Trailing \\ in string");
1148 goto failed;
1149 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001150
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001151 switch (*s++) {
1152 /* XXX This assumes ASCII! */
1153 case '\n': break;
1154 case '\\': *p++ = '\\'; break;
1155 case '\'': *p++ = '\''; break;
1156 case '\"': *p++ = '\"'; break;
1157 case 'b': *p++ = '\b'; break;
1158 case 'f': *p++ = '\014'; break; /* FF */
1159 case 't': *p++ = '\t'; break;
1160 case 'n': *p++ = '\n'; break;
1161 case 'r': *p++ = '\r'; break;
1162 case 'v': *p++ = '\013'; break; /* VT */
1163 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1164 case '0': case '1': case '2': case '3':
1165 case '4': case '5': case '6': case '7':
1166 c = s[-1] - '0';
1167 if (s < end && '0' <= *s && *s <= '7') {
1168 c = (c<<3) + *s++ - '0';
1169 if (s < end && '0' <= *s && *s <= '7')
1170 c = (c<<3) + *s++ - '0';
1171 }
1172 *p++ = c;
1173 break;
1174 case 'x':
Victor Stinner2ec80632015-10-14 13:32:13 +02001175 if (s+1 < end) {
1176 int digit1, digit2;
1177 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1178 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1179 if (digit1 < 16 && digit2 < 16) {
1180 *p++ = (unsigned char)((digit1 << 4) + digit2);
1181 s += 2;
1182 break;
1183 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001184 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001185 /* invalid hexadecimal digits */
1186
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001187 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001188 PyErr_Format(PyExc_ValueError,
1189 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001190 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001191 goto failed;
1192 }
1193 if (strcmp(errors, "replace") == 0) {
1194 *p++ = '?';
1195 } else if (strcmp(errors, "ignore") == 0)
1196 /* do nothing */;
1197 else {
1198 PyErr_Format(PyExc_ValueError,
1199 "decoding error; unknown "
1200 "error handling code: %.400s",
1201 errors);
1202 goto failed;
1203 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001204 /* skip \x */
1205 if (s < end && Py_ISXDIGIT(s[0]))
1206 s++; /* and a hexdigit */
1207 break;
Victor Stinner2ec80632015-10-14 13:32:13 +02001208
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001209 default:
1210 *p++ = '\\';
1211 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +02001212 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001213 UTF-8 bytes may follow. */
1214 }
1215 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001216
1217 return _PyBytesWriter_Finish(&writer, p);
1218
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001219 failed:
Victor Stinner2ec80632015-10-14 13:32:13 +02001220 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001221 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001222}
1223
1224/* -------------------------------------------------------------------- */
1225/* object api */
1226
1227Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001228PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001229{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001230 if (!PyBytes_Check(op)) {
1231 PyErr_Format(PyExc_TypeError,
1232 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1233 return -1;
1234 }
1235 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001236}
1237
1238char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001239PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001240{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001241 if (!PyBytes_Check(op)) {
1242 PyErr_Format(PyExc_TypeError,
1243 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1244 return NULL;
1245 }
1246 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001247}
1248
1249int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001250PyBytes_AsStringAndSize(PyObject *obj,
1251 char **s,
1252 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001253{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001254 if (s == NULL) {
1255 PyErr_BadInternalCall();
1256 return -1;
1257 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001258
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001259 if (!PyBytes_Check(obj)) {
1260 PyErr_Format(PyExc_TypeError,
1261 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1262 return -1;
1263 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001264
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001265 *s = PyBytes_AS_STRING(obj);
1266 if (len != NULL)
1267 *len = PyBytes_GET_SIZE(obj);
1268 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001269 PyErr_SetString(PyExc_ValueError,
1270 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001271 return -1;
1272 }
1273 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001274}
Neal Norwitz6968b052007-02-27 19:02:19 +00001275
1276/* -------------------------------------------------------------------- */
1277/* Methods */
1278
Eric Smith0923d1d2009-04-16 20:16:10 +00001279#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001280
1281#include "stringlib/fastsearch.h"
1282#include "stringlib/count.h"
1283#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001284#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001285#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001286#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001287#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001288
Eric Smith0f78bff2009-11-30 01:01:42 +00001289#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001290
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001291PyObject *
1292PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001293{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001294 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001295 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001296 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001297 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001298 unsigned char quote, *s, *p;
1299
1300 /* Compute size of output string */
1301 squotes = dquotes = 0;
1302 newsize = 3; /* b'' */
1303 s = (unsigned char*)op->ob_sval;
1304 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001305 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001306 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001307 case '\'': squotes++; break;
1308 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001309 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001310 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001311 default:
1312 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001313 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001314 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001315 if (newsize > PY_SSIZE_T_MAX - incr)
1316 goto overflow;
1317 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001318 }
1319 quote = '\'';
1320 if (smartquotes && squotes && !dquotes)
1321 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001322 if (squotes && quote == '\'') {
1323 if (newsize > PY_SSIZE_T_MAX - squotes)
1324 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001325 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001326 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001327
1328 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001329 if (v == NULL) {
1330 return NULL;
1331 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001332 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001333
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001334 *p++ = 'b', *p++ = quote;
1335 for (i = 0; i < length; i++) {
1336 unsigned char c = op->ob_sval[i];
1337 if (c == quote || c == '\\')
1338 *p++ = '\\', *p++ = c;
1339 else if (c == '\t')
1340 *p++ = '\\', *p++ = 't';
1341 else if (c == '\n')
1342 *p++ = '\\', *p++ = 'n';
1343 else if (c == '\r')
1344 *p++ = '\\', *p++ = 'r';
1345 else if (c < ' ' || c >= 0x7f) {
1346 *p++ = '\\';
1347 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001348 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1349 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001350 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001351 else
1352 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001353 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001354 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001355 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001356 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001357
1358 overflow:
1359 PyErr_SetString(PyExc_OverflowError,
1360 "bytes object is too large to make repr");
1361 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001362}
1363
Neal Norwitz6968b052007-02-27 19:02:19 +00001364static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001365bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001366{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001367 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001368}
1369
Neal Norwitz6968b052007-02-27 19:02:19 +00001370static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001371bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001372{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001373 if (Py_BytesWarningFlag) {
1374 if (PyErr_WarnEx(PyExc_BytesWarning,
1375 "str() on a bytes instance", 1))
1376 return NULL;
1377 }
1378 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001379}
1380
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001381static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001382bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001383{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001384 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001385}
Neal Norwitz6968b052007-02-27 19:02:19 +00001386
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001387/* This is also used by PyBytes_Concat() */
1388static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001389bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001390{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001391 Py_buffer va, vb;
1392 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001393
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001394 va.len = -1;
1395 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001396 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1397 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001398 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1399 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
1400 goto done;
1401 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001402
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001403 /* Optimize end cases */
1404 if (va.len == 0 && PyBytes_CheckExact(b)) {
1405 result = b;
1406 Py_INCREF(result);
1407 goto done;
1408 }
1409 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1410 result = a;
1411 Py_INCREF(result);
1412 goto done;
1413 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001414
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001415 if (va.len > PY_SSIZE_T_MAX - vb.len) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001416 PyErr_NoMemory();
1417 goto done;
1418 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001419
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001420 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001421 if (result != NULL) {
1422 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1423 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1424 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001425
1426 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001427 if (va.len != -1)
1428 PyBuffer_Release(&va);
1429 if (vb.len != -1)
1430 PyBuffer_Release(&vb);
1431 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001432}
Neal Norwitz6968b052007-02-27 19:02:19 +00001433
1434static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001435bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001436{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001437 Py_ssize_t i;
1438 Py_ssize_t j;
1439 Py_ssize_t size;
1440 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001441 size_t nbytes;
1442 if (n < 0)
1443 n = 0;
1444 /* watch out for overflows: the size can overflow int,
1445 * and the # of bytes needed can overflow size_t
1446 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001447 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001448 PyErr_SetString(PyExc_OverflowError,
1449 "repeated bytes are too long");
1450 return NULL;
1451 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001452 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001453 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1454 Py_INCREF(a);
1455 return (PyObject *)a;
1456 }
1457 nbytes = (size_t)size;
1458 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1459 PyErr_SetString(PyExc_OverflowError,
1460 "repeated bytes are too long");
1461 return NULL;
1462 }
1463 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1464 if (op == NULL)
1465 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +01001466 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001467 op->ob_shash = -1;
1468 op->ob_sval[size] = '\0';
1469 if (Py_SIZE(a) == 1 && n > 0) {
1470 memset(op->ob_sval, a->ob_sval[0] , n);
1471 return (PyObject *) op;
1472 }
1473 i = 0;
1474 if (i < size) {
1475 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1476 i = Py_SIZE(a);
1477 }
1478 while (i < size) {
1479 j = (i <= size-i) ? i : size-i;
1480 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1481 i += j;
1482 }
1483 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001484}
1485
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001486static int
1487bytes_contains(PyObject *self, PyObject *arg)
1488{
1489 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1490}
1491
Neal Norwitz6968b052007-02-27 19:02:19 +00001492static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001493bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001494{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001495 if (i < 0 || i >= Py_SIZE(a)) {
1496 PyErr_SetString(PyExc_IndexError, "index out of range");
1497 return NULL;
1498 }
1499 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001500}
1501
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001502Py_LOCAL(int)
1503bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1504{
1505 int cmp;
1506 Py_ssize_t len;
1507
1508 len = Py_SIZE(a);
1509 if (Py_SIZE(b) != len)
1510 return 0;
1511
1512 if (a->ob_sval[0] != b->ob_sval[0])
1513 return 0;
1514
1515 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1516 return (cmp == 0);
1517}
1518
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001519static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001520bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001521{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001522 int c;
1523 Py_ssize_t len_a, len_b;
1524 Py_ssize_t min_len;
1525 PyObject *result;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001526 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001527
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001528 /* Make sure both arguments are strings. */
1529 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001530 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001531 rc = PyObject_IsInstance((PyObject*)a,
1532 (PyObject*)&PyUnicode_Type);
1533 if (!rc)
1534 rc = PyObject_IsInstance((PyObject*)b,
1535 (PyObject*)&PyUnicode_Type);
1536 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001537 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001538 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001539 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001540 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001541 return NULL;
1542 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001543 else {
1544 rc = PyObject_IsInstance((PyObject*)a,
1545 (PyObject*)&PyLong_Type);
1546 if (!rc)
1547 rc = PyObject_IsInstance((PyObject*)b,
1548 (PyObject*)&PyLong_Type);
1549 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001550 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001551 if (rc) {
1552 if (PyErr_WarnEx(PyExc_BytesWarning,
1553 "Comparison between bytes and int", 1))
1554 return NULL;
1555 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001556 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001557 }
1558 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001559 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001560 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001561 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001562 case Py_EQ:
1563 case Py_LE:
1564 case Py_GE:
1565 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001566 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001567 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001568 case Py_NE:
1569 case Py_LT:
1570 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001571 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001572 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001573 default:
1574 PyErr_BadArgument();
1575 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001576 }
1577 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001578 else if (op == Py_EQ || op == Py_NE) {
1579 int eq = bytes_compare_eq(a, b);
1580 eq ^= (op == Py_NE);
1581 result = eq ? Py_True : Py_False;
1582 }
1583 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001584 len_a = Py_SIZE(a);
1585 len_b = Py_SIZE(b);
1586 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001587 if (min_len > 0) {
1588 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001589 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001590 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001591 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001592 else
1593 c = 0;
1594 if (c == 0)
1595 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1596 switch (op) {
1597 case Py_LT: c = c < 0; break;
1598 case Py_LE: c = c <= 0; break;
1599 case Py_GT: c = c > 0; break;
1600 case Py_GE: c = c >= 0; break;
1601 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001602 PyErr_BadArgument();
1603 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001604 }
1605 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001606 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001607
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001608 Py_INCREF(result);
1609 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001610}
1611
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001612static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001613bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001614{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001615 if (a->ob_shash == -1) {
1616 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001617 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001618 }
1619 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001620}
1621
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001622static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001623bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001624{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001625 if (PyIndex_Check(item)) {
1626 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1627 if (i == -1 && PyErr_Occurred())
1628 return NULL;
1629 if (i < 0)
1630 i += PyBytes_GET_SIZE(self);
1631 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1632 PyErr_SetString(PyExc_IndexError,
1633 "index out of range");
1634 return NULL;
1635 }
1636 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1637 }
1638 else if (PySlice_Check(item)) {
1639 Py_ssize_t start, stop, step, slicelength, cur, i;
1640 char* source_buf;
1641 char* result_buf;
1642 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001643
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001644 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001645 PyBytes_GET_SIZE(self),
1646 &start, &stop, &step, &slicelength) < 0) {
1647 return NULL;
1648 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001649
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001650 if (slicelength <= 0) {
1651 return PyBytes_FromStringAndSize("", 0);
1652 }
1653 else if (start == 0 && step == 1 &&
1654 slicelength == PyBytes_GET_SIZE(self) &&
1655 PyBytes_CheckExact(self)) {
1656 Py_INCREF(self);
1657 return (PyObject *)self;
1658 }
1659 else if (step == 1) {
1660 return PyBytes_FromStringAndSize(
1661 PyBytes_AS_STRING(self) + start,
1662 slicelength);
1663 }
1664 else {
1665 source_buf = PyBytes_AS_STRING(self);
1666 result = PyBytes_FromStringAndSize(NULL, slicelength);
1667 if (result == NULL)
1668 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001669
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001670 result_buf = PyBytes_AS_STRING(result);
1671 for (cur = start, i = 0; i < slicelength;
1672 cur += step, i++) {
1673 result_buf[i] = source_buf[cur];
1674 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001675
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001676 return result;
1677 }
1678 }
1679 else {
1680 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001681 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001682 Py_TYPE(item)->tp_name);
1683 return NULL;
1684 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001685}
1686
1687static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001688bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001689{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001690 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1691 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001692}
1693
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001694static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001695 (lenfunc)bytes_length, /*sq_length*/
1696 (binaryfunc)bytes_concat, /*sq_concat*/
1697 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1698 (ssizeargfunc)bytes_item, /*sq_item*/
1699 0, /*sq_slice*/
1700 0, /*sq_ass_item*/
1701 0, /*sq_ass_slice*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001702 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001703};
1704
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001705static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001706 (lenfunc)bytes_length,
1707 (binaryfunc)bytes_subscript,
1708 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001709};
1710
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001711static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001712 (getbufferproc)bytes_buffer_getbuffer,
1713 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001714};
1715
1716
1717#define LEFTSTRIP 0
1718#define RIGHTSTRIP 1
1719#define BOTHSTRIP 2
1720
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001721/*[clinic input]
1722bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001723
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001724 sep: object = None
1725 The delimiter according which to split the bytes.
1726 None (the default value) means split on ASCII whitespace characters
1727 (space, tab, return, newline, formfeed, vertical tab).
1728 maxsplit: Py_ssize_t = -1
1729 Maximum number of splits to do.
1730 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001731
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001732Return a list of the sections in the bytes, using sep as the delimiter.
1733[clinic start generated code]*/
1734
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001735static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001736bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1737/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001738{
1739 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001740 const char *s = PyBytes_AS_STRING(self), *sub;
1741 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001742 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001743
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001744 if (maxsplit < 0)
1745 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001746 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001747 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001748 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001749 return NULL;
1750 sub = vsub.buf;
1751 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001752
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001753 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1754 PyBuffer_Release(&vsub);
1755 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001756}
1757
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001758/*[clinic input]
1759bytes.partition
1760
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001761 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001762 /
1763
1764Partition the bytes into three parts using the given separator.
1765
1766This will search for the separator sep in the bytes. If the separator is found,
1767returns a 3-tuple containing the part before the separator, the separator
1768itself, and the part after it.
1769
1770If the separator is not found, returns a 3-tuple containing the original bytes
1771object and two empty bytes objects.
1772[clinic start generated code]*/
1773
Neal Norwitz6968b052007-02-27 19:02:19 +00001774static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001775bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001776/*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001777{
Neal Norwitz6968b052007-02-27 19:02:19 +00001778 return stringlib_partition(
1779 (PyObject*) self,
1780 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001781 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001782 );
1783}
1784
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001785/*[clinic input]
1786bytes.rpartition
1787
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001788 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001789 /
1790
1791Partition the bytes into three parts using the given separator.
1792
1793This will search for the separator sep in the bytes, starting and the end. If
1794the separator is found, returns a 3-tuple containing the part before the
1795separator, the separator itself, and the part after it.
1796
1797If the separator is not found, returns a 3-tuple containing two empty bytes
1798objects and the original bytes object.
1799[clinic start generated code]*/
1800
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001801static PyObject *
1802bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001803/*[clinic end generated code: output=191b114cbb028e50 input=67f689e63a62d478]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001804{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001805 return stringlib_rpartition(
1806 (PyObject*) self,
1807 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001808 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001809 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001810}
1811
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001812/*[clinic input]
1813bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001814
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001815Return a list of the sections in the bytes, using sep as the delimiter.
1816
1817Splitting is done starting at the end of the bytes and working to the front.
1818[clinic start generated code]*/
1819
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001820static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001821bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1822/*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001823{
1824 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001825 const char *s = PyBytes_AS_STRING(self), *sub;
1826 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001827 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001828
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001829 if (maxsplit < 0)
1830 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001831 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001832 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001833 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001834 return NULL;
1835 sub = vsub.buf;
1836 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001837
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001838 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1839 PyBuffer_Release(&vsub);
1840 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001841}
1842
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001843
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001844/*[clinic input]
1845bytes.join
1846
1847 iterable_of_bytes: object
1848 /
1849
1850Concatenate any number of bytes objects.
1851
1852The bytes whose method is called is inserted in between each pair.
1853
1854The result is returned as a new bytes object.
1855
1856Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1857[clinic start generated code]*/
1858
Neal Norwitz6968b052007-02-27 19:02:19 +00001859static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001860bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1861/*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001862{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001863 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001864}
1865
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001866PyObject *
1867_PyBytes_Join(PyObject *sep, PyObject *x)
1868{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001869 assert(sep != NULL && PyBytes_Check(sep));
1870 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001871 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001872}
1873
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001874static PyObject *
1875bytes_find(PyBytesObject *self, PyObject *args)
1876{
1877 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1878}
1879
1880static PyObject *
1881bytes_index(PyBytesObject *self, PyObject *args)
1882{
1883 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1884}
1885
1886
1887static PyObject *
1888bytes_rfind(PyBytesObject *self, PyObject *args)
1889{
1890 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1891}
1892
1893
1894static PyObject *
1895bytes_rindex(PyBytesObject *self, PyObject *args)
1896{
1897 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1898}
1899
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001900
1901Py_LOCAL_INLINE(PyObject *)
1902do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001903{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001904 Py_buffer vsep;
1905 char *s = PyBytes_AS_STRING(self);
1906 Py_ssize_t len = PyBytes_GET_SIZE(self);
1907 char *sep;
1908 Py_ssize_t seplen;
1909 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001910
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001911 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001912 return NULL;
1913 sep = vsep.buf;
1914 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001915
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001916 i = 0;
1917 if (striptype != RIGHTSTRIP) {
1918 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1919 i++;
1920 }
1921 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001922
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001923 j = len;
1924 if (striptype != LEFTSTRIP) {
1925 do {
1926 j--;
1927 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1928 j++;
1929 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001930
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001931 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001932
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001933 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1934 Py_INCREF(self);
1935 return (PyObject*)self;
1936 }
1937 else
1938 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001939}
1940
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001941
1942Py_LOCAL_INLINE(PyObject *)
1943do_strip(PyBytesObject *self, int striptype)
1944{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001945 char *s = PyBytes_AS_STRING(self);
1946 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001947
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001948 i = 0;
1949 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001950 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001951 i++;
1952 }
1953 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001954
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001955 j = len;
1956 if (striptype != LEFTSTRIP) {
1957 do {
1958 j--;
David Malcolm96960882010-11-05 17:23:41 +00001959 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001960 j++;
1961 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001962
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001963 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1964 Py_INCREF(self);
1965 return (PyObject*)self;
1966 }
1967 else
1968 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001969}
1970
1971
1972Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001973do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001974{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001975 if (bytes != NULL && bytes != Py_None) {
1976 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001977 }
1978 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001979}
1980
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001981/*[clinic input]
1982bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001983
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001984 bytes: object = None
1985 /
1986
1987Strip leading and trailing bytes contained in the argument.
1988
1989If the argument is omitted or None, strip leading and trailing ASCII whitespace.
1990[clinic start generated code]*/
1991
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001992static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001993bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001994/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001995{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001996 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001997}
1998
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001999/*[clinic input]
2000bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002001
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002002 bytes: object = None
2003 /
2004
2005Strip leading bytes contained in the argument.
2006
2007If the argument is omitted or None, strip leading ASCII whitespace.
2008[clinic start generated code]*/
2009
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002010static PyObject *
2011bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002012/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002013{
2014 return do_argstrip(self, LEFTSTRIP, bytes);
2015}
2016
2017/*[clinic input]
2018bytes.rstrip
2019
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002020 bytes: object = None
2021 /
2022
2023Strip trailing bytes contained in the argument.
2024
2025If the argument is omitted or None, strip trailing ASCII whitespace.
2026[clinic start generated code]*/
2027
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002028static PyObject *
2029bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002030/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002031{
2032 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002033}
Neal Norwitz6968b052007-02-27 19:02:19 +00002034
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002035
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002036static PyObject *
2037bytes_count(PyBytesObject *self, PyObject *args)
2038{
2039 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2040}
2041
2042
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002043/*[clinic input]
2044bytes.translate
2045
Victor Stinner049e5092014-08-17 22:20:00 +02002046 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002047 Translation table, which must be a bytes object of length 256.
2048 [
2049 deletechars: object
2050 ]
2051 /
2052
2053Return a copy with each character mapped by the given translation table.
2054
2055All characters occurring in the optional argument deletechars are removed.
2056The remaining characters are mapped through the given translation table.
2057[clinic start generated code]*/
2058
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002059static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002060bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1,
2061 PyObject *deletechars)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002062/*[clinic end generated code: output=233df850eb50bf8d input=ca20edf39d780d49]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002063{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002064 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002065 Py_buffer table_view = {NULL, NULL};
2066 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002067 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002068 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002069 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002070 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002071 Py_ssize_t inlen, tablen, dellen = 0;
2072 PyObject *result;
2073 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002074
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002075 if (PyBytes_Check(table)) {
2076 table_chars = PyBytes_AS_STRING(table);
2077 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002078 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002079 else if (table == Py_None) {
2080 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002081 tablen = 256;
2082 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002083 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002084 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002085 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002086 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002087 tablen = table_view.len;
2088 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002089
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002090 if (tablen != 256) {
2091 PyErr_SetString(PyExc_ValueError,
2092 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002093 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002094 return NULL;
2095 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002096
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002097 if (deletechars != NULL) {
2098 if (PyBytes_Check(deletechars)) {
2099 del_table_chars = PyBytes_AS_STRING(deletechars);
2100 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002101 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002102 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002103 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002104 PyBuffer_Release(&table_view);
2105 return NULL;
2106 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002107 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002108 dellen = del_table_view.len;
2109 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002110 }
2111 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002112 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002113 dellen = 0;
2114 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002115
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002116 inlen = PyBytes_GET_SIZE(input_obj);
2117 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002118 if (result == NULL) {
2119 PyBuffer_Release(&del_table_view);
2120 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002121 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002122 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002123 output_start = output = PyBytes_AS_STRING(result);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002124 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002125
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002126 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002127 /* If no deletions are required, use faster code */
2128 for (i = inlen; --i >= 0; ) {
2129 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002130 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002131 changed = 1;
2132 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002133 if (!changed && PyBytes_CheckExact(input_obj)) {
2134 Py_INCREF(input_obj);
2135 Py_DECREF(result);
2136 result = input_obj;
2137 }
2138 PyBuffer_Release(&del_table_view);
2139 PyBuffer_Release(&table_view);
2140 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002141 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002142
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002143 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002144 for (i = 0; i < 256; i++)
2145 trans_table[i] = Py_CHARMASK(i);
2146 } else {
2147 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002148 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002149 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002150 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002151
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002152 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002153 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002154 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002155
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002156 for (i = inlen; --i >= 0; ) {
2157 c = Py_CHARMASK(*input++);
2158 if (trans_table[c] != -1)
2159 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2160 continue;
2161 changed = 1;
2162 }
2163 if (!changed && PyBytes_CheckExact(input_obj)) {
2164 Py_DECREF(result);
2165 Py_INCREF(input_obj);
2166 return input_obj;
2167 }
2168 /* Fix the size of the resulting string */
2169 if (inlen > 0)
2170 _PyBytes_Resize(&result, output - output_start);
2171 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002172}
2173
2174
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002175/*[clinic input]
2176
2177@staticmethod
2178bytes.maketrans
2179
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002180 frm: Py_buffer
2181 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002182 /
2183
2184Return a translation table useable for the bytes or bytearray translate method.
2185
2186The returned table will be one where each byte in frm is mapped to the byte at
2187the same position in to.
2188
2189The bytes objects frm and to must be of the same length.
2190[clinic start generated code]*/
2191
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002192static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002193bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002194/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002195{
2196 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002197}
2198
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002199
2200/*[clinic input]
2201bytes.replace
2202
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002203 old: Py_buffer
2204 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002205 count: Py_ssize_t = -1
2206 Maximum number of occurrences to replace.
2207 -1 (the default value) means replace all occurrences.
2208 /
2209
2210Return a copy with all occurrences of substring old replaced by new.
2211
2212If the optional argument count is given, only the first count occurrences are
2213replaced.
2214[clinic start generated code]*/
2215
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002216static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002217bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
Larry Hastings89964c42015-04-14 18:07:59 -04002218 Py_ssize_t count)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002219/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002220{
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +03002221 return stringlib_replace((PyObject *)self,
2222 (const char *)old->buf, old->len,
2223 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002224}
2225
2226/** End DALKE **/
2227
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002228
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002229static PyObject *
2230bytes_startswith(PyBytesObject *self, PyObject *args)
2231{
2232 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2233}
2234
2235static PyObject *
2236bytes_endswith(PyBytesObject *self, PyObject *args)
2237{
2238 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2239}
2240
2241
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002242/*[clinic input]
2243bytes.decode
2244
2245 encoding: str(c_default="NULL") = 'utf-8'
2246 The encoding with which to decode the bytes.
2247 errors: str(c_default="NULL") = 'strict'
2248 The error handling scheme to use for the handling of decoding errors.
2249 The default is 'strict' meaning that decoding errors raise a
2250 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2251 as well as any other name registered with codecs.register_error that
2252 can handle UnicodeDecodeErrors.
2253
2254Decode the bytes using the codec registered for encoding.
2255[clinic start generated code]*/
2256
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002257static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002258bytes_decode_impl(PyBytesObject *self, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04002259 const char *errors)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002260/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002261{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002262 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002263}
2264
Guido van Rossum20188312006-05-05 15:15:40 +00002265
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002266/*[clinic input]
2267bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002268
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +03002269 keepends: int(c_default="0") = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002270
2271Return a list of the lines in the bytes, breaking at line boundaries.
2272
2273Line breaks are not included in the resulting list unless keepends is given and
2274true.
2275[clinic start generated code]*/
2276
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002277static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002278bytes_splitlines_impl(PyBytesObject *self, int keepends)
2279/*[clinic end generated code: output=3484149a5d880ffb input=7f4aac67144f9944]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002280{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002281 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002282 (PyObject*) self, PyBytes_AS_STRING(self),
2283 PyBytes_GET_SIZE(self), keepends
2284 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002285}
2286
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002287/*[clinic input]
2288@classmethod
2289bytes.fromhex
2290
2291 string: unicode
2292 /
2293
2294Create a bytes object from a string of hexadecimal numbers.
2295
2296Spaces between two numbers are accepted.
2297Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2298[clinic start generated code]*/
2299
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002300static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002301bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002302/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002303{
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002304 PyObject *result = _PyBytes_FromHex(string, 0);
2305 if (type != &PyBytes_Type && result != NULL) {
2306 Py_SETREF(result, PyObject_CallFunctionObjArgs((PyObject *)type,
2307 result, NULL));
2308 }
2309 return result;
Victor Stinner2bf89932015-10-14 11:25:33 +02002310}
2311
2312PyObject*
2313_PyBytes_FromHex(PyObject *string, int use_bytearray)
2314{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002315 char *buf;
Victor Stinner2bf89932015-10-14 11:25:33 +02002316 Py_ssize_t hexlen, invalid_char;
2317 unsigned int top, bot;
2318 Py_UCS1 *str, *end;
2319 _PyBytesWriter writer;
2320
2321 _PyBytesWriter_Init(&writer);
2322 writer.use_bytearray = use_bytearray;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002323
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002324 assert(PyUnicode_Check(string));
2325 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002326 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002327 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002328
Victor Stinner2bf89932015-10-14 11:25:33 +02002329 if (!PyUnicode_IS_ASCII(string)) {
2330 void *data = PyUnicode_DATA(string);
2331 unsigned int kind = PyUnicode_KIND(string);
2332 Py_ssize_t i;
2333
2334 /* search for the first non-ASCII character */
2335 for (i = 0; i < hexlen; i++) {
2336 if (PyUnicode_READ(kind, data, i) >= 128)
2337 break;
2338 }
2339 invalid_char = i;
2340 goto error;
2341 }
2342
2343 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2344 str = PyUnicode_1BYTE_DATA(string);
2345
2346 /* This overestimates if there are spaces */
2347 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2348 if (buf == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002349 return NULL;
Victor Stinner2bf89932015-10-14 11:25:33 +02002350
2351 end = str + hexlen;
2352 while (str < end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002353 /* skip over spaces in the input */
Victor Stinner2bf89932015-10-14 11:25:33 +02002354 if (*str == ' ') {
2355 do {
2356 str++;
2357 } while (*str == ' ');
2358 if (str >= end)
2359 break;
2360 }
2361
2362 top = _PyLong_DigitValue[*str];
2363 if (top >= 16) {
2364 invalid_char = str - PyUnicode_1BYTE_DATA(string);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002365 goto error;
2366 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002367 str++;
2368
2369 bot = _PyLong_DigitValue[*str];
2370 if (bot >= 16) {
2371 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2372 goto error;
2373 }
2374 str++;
2375
2376 *buf++ = (unsigned char)((top << 4) + bot);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002377 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002378
2379 return _PyBytesWriter_Finish(&writer, buf);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002380
2381 error:
Victor Stinner2bf89932015-10-14 11:25:33 +02002382 PyErr_Format(PyExc_ValueError,
2383 "non-hexadecimal number found in "
2384 "fromhex() arg at position %zd", invalid_char);
2385 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002386 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002387}
2388
Gregory P. Smith8cb65692015-04-25 23:22:26 +00002389PyDoc_STRVAR(hex__doc__,
2390"B.hex() -> string\n\
2391\n\
2392Create a string of hexadecimal numbers from a bytes object.\n\
2393Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'.");
2394
2395static PyObject *
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002396bytes_hex(PyBytesObject *self)
2397{
2398 char* argbuf = PyBytes_AS_STRING(self);
2399 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2400 return _Py_strhex(argbuf, arglen);
2401}
2402
2403static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002404bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002405{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002406 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002407}
2408
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002409
2410static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002411bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002412 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2413 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2414 _Py_capitalize__doc__},
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002415 {"center", (PyCFunction)stringlib_center, METH_VARARGS,
2416 _Py_center__doc__},
2417 {"count", (PyCFunction)bytes_count, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002418 _Py_count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002419 BYTES_DECODE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002420 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002421 _Py_endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02002422 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002423 _Py_expandtabs__doc__},
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002424 {"find", (PyCFunction)bytes_find, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002425 _Py_find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002426 BYTES_FROMHEX_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002427 {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__},
2428 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002429 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2430 _Py_isalnum__doc__},
2431 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2432 _Py_isalpha__doc__},
2433 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2434 _Py_isdigit__doc__},
2435 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2436 _Py_islower__doc__},
2437 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2438 _Py_isspace__doc__},
2439 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2440 _Py_istitle__doc__},
2441 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2442 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002443 BYTES_JOIN_METHODDEF
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002444 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, _Py_ljust__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002445 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002446 BYTES_LSTRIP_METHODDEF
2447 BYTES_MAKETRANS_METHODDEF
2448 BYTES_PARTITION_METHODDEF
2449 BYTES_REPLACE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002450 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2451 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002452 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, _Py_rjust__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002453 BYTES_RPARTITION_METHODDEF
2454 BYTES_RSPLIT_METHODDEF
2455 BYTES_RSTRIP_METHODDEF
2456 BYTES_SPLIT_METHODDEF
2457 BYTES_SPLITLINES_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002458 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002459 _Py_startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002460 BYTES_STRIP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002461 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2462 _Py_swapcase__doc__},
2463 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002464 BYTES_TRANSLATE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002465 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002466 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, _Py_zfill__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002467 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002468};
2469
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002470static PyObject *
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002471bytes_mod(PyObject *self, PyObject *arg)
Ethan Furmanb95b5612015-01-23 20:05:18 -08002472{
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002473 if (!PyBytes_Check(self)) {
2474 Py_RETURN_NOTIMPLEMENTED;
Victor Stinner772b2b02015-10-14 09:56:53 +02002475 }
Victor Stinner772b2b02015-10-14 09:56:53 +02002476 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002477 arg, 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -08002478}
2479
2480static PyNumberMethods bytes_as_number = {
2481 0, /*nb_add*/
2482 0, /*nb_subtract*/
2483 0, /*nb_multiply*/
2484 bytes_mod, /*nb_remainder*/
2485};
2486
2487static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002488bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002489
2490static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002491bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002492{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002493 PyObject *x = NULL;
2494 const char *encoding = NULL;
2495 const char *errors = NULL;
2496 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002497 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002498 Py_ssize_t size;
2499 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002500 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002501
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002502 if (type != &PyBytes_Type)
Serhiy Storchaka15095802015-11-25 15:47:01 +02002503 return bytes_subtype_new(type, args, kwds);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002504 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2505 &encoding, &errors))
2506 return NULL;
2507 if (x == NULL) {
2508 if (encoding != NULL || errors != NULL) {
2509 PyErr_SetString(PyExc_TypeError,
2510 "encoding or errors without sequence "
2511 "argument");
2512 return NULL;
2513 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02002514 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002515 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002516
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002517 if (encoding != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002518 /* Encode via the codec registry */
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002519 if (!PyUnicode_Check(x)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002520 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002521 "encoding without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002522 return NULL;
2523 }
2524 new = PyUnicode_AsEncodedString(x, encoding, errors);
2525 if (new == NULL)
2526 return NULL;
2527 assert(PyBytes_Check(new));
2528 return new;
2529 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002530
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002531 if (errors != NULL) {
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002532 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002533 PyUnicode_Check(x) ?
2534 "string argument without an encoding" :
2535 "errors without a string argument");
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002536 return NULL;
2537 }
2538
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002539 /* We'd like to call PyObject_Bytes here, but we need to check for an
2540 integer argument before deferring to PyBytes_FromObject, something
2541 PyObject_Bytes doesn't do. */
2542 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2543 if (func != NULL) {
2544 new = PyObject_CallFunctionObjArgs(func, NULL);
2545 Py_DECREF(func);
2546 if (new == NULL)
2547 return NULL;
2548 if (!PyBytes_Check(new)) {
2549 PyErr_Format(PyExc_TypeError,
2550 "__bytes__ returned non-bytes (type %.200s)",
2551 Py_TYPE(new)->tp_name);
2552 Py_DECREF(new);
2553 return NULL;
2554 }
2555 return new;
2556 }
2557 else if (PyErr_Occurred())
2558 return NULL;
2559
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002560 if (PyUnicode_Check(x)) {
2561 PyErr_SetString(PyExc_TypeError,
2562 "string argument without an encoding");
2563 return NULL;
2564 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002565 /* Is it an integer? */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002566 if (PyIndex_Check(x)) {
2567 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2568 if (size == -1 && PyErr_Occurred()) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002569 return NULL;
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002570 }
2571 if (size < 0) {
2572 PyErr_SetString(PyExc_ValueError, "negative count");
2573 return NULL;
2574 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02002575 new = _PyBytes_FromSize(size, 1);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002576 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002577 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002578 return new;
2579 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002580
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002581 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002582}
2583
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002584static PyObject*
2585_PyBytes_FromBuffer(PyObject *x)
2586{
2587 PyObject *new;
2588 Py_buffer view;
2589
2590 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2591 return NULL;
2592
2593 new = PyBytes_FromStringAndSize(NULL, view.len);
2594 if (!new)
2595 goto fail;
2596 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2597 &view, view.len, 'C') < 0)
2598 goto fail;
2599 PyBuffer_Release(&view);
2600 return new;
2601
2602fail:
2603 Py_XDECREF(new);
2604 PyBuffer_Release(&view);
2605 return NULL;
2606}
2607
Victor Stinner3c50ce32015-10-14 13:50:40 +02002608#define _PyBytes_FROM_LIST_BODY(x, GET_ITEM) \
2609 do { \
2610 PyObject *bytes; \
2611 Py_ssize_t i; \
2612 Py_ssize_t value; \
2613 char *str; \
2614 PyObject *item; \
2615 \
2616 bytes = PyBytes_FromStringAndSize(NULL, Py_SIZE(x)); \
2617 if (bytes == NULL) \
2618 return NULL; \
2619 str = ((PyBytesObject *)bytes)->ob_sval; \
2620 \
2621 for (i = 0; i < Py_SIZE(x); i++) { \
2622 item = GET_ITEM((x), i); \
Serhiy Storchakaf54d7812016-07-06 21:39:44 +03002623 value = PyNumber_AsSsize_t(item, NULL); \
Victor Stinner3c50ce32015-10-14 13:50:40 +02002624 if (value == -1 && PyErr_Occurred()) \
2625 goto error; \
2626 \
2627 if (value < 0 || value >= 256) { \
2628 PyErr_SetString(PyExc_ValueError, \
2629 "bytes must be in range(0, 256)"); \
2630 goto error; \
2631 } \
2632 *str++ = (char) value; \
2633 } \
2634 return bytes; \
2635 \
2636 error: \
2637 Py_DECREF(bytes); \
2638 return NULL; \
2639 } while (0)
2640
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002641static PyObject*
2642_PyBytes_FromList(PyObject *x)
2643{
Victor Stinner3c50ce32015-10-14 13:50:40 +02002644 _PyBytes_FROM_LIST_BODY(x, PyList_GET_ITEM);
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002645}
2646
2647static PyObject*
2648_PyBytes_FromTuple(PyObject *x)
2649{
Victor Stinner3c50ce32015-10-14 13:50:40 +02002650 _PyBytes_FROM_LIST_BODY(x, PyTuple_GET_ITEM);
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002651}
2652
2653static PyObject *
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002654_PyBytes_FromIterator(PyObject *it, PyObject *x)
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002655{
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002656 char *str;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002657 Py_ssize_t i, size;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002658 _PyBytesWriter writer;
2659
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002660 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02002661 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002662 if (size == -1 && PyErr_Occurred())
2663 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002664
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002665 _PyBytesWriter_Init(&writer);
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002666 str = _PyBytesWriter_Alloc(&writer, size);
2667 if (str == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002668 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002669 writer.overallocate = 1;
2670 size = writer.allocated;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002671
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002672 /* Run the iterator to exhaustion */
2673 for (i = 0; ; i++) {
2674 PyObject *item;
2675 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002676
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002677 /* Get the next item */
2678 item = PyIter_Next(it);
2679 if (item == NULL) {
2680 if (PyErr_Occurred())
2681 goto error;
2682 break;
2683 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002684
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002685 /* Interpret it as an int (__index__) */
Serhiy Storchakaf54d7812016-07-06 21:39:44 +03002686 value = PyNumber_AsSsize_t(item, NULL);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002687 Py_DECREF(item);
2688 if (value == -1 && PyErr_Occurred())
2689 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002690
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002691 /* Range check */
2692 if (value < 0 || value >= 256) {
2693 PyErr_SetString(PyExc_ValueError,
2694 "bytes must be in range(0, 256)");
2695 goto error;
2696 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002697
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002698 /* Append the byte */
2699 if (i >= size) {
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002700 str = _PyBytesWriter_Resize(&writer, str, size+1);
2701 if (str == NULL)
2702 return NULL;
2703 size = writer.allocated;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002704 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002705 *str++ = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002706 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002707
2708 return _PyBytesWriter_Finish(&writer, str);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002709
2710 error:
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002711 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002712 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002713}
2714
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002715PyObject *
2716PyBytes_FromObject(PyObject *x)
2717{
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002718 PyObject *it, *result;
2719
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002720 if (x == NULL) {
2721 PyErr_BadInternalCall();
2722 return NULL;
2723 }
2724
2725 if (PyBytes_CheckExact(x)) {
2726 Py_INCREF(x);
2727 return x;
2728 }
2729
2730 /* Use the modern buffer interface */
2731 if (PyObject_CheckBuffer(x))
2732 return _PyBytes_FromBuffer(x);
2733
2734 if (PyList_CheckExact(x))
2735 return _PyBytes_FromList(x);
2736
2737 if (PyTuple_CheckExact(x))
2738 return _PyBytes_FromTuple(x);
2739
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002740 if (!PyUnicode_Check(x)) {
2741 it = PyObject_GetIter(x);
2742 if (it != NULL) {
2743 result = _PyBytes_FromIterator(it, x);
2744 Py_DECREF(it);
2745 return result;
2746 }
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002747 }
2748
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002749 PyErr_Format(PyExc_TypeError,
2750 "cannot convert '%.200s' object to bytes",
2751 x->ob_type->tp_name);
2752 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002753}
2754
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002755static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002756bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002757{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002758 PyObject *tmp, *pnew;
2759 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002760
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002761 assert(PyType_IsSubtype(type, &PyBytes_Type));
2762 tmp = bytes_new(&PyBytes_Type, args, kwds);
2763 if (tmp == NULL)
2764 return NULL;
Serhiy Storchaka15095802015-11-25 15:47:01 +02002765 assert(PyBytes_Check(tmp));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002766 n = PyBytes_GET_SIZE(tmp);
2767 pnew = type->tp_alloc(type, n);
2768 if (pnew != NULL) {
2769 Py_MEMCPY(PyBytes_AS_STRING(pnew),
2770 PyBytes_AS_STRING(tmp), n+1);
2771 ((PyBytesObject *)pnew)->ob_shash =
2772 ((PyBytesObject *)tmp)->ob_shash;
2773 }
2774 Py_DECREF(tmp);
2775 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002776}
2777
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002778PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002779"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002780bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002781bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002782bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2783bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002784\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002785Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002786 - an iterable yielding integers in range(256)\n\
2787 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002788 - any object implementing the buffer API.\n\
2789 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002790
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002791static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002792
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002793PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002794 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2795 "bytes",
2796 PyBytesObject_SIZE,
2797 sizeof(char),
2798 bytes_dealloc, /* tp_dealloc */
2799 0, /* tp_print */
2800 0, /* tp_getattr */
2801 0, /* tp_setattr */
2802 0, /* tp_reserved */
2803 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08002804 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002805 &bytes_as_sequence, /* tp_as_sequence */
2806 &bytes_as_mapping, /* tp_as_mapping */
2807 (hashfunc)bytes_hash, /* tp_hash */
2808 0, /* tp_call */
2809 bytes_str, /* tp_str */
2810 PyObject_GenericGetAttr, /* tp_getattro */
2811 0, /* tp_setattro */
2812 &bytes_as_buffer, /* tp_as_buffer */
2813 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2814 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2815 bytes_doc, /* tp_doc */
2816 0, /* tp_traverse */
2817 0, /* tp_clear */
2818 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2819 0, /* tp_weaklistoffset */
2820 bytes_iter, /* tp_iter */
2821 0, /* tp_iternext */
2822 bytes_methods, /* tp_methods */
2823 0, /* tp_members */
2824 0, /* tp_getset */
2825 &PyBaseObject_Type, /* tp_base */
2826 0, /* tp_dict */
2827 0, /* tp_descr_get */
2828 0, /* tp_descr_set */
2829 0, /* tp_dictoffset */
2830 0, /* tp_init */
2831 0, /* tp_alloc */
2832 bytes_new, /* tp_new */
2833 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002834};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002835
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002836void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002837PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002838{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002839 assert(pv != NULL);
2840 if (*pv == NULL)
2841 return;
2842 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002843 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002844 return;
2845 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02002846
2847 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2848 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05002849 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02002850 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02002851
Antoine Pitrou161d6952014-05-01 14:36:20 +02002852 wb.len = -1;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002853 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02002854 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2855 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2856 Py_CLEAR(*pv);
2857 return;
2858 }
2859
2860 oldsize = PyBytes_GET_SIZE(*pv);
2861 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2862 PyErr_NoMemory();
2863 goto error;
2864 }
2865 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2866 goto error;
2867
2868 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2869 PyBuffer_Release(&wb);
2870 return;
2871
2872 error:
2873 PyBuffer_Release(&wb);
2874 Py_CLEAR(*pv);
2875 return;
2876 }
2877
2878 else {
2879 /* Multiple references, need to create new object */
2880 PyObject *v;
2881 v = bytes_concat(*pv, w);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002882 Py_SETREF(*pv, v);
Antoine Pitrou161d6952014-05-01 14:36:20 +02002883 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002884}
2885
2886void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002887PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002888{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002889 PyBytes_Concat(pv, w);
2890 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002891}
2892
2893
Ethan Furmanb95b5612015-01-23 20:05:18 -08002894/* The following function breaks the notion that bytes are immutable:
2895 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002896 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08002897 as creating a new bytes object and destroying the old one, only
2898 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002899 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08002900 Note that if there's not enough memory to resize the bytes object, the
2901 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002902 memory" exception is set, and -1 is returned. Else (on success) 0 is
2903 returned, and the value in *pv may or may not be the same as on input.
2904 As always, an extra byte is allocated for a trailing \0 byte (newsize
2905 does *not* include that), and a trailing \0 byte is stored.
2906*/
2907
2908int
2909_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2910{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002911 PyObject *v;
2912 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002913 v = *pv;
2914 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
2915 *pv = 0;
2916 Py_DECREF(v);
2917 PyErr_BadInternalCall();
2918 return -1;
2919 }
2920 /* XXX UNREF/NEWREF interface should be more symmetrical */
2921 _Py_DEC_REFTOTAL;
2922 _Py_ForgetReference(v);
2923 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03002924 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002925 if (*pv == NULL) {
2926 PyObject_Del(v);
2927 PyErr_NoMemory();
2928 return -1;
2929 }
2930 _Py_NewReference(*pv);
2931 sv = (PyBytesObject *) *pv;
2932 Py_SIZE(sv) = newsize;
2933 sv->ob_sval[newsize] = '\0';
2934 sv->ob_shash = -1; /* invalidate cached hash value */
2935 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002936}
2937
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002938void
2939PyBytes_Fini(void)
2940{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002941 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002942 for (i = 0; i < UCHAR_MAX + 1; i++)
2943 Py_CLEAR(characters[i]);
2944 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002945}
2946
Benjamin Peterson4116f362008-05-27 00:36:20 +00002947/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002948
2949typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002950 PyObject_HEAD
2951 Py_ssize_t it_index;
2952 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002953} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002954
2955static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002956striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002957{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002958 _PyObject_GC_UNTRACK(it);
2959 Py_XDECREF(it->it_seq);
2960 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002961}
2962
2963static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002964striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002965{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002966 Py_VISIT(it->it_seq);
2967 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002968}
2969
2970static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002971striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002972{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002973 PyBytesObject *seq;
2974 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002975
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002976 assert(it != NULL);
2977 seq = it->it_seq;
2978 if (seq == NULL)
2979 return NULL;
2980 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002981
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002982 if (it->it_index < PyBytes_GET_SIZE(seq)) {
2983 item = PyLong_FromLong(
2984 (unsigned char)seq->ob_sval[it->it_index]);
2985 if (item != NULL)
2986 ++it->it_index;
2987 return item;
2988 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002989
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002990 it->it_seq = NULL;
Serhiy Storchakafbb1c5e2016-03-30 20:40:02 +03002991 Py_DECREF(seq);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002992 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002993}
2994
2995static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002996striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002997{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002998 Py_ssize_t len = 0;
2999 if (it->it_seq)
3000 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3001 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003002}
3003
3004PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003005 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003006
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003007static PyObject *
3008striter_reduce(striterobject *it)
3009{
3010 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02003011 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003012 it->it_seq, it->it_index);
3013 } else {
3014 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
3015 if (u == NULL)
3016 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02003017 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003018 }
3019}
3020
3021PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3022
3023static PyObject *
3024striter_setstate(striterobject *it, PyObject *state)
3025{
3026 Py_ssize_t index = PyLong_AsSsize_t(state);
3027 if (index == -1 && PyErr_Occurred())
3028 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003029 if (it->it_seq != NULL) {
3030 if (index < 0)
3031 index = 0;
3032 else if (index > PyBytes_GET_SIZE(it->it_seq))
3033 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3034 it->it_index = index;
3035 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003036 Py_RETURN_NONE;
3037}
3038
3039PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3040
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003041static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003042 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3043 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003044 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3045 reduce_doc},
3046 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3047 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003048 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003049};
3050
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003051PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003052 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3053 "bytes_iterator", /* tp_name */
3054 sizeof(striterobject), /* tp_basicsize */
3055 0, /* tp_itemsize */
3056 /* methods */
3057 (destructor)striter_dealloc, /* tp_dealloc */
3058 0, /* tp_print */
3059 0, /* tp_getattr */
3060 0, /* tp_setattr */
3061 0, /* tp_reserved */
3062 0, /* tp_repr */
3063 0, /* tp_as_number */
3064 0, /* tp_as_sequence */
3065 0, /* tp_as_mapping */
3066 0, /* tp_hash */
3067 0, /* tp_call */
3068 0, /* tp_str */
3069 PyObject_GenericGetAttr, /* tp_getattro */
3070 0, /* tp_setattro */
3071 0, /* tp_as_buffer */
3072 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3073 0, /* tp_doc */
3074 (traverseproc)striter_traverse, /* tp_traverse */
3075 0, /* tp_clear */
3076 0, /* tp_richcompare */
3077 0, /* tp_weaklistoffset */
3078 PyObject_SelfIter, /* tp_iter */
3079 (iternextfunc)striter_next, /* tp_iternext */
3080 striter_methods, /* tp_methods */
3081 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003082};
3083
3084static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003085bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003086{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003087 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003088
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003089 if (!PyBytes_Check(seq)) {
3090 PyErr_BadInternalCall();
3091 return NULL;
3092 }
3093 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3094 if (it == NULL)
3095 return NULL;
3096 it->it_index = 0;
3097 Py_INCREF(seq);
3098 it->it_seq = (PyBytesObject *)seq;
3099 _PyObject_GC_TRACK(it);
3100 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003101}
Victor Stinner00165072015-10-09 01:53:21 +02003102
3103
3104/* _PyBytesWriter API */
3105
3106#ifdef MS_WINDOWS
3107 /* On Windows, overallocate by 50% is the best factor */
3108# define OVERALLOCATE_FACTOR 2
3109#else
3110 /* On Linux, overallocate by 25% is the best factor */
3111# define OVERALLOCATE_FACTOR 4
3112#endif
3113
3114void
3115_PyBytesWriter_Init(_PyBytesWriter *writer)
3116{
Victor Stinner661aacc2015-10-14 09:41:48 +02003117 /* Set all attributes before small_buffer to 0 */
3118 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003119#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003120 memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003121#endif
3122}
3123
3124void
3125_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3126{
3127 Py_CLEAR(writer->buffer);
3128}
3129
3130Py_LOCAL_INLINE(char*)
3131_PyBytesWriter_AsString(_PyBytesWriter *writer)
3132{
Victor Stinner661aacc2015-10-14 09:41:48 +02003133 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003134 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003135 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003136 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003137 else if (writer->use_bytearray) {
3138 assert(writer->buffer != NULL);
3139 return PyByteArray_AS_STRING(writer->buffer);
3140 }
3141 else {
3142 assert(writer->buffer != NULL);
3143 return PyBytes_AS_STRING(writer->buffer);
3144 }
Victor Stinner00165072015-10-09 01:53:21 +02003145}
3146
3147Py_LOCAL_INLINE(Py_ssize_t)
Victor Stinner2bf89932015-10-14 11:25:33 +02003148_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
Victor Stinner00165072015-10-09 01:53:21 +02003149{
3150 char *start = _PyBytesWriter_AsString(writer);
3151 assert(str != NULL);
3152 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003153 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003154 return str - start;
3155}
3156
3157Py_LOCAL_INLINE(void)
3158_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3159{
3160#ifdef Py_DEBUG
3161 char *start, *end;
3162
Victor Stinner661aacc2015-10-14 09:41:48 +02003163 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003164 assert(writer->buffer == NULL);
3165 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003166 else {
3167 assert(writer->buffer != NULL);
3168 if (writer->use_bytearray)
3169 assert(PyByteArray_CheckExact(writer->buffer));
3170 else
3171 assert(PyBytes_CheckExact(writer->buffer));
3172 assert(Py_REFCNT(writer->buffer) == 1);
3173 }
Victor Stinner00165072015-10-09 01:53:21 +02003174
Victor Stinner661aacc2015-10-14 09:41:48 +02003175 if (writer->use_bytearray) {
3176 /* bytearray has its own overallocation algorithm,
3177 writer overallocation must be disabled */
3178 assert(!writer->overallocate);
3179 }
3180
3181 assert(0 <= writer->allocated);
Victor Stinner53926a12015-10-09 12:37:03 +02003182 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003183 /* the last byte must always be null */
Victor Stinner661aacc2015-10-14 09:41:48 +02003184 start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003185 assert(start[writer->allocated] == 0);
3186
3187 end = start + writer->allocated;
3188 assert(str != NULL);
3189 assert(start <= str && str <= end);
3190#endif
3191}
3192
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003193void*
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003194_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
Victor Stinner00165072015-10-09 01:53:21 +02003195{
3196 Py_ssize_t allocated, pos;
3197
3198 _PyBytesWriter_CheckConsistency(writer, str);
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003199 assert(writer->allocated < size);
Victor Stinner00165072015-10-09 01:53:21 +02003200
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003201 allocated = size;
Victor Stinner00165072015-10-09 01:53:21 +02003202 if (writer->overallocate
3203 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3204 /* overallocate to limit the number of realloc() */
3205 allocated += allocated / OVERALLOCATE_FACTOR;
3206 }
3207
Victor Stinner2bf89932015-10-14 11:25:33 +02003208 pos = _PyBytesWriter_GetSize(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003209 if (!writer->use_small_buffer) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003210 if (writer->use_bytearray) {
3211 if (PyByteArray_Resize(writer->buffer, allocated))
3212 goto error;
3213 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3214 but we cannot use ob_alloc because bytes may need to be moved
3215 to use the whole buffer. bytearray uses an internal optimization
3216 to avoid moving or copying bytes when bytes are removed at the
3217 beginning (ex: del bytearray[:1]). */
3218 }
3219 else {
3220 if (_PyBytes_Resize(&writer->buffer, allocated))
3221 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003222 }
3223 }
3224 else {
3225 /* convert from stack buffer to bytes object buffer */
3226 assert(writer->buffer == NULL);
3227
Victor Stinner661aacc2015-10-14 09:41:48 +02003228 if (writer->use_bytearray)
3229 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3230 else
3231 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003232 if (writer->buffer == NULL)
Victor Stinner661aacc2015-10-14 09:41:48 +02003233 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003234
3235 if (pos != 0) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003236 char *dest;
3237 if (writer->use_bytearray)
3238 dest = PyByteArray_AS_STRING(writer->buffer);
3239 else
3240 dest = PyBytes_AS_STRING(writer->buffer);
3241 Py_MEMCPY(dest,
Victor Stinnerb3653a32015-10-09 03:38:24 +02003242 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02003243 pos);
3244 }
3245
Victor Stinnerb3653a32015-10-09 03:38:24 +02003246 writer->use_small_buffer = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003247#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003248 memset(writer->small_buffer, 0xDB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003249#endif
Victor Stinner00165072015-10-09 01:53:21 +02003250 }
3251 writer->allocated = allocated;
3252
3253 str = _PyBytesWriter_AsString(writer) + pos;
3254 _PyBytesWriter_CheckConsistency(writer, str);
3255 return str;
Victor Stinner661aacc2015-10-14 09:41:48 +02003256
3257error:
3258 _PyBytesWriter_Dealloc(writer);
3259 return NULL;
Victor Stinner00165072015-10-09 01:53:21 +02003260}
3261
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003262void*
3263_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3264{
3265 Py_ssize_t new_min_size;
3266
3267 _PyBytesWriter_CheckConsistency(writer, str);
3268 assert(size >= 0);
3269
3270 if (size == 0) {
3271 /* nothing to do */
3272 return str;
3273 }
3274
3275 if (writer->min_size > PY_SSIZE_T_MAX - size) {
3276 PyErr_NoMemory();
3277 _PyBytesWriter_Dealloc(writer);
3278 return NULL;
3279 }
3280 new_min_size = writer->min_size + size;
3281
3282 if (new_min_size > writer->allocated)
3283 str = _PyBytesWriter_Resize(writer, str, new_min_size);
3284
3285 writer->min_size = new_min_size;
3286 return str;
3287}
3288
Victor Stinner00165072015-10-09 01:53:21 +02003289/* Allocate the buffer to write size bytes.
3290 Return the pointer to the beginning of buffer data.
3291 Raise an exception and return NULL on error. */
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003292void*
Victor Stinner00165072015-10-09 01:53:21 +02003293_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3294{
3295 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02003296 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02003297 assert(size >= 0);
3298
Victor Stinnerb3653a32015-10-09 03:38:24 +02003299 writer->use_small_buffer = 1;
Victor Stinnerb13b97d2015-10-09 02:52:16 +02003300#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003301 writer->allocated = sizeof(writer->small_buffer) - 1;
Victor Stinnerf6358a72015-10-14 12:02:39 +02003302 /* In debug mode, don't use the full small buffer because it is less
3303 efficient than bytes and bytearray objects to detect buffer underflow
3304 and buffer overflow. Use 10 bytes of the small buffer to test also
3305 code using the smaller buffer in debug mode.
3306
3307 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3308 in debug mode to also be able to detect stack overflow when running
3309 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3310 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3311 stack overflow. */
3312 writer->allocated = Py_MIN(writer->allocated, 10);
3313 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3314 to detect buffer overflow */
Victor Stinnerb3653a32015-10-09 03:38:24 +02003315 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003316#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02003317 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02003318#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02003319 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02003320}
3321
3322PyObject *
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003323_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
Victor Stinner00165072015-10-09 01:53:21 +02003324{
Victor Stinner2bf89932015-10-14 11:25:33 +02003325 Py_ssize_t size;
Victor Stinner00165072015-10-09 01:53:21 +02003326 PyObject *result;
3327
3328 _PyBytesWriter_CheckConsistency(writer, str);
3329
Victor Stinner2bf89932015-10-14 11:25:33 +02003330 size = _PyBytesWriter_GetSize(writer, str);
3331 if (size == 0 && !writer->use_bytearray) {
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003332 Py_CLEAR(writer->buffer);
3333 /* Get the empty byte string singleton */
3334 result = PyBytes_FromStringAndSize(NULL, 0);
3335 }
3336 else if (writer->use_small_buffer) {
Victor Stinnere914d412016-04-15 17:52:27 +02003337 if (writer->use_bytearray) {
3338 result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3339 }
3340 else {
3341 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3342 }
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003343 }
3344 else {
3345 result = writer->buffer;
3346 writer->buffer = NULL;
3347
Victor Stinner2bf89932015-10-14 11:25:33 +02003348 if (size != writer->allocated) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003349 if (writer->use_bytearray) {
Victor Stinner2bf89932015-10-14 11:25:33 +02003350 if (PyByteArray_Resize(result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003351 Py_DECREF(result);
3352 return NULL;
3353 }
3354 }
3355 else {
Victor Stinner2bf89932015-10-14 11:25:33 +02003356 if (_PyBytes_Resize(&result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003357 assert(result == NULL);
3358 return NULL;
3359 }
Victor Stinner00165072015-10-09 01:53:21 +02003360 }
3361 }
Victor Stinner00165072015-10-09 01:53:21 +02003362 }
Victor Stinner00165072015-10-09 01:53:21 +02003363 return result;
3364}
Victor Stinnerce179bf2015-10-09 12:57:22 +02003365
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003366void*
Victor Stinnere9aa5952015-10-12 13:57:47 +02003367_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003368 const void *bytes, Py_ssize_t size)
Victor Stinnerce179bf2015-10-09 12:57:22 +02003369{
Victor Stinnere9aa5952015-10-12 13:57:47 +02003370 char *str = (char *)ptr;
3371
Victor Stinnerce179bf2015-10-09 12:57:22 +02003372 str = _PyBytesWriter_Prepare(writer, str, size);
3373 if (str == NULL)
3374 return NULL;
3375
3376 Py_MEMCPY(str, bytes, size);
3377 str += size;
3378
3379 return str;
3380}