blob: a30ac0c37970e12ff5d45c603285d9fb5aac9a22 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Gregory P. Smith8cb65692015-04-25 23:22:26 +00008#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00009#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000010
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020011/*[clinic input]
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030012class bytes "PyBytesObject *" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020013[clinic start generated code]*/
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030014/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020015
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030016#include "clinic/bytesobject.c.h"
17
Christian Heimes2c9c7a52008-05-26 13:42:13 +000018#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000019Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000020#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000021
Christian Heimes2c9c7a52008-05-26 13:42:13 +000022static PyBytesObject *characters[UCHAR_MAX + 1];
23static PyBytesObject *nullstring;
24
Mark Dickinsonfd24b322008-12-06 15:33:31 +000025/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
26 for a string of length n should request PyBytesObject_SIZE + n bytes.
27
28 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
29 3 bytes per string allocation on a typical system.
30*/
31#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
32
Victor Stinner2bf89932015-10-14 11:25:33 +020033/* Forward declaration */
34Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
35 char *str);
36
Christian Heimes2c9c7a52008-05-26 13:42:13 +000037/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000038 For PyBytes_FromString(), the parameter `str' points to a null-terminated
39 string containing exactly `size' bytes.
40
Martin Pantera90a4a92016-05-30 04:04:50 +000041 For PyBytes_FromStringAndSize(), the parameter `str' is
Christian Heimes2c9c7a52008-05-26 13:42:13 +000042 either NULL or else points to a string containing at least `size' bytes.
43 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
44 not have to be null-terminated. (Therefore it is safe to construct a
45 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
46 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
47 bytes (setting the last byte to the null terminating character) and you can
48 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000049 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000050 alter the data yourself, since the strings may be shared.
51
52 The PyObject member `op->ob_size', which denotes the number of "extra
53 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020054 allocated for string data, not counting the null terminating character.
55 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000056 PyBytes_FromStringAndSize()) or the length of the string in the `str'
57 parameter (for PyBytes_FromString()).
58*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020059static PyObject *
60_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000061{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020062 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020063 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020064
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000066#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000068#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000069 Py_INCREF(op);
70 return (PyObject *)op;
71 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000072
Victor Stinner049e5092014-08-17 22:20:00 +020073 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000074 PyErr_SetString(PyExc_OverflowError,
75 "byte string is too large");
76 return NULL;
77 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000078
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000079 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020080 if (use_calloc)
81 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
82 else
83 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084 if (op == NULL)
85 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +010086 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020088 if (!use_calloc)
89 op->ob_sval[size] = '\0';
90 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 if (size == 0) {
92 nullstring = op;
93 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020094 }
95 return (PyObject *) op;
96}
97
98PyObject *
99PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
100{
101 PyBytesObject *op;
102 if (size < 0) {
103 PyErr_SetString(PyExc_SystemError,
104 "Negative size passed to PyBytes_FromStringAndSize");
105 return NULL;
106 }
107 if (size == 1 && str != NULL &&
108 (op = characters[*str & UCHAR_MAX]) != NULL)
109 {
110#ifdef COUNT_ALLOCS
111 one_strings++;
112#endif
113 Py_INCREF(op);
114 return (PyObject *)op;
115 }
116
117 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
118 if (op == NULL)
119 return NULL;
120 if (str == NULL)
121 return (PyObject *) op;
122
Christian Heimesf051e432016-09-13 20:22:02 +0200123 memcpy(op->ob_sval, str, size);
Victor Stinnerdb067af2014-05-02 22:31:14 +0200124 /* share short strings */
125 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000126 characters[*str & UCHAR_MAX] = op;
127 Py_INCREF(op);
128 }
129 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000130}
131
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000132PyObject *
133PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000134{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200135 size_t size;
136 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000137
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000138 assert(str != NULL);
139 size = strlen(str);
140 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
141 PyErr_SetString(PyExc_OverflowError,
142 "byte string is too long");
143 return NULL;
144 }
145 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000146#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000147 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000148#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 Py_INCREF(op);
150 return (PyObject *)op;
151 }
152 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000153#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000155#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 Py_INCREF(op);
157 return (PyObject *)op;
158 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000159
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000160 /* Inline PyObject_NewVar */
161 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
162 if (op == NULL)
163 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100164 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000165 op->ob_shash = -1;
Christian Heimesf051e432016-09-13 20:22:02 +0200166 memcpy(op->ob_sval, str, size+1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000167 /* share short strings */
168 if (size == 0) {
169 nullstring = op;
170 Py_INCREF(op);
171 } else if (size == 1) {
172 characters[*str & UCHAR_MAX] = op;
173 Py_INCREF(op);
174 }
175 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000176}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000177
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000178PyObject *
179PyBytes_FromFormatV(const char *format, va_list vargs)
180{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000181 char *s;
Victor Stinner03dab782015-10-14 00:21:35 +0200182 const char *f;
183 const char *p;
184 Py_ssize_t prec;
185 int longflag;
186 int size_tflag;
187 /* Longest 64-bit formatted numbers:
188 - "18446744073709551615\0" (21 bytes)
189 - "-9223372036854775808\0" (21 bytes)
190 Decimal takes the most space (it isn't enough for octal.)
Guido van Rossum343e97f2007-04-09 00:43:24 +0000191
Victor Stinner03dab782015-10-14 00:21:35 +0200192 Longest 64-bit pointer representation:
193 "0xffffffffffffffff\0" (19 bytes). */
194 char buffer[21];
195 _PyBytesWriter writer;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000196
Victor Stinner03dab782015-10-14 00:21:35 +0200197 _PyBytesWriter_Init(&writer);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000198
Victor Stinner03dab782015-10-14 00:21:35 +0200199 s = _PyBytesWriter_Alloc(&writer, strlen(format));
200 if (s == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000201 return NULL;
Victor Stinner03dab782015-10-14 00:21:35 +0200202 writer.overallocate = 1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000203
Victor Stinner03dab782015-10-14 00:21:35 +0200204#define WRITE_BYTES(str) \
205 do { \
206 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
207 if (s == NULL) \
208 goto error; \
209 } while (0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000210
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000211 for (f = format; *f; f++) {
Victor Stinner03dab782015-10-14 00:21:35 +0200212 if (*f != '%') {
213 *s++ = *f;
214 continue;
215 }
216
217 p = f++;
218
219 /* ignore the width (ex: 10 in "%10s") */
220 while (Py_ISDIGIT(*f))
221 f++;
222
223 /* parse the precision (ex: 10 in "%.10s") */
224 prec = 0;
225 if (*f == '.') {
226 f++;
227 for (; Py_ISDIGIT(*f); f++) {
228 prec = (prec * 10) + (*f - '0');
229 }
230 }
231
232 while (*f && *f != '%' && !Py_ISALPHA(*f))
233 f++;
234
235 /* handle the long flag ('l'), but only for %ld and %lu.
236 others can be added when necessary. */
237 longflag = 0;
238 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
239 longflag = 1;
240 ++f;
241 }
242
243 /* handle the size_t flag ('z'). */
244 size_tflag = 0;
245 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
246 size_tflag = 1;
247 ++f;
248 }
249
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700250 /* subtract bytes preallocated for the format string
Victor Stinner03dab782015-10-14 00:21:35 +0200251 (ex: 2 for "%s") */
252 writer.min_size -= (f - p + 1);
253
254 switch (*f) {
255 case 'c':
256 {
257 int c = va_arg(vargs, int);
258 if (c < 0 || c > 255) {
259 PyErr_SetString(PyExc_OverflowError,
260 "PyBytes_FromFormatV(): %c format "
261 "expects an integer in range [0; 255]");
262 goto error;
263 }
264 writer.min_size++;
265 *s++ = (unsigned char)c;
266 break;
267 }
268
269 case 'd':
270 if (longflag)
271 sprintf(buffer, "%ld", va_arg(vargs, long));
272 else if (size_tflag)
273 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
274 va_arg(vargs, Py_ssize_t));
275 else
276 sprintf(buffer, "%d", va_arg(vargs, int));
277 assert(strlen(buffer) < sizeof(buffer));
278 WRITE_BYTES(buffer);
279 break;
280
281 case 'u':
282 if (longflag)
283 sprintf(buffer, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
286 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
288 else
289 sprintf(buffer, "%u",
290 va_arg(vargs, unsigned int));
291 assert(strlen(buffer) < sizeof(buffer));
292 WRITE_BYTES(buffer);
293 break;
294
295 case 'i':
296 sprintf(buffer, "%i", va_arg(vargs, int));
297 assert(strlen(buffer) < sizeof(buffer));
298 WRITE_BYTES(buffer);
299 break;
300
301 case 'x':
302 sprintf(buffer, "%x", va_arg(vargs, int));
303 assert(strlen(buffer) < sizeof(buffer));
304 WRITE_BYTES(buffer);
305 break;
306
307 case 's':
308 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000309 Py_ssize_t i;
Victor Stinner03dab782015-10-14 00:21:35 +0200310
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200311 p = va_arg(vargs, const char*);
Victor Stinner03dab782015-10-14 00:21:35 +0200312 i = strlen(p);
313 if (prec > 0 && i > prec)
314 i = prec;
315 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
316 if (s == NULL)
317 goto error;
318 break;
319 }
320
321 case 'p':
322 sprintf(buffer, "%p", va_arg(vargs, void*));
323 assert(strlen(buffer) < sizeof(buffer));
324 /* %p is ill-defined: ensure leading 0x. */
325 if (buffer[1] == 'X')
326 buffer[1] = 'x';
327 else if (buffer[1] != 'x') {
328 memmove(buffer+2, buffer, strlen(buffer)+1);
329 buffer[0] = '0';
330 buffer[1] = 'x';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000331 }
Victor Stinner03dab782015-10-14 00:21:35 +0200332 WRITE_BYTES(buffer);
333 break;
334
335 case '%':
336 writer.min_size++;
337 *s++ = '%';
338 break;
339
340 default:
341 if (*f == 0) {
342 /* fix min_size if we reached the end of the format string */
343 writer.min_size++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000344 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000345
Victor Stinner03dab782015-10-14 00:21:35 +0200346 /* invalid format string: copy unformatted string and exit */
347 WRITE_BYTES(p);
348 return _PyBytesWriter_Finish(&writer, s);
349 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000351
Victor Stinner03dab782015-10-14 00:21:35 +0200352#undef WRITE_BYTES
353
354 return _PyBytesWriter_Finish(&writer, s);
355
356 error:
357 _PyBytesWriter_Dealloc(&writer);
358 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000359}
360
361PyObject *
362PyBytes_FromFormat(const char *format, ...)
363{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000364 PyObject* ret;
365 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000366
367#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000368 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000369#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000370 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000371#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 ret = PyBytes_FromFormatV(format, vargs);
373 va_end(vargs);
374 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000375}
376
Ethan Furmanb95b5612015-01-23 20:05:18 -0800377/* Helpers for formatstring */
378
379Py_LOCAL_INLINE(PyObject *)
380getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
381{
382 Py_ssize_t argidx = *p_argidx;
383 if (argidx < arglen) {
384 (*p_argidx)++;
385 if (arglen < 0)
386 return args;
387 else
388 return PyTuple_GetItem(args, argidx);
389 }
390 PyErr_SetString(PyExc_TypeError,
391 "not enough arguments for format string");
392 return NULL;
393}
394
395/* Format codes
396 * F_LJUST '-'
397 * F_SIGN '+'
398 * F_BLANK ' '
399 * F_ALT '#'
400 * F_ZERO '0'
401 */
402#define F_LJUST (1<<0)
403#define F_SIGN (1<<1)
404#define F_BLANK (1<<2)
405#define F_ALT (1<<3)
406#define F_ZERO (1<<4)
407
408/* Returns a new reference to a PyBytes object, or NULL on failure. */
409
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200410static char*
411formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200412 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800413{
414 char *p;
415 PyObject *result;
416 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200417 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800418
419 x = PyFloat_AsDouble(v);
420 if (x == -1.0 && PyErr_Occurred()) {
421 PyErr_Format(PyExc_TypeError, "float argument required, "
422 "not %.200s", Py_TYPE(v)->tp_name);
423 return NULL;
424 }
425
426 if (prec < 0)
427 prec = 6;
428
429 p = PyOS_double_to_string(x, type, prec,
430 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
431
432 if (p == NULL)
433 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200434
435 len = strlen(p);
436 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200437 str = _PyBytesWriter_Prepare(writer, str, len);
438 if (str == NULL)
439 return NULL;
Christian Heimesf051e432016-09-13 20:22:02 +0200440 memcpy(str, p, len);
Victor Stinner71dc3d82016-04-26 12:35:13 +0200441 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200442 str += len;
443 return str;
444 }
445
446 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800447 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200448 *p_result = result;
449 return str;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800450}
451
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300452static PyObject *
453formatlong(PyObject *v, int flags, int prec, int type)
454{
455 PyObject *result, *iobj;
456 if (type == 'i')
457 type = 'd';
458 if (PyLong_Check(v))
459 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
460 if (PyNumber_Check(v)) {
461 /* make sure number is a type of integer for o, x, and X */
462 if (type == 'o' || type == 'x' || type == 'X')
463 iobj = PyNumber_Index(v);
464 else
465 iobj = PyNumber_Long(v);
466 if (iobj == NULL) {
467 if (!PyErr_ExceptionMatches(PyExc_TypeError))
468 return NULL;
469 }
470 else if (!PyLong_Check(iobj))
471 Py_CLEAR(iobj);
472 if (iobj != NULL) {
473 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
474 Py_DECREF(iobj);
475 return result;
476 }
477 }
478 PyErr_Format(PyExc_TypeError,
479 "%%%c format: %s is required, not %.200s", type,
480 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
481 : "a number",
482 Py_TYPE(v)->tp_name);
483 return NULL;
484}
485
486static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200487byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800488{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300489 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200490 *p = PyBytes_AS_STRING(arg)[0];
491 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800492 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300493 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200494 *p = PyByteArray_AS_STRING(arg)[0];
495 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800496 }
497 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300498 PyObject *iobj;
499 long ival;
500 int overflow;
501 /* make sure number is a type of integer */
502 if (PyLong_Check(arg)) {
503 ival = PyLong_AsLongAndOverflow(arg, &overflow);
504 }
505 else {
506 iobj = PyNumber_Index(arg);
507 if (iobj == NULL) {
508 if (!PyErr_ExceptionMatches(PyExc_TypeError))
509 return 0;
510 goto onError;
511 }
512 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
513 Py_DECREF(iobj);
514 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300515 if (!overflow && ival == -1 && PyErr_Occurred())
516 goto onError;
517 if (overflow || !(0 <= ival && ival <= 255)) {
518 PyErr_SetString(PyExc_OverflowError,
519 "%c arg not in range(256)");
520 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800521 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300522 *p = (char)ival;
523 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800524 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300525 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200526 PyErr_SetString(PyExc_TypeError,
527 "%c requires an integer in range(256) or a single byte");
528 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800529}
530
531static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200532format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800533{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200534 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800535 _Py_IDENTIFIER(__bytes__);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800536 /* is it a bytes object? */
537 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200538 *pbuf = PyBytes_AS_STRING(v);
539 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800540 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200541 return v;
542 }
543 if (PyByteArray_Check(v)) {
544 *pbuf = PyByteArray_AS_STRING(v);
545 *plen = PyByteArray_GET_SIZE(v);
546 Py_INCREF(v);
547 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800548 }
549 /* does it support __bytes__? */
550 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
551 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +0100552 result = _PyObject_CallNoArg(func);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800553 Py_DECREF(func);
554 if (result == NULL)
555 return NULL;
556 if (!PyBytes_Check(result)) {
557 PyErr_Format(PyExc_TypeError,
558 "__bytes__ returned non-bytes (type %.200s)",
559 Py_TYPE(result)->tp_name);
560 Py_DECREF(result);
561 return NULL;
562 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200563 *pbuf = PyBytes_AS_STRING(result);
564 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800565 return result;
566 }
567 PyErr_Format(PyExc_TypeError,
568 "%%b requires bytes, or an object that implements __bytes__, not '%.100s'",
569 Py_TYPE(v)->tp_name);
570 return NULL;
571}
572
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200573/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800574
575PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +0200576_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
577 PyObject *args, int use_bytearray)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800578{
Victor Stinner772b2b02015-10-14 09:56:53 +0200579 const char *fmt;
580 char *res;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800581 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200582 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800583 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800584 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200585 _PyBytesWriter writer;
586
Victor Stinner772b2b02015-10-14 09:56:53 +0200587 if (args == NULL) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800588 PyErr_BadInternalCall();
589 return NULL;
590 }
Victor Stinner772b2b02015-10-14 09:56:53 +0200591 fmt = format;
592 fmtcnt = format_len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200593
594 _PyBytesWriter_Init(&writer);
Victor Stinner772b2b02015-10-14 09:56:53 +0200595 writer.use_bytearray = use_bytearray;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200596
597 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
598 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800599 return NULL;
Victor Stinner772b2b02015-10-14 09:56:53 +0200600 if (!use_bytearray)
601 writer.overallocate = 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200602
Ethan Furmanb95b5612015-01-23 20:05:18 -0800603 if (PyTuple_Check(args)) {
604 arglen = PyTuple_GET_SIZE(args);
605 argidx = 0;
606 }
607 else {
608 arglen = -1;
609 argidx = -2;
610 }
611 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
612 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
613 !PyByteArray_Check(args)) {
614 dict = args;
615 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200616
Ethan Furmanb95b5612015-01-23 20:05:18 -0800617 while (--fmtcnt >= 0) {
618 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200619 Py_ssize_t len;
620 char *pos;
621
622 pos = strchr(fmt + 1, '%');
623 if (pos != NULL)
624 len = pos - fmt;
Victor Stinner772b2b02015-10-14 09:56:53 +0200625 else
626 len = format_len - (fmt - format);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200627 assert(len != 0);
628
Christian Heimesf051e432016-09-13 20:22:02 +0200629 memcpy(res, fmt, len);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200630 res += len;
631 fmt += len;
632 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800633 }
634 else {
635 /* Got a format specifier */
636 int flags = 0;
637 Py_ssize_t width = -1;
638 int prec = -1;
639 int c = '\0';
640 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800641 PyObject *v = NULL;
642 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200643 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800644 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200645 Py_ssize_t len = 0;
646 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200647 Py_ssize_t alloc;
648#ifdef Py_DEBUG
649 char *before;
650#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -0800651
Ethan Furmanb95b5612015-01-23 20:05:18 -0800652 fmt++;
653 if (*fmt == '(') {
Victor Stinner772b2b02015-10-14 09:56:53 +0200654 const char *keystart;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800655 Py_ssize_t keylen;
656 PyObject *key;
657 int pcount = 1;
658
659 if (dict == NULL) {
660 PyErr_SetString(PyExc_TypeError,
661 "format requires a mapping");
662 goto error;
663 }
664 ++fmt;
665 --fmtcnt;
666 keystart = fmt;
667 /* Skip over balanced parentheses */
668 while (pcount > 0 && --fmtcnt >= 0) {
669 if (*fmt == ')')
670 --pcount;
671 else if (*fmt == '(')
672 ++pcount;
673 fmt++;
674 }
675 keylen = fmt - keystart - 1;
676 if (fmtcnt < 0 || pcount > 0) {
677 PyErr_SetString(PyExc_ValueError,
678 "incomplete format key");
679 goto error;
680 }
681 key = PyBytes_FromStringAndSize(keystart,
682 keylen);
683 if (key == NULL)
684 goto error;
685 if (args_owned) {
686 Py_DECREF(args);
687 args_owned = 0;
688 }
689 args = PyObject_GetItem(dict, key);
690 Py_DECREF(key);
691 if (args == NULL) {
692 goto error;
693 }
694 args_owned = 1;
695 arglen = -1;
696 argidx = -2;
697 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200698
699 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800700 while (--fmtcnt >= 0) {
701 switch (c = *fmt++) {
702 case '-': flags |= F_LJUST; continue;
703 case '+': flags |= F_SIGN; continue;
704 case ' ': flags |= F_BLANK; continue;
705 case '#': flags |= F_ALT; continue;
706 case '0': flags |= F_ZERO; continue;
707 }
708 break;
709 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200710
711 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800712 if (c == '*') {
713 v = getnextarg(args, arglen, &argidx);
714 if (v == NULL)
715 goto error;
716 if (!PyLong_Check(v)) {
717 PyErr_SetString(PyExc_TypeError,
718 "* wants int");
719 goto error;
720 }
721 width = PyLong_AsSsize_t(v);
722 if (width == -1 && PyErr_Occurred())
723 goto error;
724 if (width < 0) {
725 flags |= F_LJUST;
726 width = -width;
727 }
728 if (--fmtcnt >= 0)
729 c = *fmt++;
730 }
731 else if (c >= 0 && isdigit(c)) {
732 width = c - '0';
733 while (--fmtcnt >= 0) {
734 c = Py_CHARMASK(*fmt++);
735 if (!isdigit(c))
736 break;
737 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
738 PyErr_SetString(
739 PyExc_ValueError,
740 "width too big");
741 goto error;
742 }
743 width = width*10 + (c - '0');
744 }
745 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200746
747 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800748 if (c == '.') {
749 prec = 0;
750 if (--fmtcnt >= 0)
751 c = *fmt++;
752 if (c == '*') {
753 v = getnextarg(args, arglen, &argidx);
754 if (v == NULL)
755 goto error;
756 if (!PyLong_Check(v)) {
757 PyErr_SetString(
758 PyExc_TypeError,
759 "* wants int");
760 goto error;
761 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200762 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800763 if (prec == -1 && PyErr_Occurred())
764 goto error;
765 if (prec < 0)
766 prec = 0;
767 if (--fmtcnt >= 0)
768 c = *fmt++;
769 }
770 else if (c >= 0 && isdigit(c)) {
771 prec = c - '0';
772 while (--fmtcnt >= 0) {
773 c = Py_CHARMASK(*fmt++);
774 if (!isdigit(c))
775 break;
776 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
777 PyErr_SetString(
778 PyExc_ValueError,
779 "prec too big");
780 goto error;
781 }
782 prec = prec*10 + (c - '0');
783 }
784 }
785 } /* prec */
786 if (fmtcnt >= 0) {
787 if (c == 'h' || c == 'l' || c == 'L') {
788 if (--fmtcnt >= 0)
789 c = *fmt++;
790 }
791 }
792 if (fmtcnt < 0) {
793 PyErr_SetString(PyExc_ValueError,
794 "incomplete format");
795 goto error;
796 }
797 if (c != '%') {
798 v = getnextarg(args, arglen, &argidx);
799 if (v == NULL)
800 goto error;
801 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200802
803 if (fmtcnt < 0) {
804 /* last writer: disable writer overallocation */
805 writer.overallocate = 0;
806 }
807
Ethan Furmanb95b5612015-01-23 20:05:18 -0800808 sign = 0;
809 fill = ' ';
810 switch (c) {
811 case '%':
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200812 *res++ = '%';
813 continue;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200814
Ethan Furman62e977f2015-03-11 08:17:00 -0700815 case 'r':
816 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800817 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200818 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800819 if (temp == NULL)
820 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200821 assert(PyUnicode_IS_ASCII(temp));
822 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
823 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800824 if (prec >= 0 && len > prec)
825 len = prec;
826 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200827
Ethan Furmanb95b5612015-01-23 20:05:18 -0800828 case 's':
829 // %s is only for 2/3 code; 3 only code should use %b
830 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200831 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800832 if (temp == NULL)
833 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800834 if (prec >= 0 && len > prec)
835 len = prec;
836 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200837
Ethan Furmanb95b5612015-01-23 20:05:18 -0800838 case 'i':
839 case 'd':
840 case 'u':
841 case 'o':
842 case 'x':
843 case 'X':
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200844 if (PyLong_CheckExact(v)
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200845 && width == -1 && prec == -1
846 && !(flags & (F_SIGN | F_BLANK))
847 && c != 'X')
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200848 {
849 /* Fast path */
850 int alternate = flags & F_ALT;
851 int base;
852
853 switch(c)
854 {
855 default:
856 assert(0 && "'type' not in [diuoxX]");
857 case 'd':
858 case 'i':
859 case 'u':
860 base = 10;
861 break;
862 case 'o':
863 base = 8;
864 break;
865 case 'x':
866 case 'X':
867 base = 16;
868 break;
869 }
870
871 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200872 writer.min_size -= 2; /* size preallocated for "%d" */
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200873 res = _PyLong_FormatBytesWriter(&writer, res,
874 v, base, alternate);
875 if (res == NULL)
876 goto error;
877 continue;
878 }
879
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300880 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200881 if (!temp)
882 goto error;
883 assert(PyUnicode_IS_ASCII(temp));
884 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
885 len = PyUnicode_GET_LENGTH(temp);
886 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800887 if (flags & F_ZERO)
888 fill = '0';
889 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200890
Ethan Furmanb95b5612015-01-23 20:05:18 -0800891 case 'e':
892 case 'E':
893 case 'f':
894 case 'F':
895 case 'g':
896 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200897 if (width == -1 && prec == -1
898 && !(flags & (F_SIGN | F_BLANK)))
899 {
900 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200901 writer.min_size -= 2; /* size preallocated for "%f" */
Victor Stinnerad771582015-10-09 12:38:53 +0200902 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200903 if (res == NULL)
904 goto error;
905 continue;
906 }
907
Victor Stinnerad771582015-10-09 12:38:53 +0200908 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800909 goto error;
910 pbuf = PyBytes_AS_STRING(temp);
911 len = PyBytes_GET_SIZE(temp);
912 sign = 1;
913 if (flags & F_ZERO)
914 fill = '0';
915 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200916
Ethan Furmanb95b5612015-01-23 20:05:18 -0800917 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200918 pbuf = &onechar;
919 len = byte_converter(v, &onechar);
920 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800921 goto error;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200922 if (width == -1) {
923 /* Fast path */
924 *res++ = onechar;
925 continue;
926 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800927 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200928
Ethan Furmanb95b5612015-01-23 20:05:18 -0800929 default:
930 PyErr_Format(PyExc_ValueError,
931 "unsupported format character '%c' (0x%x) "
932 "at index %zd",
933 c, c,
Victor Stinner772b2b02015-10-14 09:56:53 +0200934 (Py_ssize_t)(fmt - 1 - format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800935 goto error;
936 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200937
Ethan Furmanb95b5612015-01-23 20:05:18 -0800938 if (sign) {
939 if (*pbuf == '-' || *pbuf == '+') {
940 sign = *pbuf++;
941 len--;
942 }
943 else if (flags & F_SIGN)
944 sign = '+';
945 else if (flags & F_BLANK)
946 sign = ' ';
947 else
948 sign = 0;
949 }
950 if (width < len)
951 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200952
953 alloc = width;
954 if (sign != 0 && len == width)
955 alloc++;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200956 /* 2: size preallocated for %s */
957 if (alloc > 2) {
958 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200959 if (res == NULL)
960 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800961 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200962#ifdef Py_DEBUG
963 before = res;
964#endif
965
966 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800967 if (sign) {
968 if (fill != ' ')
969 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800970 if (width > len)
971 width--;
972 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200973
974 /* Write the numeric prefix for "x", "X" and "o" formats
975 if the alternate form is used.
976 For example, write "0x" for the "%#x" format. */
Serhiy Storchakab1a16192016-12-17 21:48:03 +0200977 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800978 assert(pbuf[0] == '0');
979 assert(pbuf[1] == c);
980 if (fill != ' ') {
981 *res++ = *pbuf++;
982 *res++ = *pbuf++;
983 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800984 width -= 2;
985 if (width < 0)
986 width = 0;
987 len -= 2;
988 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200989
990 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800991 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200992 memset(res, fill, width - len);
993 res += (width - len);
994 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800995 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200996
997 /* If padding with spaces: write sign if needed and/or numeric
998 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800999 if (fill == ' ') {
1000 if (sign)
1001 *res++ = sign;
Serhiy Storchakab1a16192016-12-17 21:48:03 +02001002 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001003 assert(pbuf[0] == '0');
1004 assert(pbuf[1] == c);
1005 *res++ = *pbuf++;
1006 *res++ = *pbuf++;
1007 }
1008 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001009
1010 /* Copy bytes */
Christian Heimesf051e432016-09-13 20:22:02 +02001011 memcpy(res, pbuf, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001012 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001013
1014 /* Pad right with the fill character if needed */
1015 if (width > len) {
1016 memset(res, ' ', width - len);
1017 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001018 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001019
Ethan Furmanb95b5612015-01-23 20:05:18 -08001020 if (dict && (argidx < arglen) && c != '%') {
1021 PyErr_SetString(PyExc_TypeError,
1022 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -08001023 Py_XDECREF(temp);
1024 goto error;
1025 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001026 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001027
1028#ifdef Py_DEBUG
1029 /* check that we computed the exact size for this write */
1030 assert((res - before) == alloc);
1031#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -08001032 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001033
1034 /* If overallocation was disabled, ensure that it was the last
1035 write. Otherwise, we missed an optimization */
Victor Stinner772b2b02015-10-14 09:56:53 +02001036 assert(writer.overallocate || fmtcnt < 0 || use_bytearray);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001037 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001038
Ethan Furmanb95b5612015-01-23 20:05:18 -08001039 if (argidx < arglen && !dict) {
1040 PyErr_SetString(PyExc_TypeError,
1041 "not all arguments converted during bytes formatting");
1042 goto error;
1043 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001044
Ethan Furmanb95b5612015-01-23 20:05:18 -08001045 if (args_owned) {
1046 Py_DECREF(args);
1047 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001048 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001049
1050 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001051 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001052 if (args_owned) {
1053 Py_DECREF(args);
1054 }
1055 return NULL;
1056}
1057
1058/* =-= */
1059
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001060static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001061bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001062{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001063 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001064}
1065
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001066/* Unescape a backslash-escaped string. If unicode is non-zero,
1067 the string is a u-literal. If recode_encoding is non-zero,
1068 the string is UTF-8 encoded and should be re-encoded in the
1069 specified encoding. */
1070
Victor Stinner2ec80632015-10-14 13:32:13 +02001071static char *
1072_PyBytes_DecodeEscapeRecode(const char **s, const char *end,
1073 const char *errors, const char *recode_encoding,
1074 _PyBytesWriter *writer, char *p)
1075{
1076 PyObject *u, *w;
1077 const char* t;
1078
1079 t = *s;
1080 /* Decode non-ASCII bytes as UTF-8. */
1081 while (t < end && (*t & 0x80))
1082 t++;
1083 u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
1084 if (u == NULL)
1085 return NULL;
1086
1087 /* Recode them in target encoding. */
1088 w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
1089 Py_DECREF(u);
1090 if (w == NULL)
1091 return NULL;
1092 assert(PyBytes_Check(w));
1093
1094 /* Append bytes to output buffer. */
Raymond Hettinger15f44ab2016-08-30 10:47:49 -07001095 writer->min_size--; /* subtract 1 preallocated byte */
Victor Stinner2ec80632015-10-14 13:32:13 +02001096 p = _PyBytesWriter_WriteBytes(writer, p,
1097 PyBytes_AS_STRING(w),
1098 PyBytes_GET_SIZE(w));
1099 Py_DECREF(w);
1100 if (p == NULL)
1101 return NULL;
1102
1103 *s = t;
1104 return p;
1105}
1106
Eric V. Smith42454af2016-10-31 09:22:08 -04001107PyObject *_PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001108 Py_ssize_t len,
1109 const char *errors,
1110 Py_ssize_t unicode,
Eric V. Smith42454af2016-10-31 09:22:08 -04001111 const char *recode_encoding,
1112 const char **first_invalid_escape)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001113{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001114 int c;
Victor Stinner2ec80632015-10-14 13:32:13 +02001115 char *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001116 const char *end;
Victor Stinner2ec80632015-10-14 13:32:13 +02001117 _PyBytesWriter writer;
1118
1119 _PyBytesWriter_Init(&writer);
1120
1121 p = _PyBytesWriter_Alloc(&writer, len);
1122 if (p == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001123 return NULL;
Victor Stinner2ec80632015-10-14 13:32:13 +02001124 writer.overallocate = 1;
1125
Eric V. Smith42454af2016-10-31 09:22:08 -04001126 *first_invalid_escape = NULL;
1127
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001128 end = s + len;
1129 while (s < end) {
1130 if (*s != '\\') {
1131 non_esc:
Victor Stinner2ec80632015-10-14 13:32:13 +02001132 if (!(recode_encoding && (*s & 0x80))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001133 *p++ = *s++;
1134 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001135 else {
1136 /* non-ASCII character and need to recode */
1137 p = _PyBytes_DecodeEscapeRecode(&s, end,
1138 errors, recode_encoding,
1139 &writer, p);
1140 if (p == NULL)
1141 goto failed;
1142 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001143 continue;
1144 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001145
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001146 s++;
Victor Stinner2ec80632015-10-14 13:32:13 +02001147 if (s == end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001148 PyErr_SetString(PyExc_ValueError,
1149 "Trailing \\ in string");
1150 goto failed;
1151 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001152
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001153 switch (*s++) {
1154 /* XXX This assumes ASCII! */
1155 case '\n': break;
1156 case '\\': *p++ = '\\'; break;
1157 case '\'': *p++ = '\''; break;
1158 case '\"': *p++ = '\"'; break;
1159 case 'b': *p++ = '\b'; break;
1160 case 'f': *p++ = '\014'; break; /* FF */
1161 case 't': *p++ = '\t'; break;
1162 case 'n': *p++ = '\n'; break;
1163 case 'r': *p++ = '\r'; break;
1164 case 'v': *p++ = '\013'; break; /* VT */
1165 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1166 case '0': case '1': case '2': case '3':
1167 case '4': case '5': case '6': case '7':
1168 c = s[-1] - '0';
1169 if (s < end && '0' <= *s && *s <= '7') {
1170 c = (c<<3) + *s++ - '0';
1171 if (s < end && '0' <= *s && *s <= '7')
1172 c = (c<<3) + *s++ - '0';
1173 }
1174 *p++ = c;
1175 break;
1176 case 'x':
Victor Stinner2ec80632015-10-14 13:32:13 +02001177 if (s+1 < end) {
1178 int digit1, digit2;
1179 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1180 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1181 if (digit1 < 16 && digit2 < 16) {
1182 *p++ = (unsigned char)((digit1 << 4) + digit2);
1183 s += 2;
1184 break;
1185 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001186 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001187 /* invalid hexadecimal digits */
1188
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001189 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001190 PyErr_Format(PyExc_ValueError,
1191 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001192 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001193 goto failed;
1194 }
1195 if (strcmp(errors, "replace") == 0) {
1196 *p++ = '?';
1197 } else if (strcmp(errors, "ignore") == 0)
1198 /* do nothing */;
1199 else {
1200 PyErr_Format(PyExc_ValueError,
1201 "decoding error; unknown "
1202 "error handling code: %.400s",
1203 errors);
1204 goto failed;
1205 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001206 /* skip \x */
1207 if (s < end && Py_ISXDIGIT(s[0]))
1208 s++; /* and a hexdigit */
1209 break;
Victor Stinner2ec80632015-10-14 13:32:13 +02001210
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001211 default:
Eric V. Smith42454af2016-10-31 09:22:08 -04001212 if (*first_invalid_escape == NULL) {
1213 *first_invalid_escape = s-1; /* Back up one char, since we've
1214 already incremented s. */
1215 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001216 *p++ = '\\';
Eric V. Smith42454af2016-10-31 09:22:08 -04001217 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +02001218 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001219 UTF-8 bytes may follow. */
1220 }
1221 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001222
1223 return _PyBytesWriter_Finish(&writer, p);
1224
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001225 failed:
Victor Stinner2ec80632015-10-14 13:32:13 +02001226 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001227 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001228}
1229
Eric V. Smith42454af2016-10-31 09:22:08 -04001230PyObject *PyBytes_DecodeEscape(const char *s,
1231 Py_ssize_t len,
1232 const char *errors,
1233 Py_ssize_t unicode,
1234 const char *recode_encoding)
1235{
1236 const char* first_invalid_escape;
1237 PyObject *result = _PyBytes_DecodeEscape(s, len, errors, unicode,
1238 recode_encoding,
1239 &first_invalid_escape);
1240 if (result == NULL)
1241 return NULL;
1242 if (first_invalid_escape != NULL) {
1243 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1244 "invalid escape sequence '\\%c'",
1245 *first_invalid_escape) < 0) {
1246 Py_DECREF(result);
1247 return NULL;
1248 }
1249 }
1250 return result;
1251
1252}
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001253/* -------------------------------------------------------------------- */
1254/* object api */
1255
1256Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001257PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001258{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001259 if (!PyBytes_Check(op)) {
1260 PyErr_Format(PyExc_TypeError,
1261 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1262 return -1;
1263 }
1264 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001265}
1266
1267char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001268PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001269{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001270 if (!PyBytes_Check(op)) {
1271 PyErr_Format(PyExc_TypeError,
1272 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1273 return NULL;
1274 }
1275 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001276}
1277
1278int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001279PyBytes_AsStringAndSize(PyObject *obj,
1280 char **s,
1281 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001282{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001283 if (s == NULL) {
1284 PyErr_BadInternalCall();
1285 return -1;
1286 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001287
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001288 if (!PyBytes_Check(obj)) {
1289 PyErr_Format(PyExc_TypeError,
1290 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1291 return -1;
1292 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001293
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001294 *s = PyBytes_AS_STRING(obj);
1295 if (len != NULL)
1296 *len = PyBytes_GET_SIZE(obj);
1297 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001298 PyErr_SetString(PyExc_ValueError,
1299 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001300 return -1;
1301 }
1302 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001303}
Neal Norwitz6968b052007-02-27 19:02:19 +00001304
1305/* -------------------------------------------------------------------- */
1306/* Methods */
1307
Eric Smith0923d1d2009-04-16 20:16:10 +00001308#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001309
1310#include "stringlib/fastsearch.h"
1311#include "stringlib/count.h"
1312#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001313#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001314#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001315#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001316#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001317
Eric Smith0f78bff2009-11-30 01:01:42 +00001318#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001319
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001320PyObject *
1321PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001322{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001323 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001324 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001325 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001326 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001327 unsigned char quote, *s, *p;
1328
1329 /* Compute size of output string */
1330 squotes = dquotes = 0;
1331 newsize = 3; /* b'' */
1332 s = (unsigned char*)op->ob_sval;
1333 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001334 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001335 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001336 case '\'': squotes++; break;
1337 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001338 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001339 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001340 default:
1341 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001342 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001343 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001344 if (newsize > PY_SSIZE_T_MAX - incr)
1345 goto overflow;
1346 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001347 }
1348 quote = '\'';
1349 if (smartquotes && squotes && !dquotes)
1350 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001351 if (squotes && quote == '\'') {
1352 if (newsize > PY_SSIZE_T_MAX - squotes)
1353 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001354 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001355 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001356
1357 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001358 if (v == NULL) {
1359 return NULL;
1360 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001361 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001362
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001363 *p++ = 'b', *p++ = quote;
1364 for (i = 0; i < length; i++) {
1365 unsigned char c = op->ob_sval[i];
1366 if (c == quote || c == '\\')
1367 *p++ = '\\', *p++ = c;
1368 else if (c == '\t')
1369 *p++ = '\\', *p++ = 't';
1370 else if (c == '\n')
1371 *p++ = '\\', *p++ = 'n';
1372 else if (c == '\r')
1373 *p++ = '\\', *p++ = 'r';
1374 else if (c < ' ' || c >= 0x7f) {
1375 *p++ = '\\';
1376 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001377 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1378 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001379 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001380 else
1381 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001382 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001383 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001384 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001385 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001386
1387 overflow:
1388 PyErr_SetString(PyExc_OverflowError,
1389 "bytes object is too large to make repr");
1390 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001391}
1392
Neal Norwitz6968b052007-02-27 19:02:19 +00001393static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001394bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001395{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001396 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001397}
1398
Neal Norwitz6968b052007-02-27 19:02:19 +00001399static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001400bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001401{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001402 if (Py_BytesWarningFlag) {
1403 if (PyErr_WarnEx(PyExc_BytesWarning,
1404 "str() on a bytes instance", 1))
1405 return NULL;
1406 }
1407 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001408}
1409
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001410static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001411bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001412{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001413 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001414}
Neal Norwitz6968b052007-02-27 19:02:19 +00001415
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001416/* This is also used by PyBytes_Concat() */
1417static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001418bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001419{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001420 Py_buffer va, vb;
1421 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001422
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001423 va.len = -1;
1424 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001425 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1426 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001427 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1428 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
1429 goto done;
1430 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001431
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001432 /* Optimize end cases */
1433 if (va.len == 0 && PyBytes_CheckExact(b)) {
1434 result = b;
1435 Py_INCREF(result);
1436 goto done;
1437 }
1438 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1439 result = a;
1440 Py_INCREF(result);
1441 goto done;
1442 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001443
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001444 if (va.len > PY_SSIZE_T_MAX - vb.len) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001445 PyErr_NoMemory();
1446 goto done;
1447 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001448
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001449 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001450 if (result != NULL) {
1451 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1452 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1453 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001454
1455 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001456 if (va.len != -1)
1457 PyBuffer_Release(&va);
1458 if (vb.len != -1)
1459 PyBuffer_Release(&vb);
1460 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001461}
Neal Norwitz6968b052007-02-27 19:02:19 +00001462
1463static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001464bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001465{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001466 Py_ssize_t i;
1467 Py_ssize_t j;
1468 Py_ssize_t size;
1469 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001470 size_t nbytes;
1471 if (n < 0)
1472 n = 0;
1473 /* watch out for overflows: the size can overflow int,
1474 * and the # of bytes needed can overflow size_t
1475 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001476 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001477 PyErr_SetString(PyExc_OverflowError,
1478 "repeated bytes are too long");
1479 return NULL;
1480 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001481 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001482 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1483 Py_INCREF(a);
1484 return (PyObject *)a;
1485 }
1486 nbytes = (size_t)size;
1487 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1488 PyErr_SetString(PyExc_OverflowError,
1489 "repeated bytes are too long");
1490 return NULL;
1491 }
1492 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1493 if (op == NULL)
1494 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +01001495 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001496 op->ob_shash = -1;
1497 op->ob_sval[size] = '\0';
1498 if (Py_SIZE(a) == 1 && n > 0) {
1499 memset(op->ob_sval, a->ob_sval[0] , n);
1500 return (PyObject *) op;
1501 }
1502 i = 0;
1503 if (i < size) {
Christian Heimesf051e432016-09-13 20:22:02 +02001504 memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001505 i = Py_SIZE(a);
1506 }
1507 while (i < size) {
1508 j = (i <= size-i) ? i : size-i;
Christian Heimesf051e432016-09-13 20:22:02 +02001509 memcpy(op->ob_sval+i, op->ob_sval, j);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001510 i += j;
1511 }
1512 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001513}
1514
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001515static int
1516bytes_contains(PyObject *self, PyObject *arg)
1517{
1518 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1519}
1520
Neal Norwitz6968b052007-02-27 19:02:19 +00001521static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001522bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001523{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001524 if (i < 0 || i >= Py_SIZE(a)) {
1525 PyErr_SetString(PyExc_IndexError, "index out of range");
1526 return NULL;
1527 }
1528 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001529}
1530
Benjamin Peterson621b4302016-09-09 13:54:34 -07001531static int
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001532bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1533{
1534 int cmp;
1535 Py_ssize_t len;
1536
1537 len = Py_SIZE(a);
1538 if (Py_SIZE(b) != len)
1539 return 0;
1540
1541 if (a->ob_sval[0] != b->ob_sval[0])
1542 return 0;
1543
1544 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1545 return (cmp == 0);
1546}
1547
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001548static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001549bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001550{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001551 int c;
1552 Py_ssize_t len_a, len_b;
1553 Py_ssize_t min_len;
1554 PyObject *result;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001555 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001556
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001557 /* Make sure both arguments are strings. */
1558 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001559 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001560 rc = PyObject_IsInstance((PyObject*)a,
1561 (PyObject*)&PyUnicode_Type);
1562 if (!rc)
1563 rc = PyObject_IsInstance((PyObject*)b,
1564 (PyObject*)&PyUnicode_Type);
1565 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001566 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001567 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001568 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001569 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001570 return NULL;
1571 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001572 else {
1573 rc = PyObject_IsInstance((PyObject*)a,
1574 (PyObject*)&PyLong_Type);
1575 if (!rc)
1576 rc = PyObject_IsInstance((PyObject*)b,
1577 (PyObject*)&PyLong_Type);
1578 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001579 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001580 if (rc) {
1581 if (PyErr_WarnEx(PyExc_BytesWarning,
1582 "Comparison between bytes and int", 1))
1583 return NULL;
1584 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001585 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001586 }
1587 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001588 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001589 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001590 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001591 case Py_EQ:
1592 case Py_LE:
1593 case Py_GE:
1594 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001595 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001596 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001597 case Py_NE:
1598 case Py_LT:
1599 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001600 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001601 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001602 default:
1603 PyErr_BadArgument();
1604 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001605 }
1606 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001607 else if (op == Py_EQ || op == Py_NE) {
1608 int eq = bytes_compare_eq(a, b);
1609 eq ^= (op == Py_NE);
1610 result = eq ? Py_True : Py_False;
1611 }
1612 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001613 len_a = Py_SIZE(a);
1614 len_b = Py_SIZE(b);
1615 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001616 if (min_len > 0) {
1617 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001618 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001619 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001620 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001621 else
1622 c = 0;
1623 if (c == 0)
1624 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1625 switch (op) {
1626 case Py_LT: c = c < 0; break;
1627 case Py_LE: c = c <= 0; break;
1628 case Py_GT: c = c > 0; break;
1629 case Py_GE: c = c >= 0; break;
1630 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001631 PyErr_BadArgument();
1632 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001633 }
1634 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001635 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001636
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001637 Py_INCREF(result);
1638 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001639}
1640
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001641static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001642bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001643{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001644 if (a->ob_shash == -1) {
1645 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001646 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001647 }
1648 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001649}
1650
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001651static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001652bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001653{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001654 if (PyIndex_Check(item)) {
1655 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1656 if (i == -1 && PyErr_Occurred())
1657 return NULL;
1658 if (i < 0)
1659 i += PyBytes_GET_SIZE(self);
1660 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1661 PyErr_SetString(PyExc_IndexError,
1662 "index out of range");
1663 return NULL;
1664 }
1665 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1666 }
1667 else if (PySlice_Check(item)) {
1668 Py_ssize_t start, stop, step, slicelength, cur, i;
1669 char* source_buf;
1670 char* result_buf;
1671 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001672
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001673 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001674 PyBytes_GET_SIZE(self),
1675 &start, &stop, &step, &slicelength) < 0) {
1676 return NULL;
1677 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001678
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001679 if (slicelength <= 0) {
1680 return PyBytes_FromStringAndSize("", 0);
1681 }
1682 else if (start == 0 && step == 1 &&
1683 slicelength == PyBytes_GET_SIZE(self) &&
1684 PyBytes_CheckExact(self)) {
1685 Py_INCREF(self);
1686 return (PyObject *)self;
1687 }
1688 else if (step == 1) {
1689 return PyBytes_FromStringAndSize(
1690 PyBytes_AS_STRING(self) + start,
1691 slicelength);
1692 }
1693 else {
1694 source_buf = PyBytes_AS_STRING(self);
1695 result = PyBytes_FromStringAndSize(NULL, slicelength);
1696 if (result == NULL)
1697 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001698
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001699 result_buf = PyBytes_AS_STRING(result);
1700 for (cur = start, i = 0; i < slicelength;
1701 cur += step, i++) {
1702 result_buf[i] = source_buf[cur];
1703 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001704
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001705 return result;
1706 }
1707 }
1708 else {
1709 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001710 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001711 Py_TYPE(item)->tp_name);
1712 return NULL;
1713 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001714}
1715
1716static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001717bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001718{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001719 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1720 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001721}
1722
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001723static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001724 (lenfunc)bytes_length, /*sq_length*/
1725 (binaryfunc)bytes_concat, /*sq_concat*/
1726 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1727 (ssizeargfunc)bytes_item, /*sq_item*/
1728 0, /*sq_slice*/
1729 0, /*sq_ass_item*/
1730 0, /*sq_ass_slice*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001731 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001732};
1733
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001734static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001735 (lenfunc)bytes_length,
1736 (binaryfunc)bytes_subscript,
1737 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001738};
1739
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001740static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001741 (getbufferproc)bytes_buffer_getbuffer,
1742 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001743};
1744
1745
1746#define LEFTSTRIP 0
1747#define RIGHTSTRIP 1
1748#define BOTHSTRIP 2
1749
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001750/*[clinic input]
1751bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001752
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001753 sep: object = None
1754 The delimiter according which to split the bytes.
1755 None (the default value) means split on ASCII whitespace characters
1756 (space, tab, return, newline, formfeed, vertical tab).
1757 maxsplit: Py_ssize_t = -1
1758 Maximum number of splits to do.
1759 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001760
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001761Return a list of the sections in the bytes, using sep as the delimiter.
1762[clinic start generated code]*/
1763
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001764static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001765bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1766/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001767{
1768 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001769 const char *s = PyBytes_AS_STRING(self), *sub;
1770 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001771 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001772
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001773 if (maxsplit < 0)
1774 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001775 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001776 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001777 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001778 return NULL;
1779 sub = vsub.buf;
1780 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001781
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001782 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1783 PyBuffer_Release(&vsub);
1784 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001785}
1786
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001787/*[clinic input]
1788bytes.partition
1789
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001790 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001791 /
1792
1793Partition the bytes into three parts using the given separator.
1794
1795This will search for the separator sep in the bytes. If the separator is found,
1796returns a 3-tuple containing the part before the separator, the separator
1797itself, and the part after it.
1798
1799If the separator is not found, returns a 3-tuple containing the original bytes
1800object and two empty bytes objects.
1801[clinic start generated code]*/
1802
Neal Norwitz6968b052007-02-27 19:02:19 +00001803static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001804bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001805/*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001806{
Neal Norwitz6968b052007-02-27 19:02:19 +00001807 return stringlib_partition(
1808 (PyObject*) self,
1809 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001810 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001811 );
1812}
1813
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001814/*[clinic input]
1815bytes.rpartition
1816
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001817 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001818 /
1819
1820Partition the bytes into three parts using the given separator.
1821
1822This will search for the separator sep in the bytes, starting and the end. If
1823the separator is found, returns a 3-tuple containing the part before the
1824separator, the separator itself, and the part after it.
1825
1826If the separator is not found, returns a 3-tuple containing two empty bytes
1827objects and the original bytes object.
1828[clinic start generated code]*/
1829
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001830static PyObject *
1831bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001832/*[clinic end generated code: output=191b114cbb028e50 input=67f689e63a62d478]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001833{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001834 return stringlib_rpartition(
1835 (PyObject*) self,
1836 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001837 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001838 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001839}
1840
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001841/*[clinic input]
1842bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001843
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001844Return a list of the sections in the bytes, using sep as the delimiter.
1845
1846Splitting is done starting at the end of the bytes and working to the front.
1847[clinic start generated code]*/
1848
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001849static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001850bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1851/*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001852{
1853 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001854 const char *s = PyBytes_AS_STRING(self), *sub;
1855 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001856 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001857
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001858 if (maxsplit < 0)
1859 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001860 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001861 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001862 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001863 return NULL;
1864 sub = vsub.buf;
1865 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001866
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001867 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1868 PyBuffer_Release(&vsub);
1869 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001870}
1871
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001872
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001873/*[clinic input]
1874bytes.join
1875
1876 iterable_of_bytes: object
1877 /
1878
1879Concatenate any number of bytes objects.
1880
1881The bytes whose method is called is inserted in between each pair.
1882
1883The result is returned as a new bytes object.
1884
1885Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1886[clinic start generated code]*/
1887
Neal Norwitz6968b052007-02-27 19:02:19 +00001888static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001889bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1890/*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001891{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001892 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001893}
1894
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001895PyObject *
1896_PyBytes_Join(PyObject *sep, PyObject *x)
1897{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001898 assert(sep != NULL && PyBytes_Check(sep));
1899 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001900 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001901}
1902
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001903static PyObject *
1904bytes_find(PyBytesObject *self, PyObject *args)
1905{
1906 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1907}
1908
1909static PyObject *
1910bytes_index(PyBytesObject *self, PyObject *args)
1911{
1912 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1913}
1914
1915
1916static PyObject *
1917bytes_rfind(PyBytesObject *self, PyObject *args)
1918{
1919 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1920}
1921
1922
1923static PyObject *
1924bytes_rindex(PyBytesObject *self, PyObject *args)
1925{
1926 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1927}
1928
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001929
1930Py_LOCAL_INLINE(PyObject *)
1931do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001932{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001933 Py_buffer vsep;
1934 char *s = PyBytes_AS_STRING(self);
1935 Py_ssize_t len = PyBytes_GET_SIZE(self);
1936 char *sep;
1937 Py_ssize_t seplen;
1938 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001939
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001940 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001941 return NULL;
1942 sep = vsep.buf;
1943 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001944
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001945 i = 0;
1946 if (striptype != RIGHTSTRIP) {
1947 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1948 i++;
1949 }
1950 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001951
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001952 j = len;
1953 if (striptype != LEFTSTRIP) {
1954 do {
1955 j--;
1956 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1957 j++;
1958 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001959
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001960 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001961
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001962 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1963 Py_INCREF(self);
1964 return (PyObject*)self;
1965 }
1966 else
1967 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001968}
1969
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001970
1971Py_LOCAL_INLINE(PyObject *)
1972do_strip(PyBytesObject *self, int striptype)
1973{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001974 char *s = PyBytes_AS_STRING(self);
1975 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001976
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001977 i = 0;
1978 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001979 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001980 i++;
1981 }
1982 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001983
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001984 j = len;
1985 if (striptype != LEFTSTRIP) {
1986 do {
1987 j--;
David Malcolm96960882010-11-05 17:23:41 +00001988 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001989 j++;
1990 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001991
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001992 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1993 Py_INCREF(self);
1994 return (PyObject*)self;
1995 }
1996 else
1997 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001998}
1999
2000
2001Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002002do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002003{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002004 if (bytes != NULL && bytes != Py_None) {
2005 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002006 }
2007 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002008}
2009
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002010/*[clinic input]
2011bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002012
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002013 bytes: object = None
2014 /
2015
2016Strip leading and trailing bytes contained in the argument.
2017
2018If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2019[clinic start generated code]*/
2020
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002021static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002022bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002023/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002024{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002025 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002026}
2027
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002028/*[clinic input]
2029bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002030
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002031 bytes: object = None
2032 /
2033
2034Strip leading bytes contained in the argument.
2035
2036If the argument is omitted or None, strip leading ASCII whitespace.
2037[clinic start generated code]*/
2038
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002039static PyObject *
2040bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002041/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002042{
2043 return do_argstrip(self, LEFTSTRIP, bytes);
2044}
2045
2046/*[clinic input]
2047bytes.rstrip
2048
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002049 bytes: object = None
2050 /
2051
2052Strip trailing bytes contained in the argument.
2053
2054If the argument is omitted or None, strip trailing ASCII whitespace.
2055[clinic start generated code]*/
2056
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002057static PyObject *
2058bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002059/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002060{
2061 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002062}
Neal Norwitz6968b052007-02-27 19:02:19 +00002063
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002064
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002065static PyObject *
2066bytes_count(PyBytesObject *self, PyObject *args)
2067{
2068 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2069}
2070
2071
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002072/*[clinic input]
2073bytes.translate
2074
Victor Stinner049e5092014-08-17 22:20:00 +02002075 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002076 Translation table, which must be a bytes object of length 256.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002077 /
Martin Panter1b6c6da2016-08-27 08:35:02 +00002078 delete as deletechars: object(c_default="NULL") = b''
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002079
2080Return a copy with each character mapped by the given translation table.
2081
Martin Panter1b6c6da2016-08-27 08:35:02 +00002082All characters occurring in the optional argument delete are removed.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002083The remaining characters are mapped through the given translation table.
2084[clinic start generated code]*/
2085
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002086static PyObject *
Martin Panter1b6c6da2016-08-27 08:35:02 +00002087bytes_translate_impl(PyBytesObject *self, PyObject *table,
Larry Hastings89964c42015-04-14 18:07:59 -04002088 PyObject *deletechars)
Martin Panter1b6c6da2016-08-27 08:35:02 +00002089/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002090{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002091 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002092 Py_buffer table_view = {NULL, NULL};
2093 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002094 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002095 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002096 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002097 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002098 Py_ssize_t inlen, tablen, dellen = 0;
2099 PyObject *result;
2100 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002101
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002102 if (PyBytes_Check(table)) {
2103 table_chars = PyBytes_AS_STRING(table);
2104 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002105 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002106 else if (table == Py_None) {
2107 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002108 tablen = 256;
2109 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002110 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002111 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002112 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002113 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002114 tablen = table_view.len;
2115 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002116
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002117 if (tablen != 256) {
2118 PyErr_SetString(PyExc_ValueError,
2119 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002120 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002121 return NULL;
2122 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002123
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002124 if (deletechars != NULL) {
2125 if (PyBytes_Check(deletechars)) {
2126 del_table_chars = PyBytes_AS_STRING(deletechars);
2127 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002128 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002129 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002130 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002131 PyBuffer_Release(&table_view);
2132 return NULL;
2133 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002134 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002135 dellen = del_table_view.len;
2136 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002137 }
2138 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002139 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002140 dellen = 0;
2141 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002142
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002143 inlen = PyBytes_GET_SIZE(input_obj);
2144 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002145 if (result == NULL) {
2146 PyBuffer_Release(&del_table_view);
2147 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002148 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002149 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002150 output_start = output = PyBytes_AS_STRING(result);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002151 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002152
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002153 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002154 /* If no deletions are required, use faster code */
2155 for (i = inlen; --i >= 0; ) {
2156 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002157 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002158 changed = 1;
2159 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002160 if (!changed && PyBytes_CheckExact(input_obj)) {
2161 Py_INCREF(input_obj);
2162 Py_DECREF(result);
2163 result = input_obj;
2164 }
2165 PyBuffer_Release(&del_table_view);
2166 PyBuffer_Release(&table_view);
2167 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002168 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002169
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002170 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002171 for (i = 0; i < 256; i++)
2172 trans_table[i] = Py_CHARMASK(i);
2173 } else {
2174 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002175 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002176 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002177 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002178
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002179 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002180 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002181 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002182
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002183 for (i = inlen; --i >= 0; ) {
2184 c = Py_CHARMASK(*input++);
2185 if (trans_table[c] != -1)
2186 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2187 continue;
2188 changed = 1;
2189 }
2190 if (!changed && PyBytes_CheckExact(input_obj)) {
2191 Py_DECREF(result);
2192 Py_INCREF(input_obj);
2193 return input_obj;
2194 }
2195 /* Fix the size of the resulting string */
2196 if (inlen > 0)
2197 _PyBytes_Resize(&result, output - output_start);
2198 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002199}
2200
2201
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002202/*[clinic input]
2203
2204@staticmethod
2205bytes.maketrans
2206
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002207 frm: Py_buffer
2208 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002209 /
2210
2211Return a translation table useable for the bytes or bytearray translate method.
2212
2213The returned table will be one where each byte in frm is mapped to the byte at
2214the same position in to.
2215
2216The bytes objects frm and to must be of the same length.
2217[clinic start generated code]*/
2218
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002219static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002220bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002221/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002222{
2223 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002224}
2225
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002226
2227/*[clinic input]
2228bytes.replace
2229
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002230 old: Py_buffer
2231 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002232 count: Py_ssize_t = -1
2233 Maximum number of occurrences to replace.
2234 -1 (the default value) means replace all occurrences.
2235 /
2236
2237Return a copy with all occurrences of substring old replaced by new.
2238
2239If the optional argument count is given, only the first count occurrences are
2240replaced.
2241[clinic start generated code]*/
2242
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002243static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002244bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
Larry Hastings89964c42015-04-14 18:07:59 -04002245 Py_ssize_t count)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002246/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002247{
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +03002248 return stringlib_replace((PyObject *)self,
2249 (const char *)old->buf, old->len,
2250 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002251}
2252
2253/** End DALKE **/
2254
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002255
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002256static PyObject *
2257bytes_startswith(PyBytesObject *self, PyObject *args)
2258{
2259 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2260}
2261
2262static PyObject *
2263bytes_endswith(PyBytesObject *self, PyObject *args)
2264{
2265 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2266}
2267
2268
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002269/*[clinic input]
2270bytes.decode
2271
2272 encoding: str(c_default="NULL") = 'utf-8'
2273 The encoding with which to decode the bytes.
2274 errors: str(c_default="NULL") = 'strict'
2275 The error handling scheme to use for the handling of decoding errors.
2276 The default is 'strict' meaning that decoding errors raise a
2277 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2278 as well as any other name registered with codecs.register_error that
2279 can handle UnicodeDecodeErrors.
2280
2281Decode the bytes using the codec registered for encoding.
2282[clinic start generated code]*/
2283
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002284static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002285bytes_decode_impl(PyBytesObject *self, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04002286 const char *errors)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002287/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002288{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002289 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002290}
2291
Guido van Rossum20188312006-05-05 15:15:40 +00002292
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002293/*[clinic input]
2294bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002295
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +03002296 keepends: int(c_default="0") = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002297
2298Return a list of the lines in the bytes, breaking at line boundaries.
2299
2300Line breaks are not included in the resulting list unless keepends is given and
2301true.
2302[clinic start generated code]*/
2303
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002304static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002305bytes_splitlines_impl(PyBytesObject *self, int keepends)
2306/*[clinic end generated code: output=3484149a5d880ffb input=7f4aac67144f9944]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002307{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002308 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002309 (PyObject*) self, PyBytes_AS_STRING(self),
2310 PyBytes_GET_SIZE(self), keepends
2311 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002312}
2313
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002314/*[clinic input]
2315@classmethod
2316bytes.fromhex
2317
2318 string: unicode
2319 /
2320
2321Create a bytes object from a string of hexadecimal numbers.
2322
2323Spaces between two numbers are accepted.
2324Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2325[clinic start generated code]*/
2326
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002327static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002328bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002329/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002330{
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002331 PyObject *result = _PyBytes_FromHex(string, 0);
2332 if (type != &PyBytes_Type && result != NULL) {
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01002333 Py_SETREF(result, PyObject_CallFunctionObjArgs((PyObject *)type,
2334 result, NULL));
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002335 }
2336 return result;
Victor Stinner2bf89932015-10-14 11:25:33 +02002337}
2338
2339PyObject*
2340_PyBytes_FromHex(PyObject *string, int use_bytearray)
2341{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002342 char *buf;
Victor Stinner2bf89932015-10-14 11:25:33 +02002343 Py_ssize_t hexlen, invalid_char;
2344 unsigned int top, bot;
2345 Py_UCS1 *str, *end;
2346 _PyBytesWriter writer;
2347
2348 _PyBytesWriter_Init(&writer);
2349 writer.use_bytearray = use_bytearray;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002350
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002351 assert(PyUnicode_Check(string));
2352 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002353 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002354 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002355
Victor Stinner2bf89932015-10-14 11:25:33 +02002356 if (!PyUnicode_IS_ASCII(string)) {
2357 void *data = PyUnicode_DATA(string);
2358 unsigned int kind = PyUnicode_KIND(string);
2359 Py_ssize_t i;
2360
2361 /* search for the first non-ASCII character */
2362 for (i = 0; i < hexlen; i++) {
2363 if (PyUnicode_READ(kind, data, i) >= 128)
2364 break;
2365 }
2366 invalid_char = i;
2367 goto error;
2368 }
2369
2370 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2371 str = PyUnicode_1BYTE_DATA(string);
2372
2373 /* This overestimates if there are spaces */
2374 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2375 if (buf == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002376 return NULL;
Victor Stinner2bf89932015-10-14 11:25:33 +02002377
2378 end = str + hexlen;
2379 while (str < end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002380 /* skip over spaces in the input */
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002381 if (Py_ISSPACE(*str)) {
Victor Stinner2bf89932015-10-14 11:25:33 +02002382 do {
2383 str++;
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002384 } while (Py_ISSPACE(*str));
Victor Stinner2bf89932015-10-14 11:25:33 +02002385 if (str >= end)
2386 break;
2387 }
2388
2389 top = _PyLong_DigitValue[*str];
2390 if (top >= 16) {
2391 invalid_char = str - PyUnicode_1BYTE_DATA(string);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002392 goto error;
2393 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002394 str++;
2395
2396 bot = _PyLong_DigitValue[*str];
2397 if (bot >= 16) {
2398 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2399 goto error;
2400 }
2401 str++;
2402
2403 *buf++ = (unsigned char)((top << 4) + bot);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002404 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002405
2406 return _PyBytesWriter_Finish(&writer, buf);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002407
2408 error:
Victor Stinner2bf89932015-10-14 11:25:33 +02002409 PyErr_Format(PyExc_ValueError,
2410 "non-hexadecimal number found in "
2411 "fromhex() arg at position %zd", invalid_char);
2412 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002413 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002414}
2415
Gregory P. Smith8cb65692015-04-25 23:22:26 +00002416PyDoc_STRVAR(hex__doc__,
2417"B.hex() -> string\n\
2418\n\
2419Create a string of hexadecimal numbers from a bytes object.\n\
2420Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'.");
2421
2422static PyObject *
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002423bytes_hex(PyBytesObject *self)
2424{
2425 char* argbuf = PyBytes_AS_STRING(self);
2426 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2427 return _Py_strhex(argbuf, arglen);
2428}
2429
2430static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002431bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002432{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002433 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002434}
2435
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002436
2437static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002438bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002439 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2440 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2441 _Py_capitalize__doc__},
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002442 {"center", (PyCFunction)stringlib_center, METH_VARARGS,
2443 _Py_center__doc__},
2444 {"count", (PyCFunction)bytes_count, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002445 _Py_count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002446 BYTES_DECODE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002447 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002448 _Py_endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02002449 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002450 _Py_expandtabs__doc__},
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002451 {"find", (PyCFunction)bytes_find, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002452 _Py_find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002453 BYTES_FROMHEX_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002454 {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__},
2455 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002456 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2457 _Py_isalnum__doc__},
2458 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2459 _Py_isalpha__doc__},
2460 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2461 _Py_isdigit__doc__},
2462 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2463 _Py_islower__doc__},
2464 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2465 _Py_isspace__doc__},
2466 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2467 _Py_istitle__doc__},
2468 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2469 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002470 BYTES_JOIN_METHODDEF
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002471 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, _Py_ljust__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002472 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002473 BYTES_LSTRIP_METHODDEF
2474 BYTES_MAKETRANS_METHODDEF
2475 BYTES_PARTITION_METHODDEF
2476 BYTES_REPLACE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002477 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2478 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002479 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, _Py_rjust__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002480 BYTES_RPARTITION_METHODDEF
2481 BYTES_RSPLIT_METHODDEF
2482 BYTES_RSTRIP_METHODDEF
2483 BYTES_SPLIT_METHODDEF
2484 BYTES_SPLITLINES_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002485 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002486 _Py_startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002487 BYTES_STRIP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002488 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2489 _Py_swapcase__doc__},
2490 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002491 BYTES_TRANSLATE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002492 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002493 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, _Py_zfill__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002494 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002495};
2496
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002497static PyObject *
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002498bytes_mod(PyObject *self, PyObject *arg)
Ethan Furmanb95b5612015-01-23 20:05:18 -08002499{
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002500 if (!PyBytes_Check(self)) {
2501 Py_RETURN_NOTIMPLEMENTED;
Victor Stinner772b2b02015-10-14 09:56:53 +02002502 }
Victor Stinner772b2b02015-10-14 09:56:53 +02002503 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002504 arg, 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -08002505}
2506
2507static PyNumberMethods bytes_as_number = {
2508 0, /*nb_add*/
2509 0, /*nb_subtract*/
2510 0, /*nb_multiply*/
2511 bytes_mod, /*nb_remainder*/
2512};
2513
2514static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002515bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002516
2517static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002518bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002519{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002520 PyObject *x = NULL;
2521 const char *encoding = NULL;
2522 const char *errors = NULL;
2523 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002524 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002525 Py_ssize_t size;
2526 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002527 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002528
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002529 if (type != &PyBytes_Type)
Serhiy Storchaka15095802015-11-25 15:47:01 +02002530 return bytes_subtype_new(type, args, kwds);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002531 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2532 &encoding, &errors))
2533 return NULL;
2534 if (x == NULL) {
2535 if (encoding != NULL || errors != NULL) {
2536 PyErr_SetString(PyExc_TypeError,
2537 "encoding or errors without sequence "
2538 "argument");
2539 return NULL;
2540 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02002541 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002542 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002543
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002544 if (encoding != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002545 /* Encode via the codec registry */
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002546 if (!PyUnicode_Check(x)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002547 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002548 "encoding without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002549 return NULL;
2550 }
2551 new = PyUnicode_AsEncodedString(x, encoding, errors);
2552 if (new == NULL)
2553 return NULL;
2554 assert(PyBytes_Check(new));
2555 return new;
2556 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002557
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002558 if (errors != NULL) {
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002559 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002560 PyUnicode_Check(x) ?
2561 "string argument without an encoding" :
2562 "errors without a string argument");
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002563 return NULL;
2564 }
2565
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002566 /* We'd like to call PyObject_Bytes here, but we need to check for an
2567 integer argument before deferring to PyBytes_FromObject, something
2568 PyObject_Bytes doesn't do. */
2569 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2570 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +01002571 new = _PyObject_CallNoArg(func);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002572 Py_DECREF(func);
2573 if (new == NULL)
2574 return NULL;
2575 if (!PyBytes_Check(new)) {
2576 PyErr_Format(PyExc_TypeError,
2577 "__bytes__ returned non-bytes (type %.200s)",
2578 Py_TYPE(new)->tp_name);
2579 Py_DECREF(new);
2580 return NULL;
2581 }
2582 return new;
2583 }
2584 else if (PyErr_Occurred())
2585 return NULL;
2586
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002587 if (PyUnicode_Check(x)) {
2588 PyErr_SetString(PyExc_TypeError,
2589 "string argument without an encoding");
2590 return NULL;
2591 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002592 /* Is it an integer? */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002593 if (PyIndex_Check(x)) {
2594 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2595 if (size == -1 && PyErr_Occurred()) {
INADA Naokia634e232017-01-06 17:32:01 +09002596 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2597 return NULL;
2598 PyErr_Clear(); /* fall through */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002599 }
INADA Naokia634e232017-01-06 17:32:01 +09002600 else {
2601 if (size < 0) {
2602 PyErr_SetString(PyExc_ValueError, "negative count");
2603 return NULL;
2604 }
2605 new = _PyBytes_FromSize(size, 1);
2606 if (new == NULL)
2607 return NULL;
2608 return new;
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002609 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002610 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002611
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002612 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002613}
2614
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002615static PyObject*
2616_PyBytes_FromBuffer(PyObject *x)
2617{
2618 PyObject *new;
2619 Py_buffer view;
2620
2621 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2622 return NULL;
2623
2624 new = PyBytes_FromStringAndSize(NULL, view.len);
2625 if (!new)
2626 goto fail;
2627 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2628 &view, view.len, 'C') < 0)
2629 goto fail;
2630 PyBuffer_Release(&view);
2631 return new;
2632
2633fail:
2634 Py_XDECREF(new);
2635 PyBuffer_Release(&view);
2636 return NULL;
2637}
2638
Victor Stinner3c50ce32015-10-14 13:50:40 +02002639#define _PyBytes_FROM_LIST_BODY(x, GET_ITEM) \
2640 do { \
2641 PyObject *bytes; \
2642 Py_ssize_t i; \
2643 Py_ssize_t value; \
2644 char *str; \
2645 PyObject *item; \
2646 \
2647 bytes = PyBytes_FromStringAndSize(NULL, Py_SIZE(x)); \
2648 if (bytes == NULL) \
2649 return NULL; \
2650 str = ((PyBytesObject *)bytes)->ob_sval; \
2651 \
2652 for (i = 0; i < Py_SIZE(x); i++) { \
2653 item = GET_ITEM((x), i); \
Serhiy Storchakaf54d7812016-07-06 21:39:44 +03002654 value = PyNumber_AsSsize_t(item, NULL); \
Victor Stinner3c50ce32015-10-14 13:50:40 +02002655 if (value == -1 && PyErr_Occurred()) \
2656 goto error; \
2657 \
2658 if (value < 0 || value >= 256) { \
2659 PyErr_SetString(PyExc_ValueError, \
2660 "bytes must be in range(0, 256)"); \
2661 goto error; \
2662 } \
2663 *str++ = (char) value; \
2664 } \
2665 return bytes; \
2666 \
2667 error: \
2668 Py_DECREF(bytes); \
2669 return NULL; \
2670 } while (0)
2671
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002672static PyObject*
2673_PyBytes_FromList(PyObject *x)
2674{
Victor Stinner3c50ce32015-10-14 13:50:40 +02002675 _PyBytes_FROM_LIST_BODY(x, PyList_GET_ITEM);
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002676}
2677
2678static PyObject*
2679_PyBytes_FromTuple(PyObject *x)
2680{
Victor Stinner3c50ce32015-10-14 13:50:40 +02002681 _PyBytes_FROM_LIST_BODY(x, PyTuple_GET_ITEM);
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002682}
2683
2684static PyObject *
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002685_PyBytes_FromIterator(PyObject *it, PyObject *x)
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002686{
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002687 char *str;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002688 Py_ssize_t i, size;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002689 _PyBytesWriter writer;
2690
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002691 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02002692 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002693 if (size == -1 && PyErr_Occurred())
2694 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002695
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002696 _PyBytesWriter_Init(&writer);
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002697 str = _PyBytesWriter_Alloc(&writer, size);
2698 if (str == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002699 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002700 writer.overallocate = 1;
2701 size = writer.allocated;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002702
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002703 /* Run the iterator to exhaustion */
2704 for (i = 0; ; i++) {
2705 PyObject *item;
2706 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002707
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002708 /* Get the next item */
2709 item = PyIter_Next(it);
2710 if (item == NULL) {
2711 if (PyErr_Occurred())
2712 goto error;
2713 break;
2714 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002715
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002716 /* Interpret it as an int (__index__) */
Serhiy Storchakaf54d7812016-07-06 21:39:44 +03002717 value = PyNumber_AsSsize_t(item, NULL);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002718 Py_DECREF(item);
2719 if (value == -1 && PyErr_Occurred())
2720 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002721
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002722 /* Range check */
2723 if (value < 0 || value >= 256) {
2724 PyErr_SetString(PyExc_ValueError,
2725 "bytes must be in range(0, 256)");
2726 goto error;
2727 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002728
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002729 /* Append the byte */
2730 if (i >= size) {
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002731 str = _PyBytesWriter_Resize(&writer, str, size+1);
2732 if (str == NULL)
2733 return NULL;
2734 size = writer.allocated;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002735 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002736 *str++ = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002737 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002738
2739 return _PyBytesWriter_Finish(&writer, str);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002740
2741 error:
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002742 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002743 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002744}
2745
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002746PyObject *
2747PyBytes_FromObject(PyObject *x)
2748{
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002749 PyObject *it, *result;
2750
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002751 if (x == NULL) {
2752 PyErr_BadInternalCall();
2753 return NULL;
2754 }
2755
2756 if (PyBytes_CheckExact(x)) {
2757 Py_INCREF(x);
2758 return x;
2759 }
2760
2761 /* Use the modern buffer interface */
2762 if (PyObject_CheckBuffer(x))
2763 return _PyBytes_FromBuffer(x);
2764
2765 if (PyList_CheckExact(x))
2766 return _PyBytes_FromList(x);
2767
2768 if (PyTuple_CheckExact(x))
2769 return _PyBytes_FromTuple(x);
2770
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002771 if (!PyUnicode_Check(x)) {
2772 it = PyObject_GetIter(x);
2773 if (it != NULL) {
2774 result = _PyBytes_FromIterator(it, x);
2775 Py_DECREF(it);
2776 return result;
2777 }
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002778 }
2779
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002780 PyErr_Format(PyExc_TypeError,
2781 "cannot convert '%.200s' object to bytes",
2782 x->ob_type->tp_name);
2783 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002784}
2785
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002786static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002787bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002788{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002789 PyObject *tmp, *pnew;
2790 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002791
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002792 assert(PyType_IsSubtype(type, &PyBytes_Type));
2793 tmp = bytes_new(&PyBytes_Type, args, kwds);
2794 if (tmp == NULL)
2795 return NULL;
Serhiy Storchaka15095802015-11-25 15:47:01 +02002796 assert(PyBytes_Check(tmp));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002797 n = PyBytes_GET_SIZE(tmp);
2798 pnew = type->tp_alloc(type, n);
2799 if (pnew != NULL) {
Christian Heimesf051e432016-09-13 20:22:02 +02002800 memcpy(PyBytes_AS_STRING(pnew),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002801 PyBytes_AS_STRING(tmp), n+1);
2802 ((PyBytesObject *)pnew)->ob_shash =
2803 ((PyBytesObject *)tmp)->ob_shash;
2804 }
2805 Py_DECREF(tmp);
2806 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002807}
2808
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002809PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002810"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002811bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002812bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002813bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2814bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002815\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002816Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002817 - an iterable yielding integers in range(256)\n\
2818 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002819 - any object implementing the buffer API.\n\
2820 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002821
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002822static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002823
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002824PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002825 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2826 "bytes",
2827 PyBytesObject_SIZE,
2828 sizeof(char),
2829 bytes_dealloc, /* tp_dealloc */
2830 0, /* tp_print */
2831 0, /* tp_getattr */
2832 0, /* tp_setattr */
2833 0, /* tp_reserved */
2834 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08002835 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002836 &bytes_as_sequence, /* tp_as_sequence */
2837 &bytes_as_mapping, /* tp_as_mapping */
2838 (hashfunc)bytes_hash, /* tp_hash */
2839 0, /* tp_call */
2840 bytes_str, /* tp_str */
2841 PyObject_GenericGetAttr, /* tp_getattro */
2842 0, /* tp_setattro */
2843 &bytes_as_buffer, /* tp_as_buffer */
2844 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2845 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2846 bytes_doc, /* tp_doc */
2847 0, /* tp_traverse */
2848 0, /* tp_clear */
2849 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2850 0, /* tp_weaklistoffset */
2851 bytes_iter, /* tp_iter */
2852 0, /* tp_iternext */
2853 bytes_methods, /* tp_methods */
2854 0, /* tp_members */
2855 0, /* tp_getset */
2856 &PyBaseObject_Type, /* tp_base */
2857 0, /* tp_dict */
2858 0, /* tp_descr_get */
2859 0, /* tp_descr_set */
2860 0, /* tp_dictoffset */
2861 0, /* tp_init */
2862 0, /* tp_alloc */
2863 bytes_new, /* tp_new */
2864 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002865};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002866
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002867void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002868PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002869{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002870 assert(pv != NULL);
2871 if (*pv == NULL)
2872 return;
2873 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002874 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002875 return;
2876 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02002877
2878 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2879 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05002880 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02002881 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02002882
Antoine Pitrou161d6952014-05-01 14:36:20 +02002883 wb.len = -1;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002884 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02002885 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2886 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2887 Py_CLEAR(*pv);
2888 return;
2889 }
2890
2891 oldsize = PyBytes_GET_SIZE(*pv);
2892 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2893 PyErr_NoMemory();
2894 goto error;
2895 }
2896 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2897 goto error;
2898
2899 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2900 PyBuffer_Release(&wb);
2901 return;
2902
2903 error:
2904 PyBuffer_Release(&wb);
2905 Py_CLEAR(*pv);
2906 return;
2907 }
2908
2909 else {
2910 /* Multiple references, need to create new object */
2911 PyObject *v;
2912 v = bytes_concat(*pv, w);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002913 Py_SETREF(*pv, v);
Antoine Pitrou161d6952014-05-01 14:36:20 +02002914 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002915}
2916
2917void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002918PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002919{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002920 PyBytes_Concat(pv, w);
2921 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002922}
2923
2924
Ethan Furmanb95b5612015-01-23 20:05:18 -08002925/* The following function breaks the notion that bytes are immutable:
2926 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002927 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08002928 as creating a new bytes object and destroying the old one, only
2929 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002930 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08002931 Note that if there's not enough memory to resize the bytes object, the
2932 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002933 memory" exception is set, and -1 is returned. Else (on success) 0 is
2934 returned, and the value in *pv may or may not be the same as on input.
2935 As always, an extra byte is allocated for a trailing \0 byte (newsize
2936 does *not* include that), and a trailing \0 byte is stored.
2937*/
2938
2939int
2940_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2941{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002942 PyObject *v;
2943 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002944 v = *pv;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002945 if (!PyBytes_Check(v) || newsize < 0) {
2946 goto error;
2947 }
2948 if (Py_SIZE(v) == newsize) {
2949 /* return early if newsize equals to v->ob_size */
2950 return 0;
2951 }
2952 if (Py_REFCNT(v) != 1) {
2953 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002954 }
2955 /* XXX UNREF/NEWREF interface should be more symmetrical */
2956 _Py_DEC_REFTOTAL;
2957 _Py_ForgetReference(v);
2958 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03002959 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002960 if (*pv == NULL) {
2961 PyObject_Del(v);
2962 PyErr_NoMemory();
2963 return -1;
2964 }
2965 _Py_NewReference(*pv);
2966 sv = (PyBytesObject *) *pv;
2967 Py_SIZE(sv) = newsize;
2968 sv->ob_sval[newsize] = '\0';
2969 sv->ob_shash = -1; /* invalidate cached hash value */
2970 return 0;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002971error:
2972 *pv = 0;
2973 Py_DECREF(v);
2974 PyErr_BadInternalCall();
2975 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002976}
2977
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002978void
2979PyBytes_Fini(void)
2980{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002981 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002982 for (i = 0; i < UCHAR_MAX + 1; i++)
2983 Py_CLEAR(characters[i]);
2984 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002985}
2986
Benjamin Peterson4116f362008-05-27 00:36:20 +00002987/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002988
2989typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002990 PyObject_HEAD
2991 Py_ssize_t it_index;
2992 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002993} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002994
2995static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002996striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002997{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002998 _PyObject_GC_UNTRACK(it);
2999 Py_XDECREF(it->it_seq);
3000 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003001}
3002
3003static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003004striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003005{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003006 Py_VISIT(it->it_seq);
3007 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003008}
3009
3010static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003011striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003012{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003013 PyBytesObject *seq;
3014 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003015
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003016 assert(it != NULL);
3017 seq = it->it_seq;
3018 if (seq == NULL)
3019 return NULL;
3020 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003021
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003022 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3023 item = PyLong_FromLong(
3024 (unsigned char)seq->ob_sval[it->it_index]);
3025 if (item != NULL)
3026 ++it->it_index;
3027 return item;
3028 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003029
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003030 it->it_seq = NULL;
Serhiy Storchakafbb1c5e2016-03-30 20:40:02 +03003031 Py_DECREF(seq);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003032 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003033}
3034
3035static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003036striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003037{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003038 Py_ssize_t len = 0;
3039 if (it->it_seq)
3040 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3041 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003042}
3043
3044PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003045 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003046
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003047static PyObject *
3048striter_reduce(striterobject *it)
3049{
3050 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02003051 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003052 it->it_seq, it->it_index);
3053 } else {
Serhiy Storchaka460bd0d2016-11-20 12:16:46 +02003054 return Py_BuildValue("N(())", _PyObject_GetBuiltin("iter"));
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003055 }
3056}
3057
3058PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3059
3060static PyObject *
3061striter_setstate(striterobject *it, PyObject *state)
3062{
3063 Py_ssize_t index = PyLong_AsSsize_t(state);
3064 if (index == -1 && PyErr_Occurred())
3065 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003066 if (it->it_seq != NULL) {
3067 if (index < 0)
3068 index = 0;
3069 else if (index > PyBytes_GET_SIZE(it->it_seq))
3070 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3071 it->it_index = index;
3072 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003073 Py_RETURN_NONE;
3074}
3075
3076PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3077
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003078static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003079 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3080 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003081 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3082 reduce_doc},
3083 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3084 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003085 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003086};
3087
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003088PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003089 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3090 "bytes_iterator", /* tp_name */
3091 sizeof(striterobject), /* tp_basicsize */
3092 0, /* tp_itemsize */
3093 /* methods */
3094 (destructor)striter_dealloc, /* tp_dealloc */
3095 0, /* tp_print */
3096 0, /* tp_getattr */
3097 0, /* tp_setattr */
3098 0, /* tp_reserved */
3099 0, /* tp_repr */
3100 0, /* tp_as_number */
3101 0, /* tp_as_sequence */
3102 0, /* tp_as_mapping */
3103 0, /* tp_hash */
3104 0, /* tp_call */
3105 0, /* tp_str */
3106 PyObject_GenericGetAttr, /* tp_getattro */
3107 0, /* tp_setattro */
3108 0, /* tp_as_buffer */
3109 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3110 0, /* tp_doc */
3111 (traverseproc)striter_traverse, /* tp_traverse */
3112 0, /* tp_clear */
3113 0, /* tp_richcompare */
3114 0, /* tp_weaklistoffset */
3115 PyObject_SelfIter, /* tp_iter */
3116 (iternextfunc)striter_next, /* tp_iternext */
3117 striter_methods, /* tp_methods */
3118 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003119};
3120
3121static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003122bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003123{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003124 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003125
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003126 if (!PyBytes_Check(seq)) {
3127 PyErr_BadInternalCall();
3128 return NULL;
3129 }
3130 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3131 if (it == NULL)
3132 return NULL;
3133 it->it_index = 0;
3134 Py_INCREF(seq);
3135 it->it_seq = (PyBytesObject *)seq;
3136 _PyObject_GC_TRACK(it);
3137 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003138}
Victor Stinner00165072015-10-09 01:53:21 +02003139
3140
3141/* _PyBytesWriter API */
3142
3143#ifdef MS_WINDOWS
3144 /* On Windows, overallocate by 50% is the best factor */
3145# define OVERALLOCATE_FACTOR 2
3146#else
3147 /* On Linux, overallocate by 25% is the best factor */
3148# define OVERALLOCATE_FACTOR 4
3149#endif
3150
3151void
3152_PyBytesWriter_Init(_PyBytesWriter *writer)
3153{
Victor Stinner661aacc2015-10-14 09:41:48 +02003154 /* Set all attributes before small_buffer to 0 */
3155 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003156#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003157 memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003158#endif
3159}
3160
3161void
3162_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3163{
3164 Py_CLEAR(writer->buffer);
3165}
3166
3167Py_LOCAL_INLINE(char*)
3168_PyBytesWriter_AsString(_PyBytesWriter *writer)
3169{
Victor Stinner661aacc2015-10-14 09:41:48 +02003170 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003171 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003172 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003173 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003174 else if (writer->use_bytearray) {
3175 assert(writer->buffer != NULL);
3176 return PyByteArray_AS_STRING(writer->buffer);
3177 }
3178 else {
3179 assert(writer->buffer != NULL);
3180 return PyBytes_AS_STRING(writer->buffer);
3181 }
Victor Stinner00165072015-10-09 01:53:21 +02003182}
3183
3184Py_LOCAL_INLINE(Py_ssize_t)
Victor Stinner2bf89932015-10-14 11:25:33 +02003185_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
Victor Stinner00165072015-10-09 01:53:21 +02003186{
3187 char *start = _PyBytesWriter_AsString(writer);
3188 assert(str != NULL);
3189 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003190 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003191 return str - start;
3192}
3193
3194Py_LOCAL_INLINE(void)
3195_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3196{
3197#ifdef Py_DEBUG
3198 char *start, *end;
3199
Victor Stinner661aacc2015-10-14 09:41:48 +02003200 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003201 assert(writer->buffer == NULL);
3202 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003203 else {
3204 assert(writer->buffer != NULL);
3205 if (writer->use_bytearray)
3206 assert(PyByteArray_CheckExact(writer->buffer));
3207 else
3208 assert(PyBytes_CheckExact(writer->buffer));
3209 assert(Py_REFCNT(writer->buffer) == 1);
3210 }
Victor Stinner00165072015-10-09 01:53:21 +02003211
Victor Stinner661aacc2015-10-14 09:41:48 +02003212 if (writer->use_bytearray) {
3213 /* bytearray has its own overallocation algorithm,
3214 writer overallocation must be disabled */
3215 assert(!writer->overallocate);
3216 }
3217
3218 assert(0 <= writer->allocated);
Victor Stinner53926a12015-10-09 12:37:03 +02003219 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003220 /* the last byte must always be null */
Victor Stinner661aacc2015-10-14 09:41:48 +02003221 start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003222 assert(start[writer->allocated] == 0);
3223
3224 end = start + writer->allocated;
3225 assert(str != NULL);
3226 assert(start <= str && str <= end);
3227#endif
3228}
3229
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003230void*
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003231_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
Victor Stinner00165072015-10-09 01:53:21 +02003232{
3233 Py_ssize_t allocated, pos;
3234
3235 _PyBytesWriter_CheckConsistency(writer, str);
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003236 assert(writer->allocated < size);
Victor Stinner00165072015-10-09 01:53:21 +02003237
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003238 allocated = size;
Victor Stinner00165072015-10-09 01:53:21 +02003239 if (writer->overallocate
3240 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3241 /* overallocate to limit the number of realloc() */
3242 allocated += allocated / OVERALLOCATE_FACTOR;
3243 }
3244
Victor Stinner2bf89932015-10-14 11:25:33 +02003245 pos = _PyBytesWriter_GetSize(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003246 if (!writer->use_small_buffer) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003247 if (writer->use_bytearray) {
3248 if (PyByteArray_Resize(writer->buffer, allocated))
3249 goto error;
3250 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3251 but we cannot use ob_alloc because bytes may need to be moved
3252 to use the whole buffer. bytearray uses an internal optimization
3253 to avoid moving or copying bytes when bytes are removed at the
3254 beginning (ex: del bytearray[:1]). */
3255 }
3256 else {
3257 if (_PyBytes_Resize(&writer->buffer, allocated))
3258 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003259 }
3260 }
3261 else {
3262 /* convert from stack buffer to bytes object buffer */
3263 assert(writer->buffer == NULL);
3264
Victor Stinner661aacc2015-10-14 09:41:48 +02003265 if (writer->use_bytearray)
3266 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3267 else
3268 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003269 if (writer->buffer == NULL)
Victor Stinner661aacc2015-10-14 09:41:48 +02003270 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003271
3272 if (pos != 0) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003273 char *dest;
3274 if (writer->use_bytearray)
3275 dest = PyByteArray_AS_STRING(writer->buffer);
3276 else
3277 dest = PyBytes_AS_STRING(writer->buffer);
Christian Heimesf051e432016-09-13 20:22:02 +02003278 memcpy(dest,
Victor Stinnerb3653a32015-10-09 03:38:24 +02003279 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02003280 pos);
3281 }
3282
Victor Stinnerb3653a32015-10-09 03:38:24 +02003283 writer->use_small_buffer = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003284#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003285 memset(writer->small_buffer, 0xDB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003286#endif
Victor Stinner00165072015-10-09 01:53:21 +02003287 }
3288 writer->allocated = allocated;
3289
3290 str = _PyBytesWriter_AsString(writer) + pos;
3291 _PyBytesWriter_CheckConsistency(writer, str);
3292 return str;
Victor Stinner661aacc2015-10-14 09:41:48 +02003293
3294error:
3295 _PyBytesWriter_Dealloc(writer);
3296 return NULL;
Victor Stinner00165072015-10-09 01:53:21 +02003297}
3298
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003299void*
3300_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3301{
3302 Py_ssize_t new_min_size;
3303
3304 _PyBytesWriter_CheckConsistency(writer, str);
3305 assert(size >= 0);
3306
3307 if (size == 0) {
3308 /* nothing to do */
3309 return str;
3310 }
3311
3312 if (writer->min_size > PY_SSIZE_T_MAX - size) {
3313 PyErr_NoMemory();
3314 _PyBytesWriter_Dealloc(writer);
3315 return NULL;
3316 }
3317 new_min_size = writer->min_size + size;
3318
3319 if (new_min_size > writer->allocated)
3320 str = _PyBytesWriter_Resize(writer, str, new_min_size);
3321
3322 writer->min_size = new_min_size;
3323 return str;
3324}
3325
Victor Stinner00165072015-10-09 01:53:21 +02003326/* Allocate the buffer to write size bytes.
3327 Return the pointer to the beginning of buffer data.
3328 Raise an exception and return NULL on error. */
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003329void*
Victor Stinner00165072015-10-09 01:53:21 +02003330_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3331{
3332 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02003333 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02003334 assert(size >= 0);
3335
Victor Stinnerb3653a32015-10-09 03:38:24 +02003336 writer->use_small_buffer = 1;
Victor Stinnerb13b97d2015-10-09 02:52:16 +02003337#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003338 writer->allocated = sizeof(writer->small_buffer) - 1;
Victor Stinnerf6358a72015-10-14 12:02:39 +02003339 /* In debug mode, don't use the full small buffer because it is less
3340 efficient than bytes and bytearray objects to detect buffer underflow
3341 and buffer overflow. Use 10 bytes of the small buffer to test also
3342 code using the smaller buffer in debug mode.
3343
3344 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3345 in debug mode to also be able to detect stack overflow when running
3346 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3347 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3348 stack overflow. */
3349 writer->allocated = Py_MIN(writer->allocated, 10);
3350 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3351 to detect buffer overflow */
Victor Stinnerb3653a32015-10-09 03:38:24 +02003352 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003353#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02003354 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02003355#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02003356 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02003357}
3358
3359PyObject *
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003360_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
Victor Stinner00165072015-10-09 01:53:21 +02003361{
Victor Stinner2bf89932015-10-14 11:25:33 +02003362 Py_ssize_t size;
Victor Stinner00165072015-10-09 01:53:21 +02003363 PyObject *result;
3364
3365 _PyBytesWriter_CheckConsistency(writer, str);
3366
Victor Stinner2bf89932015-10-14 11:25:33 +02003367 size = _PyBytesWriter_GetSize(writer, str);
3368 if (size == 0 && !writer->use_bytearray) {
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003369 Py_CLEAR(writer->buffer);
3370 /* Get the empty byte string singleton */
3371 result = PyBytes_FromStringAndSize(NULL, 0);
3372 }
3373 else if (writer->use_small_buffer) {
Victor Stinnere914d412016-04-15 17:52:27 +02003374 if (writer->use_bytearray) {
3375 result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3376 }
3377 else {
3378 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3379 }
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003380 }
3381 else {
3382 result = writer->buffer;
3383 writer->buffer = NULL;
3384
Victor Stinner2bf89932015-10-14 11:25:33 +02003385 if (size != writer->allocated) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003386 if (writer->use_bytearray) {
Victor Stinner2bf89932015-10-14 11:25:33 +02003387 if (PyByteArray_Resize(result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003388 Py_DECREF(result);
3389 return NULL;
3390 }
3391 }
3392 else {
Victor Stinner2bf89932015-10-14 11:25:33 +02003393 if (_PyBytes_Resize(&result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003394 assert(result == NULL);
3395 return NULL;
3396 }
Victor Stinner00165072015-10-09 01:53:21 +02003397 }
3398 }
Victor Stinner00165072015-10-09 01:53:21 +02003399 }
Victor Stinner00165072015-10-09 01:53:21 +02003400 return result;
3401}
Victor Stinnerce179bf2015-10-09 12:57:22 +02003402
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003403void*
Victor Stinnere9aa5952015-10-12 13:57:47 +02003404_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003405 const void *bytes, Py_ssize_t size)
Victor Stinnerce179bf2015-10-09 12:57:22 +02003406{
Victor Stinnere9aa5952015-10-12 13:57:47 +02003407 char *str = (char *)ptr;
3408
Victor Stinnerce179bf2015-10-09 12:57:22 +02003409 str = _PyBytesWriter_Prepare(writer, str, size);
3410 if (str == NULL)
3411 return NULL;
3412
Christian Heimesf051e432016-09-13 20:22:02 +02003413 memcpy(str, bytes, size);
Victor Stinnerce179bf2015-10-09 12:57:22 +02003414 str += size;
3415
3416 return str;
3417}