blob: 5fd92f72a536ad9cff5f61490db89ab143774f97 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Victor Stinnerbcda8f12018-11-21 22:27:47 +01006#include "pycore_object.h"
Victor Stinner621cebe2018-11-12 16:53:38 +01007#include "pycore_pymem.h"
8#include "pycore_pystate.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00009
Gregory P. Smith60d241f2007-10-16 06:31:30 +000010#include "bytes_methods.h"
Gregory P. Smith8cb65692015-04-25 23:22:26 +000011#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +000012#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000013
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020014/*[clinic input]
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030015class bytes "PyBytesObject *" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020016[clinic start generated code]*/
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030017/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020018
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030019#include "clinic/bytesobject.c.h"
20
Christian Heimes2c9c7a52008-05-26 13:42:13 +000021#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +000022Py_ssize_t _Py_null_strings, _Py_one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000023#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000024
Christian Heimes2c9c7a52008-05-26 13:42:13 +000025static PyBytesObject *characters[UCHAR_MAX + 1];
26static PyBytesObject *nullstring;
27
Hai Shi46874c22020-01-30 17:20:25 -060028_Py_IDENTIFIER(__bytes__);
29
Mark Dickinsonfd24b322008-12-06 15:33:31 +000030/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
31 for a string of length n should request PyBytesObject_SIZE + n bytes.
32
33 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
34 3 bytes per string allocation on a typical system.
35*/
36#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
37
Victor Stinner2bf89932015-10-14 11:25:33 +020038/* Forward declaration */
39Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
40 char *str);
41
Christian Heimes2c9c7a52008-05-26 13:42:13 +000042/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043 For PyBytes_FromString(), the parameter `str' points to a null-terminated
44 string containing exactly `size' bytes.
45
Martin Pantera90a4a92016-05-30 04:04:50 +000046 For PyBytes_FromStringAndSize(), the parameter `str' is
Christian Heimes2c9c7a52008-05-26 13:42:13 +000047 either NULL or else points to a string containing at least `size' bytes.
48 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
49 not have to be null-terminated. (Therefore it is safe to construct a
50 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
51 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
52 bytes (setting the last byte to the null terminating character) and you can
53 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000054 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000055 alter the data yourself, since the strings may be shared.
56
57 The PyObject member `op->ob_size', which denotes the number of "extra
58 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020059 allocated for string data, not counting the null terminating character.
60 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000061 PyBytes_FromStringAndSize()) or the length of the string in the `str'
62 parameter (for PyBytes_FromString()).
63*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020064static PyObject *
65_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000066{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020067 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020068 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020069
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000070 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000071#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +000072 _Py_null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000073#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000074 Py_INCREF(op);
75 return (PyObject *)op;
76 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000077
Victor Stinner049e5092014-08-17 22:20:00 +020078 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000079 PyErr_SetString(PyExc_OverflowError,
80 "byte string is too large");
81 return NULL;
82 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000083
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020085 if (use_calloc)
86 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
87 else
88 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000089 if (op == NULL)
90 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +010091 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000092 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020093 if (!use_calloc)
94 op->ob_sval[size] = '\0';
95 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000096 if (size == 0) {
97 nullstring = op;
98 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020099 }
100 return (PyObject *) op;
101}
102
103PyObject *
104PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
105{
106 PyBytesObject *op;
107 if (size < 0) {
108 PyErr_SetString(PyExc_SystemError,
109 "Negative size passed to PyBytes_FromStringAndSize");
110 return NULL;
111 }
112 if (size == 1 && str != NULL &&
113 (op = characters[*str & UCHAR_MAX]) != NULL)
114 {
115#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +0000116 _Py_one_strings++;
Victor Stinnerdb067af2014-05-02 22:31:14 +0200117#endif
118 Py_INCREF(op);
119 return (PyObject *)op;
120 }
121
122 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
123 if (op == NULL)
124 return NULL;
125 if (str == NULL)
126 return (PyObject *) op;
127
Christian Heimesf051e432016-09-13 20:22:02 +0200128 memcpy(op->ob_sval, str, size);
Victor Stinnerdb067af2014-05-02 22:31:14 +0200129 /* share short strings */
130 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000131 characters[*str & UCHAR_MAX] = op;
132 Py_INCREF(op);
133 }
134 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000135}
136
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000137PyObject *
138PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000139{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200140 size_t size;
141 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000142
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000143 assert(str != NULL);
144 size = strlen(str);
145 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
146 PyErr_SetString(PyExc_OverflowError,
147 "byte string is too long");
148 return NULL;
149 }
150 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000151#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +0000152 _Py_null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000153#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 Py_INCREF(op);
155 return (PyObject *)op;
156 }
157 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000158#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +0000159 _Py_one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000160#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000161 Py_INCREF(op);
162 return (PyObject *)op;
163 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000164
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000165 /* Inline PyObject_NewVar */
166 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
167 if (op == NULL)
168 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +0100169 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000170 op->ob_shash = -1;
Christian Heimesf051e432016-09-13 20:22:02 +0200171 memcpy(op->ob_sval, str, size+1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000172 /* share short strings */
173 if (size == 0) {
174 nullstring = op;
175 Py_INCREF(op);
176 } else if (size == 1) {
177 characters[*str & UCHAR_MAX] = op;
178 Py_INCREF(op);
179 }
180 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000181}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000182
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000183PyObject *
184PyBytes_FromFormatV(const char *format, va_list vargs)
185{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000186 char *s;
Victor Stinner03dab782015-10-14 00:21:35 +0200187 const char *f;
188 const char *p;
189 Py_ssize_t prec;
190 int longflag;
191 int size_tflag;
192 /* Longest 64-bit formatted numbers:
193 - "18446744073709551615\0" (21 bytes)
194 - "-9223372036854775808\0" (21 bytes)
195 Decimal takes the most space (it isn't enough for octal.)
Guido van Rossum343e97f2007-04-09 00:43:24 +0000196
Victor Stinner03dab782015-10-14 00:21:35 +0200197 Longest 64-bit pointer representation:
198 "0xffffffffffffffff\0" (19 bytes). */
199 char buffer[21];
200 _PyBytesWriter writer;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000201
Victor Stinner03dab782015-10-14 00:21:35 +0200202 _PyBytesWriter_Init(&writer);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000203
Victor Stinner03dab782015-10-14 00:21:35 +0200204 s = _PyBytesWriter_Alloc(&writer, strlen(format));
205 if (s == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000206 return NULL;
Victor Stinner03dab782015-10-14 00:21:35 +0200207 writer.overallocate = 1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000208
Victor Stinner03dab782015-10-14 00:21:35 +0200209#define WRITE_BYTES(str) \
210 do { \
211 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
212 if (s == NULL) \
213 goto error; \
214 } while (0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000215
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000216 for (f = format; *f; f++) {
Victor Stinner03dab782015-10-14 00:21:35 +0200217 if (*f != '%') {
218 *s++ = *f;
219 continue;
220 }
221
222 p = f++;
223
224 /* ignore the width (ex: 10 in "%10s") */
225 while (Py_ISDIGIT(*f))
226 f++;
227
228 /* parse the precision (ex: 10 in "%.10s") */
229 prec = 0;
230 if (*f == '.') {
231 f++;
232 for (; Py_ISDIGIT(*f); f++) {
233 prec = (prec * 10) + (*f - '0');
234 }
235 }
236
237 while (*f && *f != '%' && !Py_ISALPHA(*f))
238 f++;
239
240 /* handle the long flag ('l'), but only for %ld and %lu.
241 others can be added when necessary. */
242 longflag = 0;
243 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
244 longflag = 1;
245 ++f;
246 }
247
248 /* handle the size_t flag ('z'). */
249 size_tflag = 0;
250 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
251 size_tflag = 1;
252 ++f;
253 }
254
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700255 /* subtract bytes preallocated for the format string
Victor Stinner03dab782015-10-14 00:21:35 +0200256 (ex: 2 for "%s") */
257 writer.min_size -= (f - p + 1);
258
259 switch (*f) {
260 case 'c':
261 {
262 int c = va_arg(vargs, int);
263 if (c < 0 || c > 255) {
264 PyErr_SetString(PyExc_OverflowError,
265 "PyBytes_FromFormatV(): %c format "
266 "expects an integer in range [0; 255]");
267 goto error;
268 }
269 writer.min_size++;
270 *s++ = (unsigned char)c;
271 break;
272 }
273
274 case 'd':
275 if (longflag)
276 sprintf(buffer, "%ld", va_arg(vargs, long));
277 else if (size_tflag)
278 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
279 va_arg(vargs, Py_ssize_t));
280 else
281 sprintf(buffer, "%d", va_arg(vargs, int));
282 assert(strlen(buffer) < sizeof(buffer));
283 WRITE_BYTES(buffer);
284 break;
285
286 case 'u':
287 if (longflag)
288 sprintf(buffer, "%lu",
289 va_arg(vargs, unsigned long));
290 else if (size_tflag)
291 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
292 va_arg(vargs, size_t));
293 else
294 sprintf(buffer, "%u",
295 va_arg(vargs, unsigned int));
296 assert(strlen(buffer) < sizeof(buffer));
297 WRITE_BYTES(buffer);
298 break;
299
300 case 'i':
301 sprintf(buffer, "%i", va_arg(vargs, int));
302 assert(strlen(buffer) < sizeof(buffer));
303 WRITE_BYTES(buffer);
304 break;
305
306 case 'x':
307 sprintf(buffer, "%x", va_arg(vargs, int));
308 assert(strlen(buffer) < sizeof(buffer));
309 WRITE_BYTES(buffer);
310 break;
311
312 case 's':
313 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000314 Py_ssize_t i;
Victor Stinner03dab782015-10-14 00:21:35 +0200315
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200316 p = va_arg(vargs, const char*);
Serhiy Storchakad586ccb2019-01-12 10:30:35 +0200317 if (prec <= 0) {
318 i = strlen(p);
319 }
320 else {
321 i = 0;
322 while (i < prec && p[i]) {
323 i++;
324 }
325 }
Victor Stinner03dab782015-10-14 00:21:35 +0200326 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
327 if (s == NULL)
328 goto error;
329 break;
330 }
331
332 case 'p':
333 sprintf(buffer, "%p", va_arg(vargs, void*));
334 assert(strlen(buffer) < sizeof(buffer));
335 /* %p is ill-defined: ensure leading 0x. */
336 if (buffer[1] == 'X')
337 buffer[1] = 'x';
338 else if (buffer[1] != 'x') {
339 memmove(buffer+2, buffer, strlen(buffer)+1);
340 buffer[0] = '0';
341 buffer[1] = 'x';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000342 }
Victor Stinner03dab782015-10-14 00:21:35 +0200343 WRITE_BYTES(buffer);
344 break;
345
346 case '%':
347 writer.min_size++;
348 *s++ = '%';
349 break;
350
351 default:
352 if (*f == 0) {
353 /* fix min_size if we reached the end of the format string */
354 writer.min_size++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000355 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000356
Victor Stinner03dab782015-10-14 00:21:35 +0200357 /* invalid format string: copy unformatted string and exit */
358 WRITE_BYTES(p);
359 return _PyBytesWriter_Finish(&writer, s);
360 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000361 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000362
Victor Stinner03dab782015-10-14 00:21:35 +0200363#undef WRITE_BYTES
364
365 return _PyBytesWriter_Finish(&writer, s);
366
367 error:
368 _PyBytesWriter_Dealloc(&writer);
369 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000370}
371
372PyObject *
373PyBytes_FromFormat(const char *format, ...)
374{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000375 PyObject* ret;
376 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000377
378#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000379 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000380#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000381 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000382#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000383 ret = PyBytes_FromFormatV(format, vargs);
384 va_end(vargs);
385 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000386}
387
Ethan Furmanb95b5612015-01-23 20:05:18 -0800388/* Helpers for formatstring */
389
390Py_LOCAL_INLINE(PyObject *)
391getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
392{
393 Py_ssize_t argidx = *p_argidx;
394 if (argidx < arglen) {
395 (*p_argidx)++;
396 if (arglen < 0)
397 return args;
398 else
399 return PyTuple_GetItem(args, argidx);
400 }
401 PyErr_SetString(PyExc_TypeError,
402 "not enough arguments for format string");
403 return NULL;
404}
405
406/* Format codes
407 * F_LJUST '-'
408 * F_SIGN '+'
409 * F_BLANK ' '
410 * F_ALT '#'
411 * F_ZERO '0'
412 */
413#define F_LJUST (1<<0)
414#define F_SIGN (1<<1)
415#define F_BLANK (1<<2)
416#define F_ALT (1<<3)
417#define F_ZERO (1<<4)
418
419/* Returns a new reference to a PyBytes object, or NULL on failure. */
420
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200421static char*
422formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200423 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800424{
425 char *p;
426 PyObject *result;
427 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200428 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800429
430 x = PyFloat_AsDouble(v);
431 if (x == -1.0 && PyErr_Occurred()) {
432 PyErr_Format(PyExc_TypeError, "float argument required, "
433 "not %.200s", Py_TYPE(v)->tp_name);
434 return NULL;
435 }
436
437 if (prec < 0)
438 prec = 6;
439
440 p = PyOS_double_to_string(x, type, prec,
441 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
442
443 if (p == NULL)
444 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200445
446 len = strlen(p);
447 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200448 str = _PyBytesWriter_Prepare(writer, str, len);
449 if (str == NULL)
450 return NULL;
Christian Heimesf051e432016-09-13 20:22:02 +0200451 memcpy(str, p, len);
Victor Stinner71dc3d82016-04-26 12:35:13 +0200452 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200453 str += len;
454 return str;
455 }
456
457 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800458 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200459 *p_result = result;
Zackery Spytz96c59322018-10-03 00:01:30 -0600460 return result != NULL ? str : NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800461}
462
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300463static PyObject *
464formatlong(PyObject *v, int flags, int prec, int type)
465{
466 PyObject *result, *iobj;
467 if (type == 'i')
468 type = 'd';
469 if (PyLong_Check(v))
470 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
471 if (PyNumber_Check(v)) {
472 /* make sure number is a type of integer for o, x, and X */
473 if (type == 'o' || type == 'x' || type == 'X')
474 iobj = PyNumber_Index(v);
475 else
476 iobj = PyNumber_Long(v);
477 if (iobj == NULL) {
478 if (!PyErr_ExceptionMatches(PyExc_TypeError))
479 return NULL;
480 }
481 else if (!PyLong_Check(iobj))
482 Py_CLEAR(iobj);
483 if (iobj != NULL) {
484 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
485 Py_DECREF(iobj);
486 return result;
487 }
488 }
489 PyErr_Format(PyExc_TypeError,
490 "%%%c format: %s is required, not %.200s", type,
491 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
492 : "a number",
493 Py_TYPE(v)->tp_name);
494 return NULL;
495}
496
497static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200498byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800499{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300500 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200501 *p = PyBytes_AS_STRING(arg)[0];
502 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800503 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300504 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200505 *p = PyByteArray_AS_STRING(arg)[0];
506 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800507 }
508 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300509 PyObject *iobj;
510 long ival;
511 int overflow;
512 /* make sure number is a type of integer */
513 if (PyLong_Check(arg)) {
514 ival = PyLong_AsLongAndOverflow(arg, &overflow);
515 }
516 else {
517 iobj = PyNumber_Index(arg);
518 if (iobj == NULL) {
519 if (!PyErr_ExceptionMatches(PyExc_TypeError))
520 return 0;
521 goto onError;
522 }
523 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
524 Py_DECREF(iobj);
525 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300526 if (!overflow && ival == -1 && PyErr_Occurred())
527 goto onError;
528 if (overflow || !(0 <= ival && ival <= 255)) {
529 PyErr_SetString(PyExc_OverflowError,
530 "%c arg not in range(256)");
531 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800532 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300533 *p = (char)ival;
534 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800535 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300536 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200537 PyErr_SetString(PyExc_TypeError,
538 "%c requires an integer in range(256) or a single byte");
539 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800540}
541
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800542static PyObject *_PyBytes_FromBuffer(PyObject *x);
543
Ethan Furmanb95b5612015-01-23 20:05:18 -0800544static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200545format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800546{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200547 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800548 /* is it a bytes object? */
549 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200550 *pbuf = PyBytes_AS_STRING(v);
551 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800552 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200553 return v;
554 }
555 if (PyByteArray_Check(v)) {
556 *pbuf = PyByteArray_AS_STRING(v);
557 *plen = PyByteArray_GET_SIZE(v);
558 Py_INCREF(v);
559 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800560 }
561 /* does it support __bytes__? */
562 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
563 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +0100564 result = _PyObject_CallNoArg(func);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800565 Py_DECREF(func);
566 if (result == NULL)
567 return NULL;
568 if (!PyBytes_Check(result)) {
569 PyErr_Format(PyExc_TypeError,
570 "__bytes__ returned non-bytes (type %.200s)",
571 Py_TYPE(result)->tp_name);
572 Py_DECREF(result);
573 return NULL;
574 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200575 *pbuf = PyBytes_AS_STRING(result);
576 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800577 return result;
578 }
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800579 /* does it support buffer protocol? */
580 if (PyObject_CheckBuffer(v)) {
581 /* maybe we can avoid making a copy of the buffer object here? */
582 result = _PyBytes_FromBuffer(v);
583 if (result == NULL)
584 return NULL;
585 *pbuf = PyBytes_AS_STRING(result);
586 *plen = PyBytes_GET_SIZE(result);
587 return result;
588 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800589 PyErr_Format(PyExc_TypeError,
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800590 "%%b requires a bytes-like object, "
591 "or an object that implements __bytes__, not '%.100s'",
Ethan Furmanb95b5612015-01-23 20:05:18 -0800592 Py_TYPE(v)->tp_name);
593 return NULL;
594}
595
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200596/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800597
598PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +0200599_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
600 PyObject *args, int use_bytearray)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800601{
Victor Stinner772b2b02015-10-14 09:56:53 +0200602 const char *fmt;
603 char *res;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800604 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200605 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800606 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800607 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200608 _PyBytesWriter writer;
609
Victor Stinner772b2b02015-10-14 09:56:53 +0200610 if (args == NULL) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800611 PyErr_BadInternalCall();
612 return NULL;
613 }
Victor Stinner772b2b02015-10-14 09:56:53 +0200614 fmt = format;
615 fmtcnt = format_len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200616
617 _PyBytesWriter_Init(&writer);
Victor Stinner772b2b02015-10-14 09:56:53 +0200618 writer.use_bytearray = use_bytearray;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200619
620 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
621 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800622 return NULL;
Victor Stinner772b2b02015-10-14 09:56:53 +0200623 if (!use_bytearray)
624 writer.overallocate = 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200625
Ethan Furmanb95b5612015-01-23 20:05:18 -0800626 if (PyTuple_Check(args)) {
627 arglen = PyTuple_GET_SIZE(args);
628 argidx = 0;
629 }
630 else {
631 arglen = -1;
632 argidx = -2;
633 }
634 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
635 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
636 !PyByteArray_Check(args)) {
637 dict = args;
638 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200639
Ethan Furmanb95b5612015-01-23 20:05:18 -0800640 while (--fmtcnt >= 0) {
641 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200642 Py_ssize_t len;
643 char *pos;
644
Xiang Zhangb76ad512017-03-06 17:17:05 +0800645 pos = (char *)memchr(fmt + 1, '%', fmtcnt);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200646 if (pos != NULL)
647 len = pos - fmt;
Victor Stinner772b2b02015-10-14 09:56:53 +0200648 else
Xiang Zhangb76ad512017-03-06 17:17:05 +0800649 len = fmtcnt + 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200650 assert(len != 0);
651
Christian Heimesf051e432016-09-13 20:22:02 +0200652 memcpy(res, fmt, len);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200653 res += len;
654 fmt += len;
655 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800656 }
657 else {
658 /* Got a format specifier */
659 int flags = 0;
660 Py_ssize_t width = -1;
661 int prec = -1;
662 int c = '\0';
663 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800664 PyObject *v = NULL;
665 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200666 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800667 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200668 Py_ssize_t len = 0;
669 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200670 Py_ssize_t alloc;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800671
Ethan Furmanb95b5612015-01-23 20:05:18 -0800672 fmt++;
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200673 if (*fmt == '%') {
674 *res++ = '%';
675 fmt++;
676 fmtcnt--;
677 continue;
678 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800679 if (*fmt == '(') {
Victor Stinner772b2b02015-10-14 09:56:53 +0200680 const char *keystart;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800681 Py_ssize_t keylen;
682 PyObject *key;
683 int pcount = 1;
684
685 if (dict == NULL) {
686 PyErr_SetString(PyExc_TypeError,
687 "format requires a mapping");
688 goto error;
689 }
690 ++fmt;
691 --fmtcnt;
692 keystart = fmt;
693 /* Skip over balanced parentheses */
694 while (pcount > 0 && --fmtcnt >= 0) {
695 if (*fmt == ')')
696 --pcount;
697 else if (*fmt == '(')
698 ++pcount;
699 fmt++;
700 }
701 keylen = fmt - keystart - 1;
702 if (fmtcnt < 0 || pcount > 0) {
703 PyErr_SetString(PyExc_ValueError,
704 "incomplete format key");
705 goto error;
706 }
707 key = PyBytes_FromStringAndSize(keystart,
708 keylen);
709 if (key == NULL)
710 goto error;
711 if (args_owned) {
712 Py_DECREF(args);
713 args_owned = 0;
714 }
715 args = PyObject_GetItem(dict, key);
716 Py_DECREF(key);
717 if (args == NULL) {
718 goto error;
719 }
720 args_owned = 1;
721 arglen = -1;
722 argidx = -2;
723 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200724
725 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800726 while (--fmtcnt >= 0) {
727 switch (c = *fmt++) {
728 case '-': flags |= F_LJUST; continue;
729 case '+': flags |= F_SIGN; continue;
730 case ' ': flags |= F_BLANK; continue;
731 case '#': flags |= F_ALT; continue;
732 case '0': flags |= F_ZERO; continue;
733 }
734 break;
735 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200736
737 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800738 if (c == '*') {
739 v = getnextarg(args, arglen, &argidx);
740 if (v == NULL)
741 goto error;
742 if (!PyLong_Check(v)) {
743 PyErr_SetString(PyExc_TypeError,
744 "* wants int");
745 goto error;
746 }
747 width = PyLong_AsSsize_t(v);
748 if (width == -1 && PyErr_Occurred())
749 goto error;
750 if (width < 0) {
751 flags |= F_LJUST;
752 width = -width;
753 }
754 if (--fmtcnt >= 0)
755 c = *fmt++;
756 }
757 else if (c >= 0 && isdigit(c)) {
758 width = c - '0';
759 while (--fmtcnt >= 0) {
760 c = Py_CHARMASK(*fmt++);
761 if (!isdigit(c))
762 break;
763 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
764 PyErr_SetString(
765 PyExc_ValueError,
766 "width too big");
767 goto error;
768 }
769 width = width*10 + (c - '0');
770 }
771 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200772
773 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800774 if (c == '.') {
775 prec = 0;
776 if (--fmtcnt >= 0)
777 c = *fmt++;
778 if (c == '*') {
779 v = getnextarg(args, arglen, &argidx);
780 if (v == NULL)
781 goto error;
782 if (!PyLong_Check(v)) {
783 PyErr_SetString(
784 PyExc_TypeError,
785 "* wants int");
786 goto error;
787 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200788 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800789 if (prec == -1 && PyErr_Occurred())
790 goto error;
791 if (prec < 0)
792 prec = 0;
793 if (--fmtcnt >= 0)
794 c = *fmt++;
795 }
796 else if (c >= 0 && isdigit(c)) {
797 prec = c - '0';
798 while (--fmtcnt >= 0) {
799 c = Py_CHARMASK(*fmt++);
800 if (!isdigit(c))
801 break;
802 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
803 PyErr_SetString(
804 PyExc_ValueError,
805 "prec too big");
806 goto error;
807 }
808 prec = prec*10 + (c - '0');
809 }
810 }
811 } /* prec */
812 if (fmtcnt >= 0) {
813 if (c == 'h' || c == 'l' || c == 'L') {
814 if (--fmtcnt >= 0)
815 c = *fmt++;
816 }
817 }
818 if (fmtcnt < 0) {
819 PyErr_SetString(PyExc_ValueError,
820 "incomplete format");
821 goto error;
822 }
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200823 v = getnextarg(args, arglen, &argidx);
824 if (v == NULL)
825 goto error;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200826
Alexey Izbyshevccd99752018-08-23 10:50:52 +0300827 if (fmtcnt == 0) {
828 /* last write: disable writer overallocation */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200829 writer.overallocate = 0;
830 }
831
Ethan Furmanb95b5612015-01-23 20:05:18 -0800832 sign = 0;
833 fill = ' ';
834 switch (c) {
Ethan Furman62e977f2015-03-11 08:17:00 -0700835 case 'r':
836 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800837 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200838 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800839 if (temp == NULL)
840 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200841 assert(PyUnicode_IS_ASCII(temp));
842 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
843 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800844 if (prec >= 0 && len > prec)
845 len = prec;
846 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200847
Ethan Furmanb95b5612015-01-23 20:05:18 -0800848 case 's':
849 // %s is only for 2/3 code; 3 only code should use %b
850 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200851 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800852 if (temp == NULL)
853 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800854 if (prec >= 0 && len > prec)
855 len = prec;
856 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200857
Ethan Furmanb95b5612015-01-23 20:05:18 -0800858 case 'i':
859 case 'd':
860 case 'u':
861 case 'o':
862 case 'x':
863 case 'X':
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200864 if (PyLong_CheckExact(v)
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200865 && width == -1 && prec == -1
866 && !(flags & (F_SIGN | F_BLANK))
867 && c != 'X')
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200868 {
869 /* Fast path */
870 int alternate = flags & F_ALT;
871 int base;
872
873 switch(c)
874 {
875 default:
Barry Warsawb2e57942017-09-14 18:13:16 -0700876 Py_UNREACHABLE();
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200877 case 'd':
878 case 'i':
879 case 'u':
880 base = 10;
881 break;
882 case 'o':
883 base = 8;
884 break;
885 case 'x':
886 case 'X':
887 base = 16;
888 break;
889 }
890
891 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200892 writer.min_size -= 2; /* size preallocated for "%d" */
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200893 res = _PyLong_FormatBytesWriter(&writer, res,
894 v, base, alternate);
895 if (res == NULL)
896 goto error;
897 continue;
898 }
899
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300900 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200901 if (!temp)
902 goto error;
903 assert(PyUnicode_IS_ASCII(temp));
904 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
905 len = PyUnicode_GET_LENGTH(temp);
906 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800907 if (flags & F_ZERO)
908 fill = '0';
909 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200910
Ethan Furmanb95b5612015-01-23 20:05:18 -0800911 case 'e':
912 case 'E':
913 case 'f':
914 case 'F':
915 case 'g':
916 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200917 if (width == -1 && prec == -1
918 && !(flags & (F_SIGN | F_BLANK)))
919 {
920 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200921 writer.min_size -= 2; /* size preallocated for "%f" */
Victor Stinnerad771582015-10-09 12:38:53 +0200922 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200923 if (res == NULL)
924 goto error;
925 continue;
926 }
927
Victor Stinnerad771582015-10-09 12:38:53 +0200928 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800929 goto error;
930 pbuf = PyBytes_AS_STRING(temp);
931 len = PyBytes_GET_SIZE(temp);
932 sign = 1;
933 if (flags & F_ZERO)
934 fill = '0';
935 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200936
Ethan Furmanb95b5612015-01-23 20:05:18 -0800937 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200938 pbuf = &onechar;
939 len = byte_converter(v, &onechar);
940 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800941 goto error;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200942 if (width == -1) {
943 /* Fast path */
944 *res++ = onechar;
945 continue;
946 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800947 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200948
Ethan Furmanb95b5612015-01-23 20:05:18 -0800949 default:
950 PyErr_Format(PyExc_ValueError,
951 "unsupported format character '%c' (0x%x) "
952 "at index %zd",
953 c, c,
Victor Stinner772b2b02015-10-14 09:56:53 +0200954 (Py_ssize_t)(fmt - 1 - format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800955 goto error;
956 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200957
Ethan Furmanb95b5612015-01-23 20:05:18 -0800958 if (sign) {
959 if (*pbuf == '-' || *pbuf == '+') {
960 sign = *pbuf++;
961 len--;
962 }
963 else if (flags & F_SIGN)
964 sign = '+';
965 else if (flags & F_BLANK)
966 sign = ' ';
967 else
968 sign = 0;
969 }
970 if (width < len)
971 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200972
973 alloc = width;
974 if (sign != 0 && len == width)
975 alloc++;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200976 /* 2: size preallocated for %s */
977 if (alloc > 2) {
978 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200979 if (res == NULL)
980 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800981 }
Victor Stinner60ec6ef2019-10-07 22:31:42 +0200982#ifndef NDEBUG
983 char *before = res;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200984#endif
985
986 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800987 if (sign) {
988 if (fill != ' ')
989 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800990 if (width > len)
991 width--;
992 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200993
994 /* Write the numeric prefix for "x", "X" and "o" formats
995 if the alternate form is used.
996 For example, write "0x" for the "%#x" format. */
Serhiy Storchakab1a16192016-12-17 21:48:03 +0200997 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800998 assert(pbuf[0] == '0');
999 assert(pbuf[1] == c);
1000 if (fill != ' ') {
1001 *res++ = *pbuf++;
1002 *res++ = *pbuf++;
1003 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001004 width -= 2;
1005 if (width < 0)
1006 width = 0;
1007 len -= 2;
1008 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001009
1010 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001011 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001012 memset(res, fill, width - len);
1013 res += (width - len);
1014 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -08001015 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001016
1017 /* If padding with spaces: write sign if needed and/or numeric
1018 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001019 if (fill == ' ') {
1020 if (sign)
1021 *res++ = sign;
Serhiy Storchakab1a16192016-12-17 21:48:03 +02001022 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001023 assert(pbuf[0] == '0');
1024 assert(pbuf[1] == c);
1025 *res++ = *pbuf++;
1026 *res++ = *pbuf++;
1027 }
1028 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001029
1030 /* Copy bytes */
Christian Heimesf051e432016-09-13 20:22:02 +02001031 memcpy(res, pbuf, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001032 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001033
1034 /* Pad right with the fill character if needed */
1035 if (width > len) {
1036 memset(res, ' ', width - len);
1037 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001038 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001039
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +02001040 if (dict && (argidx < arglen)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001041 PyErr_SetString(PyExc_TypeError,
1042 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -08001043 Py_XDECREF(temp);
1044 goto error;
1045 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001046 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001047
Victor Stinner60ec6ef2019-10-07 22:31:42 +02001048#ifndef NDEBUG
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001049 /* check that we computed the exact size for this write */
1050 assert((res - before) == alloc);
1051#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -08001052 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001053
1054 /* If overallocation was disabled, ensure that it was the last
1055 write. Otherwise, we missed an optimization */
Alexey Izbyshevccd99752018-08-23 10:50:52 +03001056 assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001057 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001058
Ethan Furmanb95b5612015-01-23 20:05:18 -08001059 if (argidx < arglen && !dict) {
1060 PyErr_SetString(PyExc_TypeError,
1061 "not all arguments converted during bytes formatting");
1062 goto error;
1063 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001064
Ethan Furmanb95b5612015-01-23 20:05:18 -08001065 if (args_owned) {
1066 Py_DECREF(args);
1067 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001068 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001069
1070 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001071 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001072 if (args_owned) {
1073 Py_DECREF(args);
1074 }
1075 return NULL;
1076}
1077
Greg Price3a4f6672019-09-12 11:12:22 -07001078/* Unescape a backslash-escaped string. */
Eric V. Smith42454af2016-10-31 09:22:08 -04001079PyObject *_PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001080 Py_ssize_t len,
1081 const char *errors,
Eric V. Smith42454af2016-10-31 09:22:08 -04001082 const char **first_invalid_escape)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001083{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001084 int c;
Victor Stinner2ec80632015-10-14 13:32:13 +02001085 char *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001086 const char *end;
Victor Stinner2ec80632015-10-14 13:32:13 +02001087 _PyBytesWriter writer;
1088
1089 _PyBytesWriter_Init(&writer);
1090
1091 p = _PyBytesWriter_Alloc(&writer, len);
1092 if (p == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001093 return NULL;
Victor Stinner2ec80632015-10-14 13:32:13 +02001094 writer.overallocate = 1;
1095
Eric V. Smith42454af2016-10-31 09:22:08 -04001096 *first_invalid_escape = NULL;
1097
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001098 end = s + len;
1099 while (s < end) {
1100 if (*s != '\\') {
Greg Price3a4f6672019-09-12 11:12:22 -07001101 *p++ = *s++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001102 continue;
1103 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001104
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001105 s++;
Victor Stinner2ec80632015-10-14 13:32:13 +02001106 if (s == end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001107 PyErr_SetString(PyExc_ValueError,
1108 "Trailing \\ in string");
1109 goto failed;
1110 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001111
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001112 switch (*s++) {
1113 /* XXX This assumes ASCII! */
1114 case '\n': break;
1115 case '\\': *p++ = '\\'; break;
1116 case '\'': *p++ = '\''; break;
1117 case '\"': *p++ = '\"'; break;
1118 case 'b': *p++ = '\b'; break;
1119 case 'f': *p++ = '\014'; break; /* FF */
1120 case 't': *p++ = '\t'; break;
1121 case 'n': *p++ = '\n'; break;
1122 case 'r': *p++ = '\r'; break;
1123 case 'v': *p++ = '\013'; break; /* VT */
1124 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1125 case '0': case '1': case '2': case '3':
1126 case '4': case '5': case '6': case '7':
1127 c = s[-1] - '0';
1128 if (s < end && '0' <= *s && *s <= '7') {
1129 c = (c<<3) + *s++ - '0';
1130 if (s < end && '0' <= *s && *s <= '7')
1131 c = (c<<3) + *s++ - '0';
1132 }
1133 *p++ = c;
1134 break;
1135 case 'x':
Victor Stinner2ec80632015-10-14 13:32:13 +02001136 if (s+1 < end) {
1137 int digit1, digit2;
1138 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1139 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1140 if (digit1 < 16 && digit2 < 16) {
1141 *p++ = (unsigned char)((digit1 << 4) + digit2);
1142 s += 2;
1143 break;
1144 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001145 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001146 /* invalid hexadecimal digits */
1147
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001148 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001149 PyErr_Format(PyExc_ValueError,
Serhiy Storchakad53fe5f2019-03-13 22:59:55 +02001150 "invalid \\x escape at position %zd",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001151 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001152 goto failed;
1153 }
1154 if (strcmp(errors, "replace") == 0) {
1155 *p++ = '?';
1156 } else if (strcmp(errors, "ignore") == 0)
1157 /* do nothing */;
1158 else {
1159 PyErr_Format(PyExc_ValueError,
1160 "decoding error; unknown "
1161 "error handling code: %.400s",
1162 errors);
1163 goto failed;
1164 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001165 /* skip \x */
1166 if (s < end && Py_ISXDIGIT(s[0]))
1167 s++; /* and a hexdigit */
1168 break;
Victor Stinner2ec80632015-10-14 13:32:13 +02001169
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001170 default:
Eric V. Smith42454af2016-10-31 09:22:08 -04001171 if (*first_invalid_escape == NULL) {
1172 *first_invalid_escape = s-1; /* Back up one char, since we've
1173 already incremented s. */
1174 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001175 *p++ = '\\';
Eric V. Smith42454af2016-10-31 09:22:08 -04001176 s--;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001177 }
1178 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001179
1180 return _PyBytesWriter_Finish(&writer, p);
1181
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001182 failed:
Victor Stinner2ec80632015-10-14 13:32:13 +02001183 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001184 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001185}
1186
Eric V. Smith42454af2016-10-31 09:22:08 -04001187PyObject *PyBytes_DecodeEscape(const char *s,
1188 Py_ssize_t len,
1189 const char *errors,
Greg Price3a4f6672019-09-12 11:12:22 -07001190 Py_ssize_t Py_UNUSED(unicode),
1191 const char *Py_UNUSED(recode_encoding))
Eric V. Smith42454af2016-10-31 09:22:08 -04001192{
1193 const char* first_invalid_escape;
Greg Price3a4f6672019-09-12 11:12:22 -07001194 PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
Eric V. Smith42454af2016-10-31 09:22:08 -04001195 &first_invalid_escape);
1196 if (result == NULL)
1197 return NULL;
1198 if (first_invalid_escape != NULL) {
1199 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1200 "invalid escape sequence '\\%c'",
Serhiy Storchaka56cb4652017-10-20 17:08:15 +03001201 (unsigned char)*first_invalid_escape) < 0) {
Eric V. Smith42454af2016-10-31 09:22:08 -04001202 Py_DECREF(result);
1203 return NULL;
1204 }
1205 }
1206 return result;
1207
1208}
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001209/* -------------------------------------------------------------------- */
1210/* object api */
1211
1212Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001213PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001214{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001215 if (!PyBytes_Check(op)) {
1216 PyErr_Format(PyExc_TypeError,
1217 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1218 return -1;
1219 }
1220 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001221}
1222
1223char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001224PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001225{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001226 if (!PyBytes_Check(op)) {
1227 PyErr_Format(PyExc_TypeError,
1228 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1229 return NULL;
1230 }
1231 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001232}
1233
1234int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001235PyBytes_AsStringAndSize(PyObject *obj,
1236 char **s,
1237 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001238{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001239 if (s == NULL) {
1240 PyErr_BadInternalCall();
1241 return -1;
1242 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001243
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001244 if (!PyBytes_Check(obj)) {
1245 PyErr_Format(PyExc_TypeError,
1246 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1247 return -1;
1248 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001249
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001250 *s = PyBytes_AS_STRING(obj);
1251 if (len != NULL)
1252 *len = PyBytes_GET_SIZE(obj);
1253 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001254 PyErr_SetString(PyExc_ValueError,
1255 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001256 return -1;
1257 }
1258 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001259}
Neal Norwitz6968b052007-02-27 19:02:19 +00001260
1261/* -------------------------------------------------------------------- */
1262/* Methods */
1263
Eric Smith0923d1d2009-04-16 20:16:10 +00001264#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001265
1266#include "stringlib/fastsearch.h"
1267#include "stringlib/count.h"
1268#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001269#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001270#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001271#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001272#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001273
Eric Smith0f78bff2009-11-30 01:01:42 +00001274#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001275
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001276PyObject *
1277PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001278{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001279 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001280 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001281 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001282 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001283 unsigned char quote, *s, *p;
1284
1285 /* Compute size of output string */
1286 squotes = dquotes = 0;
1287 newsize = 3; /* b'' */
1288 s = (unsigned char*)op->ob_sval;
1289 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001290 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001291 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001292 case '\'': squotes++; break;
1293 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001294 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001295 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001296 default:
1297 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001298 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001299 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001300 if (newsize > PY_SSIZE_T_MAX - incr)
1301 goto overflow;
1302 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001303 }
1304 quote = '\'';
1305 if (smartquotes && squotes && !dquotes)
1306 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001307 if (squotes && quote == '\'') {
1308 if (newsize > PY_SSIZE_T_MAX - squotes)
1309 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001310 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001311 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001312
1313 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001314 if (v == NULL) {
1315 return NULL;
1316 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001317 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001318
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001319 *p++ = 'b', *p++ = quote;
1320 for (i = 0; i < length; i++) {
1321 unsigned char c = op->ob_sval[i];
1322 if (c == quote || c == '\\')
1323 *p++ = '\\', *p++ = c;
1324 else if (c == '\t')
1325 *p++ = '\\', *p++ = 't';
1326 else if (c == '\n')
1327 *p++ = '\\', *p++ = 'n';
1328 else if (c == '\r')
1329 *p++ = '\\', *p++ = 'r';
1330 else if (c < ' ' || c >= 0x7f) {
1331 *p++ = '\\';
1332 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001333 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1334 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001335 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001336 else
1337 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001338 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001339 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001340 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001341 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001342
1343 overflow:
1344 PyErr_SetString(PyExc_OverflowError,
1345 "bytes object is too large to make repr");
1346 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001347}
1348
Neal Norwitz6968b052007-02-27 19:02:19 +00001349static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001350bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001351{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001352 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001353}
1354
Neal Norwitz6968b052007-02-27 19:02:19 +00001355static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001356bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001357{
Victor Stinner331a6a52019-05-27 16:39:22 +02001358 PyConfig *config = &_PyInterpreterState_GET_UNSAFE()->config;
Victor Stinnerc96be812019-05-14 17:34:56 +02001359 if (config->bytes_warning) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001360 if (PyErr_WarnEx(PyExc_BytesWarning,
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001361 "str() on a bytes instance", 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001362 return NULL;
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001363 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001364 }
1365 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001366}
1367
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001368static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001369bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001370{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001371 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001372}
Neal Norwitz6968b052007-02-27 19:02:19 +00001373
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001374/* This is also used by PyBytes_Concat() */
1375static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001376bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001377{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001378 Py_buffer va, vb;
1379 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001380
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001381 va.len = -1;
1382 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001383 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1384 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001385 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
Serhiy Storchaka6b5a9ec2017-03-19 19:47:02 +02001386 Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001387 goto done;
1388 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001389
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001390 /* Optimize end cases */
1391 if (va.len == 0 && PyBytes_CheckExact(b)) {
1392 result = b;
1393 Py_INCREF(result);
1394 goto done;
1395 }
1396 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1397 result = a;
1398 Py_INCREF(result);
1399 goto done;
1400 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001401
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001402 if (va.len > PY_SSIZE_T_MAX - vb.len) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001403 PyErr_NoMemory();
1404 goto done;
1405 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001406
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001407 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001408 if (result != NULL) {
1409 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1410 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1411 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001412
1413 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001414 if (va.len != -1)
1415 PyBuffer_Release(&va);
1416 if (vb.len != -1)
1417 PyBuffer_Release(&vb);
1418 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001419}
Neal Norwitz6968b052007-02-27 19:02:19 +00001420
1421static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001422bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001423{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001424 Py_ssize_t i;
1425 Py_ssize_t j;
1426 Py_ssize_t size;
1427 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001428 size_t nbytes;
1429 if (n < 0)
1430 n = 0;
1431 /* watch out for overflows: the size can overflow int,
1432 * and the # of bytes needed can overflow size_t
1433 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001434 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001435 PyErr_SetString(PyExc_OverflowError,
1436 "repeated bytes are too long");
1437 return NULL;
1438 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001439 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001440 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1441 Py_INCREF(a);
1442 return (PyObject *)a;
1443 }
1444 nbytes = (size_t)size;
1445 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1446 PyErr_SetString(PyExc_OverflowError,
1447 "repeated bytes are too long");
1448 return NULL;
1449 }
1450 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1451 if (op == NULL)
1452 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +01001453 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001454 op->ob_shash = -1;
1455 op->ob_sval[size] = '\0';
1456 if (Py_SIZE(a) == 1 && n > 0) {
1457 memset(op->ob_sval, a->ob_sval[0] , n);
1458 return (PyObject *) op;
1459 }
1460 i = 0;
1461 if (i < size) {
Christian Heimesf051e432016-09-13 20:22:02 +02001462 memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001463 i = Py_SIZE(a);
1464 }
1465 while (i < size) {
1466 j = (i <= size-i) ? i : size-i;
Christian Heimesf051e432016-09-13 20:22:02 +02001467 memcpy(op->ob_sval+i, op->ob_sval, j);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001468 i += j;
1469 }
1470 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001471}
1472
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001473static int
1474bytes_contains(PyObject *self, PyObject *arg)
1475{
1476 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1477}
1478
Neal Norwitz6968b052007-02-27 19:02:19 +00001479static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001480bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001481{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001482 if (i < 0 || i >= Py_SIZE(a)) {
1483 PyErr_SetString(PyExc_IndexError, "index out of range");
1484 return NULL;
1485 }
1486 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001487}
1488
Benjamin Peterson621b4302016-09-09 13:54:34 -07001489static int
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001490bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1491{
1492 int cmp;
1493 Py_ssize_t len;
1494
1495 len = Py_SIZE(a);
1496 if (Py_SIZE(b) != len)
1497 return 0;
1498
1499 if (a->ob_sval[0] != b->ob_sval[0])
1500 return 0;
1501
1502 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1503 return (cmp == 0);
1504}
1505
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001506static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001507bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001508{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001509 int c;
1510 Py_ssize_t len_a, len_b;
1511 Py_ssize_t min_len;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001512 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001513
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001514 /* Make sure both arguments are strings. */
1515 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Victor Stinner331a6a52019-05-27 16:39:22 +02001516 PyConfig *config = &_PyInterpreterState_GET_UNSAFE()->config;
Victor Stinnerc96be812019-05-14 17:34:56 +02001517 if (config->bytes_warning && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001518 rc = PyObject_IsInstance((PyObject*)a,
1519 (PyObject*)&PyUnicode_Type);
1520 if (!rc)
1521 rc = PyObject_IsInstance((PyObject*)b,
1522 (PyObject*)&PyUnicode_Type);
1523 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001524 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001525 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001526 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001527 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001528 return NULL;
1529 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001530 else {
1531 rc = PyObject_IsInstance((PyObject*)a,
1532 (PyObject*)&PyLong_Type);
1533 if (!rc)
1534 rc = PyObject_IsInstance((PyObject*)b,
1535 (PyObject*)&PyLong_Type);
1536 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001537 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001538 if (rc) {
1539 if (PyErr_WarnEx(PyExc_BytesWarning,
1540 "Comparison between bytes and int", 1))
1541 return NULL;
1542 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001543 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001544 }
stratakise8b19652017-11-02 11:32:54 +01001545 Py_RETURN_NOTIMPLEMENTED;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001546 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001547 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001548 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001549 case Py_EQ:
1550 case Py_LE:
1551 case Py_GE:
1552 /* a string is equal to itself */
stratakise8b19652017-11-02 11:32:54 +01001553 Py_RETURN_TRUE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001554 case Py_NE:
1555 case Py_LT:
1556 case Py_GT:
stratakise8b19652017-11-02 11:32:54 +01001557 Py_RETURN_FALSE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001558 default:
1559 PyErr_BadArgument();
1560 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001561 }
1562 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001563 else if (op == Py_EQ || op == Py_NE) {
1564 int eq = bytes_compare_eq(a, b);
1565 eq ^= (op == Py_NE);
stratakise8b19652017-11-02 11:32:54 +01001566 return PyBool_FromLong(eq);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001567 }
1568 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001569 len_a = Py_SIZE(a);
1570 len_b = Py_SIZE(b);
1571 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001572 if (min_len > 0) {
1573 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001574 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001575 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001576 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001577 else
1578 c = 0;
stratakise8b19652017-11-02 11:32:54 +01001579 if (c != 0)
1580 Py_RETURN_RICHCOMPARE(c, 0, op);
1581 Py_RETURN_RICHCOMPARE(len_a, len_b, op);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001582 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001583}
1584
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001585static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001586bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001587{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001588 if (a->ob_shash == -1) {
1589 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001590 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001591 }
1592 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001593}
1594
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001595static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001596bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001597{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001598 if (PyIndex_Check(item)) {
1599 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1600 if (i == -1 && PyErr_Occurred())
1601 return NULL;
1602 if (i < 0)
1603 i += PyBytes_GET_SIZE(self);
1604 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1605 PyErr_SetString(PyExc_IndexError,
1606 "index out of range");
1607 return NULL;
1608 }
1609 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1610 }
1611 else if (PySlice_Check(item)) {
Zackery Spytz14514d92019-05-17 01:13:03 -06001612 Py_ssize_t start, stop, step, slicelength, i;
1613 size_t cur;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001614 char* source_buf;
1615 char* result_buf;
1616 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001617
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001618 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001619 return NULL;
1620 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001621 slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1622 &stop, step);
Neal Norwitz6968b052007-02-27 19:02:19 +00001623
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001624 if (slicelength <= 0) {
1625 return PyBytes_FromStringAndSize("", 0);
1626 }
1627 else if (start == 0 && step == 1 &&
1628 slicelength == PyBytes_GET_SIZE(self) &&
1629 PyBytes_CheckExact(self)) {
1630 Py_INCREF(self);
1631 return (PyObject *)self;
1632 }
1633 else if (step == 1) {
1634 return PyBytes_FromStringAndSize(
1635 PyBytes_AS_STRING(self) + start,
1636 slicelength);
1637 }
1638 else {
1639 source_buf = PyBytes_AS_STRING(self);
1640 result = PyBytes_FromStringAndSize(NULL, slicelength);
1641 if (result == NULL)
1642 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001643
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001644 result_buf = PyBytes_AS_STRING(result);
1645 for (cur = start, i = 0; i < slicelength;
1646 cur += step, i++) {
1647 result_buf[i] = source_buf[cur];
1648 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001649
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001650 return result;
1651 }
1652 }
1653 else {
1654 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001655 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001656 Py_TYPE(item)->tp_name);
1657 return NULL;
1658 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001659}
1660
1661static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001662bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001663{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001664 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1665 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001666}
1667
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001668static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001669 (lenfunc)bytes_length, /*sq_length*/
1670 (binaryfunc)bytes_concat, /*sq_concat*/
1671 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1672 (ssizeargfunc)bytes_item, /*sq_item*/
1673 0, /*sq_slice*/
1674 0, /*sq_ass_item*/
1675 0, /*sq_ass_slice*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001676 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001677};
1678
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001679static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001680 (lenfunc)bytes_length,
1681 (binaryfunc)bytes_subscript,
1682 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001683};
1684
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001685static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001686 (getbufferproc)bytes_buffer_getbuffer,
1687 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001688};
1689
1690
1691#define LEFTSTRIP 0
1692#define RIGHTSTRIP 1
1693#define BOTHSTRIP 2
1694
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001695/*[clinic input]
1696bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001697
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001698 sep: object = None
1699 The delimiter according which to split the bytes.
1700 None (the default value) means split on ASCII whitespace characters
1701 (space, tab, return, newline, formfeed, vertical tab).
1702 maxsplit: Py_ssize_t = -1
1703 Maximum number of splits to do.
1704 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001705
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001706Return a list of the sections in the bytes, using sep as the delimiter.
1707[clinic start generated code]*/
1708
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001709static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001710bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1711/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001712{
1713 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001714 const char *s = PyBytes_AS_STRING(self), *sub;
1715 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001716 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001717
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001718 if (maxsplit < 0)
1719 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001720 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001721 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001722 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001723 return NULL;
1724 sub = vsub.buf;
1725 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001726
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001727 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1728 PyBuffer_Release(&vsub);
1729 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001730}
1731
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001732/*[clinic input]
1733bytes.partition
1734
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001735 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001736 /
1737
1738Partition the bytes into three parts using the given separator.
1739
1740This will search for the separator sep in the bytes. If the separator is found,
1741returns a 3-tuple containing the part before the separator, the separator
1742itself, and the part after it.
1743
1744If the separator is not found, returns a 3-tuple containing the original bytes
1745object and two empty bytes objects.
1746[clinic start generated code]*/
1747
Neal Norwitz6968b052007-02-27 19:02:19 +00001748static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001749bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001750/*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001751{
Neal Norwitz6968b052007-02-27 19:02:19 +00001752 return stringlib_partition(
1753 (PyObject*) self,
1754 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001755 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001756 );
1757}
1758
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001759/*[clinic input]
1760bytes.rpartition
1761
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001762 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001763 /
1764
1765Partition the bytes into three parts using the given separator.
1766
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001767This will search for the separator sep in the bytes, starting at the end. If
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001768the separator is found, returns a 3-tuple containing the part before the
1769separator, the separator itself, and the part after it.
1770
1771If the separator is not found, returns a 3-tuple containing two empty bytes
1772objects and the original bytes object.
1773[clinic start generated code]*/
1774
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001775static PyObject *
1776bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001777/*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001778{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001779 return stringlib_rpartition(
1780 (PyObject*) self,
1781 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001782 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001783 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001784}
1785
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001786/*[clinic input]
1787bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001788
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001789Return a list of the sections in the bytes, using sep as the delimiter.
1790
1791Splitting is done starting at the end of the bytes and working to the front.
1792[clinic start generated code]*/
1793
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001794static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001795bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1796/*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001797{
1798 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001799 const char *s = PyBytes_AS_STRING(self), *sub;
1800 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001801 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001802
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001803 if (maxsplit < 0)
1804 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001805 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001806 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001807 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001808 return NULL;
1809 sub = vsub.buf;
1810 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001811
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001812 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1813 PyBuffer_Release(&vsub);
1814 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001815}
1816
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001817
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001818/*[clinic input]
1819bytes.join
1820
1821 iterable_of_bytes: object
1822 /
1823
1824Concatenate any number of bytes objects.
1825
1826The bytes whose method is called is inserted in between each pair.
1827
1828The result is returned as a new bytes object.
1829
1830Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1831[clinic start generated code]*/
1832
Neal Norwitz6968b052007-02-27 19:02:19 +00001833static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001834bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1835/*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001836{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001837 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001838}
1839
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001840PyObject *
1841_PyBytes_Join(PyObject *sep, PyObject *x)
1842{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001843 assert(sep != NULL && PyBytes_Check(sep));
1844 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001845 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001846}
1847
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001848static PyObject *
1849bytes_find(PyBytesObject *self, PyObject *args)
1850{
1851 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1852}
1853
1854static PyObject *
1855bytes_index(PyBytesObject *self, PyObject *args)
1856{
1857 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1858}
1859
1860
1861static PyObject *
1862bytes_rfind(PyBytesObject *self, PyObject *args)
1863{
1864 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1865}
1866
1867
1868static PyObject *
1869bytes_rindex(PyBytesObject *self, PyObject *args)
1870{
1871 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1872}
1873
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001874
1875Py_LOCAL_INLINE(PyObject *)
1876do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001877{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001878 Py_buffer vsep;
1879 char *s = PyBytes_AS_STRING(self);
1880 Py_ssize_t len = PyBytes_GET_SIZE(self);
1881 char *sep;
1882 Py_ssize_t seplen;
1883 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001884
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001885 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001886 return NULL;
1887 sep = vsep.buf;
1888 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001889
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001890 i = 0;
1891 if (striptype != RIGHTSTRIP) {
1892 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1893 i++;
1894 }
1895 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001896
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001897 j = len;
1898 if (striptype != LEFTSTRIP) {
1899 do {
1900 j--;
1901 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1902 j++;
1903 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001904
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001905 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001906
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001907 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1908 Py_INCREF(self);
1909 return (PyObject*)self;
1910 }
1911 else
1912 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001913}
1914
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001915
1916Py_LOCAL_INLINE(PyObject *)
1917do_strip(PyBytesObject *self, int striptype)
1918{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001919 char *s = PyBytes_AS_STRING(self);
1920 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001921
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001922 i = 0;
1923 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001924 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001925 i++;
1926 }
1927 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001928
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001929 j = len;
1930 if (striptype != LEFTSTRIP) {
1931 do {
1932 j--;
David Malcolm96960882010-11-05 17:23:41 +00001933 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001934 j++;
1935 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001936
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001937 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1938 Py_INCREF(self);
1939 return (PyObject*)self;
1940 }
1941 else
1942 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001943}
1944
1945
1946Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001947do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001948{
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001949 if (bytes != Py_None) {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001950 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001951 }
1952 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001953}
1954
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001955/*[clinic input]
1956bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001957
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001958 bytes: object = None
1959 /
1960
1961Strip leading and trailing bytes contained in the argument.
1962
1963If the argument is omitted or None, strip leading and trailing ASCII whitespace.
1964[clinic start generated code]*/
1965
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001966static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001967bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001968/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001969{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001970 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001971}
1972
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001973/*[clinic input]
1974bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001975
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001976 bytes: object = None
1977 /
1978
1979Strip leading bytes contained in the argument.
1980
1981If the argument is omitted or None, strip leading ASCII whitespace.
1982[clinic start generated code]*/
1983
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001984static PyObject *
1985bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001986/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001987{
1988 return do_argstrip(self, LEFTSTRIP, bytes);
1989}
1990
1991/*[clinic input]
1992bytes.rstrip
1993
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001994 bytes: object = None
1995 /
1996
1997Strip trailing bytes contained in the argument.
1998
1999If the argument is omitted or None, strip trailing ASCII whitespace.
2000[clinic start generated code]*/
2001
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002002static PyObject *
2003bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002004/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002005{
2006 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002007}
Neal Norwitz6968b052007-02-27 19:02:19 +00002008
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002009
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002010static PyObject *
2011bytes_count(PyBytesObject *self, PyObject *args)
2012{
2013 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2014}
2015
2016
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002017/*[clinic input]
2018bytes.translate
2019
Victor Stinner049e5092014-08-17 22:20:00 +02002020 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002021 Translation table, which must be a bytes object of length 256.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002022 /
Martin Panter1b6c6da2016-08-27 08:35:02 +00002023 delete as deletechars: object(c_default="NULL") = b''
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002024
2025Return a copy with each character mapped by the given translation table.
2026
Martin Panter1b6c6da2016-08-27 08:35:02 +00002027All characters occurring in the optional argument delete are removed.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002028The remaining characters are mapped through the given translation table.
2029[clinic start generated code]*/
2030
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002031static PyObject *
Martin Panter1b6c6da2016-08-27 08:35:02 +00002032bytes_translate_impl(PyBytesObject *self, PyObject *table,
Larry Hastings89964c42015-04-14 18:07:59 -04002033 PyObject *deletechars)
Martin Panter1b6c6da2016-08-27 08:35:02 +00002034/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002035{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002036 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002037 Py_buffer table_view = {NULL, NULL};
2038 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002039 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002040 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002041 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002042 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002043 Py_ssize_t inlen, tablen, dellen = 0;
2044 PyObject *result;
2045 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002046
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002047 if (PyBytes_Check(table)) {
2048 table_chars = PyBytes_AS_STRING(table);
2049 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002050 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002051 else if (table == Py_None) {
2052 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002053 tablen = 256;
2054 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002055 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002056 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002057 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002058 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002059 tablen = table_view.len;
2060 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002061
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002062 if (tablen != 256) {
2063 PyErr_SetString(PyExc_ValueError,
2064 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002065 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002066 return NULL;
2067 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002068
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002069 if (deletechars != NULL) {
2070 if (PyBytes_Check(deletechars)) {
2071 del_table_chars = PyBytes_AS_STRING(deletechars);
2072 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002073 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002074 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002075 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002076 PyBuffer_Release(&table_view);
2077 return NULL;
2078 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002079 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002080 dellen = del_table_view.len;
2081 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002082 }
2083 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002084 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002085 dellen = 0;
2086 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002087
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002088 inlen = PyBytes_GET_SIZE(input_obj);
2089 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002090 if (result == NULL) {
2091 PyBuffer_Release(&del_table_view);
2092 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002093 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002094 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002095 output_start = output = PyBytes_AS_STRING(result);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002096 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002097
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002098 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002099 /* If no deletions are required, use faster code */
2100 for (i = inlen; --i >= 0; ) {
2101 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002102 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002103 changed = 1;
2104 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002105 if (!changed && PyBytes_CheckExact(input_obj)) {
2106 Py_INCREF(input_obj);
2107 Py_DECREF(result);
2108 result = input_obj;
2109 }
2110 PyBuffer_Release(&del_table_view);
2111 PyBuffer_Release(&table_view);
2112 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002113 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002114
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002115 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002116 for (i = 0; i < 256; i++)
2117 trans_table[i] = Py_CHARMASK(i);
2118 } else {
2119 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002120 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002121 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002122 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002123
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002124 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002125 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002126 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002127
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002128 for (i = inlen; --i >= 0; ) {
2129 c = Py_CHARMASK(*input++);
2130 if (trans_table[c] != -1)
2131 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2132 continue;
2133 changed = 1;
2134 }
2135 if (!changed && PyBytes_CheckExact(input_obj)) {
2136 Py_DECREF(result);
2137 Py_INCREF(input_obj);
2138 return input_obj;
2139 }
2140 /* Fix the size of the resulting string */
2141 if (inlen > 0)
2142 _PyBytes_Resize(&result, output - output_start);
2143 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002144}
2145
2146
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002147/*[clinic input]
2148
2149@staticmethod
2150bytes.maketrans
2151
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002152 frm: Py_buffer
2153 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002154 /
2155
2156Return a translation table useable for the bytes or bytearray translate method.
2157
2158The returned table will be one where each byte in frm is mapped to the byte at
2159the same position in to.
2160
2161The bytes objects frm and to must be of the same length.
2162[clinic start generated code]*/
2163
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002164static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002165bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002166/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002167{
2168 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002169}
2170
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002171
2172/*[clinic input]
2173bytes.replace
2174
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002175 old: Py_buffer
2176 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002177 count: Py_ssize_t = -1
2178 Maximum number of occurrences to replace.
2179 -1 (the default value) means replace all occurrences.
2180 /
2181
2182Return a copy with all occurrences of substring old replaced by new.
2183
2184If the optional argument count is given, only the first count occurrences are
2185replaced.
2186[clinic start generated code]*/
2187
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002188static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002189bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
Larry Hastings89964c42015-04-14 18:07:59 -04002190 Py_ssize_t count)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002191/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002192{
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +03002193 return stringlib_replace((PyObject *)self,
2194 (const char *)old->buf, old->len,
2195 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002196}
2197
2198/** End DALKE **/
2199
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002200
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002201static PyObject *
2202bytes_startswith(PyBytesObject *self, PyObject *args)
2203{
2204 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2205}
2206
2207static PyObject *
2208bytes_endswith(PyBytesObject *self, PyObject *args)
2209{
2210 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2211}
2212
2213
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002214/*[clinic input]
2215bytes.decode
2216
2217 encoding: str(c_default="NULL") = 'utf-8'
2218 The encoding with which to decode the bytes.
2219 errors: str(c_default="NULL") = 'strict'
2220 The error handling scheme to use for the handling of decoding errors.
2221 The default is 'strict' meaning that decoding errors raise a
2222 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2223 as well as any other name registered with codecs.register_error that
2224 can handle UnicodeDecodeErrors.
2225
2226Decode the bytes using the codec registered for encoding.
2227[clinic start generated code]*/
2228
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002229static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002230bytes_decode_impl(PyBytesObject *self, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04002231 const char *errors)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002232/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002233{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002234 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002235}
2236
Guido van Rossum20188312006-05-05 15:15:40 +00002237
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002238/*[clinic input]
2239bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002240
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002241 keepends: bool(accept={int}) = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002242
2243Return a list of the lines in the bytes, breaking at line boundaries.
2244
2245Line breaks are not included in the resulting list unless keepends is given and
2246true.
2247[clinic start generated code]*/
2248
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002249static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002250bytes_splitlines_impl(PyBytesObject *self, int keepends)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002251/*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002252{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002253 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002254 (PyObject*) self, PyBytes_AS_STRING(self),
2255 PyBytes_GET_SIZE(self), keepends
2256 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002257}
2258
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002259/*[clinic input]
2260@classmethod
2261bytes.fromhex
2262
2263 string: unicode
2264 /
2265
2266Create a bytes object from a string of hexadecimal numbers.
2267
2268Spaces between two numbers are accepted.
2269Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2270[clinic start generated code]*/
2271
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002272static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002273bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002274/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002275{
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002276 PyObject *result = _PyBytes_FromHex(string, 0);
2277 if (type != &PyBytes_Type && result != NULL) {
Jeroen Demeyer196a5302019-07-04 12:31:34 +02002278 Py_SETREF(result, _PyObject_CallOneArg((PyObject *)type, result));
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002279 }
2280 return result;
Victor Stinner2bf89932015-10-14 11:25:33 +02002281}
2282
2283PyObject*
2284_PyBytes_FromHex(PyObject *string, int use_bytearray)
2285{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002286 char *buf;
Victor Stinner2bf89932015-10-14 11:25:33 +02002287 Py_ssize_t hexlen, invalid_char;
2288 unsigned int top, bot;
2289 Py_UCS1 *str, *end;
2290 _PyBytesWriter writer;
2291
2292 _PyBytesWriter_Init(&writer);
2293 writer.use_bytearray = use_bytearray;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002294
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002295 assert(PyUnicode_Check(string));
2296 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002297 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002298 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002299
Victor Stinner2bf89932015-10-14 11:25:33 +02002300 if (!PyUnicode_IS_ASCII(string)) {
2301 void *data = PyUnicode_DATA(string);
2302 unsigned int kind = PyUnicode_KIND(string);
2303 Py_ssize_t i;
2304
2305 /* search for the first non-ASCII character */
2306 for (i = 0; i < hexlen; i++) {
2307 if (PyUnicode_READ(kind, data, i) >= 128)
2308 break;
2309 }
2310 invalid_char = i;
2311 goto error;
2312 }
2313
2314 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2315 str = PyUnicode_1BYTE_DATA(string);
2316
2317 /* This overestimates if there are spaces */
2318 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2319 if (buf == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002320 return NULL;
Victor Stinner2bf89932015-10-14 11:25:33 +02002321
2322 end = str + hexlen;
2323 while (str < end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002324 /* skip over spaces in the input */
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002325 if (Py_ISSPACE(*str)) {
Victor Stinner2bf89932015-10-14 11:25:33 +02002326 do {
2327 str++;
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002328 } while (Py_ISSPACE(*str));
Victor Stinner2bf89932015-10-14 11:25:33 +02002329 if (str >= end)
2330 break;
2331 }
2332
2333 top = _PyLong_DigitValue[*str];
2334 if (top >= 16) {
2335 invalid_char = str - PyUnicode_1BYTE_DATA(string);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002336 goto error;
2337 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002338 str++;
2339
2340 bot = _PyLong_DigitValue[*str];
2341 if (bot >= 16) {
2342 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2343 goto error;
2344 }
2345 str++;
2346
2347 *buf++ = (unsigned char)((top << 4) + bot);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002348 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002349
2350 return _PyBytesWriter_Finish(&writer, buf);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002351
2352 error:
Victor Stinner2bf89932015-10-14 11:25:33 +02002353 PyErr_Format(PyExc_ValueError,
2354 "non-hexadecimal number found in "
2355 "fromhex() arg at position %zd", invalid_char);
2356 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002357 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002358}
2359
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002360/*[clinic input]
2361bytes.hex
2362
2363 sep: object = NULL
2364 An optional single character or byte to separate hex bytes.
2365 bytes_per_sep: int = 1
2366 How many bytes between separators. Positive values count from the
2367 right, negative values count from the left.
2368
2369Create a str of hexadecimal numbers from a bytes object.
2370
2371Example:
2372>>> value = b'\xb9\x01\xef'
2373>>> value.hex()
2374'b901ef'
2375>>> value.hex(':')
2376'b9:01:ef'
2377>>> value.hex(':', 2)
2378'b9:01ef'
2379>>> value.hex(':', -2)
2380'b901:ef'
2381[clinic start generated code]*/
Gregory P. Smith8cb65692015-04-25 23:22:26 +00002382
2383static PyObject *
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002384bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2385/*[clinic end generated code: output=1f134da504064139 input=f1238d3455990218]*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002386{
2387 char* argbuf = PyBytes_AS_STRING(self);
2388 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002389 return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002390}
2391
2392static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302393bytes_getnewargs(PyBytesObject *v, PyObject *Py_UNUSED(ignored))
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002394{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002395 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002396}
2397
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002398
2399static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002400bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002401 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302402 {"capitalize", stringlib_capitalize, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002403 _Py_capitalize__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002404 STRINGLIB_CENTER_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002405 {"count", (PyCFunction)bytes_count, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002406 _Py_count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002407 BYTES_DECODE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002408 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002409 _Py_endswith__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002410 STRINGLIB_EXPANDTABS_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002411 {"find", (PyCFunction)bytes_find, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002412 _Py_find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002413 BYTES_FROMHEX_METHODDEF
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002414 BYTES_HEX_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002415 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302416 {"isalnum", stringlib_isalnum, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002417 _Py_isalnum__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302418 {"isalpha", stringlib_isalpha, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002419 _Py_isalpha__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302420 {"isascii", stringlib_isascii, METH_NOARGS,
INADA Naokia49ac992018-01-27 14:06:21 +09002421 _Py_isascii__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302422 {"isdigit", stringlib_isdigit, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002423 _Py_isdigit__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302424 {"islower", stringlib_islower, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002425 _Py_islower__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302426 {"isspace", stringlib_isspace, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002427 _Py_isspace__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302428 {"istitle", stringlib_istitle, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002429 _Py_istitle__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302430 {"isupper", stringlib_isupper, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002431 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002432 BYTES_JOIN_METHODDEF
Tal Einatc929df32018-07-06 13:17:38 +03002433 STRINGLIB_LJUST_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302434 {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002435 BYTES_LSTRIP_METHODDEF
2436 BYTES_MAKETRANS_METHODDEF
2437 BYTES_PARTITION_METHODDEF
2438 BYTES_REPLACE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002439 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2440 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002441 STRINGLIB_RJUST_METHODDEF
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002442 BYTES_RPARTITION_METHODDEF
2443 BYTES_RSPLIT_METHODDEF
2444 BYTES_RSTRIP_METHODDEF
2445 BYTES_SPLIT_METHODDEF
2446 BYTES_SPLITLINES_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002447 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002448 _Py_startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002449 BYTES_STRIP_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302450 {"swapcase", stringlib_swapcase, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002451 _Py_swapcase__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302452 {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002453 BYTES_TRANSLATE_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302454 {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002455 STRINGLIB_ZFILL_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002456 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002457};
2458
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002459static PyObject *
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002460bytes_mod(PyObject *self, PyObject *arg)
Ethan Furmanb95b5612015-01-23 20:05:18 -08002461{
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002462 if (!PyBytes_Check(self)) {
2463 Py_RETURN_NOTIMPLEMENTED;
Victor Stinner772b2b02015-10-14 09:56:53 +02002464 }
Victor Stinner772b2b02015-10-14 09:56:53 +02002465 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002466 arg, 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -08002467}
2468
2469static PyNumberMethods bytes_as_number = {
2470 0, /*nb_add*/
2471 0, /*nb_subtract*/
2472 0, /*nb_multiply*/
2473 bytes_mod, /*nb_remainder*/
2474};
2475
2476static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002477bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002478
2479static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002480bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002481{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002482 PyObject *x = NULL;
2483 const char *encoding = NULL;
2484 const char *errors = NULL;
2485 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002486 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002487 Py_ssize_t size;
2488 static char *kwlist[] = {"source", "encoding", "errors", 0};
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002489
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002490 if (type != &PyBytes_Type)
Serhiy Storchaka15095802015-11-25 15:47:01 +02002491 return bytes_subtype_new(type, args, kwds);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002492 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2493 &encoding, &errors))
2494 return NULL;
2495 if (x == NULL) {
2496 if (encoding != NULL || errors != NULL) {
2497 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka2c2044e2018-10-21 15:29:12 +03002498 encoding != NULL ?
2499 "encoding without a string argument" :
2500 "errors without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002501 return NULL;
2502 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02002503 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002504 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002505
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002506 if (encoding != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002507 /* Encode via the codec registry */
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002508 if (!PyUnicode_Check(x)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002509 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002510 "encoding without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002511 return NULL;
2512 }
2513 new = PyUnicode_AsEncodedString(x, encoding, errors);
2514 if (new == NULL)
2515 return NULL;
2516 assert(PyBytes_Check(new));
2517 return new;
2518 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002519
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002520 if (errors != NULL) {
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002521 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002522 PyUnicode_Check(x) ?
2523 "string argument without an encoding" :
2524 "errors without a string argument");
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002525 return NULL;
2526 }
2527
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002528 /* We'd like to call PyObject_Bytes here, but we need to check for an
2529 integer argument before deferring to PyBytes_FromObject, something
2530 PyObject_Bytes doesn't do. */
2531 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2532 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +01002533 new = _PyObject_CallNoArg(func);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002534 Py_DECREF(func);
2535 if (new == NULL)
2536 return NULL;
2537 if (!PyBytes_Check(new)) {
2538 PyErr_Format(PyExc_TypeError,
2539 "__bytes__ returned non-bytes (type %.200s)",
2540 Py_TYPE(new)->tp_name);
2541 Py_DECREF(new);
2542 return NULL;
2543 }
2544 return new;
2545 }
2546 else if (PyErr_Occurred())
2547 return NULL;
2548
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002549 if (PyUnicode_Check(x)) {
2550 PyErr_SetString(PyExc_TypeError,
2551 "string argument without an encoding");
2552 return NULL;
2553 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002554 /* Is it an integer? */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002555 if (PyIndex_Check(x)) {
2556 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2557 if (size == -1 && PyErr_Occurred()) {
Serhiy Storchakae8904212018-10-15 00:02:57 +03002558 if (!PyErr_ExceptionMatches(PyExc_TypeError))
INADA Naokia634e232017-01-06 17:32:01 +09002559 return NULL;
2560 PyErr_Clear(); /* fall through */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002561 }
INADA Naokia634e232017-01-06 17:32:01 +09002562 else {
2563 if (size < 0) {
2564 PyErr_SetString(PyExc_ValueError, "negative count");
2565 return NULL;
2566 }
2567 new = _PyBytes_FromSize(size, 1);
2568 if (new == NULL)
2569 return NULL;
2570 return new;
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002571 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002572 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002573
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002574 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002575}
2576
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002577static PyObject*
2578_PyBytes_FromBuffer(PyObject *x)
2579{
2580 PyObject *new;
2581 Py_buffer view;
2582
2583 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2584 return NULL;
2585
2586 new = PyBytes_FromStringAndSize(NULL, view.len);
2587 if (!new)
2588 goto fail;
2589 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2590 &view, view.len, 'C') < 0)
2591 goto fail;
2592 PyBuffer_Release(&view);
2593 return new;
2594
2595fail:
2596 Py_XDECREF(new);
2597 PyBuffer_Release(&view);
2598 return NULL;
2599}
2600
2601static PyObject*
2602_PyBytes_FromList(PyObject *x)
2603{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002604 Py_ssize_t i, size = PyList_GET_SIZE(x);
2605 Py_ssize_t value;
2606 char *str;
2607 PyObject *item;
2608 _PyBytesWriter writer;
2609
2610 _PyBytesWriter_Init(&writer);
2611 str = _PyBytesWriter_Alloc(&writer, size);
2612 if (str == NULL)
2613 return NULL;
2614 writer.overallocate = 1;
2615 size = writer.allocated;
2616
2617 for (i = 0; i < PyList_GET_SIZE(x); i++) {
2618 item = PyList_GET_ITEM(x, i);
2619 Py_INCREF(item);
2620 value = PyNumber_AsSsize_t(item, NULL);
2621 Py_DECREF(item);
2622 if (value == -1 && PyErr_Occurred())
2623 goto error;
2624
2625 if (value < 0 || value >= 256) {
2626 PyErr_SetString(PyExc_ValueError,
2627 "bytes must be in range(0, 256)");
2628 goto error;
2629 }
2630
2631 if (i >= size) {
2632 str = _PyBytesWriter_Resize(&writer, str, size+1);
2633 if (str == NULL)
2634 return NULL;
2635 size = writer.allocated;
2636 }
2637 *str++ = (char) value;
2638 }
2639 return _PyBytesWriter_Finish(&writer, str);
2640
2641 error:
2642 _PyBytesWriter_Dealloc(&writer);
2643 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002644}
2645
2646static PyObject*
2647_PyBytes_FromTuple(PyObject *x)
2648{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002649 PyObject *bytes;
2650 Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2651 Py_ssize_t value;
2652 char *str;
2653 PyObject *item;
2654
2655 bytes = PyBytes_FromStringAndSize(NULL, size);
2656 if (bytes == NULL)
2657 return NULL;
2658 str = ((PyBytesObject *)bytes)->ob_sval;
2659
2660 for (i = 0; i < size; i++) {
2661 item = PyTuple_GET_ITEM(x, i);
2662 value = PyNumber_AsSsize_t(item, NULL);
2663 if (value == -1 && PyErr_Occurred())
2664 goto error;
2665
2666 if (value < 0 || value >= 256) {
2667 PyErr_SetString(PyExc_ValueError,
2668 "bytes must be in range(0, 256)");
2669 goto error;
2670 }
2671 *str++ = (char) value;
2672 }
2673 return bytes;
2674
2675 error:
2676 Py_DECREF(bytes);
2677 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002678}
2679
2680static PyObject *
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002681_PyBytes_FromIterator(PyObject *it, PyObject *x)
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002682{
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002683 char *str;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002684 Py_ssize_t i, size;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002685 _PyBytesWriter writer;
2686
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002687 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02002688 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002689 if (size == -1 && PyErr_Occurred())
2690 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002691
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002692 _PyBytesWriter_Init(&writer);
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002693 str = _PyBytesWriter_Alloc(&writer, size);
2694 if (str == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002695 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002696 writer.overallocate = 1;
2697 size = writer.allocated;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002698
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002699 /* Run the iterator to exhaustion */
2700 for (i = 0; ; i++) {
2701 PyObject *item;
2702 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002703
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002704 /* Get the next item */
2705 item = PyIter_Next(it);
2706 if (item == NULL) {
2707 if (PyErr_Occurred())
2708 goto error;
2709 break;
2710 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002711
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002712 /* Interpret it as an int (__index__) */
Serhiy Storchakaf54d7812016-07-06 21:39:44 +03002713 value = PyNumber_AsSsize_t(item, NULL);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002714 Py_DECREF(item);
2715 if (value == -1 && PyErr_Occurred())
2716 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002717
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002718 /* Range check */
2719 if (value < 0 || value >= 256) {
2720 PyErr_SetString(PyExc_ValueError,
2721 "bytes must be in range(0, 256)");
2722 goto error;
2723 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002724
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002725 /* Append the byte */
2726 if (i >= size) {
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002727 str = _PyBytesWriter_Resize(&writer, str, size+1);
2728 if (str == NULL)
2729 return NULL;
2730 size = writer.allocated;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002731 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002732 *str++ = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002733 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002734
2735 return _PyBytesWriter_Finish(&writer, str);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002736
2737 error:
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002738 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002739 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002740}
2741
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002742PyObject *
2743PyBytes_FromObject(PyObject *x)
2744{
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002745 PyObject *it, *result;
2746
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002747 if (x == NULL) {
2748 PyErr_BadInternalCall();
2749 return NULL;
2750 }
2751
2752 if (PyBytes_CheckExact(x)) {
2753 Py_INCREF(x);
2754 return x;
2755 }
2756
2757 /* Use the modern buffer interface */
2758 if (PyObject_CheckBuffer(x))
2759 return _PyBytes_FromBuffer(x);
2760
2761 if (PyList_CheckExact(x))
2762 return _PyBytes_FromList(x);
2763
2764 if (PyTuple_CheckExact(x))
2765 return _PyBytes_FromTuple(x);
2766
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002767 if (!PyUnicode_Check(x)) {
2768 it = PyObject_GetIter(x);
2769 if (it != NULL) {
2770 result = _PyBytes_FromIterator(it, x);
2771 Py_DECREF(it);
2772 return result;
2773 }
Serhiy Storchakae8904212018-10-15 00:02:57 +03002774 if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2775 return NULL;
2776 }
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002777 }
2778
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002779 PyErr_Format(PyExc_TypeError,
2780 "cannot convert '%.200s' object to bytes",
2781 x->ob_type->tp_name);
2782 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002783}
2784
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002785static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002786bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002787{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002788 PyObject *tmp, *pnew;
2789 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002790
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002791 assert(PyType_IsSubtype(type, &PyBytes_Type));
2792 tmp = bytes_new(&PyBytes_Type, args, kwds);
2793 if (tmp == NULL)
2794 return NULL;
Serhiy Storchaka15095802015-11-25 15:47:01 +02002795 assert(PyBytes_Check(tmp));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002796 n = PyBytes_GET_SIZE(tmp);
2797 pnew = type->tp_alloc(type, n);
2798 if (pnew != NULL) {
Christian Heimesf051e432016-09-13 20:22:02 +02002799 memcpy(PyBytes_AS_STRING(pnew),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002800 PyBytes_AS_STRING(tmp), n+1);
2801 ((PyBytesObject *)pnew)->ob_shash =
2802 ((PyBytesObject *)tmp)->ob_shash;
2803 }
2804 Py_DECREF(tmp);
2805 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002806}
2807
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002808PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002809"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002810bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002811bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002812bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2813bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002814\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002815Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002816 - an iterable yielding integers in range(256)\n\
2817 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002818 - any object implementing the buffer API.\n\
2819 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002820
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002821static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002822
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002823PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002824 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2825 "bytes",
2826 PyBytesObject_SIZE,
2827 sizeof(char),
Inada Naoki7d408692019-05-29 17:23:27 +09002828 0, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002829 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002830 0, /* tp_getattr */
2831 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002832 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002833 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08002834 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002835 &bytes_as_sequence, /* tp_as_sequence */
2836 &bytes_as_mapping, /* tp_as_mapping */
2837 (hashfunc)bytes_hash, /* tp_hash */
2838 0, /* tp_call */
2839 bytes_str, /* tp_str */
2840 PyObject_GenericGetAttr, /* tp_getattro */
2841 0, /* tp_setattro */
2842 &bytes_as_buffer, /* tp_as_buffer */
2843 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2844 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2845 bytes_doc, /* tp_doc */
2846 0, /* tp_traverse */
2847 0, /* tp_clear */
2848 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2849 0, /* tp_weaklistoffset */
2850 bytes_iter, /* tp_iter */
2851 0, /* tp_iternext */
2852 bytes_methods, /* tp_methods */
2853 0, /* tp_members */
2854 0, /* tp_getset */
2855 &PyBaseObject_Type, /* tp_base */
2856 0, /* tp_dict */
2857 0, /* tp_descr_get */
2858 0, /* tp_descr_set */
2859 0, /* tp_dictoffset */
2860 0, /* tp_init */
2861 0, /* tp_alloc */
2862 bytes_new, /* tp_new */
2863 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002864};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002865
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002866void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002867PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002868{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002869 assert(pv != NULL);
2870 if (*pv == NULL)
2871 return;
2872 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002873 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002874 return;
2875 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02002876
2877 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2878 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05002879 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02002880 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02002881
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002882 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02002883 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2884 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2885 Py_CLEAR(*pv);
2886 return;
2887 }
2888
2889 oldsize = PyBytes_GET_SIZE(*pv);
2890 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2891 PyErr_NoMemory();
2892 goto error;
2893 }
2894 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2895 goto error;
2896
2897 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2898 PyBuffer_Release(&wb);
2899 return;
2900
2901 error:
2902 PyBuffer_Release(&wb);
2903 Py_CLEAR(*pv);
2904 return;
2905 }
2906
2907 else {
2908 /* Multiple references, need to create new object */
2909 PyObject *v;
2910 v = bytes_concat(*pv, w);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002911 Py_SETREF(*pv, v);
Antoine Pitrou161d6952014-05-01 14:36:20 +02002912 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002913}
2914
2915void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002916PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002917{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002918 PyBytes_Concat(pv, w);
2919 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002920}
2921
2922
Ethan Furmanb95b5612015-01-23 20:05:18 -08002923/* The following function breaks the notion that bytes are immutable:
2924 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002925 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08002926 as creating a new bytes object and destroying the old one, only
2927 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002928 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08002929 Note that if there's not enough memory to resize the bytes object, the
2930 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002931 memory" exception is set, and -1 is returned. Else (on success) 0 is
2932 returned, and the value in *pv may or may not be the same as on input.
2933 As always, an extra byte is allocated for a trailing \0 byte (newsize
2934 does *not* include that), and a trailing \0 byte is stored.
2935*/
2936
2937int
2938_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2939{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002940 PyObject *v;
2941 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002942 v = *pv;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002943 if (!PyBytes_Check(v) || newsize < 0) {
2944 goto error;
2945 }
2946 if (Py_SIZE(v) == newsize) {
2947 /* return early if newsize equals to v->ob_size */
2948 return 0;
2949 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02002950 if (Py_SIZE(v) == 0) {
2951 if (newsize == 0) {
2952 return 0;
2953 }
2954 *pv = _PyBytes_FromSize(newsize, 0);
2955 Py_DECREF(v);
2956 return (*pv == NULL) ? -1 : 0;
2957 }
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002958 if (Py_REFCNT(v) != 1) {
2959 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002960 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02002961 if (newsize == 0) {
2962 *pv = _PyBytes_FromSize(0, 0);
2963 Py_DECREF(v);
2964 return (*pv == NULL) ? -1 : 0;
2965 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002966 /* XXX UNREF/NEWREF interface should be more symmetrical */
2967 _Py_DEC_REFTOTAL;
2968 _Py_ForgetReference(v);
2969 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03002970 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002971 if (*pv == NULL) {
2972 PyObject_Del(v);
2973 PyErr_NoMemory();
2974 return -1;
2975 }
2976 _Py_NewReference(*pv);
2977 sv = (PyBytesObject *) *pv;
2978 Py_SIZE(sv) = newsize;
2979 sv->ob_sval[newsize] = '\0';
2980 sv->ob_shash = -1; /* invalidate cached hash value */
2981 return 0;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002982error:
2983 *pv = 0;
2984 Py_DECREF(v);
2985 PyErr_BadInternalCall();
2986 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002987}
2988
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002989void
Victor Stinnerbed48172019-08-27 00:12:32 +02002990_PyBytes_Fini(void)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002991{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002992 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002993 for (i = 0; i < UCHAR_MAX + 1; i++)
2994 Py_CLEAR(characters[i]);
2995 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002996}
2997
Benjamin Peterson4116f362008-05-27 00:36:20 +00002998/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002999
3000typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003001 PyObject_HEAD
3002 Py_ssize_t it_index;
3003 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003004} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003005
3006static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003007striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003008{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003009 _PyObject_GC_UNTRACK(it);
3010 Py_XDECREF(it->it_seq);
3011 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003012}
3013
3014static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003015striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003016{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003017 Py_VISIT(it->it_seq);
3018 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003019}
3020
3021static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003022striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003023{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003024 PyBytesObject *seq;
3025 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003026
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003027 assert(it != NULL);
3028 seq = it->it_seq;
3029 if (seq == NULL)
3030 return NULL;
3031 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003032
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003033 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3034 item = PyLong_FromLong(
3035 (unsigned char)seq->ob_sval[it->it_index]);
3036 if (item != NULL)
3037 ++it->it_index;
3038 return item;
3039 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003040
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003041 it->it_seq = NULL;
Serhiy Storchakafbb1c5e2016-03-30 20:40:02 +03003042 Py_DECREF(seq);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003043 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003044}
3045
3046static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303047striter_len(striterobject *it, PyObject *Py_UNUSED(ignored))
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003048{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003049 Py_ssize_t len = 0;
3050 if (it->it_seq)
3051 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3052 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003053}
3054
3055PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003056 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003057
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003058static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303059striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003060{
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003061 _Py_IDENTIFIER(iter);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003062 if (it->it_seq != NULL) {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003063 return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003064 it->it_seq, it->it_index);
3065 } else {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003066 return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003067 }
3068}
3069
3070PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3071
3072static PyObject *
3073striter_setstate(striterobject *it, PyObject *state)
3074{
3075 Py_ssize_t index = PyLong_AsSsize_t(state);
3076 if (index == -1 && PyErr_Occurred())
3077 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003078 if (it->it_seq != NULL) {
3079 if (index < 0)
3080 index = 0;
3081 else if (index > PyBytes_GET_SIZE(it->it_seq))
3082 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3083 it->it_index = index;
3084 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003085 Py_RETURN_NONE;
3086}
3087
3088PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3089
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003090static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003091 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3092 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003093 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3094 reduce_doc},
3095 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3096 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003097 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003098};
3099
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003100PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003101 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3102 "bytes_iterator", /* tp_name */
3103 sizeof(striterobject), /* tp_basicsize */
3104 0, /* tp_itemsize */
3105 /* methods */
3106 (destructor)striter_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003107 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003108 0, /* tp_getattr */
3109 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003110 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003111 0, /* tp_repr */
3112 0, /* tp_as_number */
3113 0, /* tp_as_sequence */
3114 0, /* tp_as_mapping */
3115 0, /* tp_hash */
3116 0, /* tp_call */
3117 0, /* tp_str */
3118 PyObject_GenericGetAttr, /* tp_getattro */
3119 0, /* tp_setattro */
3120 0, /* tp_as_buffer */
3121 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3122 0, /* tp_doc */
3123 (traverseproc)striter_traverse, /* tp_traverse */
3124 0, /* tp_clear */
3125 0, /* tp_richcompare */
3126 0, /* tp_weaklistoffset */
3127 PyObject_SelfIter, /* tp_iter */
3128 (iternextfunc)striter_next, /* tp_iternext */
3129 striter_methods, /* tp_methods */
3130 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003131};
3132
3133static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003134bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003135{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003136 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003137
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003138 if (!PyBytes_Check(seq)) {
3139 PyErr_BadInternalCall();
3140 return NULL;
3141 }
3142 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3143 if (it == NULL)
3144 return NULL;
3145 it->it_index = 0;
3146 Py_INCREF(seq);
3147 it->it_seq = (PyBytesObject *)seq;
3148 _PyObject_GC_TRACK(it);
3149 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003150}
Victor Stinner00165072015-10-09 01:53:21 +02003151
3152
3153/* _PyBytesWriter API */
3154
3155#ifdef MS_WINDOWS
3156 /* On Windows, overallocate by 50% is the best factor */
3157# define OVERALLOCATE_FACTOR 2
3158#else
3159 /* On Linux, overallocate by 25% is the best factor */
3160# define OVERALLOCATE_FACTOR 4
3161#endif
3162
3163void
3164_PyBytesWriter_Init(_PyBytesWriter *writer)
3165{
Victor Stinner661aacc2015-10-14 09:41:48 +02003166 /* Set all attributes before small_buffer to 0 */
3167 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003168#ifndef NDEBUG
3169 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3170 sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003171#endif
3172}
3173
3174void
3175_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3176{
3177 Py_CLEAR(writer->buffer);
3178}
3179
3180Py_LOCAL_INLINE(char*)
3181_PyBytesWriter_AsString(_PyBytesWriter *writer)
3182{
Victor Stinner661aacc2015-10-14 09:41:48 +02003183 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003184 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003185 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003186 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003187 else if (writer->use_bytearray) {
3188 assert(writer->buffer != NULL);
3189 return PyByteArray_AS_STRING(writer->buffer);
3190 }
3191 else {
3192 assert(writer->buffer != NULL);
3193 return PyBytes_AS_STRING(writer->buffer);
3194 }
Victor Stinner00165072015-10-09 01:53:21 +02003195}
3196
3197Py_LOCAL_INLINE(Py_ssize_t)
Victor Stinner2bf89932015-10-14 11:25:33 +02003198_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
Victor Stinner00165072015-10-09 01:53:21 +02003199{
3200 char *start = _PyBytesWriter_AsString(writer);
3201 assert(str != NULL);
3202 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003203 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003204 return str - start;
3205}
3206
Victor Stinner68762572019-10-07 18:42:01 +02003207#ifndef NDEBUG
3208Py_LOCAL_INLINE(int)
Victor Stinner00165072015-10-09 01:53:21 +02003209_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3210{
Victor Stinner00165072015-10-09 01:53:21 +02003211 char *start, *end;
3212
Victor Stinner661aacc2015-10-14 09:41:48 +02003213 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003214 assert(writer->buffer == NULL);
3215 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003216 else {
3217 assert(writer->buffer != NULL);
3218 if (writer->use_bytearray)
3219 assert(PyByteArray_CheckExact(writer->buffer));
3220 else
3221 assert(PyBytes_CheckExact(writer->buffer));
3222 assert(Py_REFCNT(writer->buffer) == 1);
3223 }
Victor Stinner00165072015-10-09 01:53:21 +02003224
Victor Stinner661aacc2015-10-14 09:41:48 +02003225 if (writer->use_bytearray) {
3226 /* bytearray has its own overallocation algorithm,
3227 writer overallocation must be disabled */
3228 assert(!writer->overallocate);
3229 }
3230
3231 assert(0 <= writer->allocated);
Victor Stinner53926a12015-10-09 12:37:03 +02003232 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003233 /* the last byte must always be null */
Victor Stinner661aacc2015-10-14 09:41:48 +02003234 start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003235 assert(start[writer->allocated] == 0);
3236
3237 end = start + writer->allocated;
3238 assert(str != NULL);
3239 assert(start <= str && str <= end);
Victor Stinner68762572019-10-07 18:42:01 +02003240 return 1;
Victor Stinner00165072015-10-09 01:53:21 +02003241}
Victor Stinner68762572019-10-07 18:42:01 +02003242#endif
Victor Stinner00165072015-10-09 01:53:21 +02003243
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003244void*
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003245_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
Victor Stinner00165072015-10-09 01:53:21 +02003246{
3247 Py_ssize_t allocated, pos;
3248
Victor Stinner68762572019-10-07 18:42:01 +02003249 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003250 assert(writer->allocated < size);
Victor Stinner00165072015-10-09 01:53:21 +02003251
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003252 allocated = size;
Victor Stinner00165072015-10-09 01:53:21 +02003253 if (writer->overallocate
3254 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3255 /* overallocate to limit the number of realloc() */
3256 allocated += allocated / OVERALLOCATE_FACTOR;
3257 }
3258
Victor Stinner2bf89932015-10-14 11:25:33 +02003259 pos = _PyBytesWriter_GetSize(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003260 if (!writer->use_small_buffer) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003261 if (writer->use_bytearray) {
3262 if (PyByteArray_Resize(writer->buffer, allocated))
3263 goto error;
3264 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3265 but we cannot use ob_alloc because bytes may need to be moved
3266 to use the whole buffer. bytearray uses an internal optimization
3267 to avoid moving or copying bytes when bytes are removed at the
3268 beginning (ex: del bytearray[:1]). */
3269 }
3270 else {
3271 if (_PyBytes_Resize(&writer->buffer, allocated))
3272 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003273 }
3274 }
3275 else {
3276 /* convert from stack buffer to bytes object buffer */
3277 assert(writer->buffer == NULL);
3278
Victor Stinner661aacc2015-10-14 09:41:48 +02003279 if (writer->use_bytearray)
3280 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3281 else
3282 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003283 if (writer->buffer == NULL)
Victor Stinner661aacc2015-10-14 09:41:48 +02003284 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003285
3286 if (pos != 0) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003287 char *dest;
3288 if (writer->use_bytearray)
3289 dest = PyByteArray_AS_STRING(writer->buffer);
3290 else
3291 dest = PyBytes_AS_STRING(writer->buffer);
Christian Heimesf051e432016-09-13 20:22:02 +02003292 memcpy(dest,
Victor Stinnerb3653a32015-10-09 03:38:24 +02003293 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02003294 pos);
3295 }
3296
Victor Stinnerb3653a32015-10-09 03:38:24 +02003297 writer->use_small_buffer = 0;
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003298#ifndef NDEBUG
3299 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3300 sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003301#endif
Victor Stinner00165072015-10-09 01:53:21 +02003302 }
3303 writer->allocated = allocated;
3304
3305 str = _PyBytesWriter_AsString(writer) + pos;
Victor Stinner68762572019-10-07 18:42:01 +02003306 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinner00165072015-10-09 01:53:21 +02003307 return str;
Victor Stinner661aacc2015-10-14 09:41:48 +02003308
3309error:
3310 _PyBytesWriter_Dealloc(writer);
3311 return NULL;
Victor Stinner00165072015-10-09 01:53:21 +02003312}
3313
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003314void*
3315_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3316{
3317 Py_ssize_t new_min_size;
3318
Victor Stinner68762572019-10-07 18:42:01 +02003319 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003320 assert(size >= 0);
3321
3322 if (size == 0) {
3323 /* nothing to do */
3324 return str;
3325 }
3326
3327 if (writer->min_size > PY_SSIZE_T_MAX - size) {
3328 PyErr_NoMemory();
3329 _PyBytesWriter_Dealloc(writer);
3330 return NULL;
3331 }
3332 new_min_size = writer->min_size + size;
3333
3334 if (new_min_size > writer->allocated)
3335 str = _PyBytesWriter_Resize(writer, str, new_min_size);
3336
3337 writer->min_size = new_min_size;
3338 return str;
3339}
3340
Victor Stinner00165072015-10-09 01:53:21 +02003341/* Allocate the buffer to write size bytes.
3342 Return the pointer to the beginning of buffer data.
3343 Raise an exception and return NULL on error. */
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003344void*
Victor Stinner00165072015-10-09 01:53:21 +02003345_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3346{
3347 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02003348 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02003349 assert(size >= 0);
3350
Victor Stinnerb3653a32015-10-09 03:38:24 +02003351 writer->use_small_buffer = 1;
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003352#ifndef NDEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003353 writer->allocated = sizeof(writer->small_buffer) - 1;
Victor Stinnerf6358a72015-10-14 12:02:39 +02003354 /* In debug mode, don't use the full small buffer because it is less
3355 efficient than bytes and bytearray objects to detect buffer underflow
3356 and buffer overflow. Use 10 bytes of the small buffer to test also
3357 code using the smaller buffer in debug mode.
3358
3359 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3360 in debug mode to also be able to detect stack overflow when running
3361 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3362 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3363 stack overflow. */
3364 writer->allocated = Py_MIN(writer->allocated, 10);
3365 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3366 to detect buffer overflow */
Victor Stinnerb3653a32015-10-09 03:38:24 +02003367 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003368#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02003369 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02003370#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02003371 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02003372}
3373
3374PyObject *
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003375_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
Victor Stinner00165072015-10-09 01:53:21 +02003376{
Victor Stinner2bf89932015-10-14 11:25:33 +02003377 Py_ssize_t size;
Victor Stinner00165072015-10-09 01:53:21 +02003378 PyObject *result;
3379
Victor Stinner68762572019-10-07 18:42:01 +02003380 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinner00165072015-10-09 01:53:21 +02003381
Victor Stinner2bf89932015-10-14 11:25:33 +02003382 size = _PyBytesWriter_GetSize(writer, str);
3383 if (size == 0 && !writer->use_bytearray) {
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003384 Py_CLEAR(writer->buffer);
3385 /* Get the empty byte string singleton */
3386 result = PyBytes_FromStringAndSize(NULL, 0);
3387 }
3388 else if (writer->use_small_buffer) {
Victor Stinnere914d412016-04-15 17:52:27 +02003389 if (writer->use_bytearray) {
3390 result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3391 }
3392 else {
3393 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3394 }
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003395 }
3396 else {
3397 result = writer->buffer;
3398 writer->buffer = NULL;
3399
Victor Stinner2bf89932015-10-14 11:25:33 +02003400 if (size != writer->allocated) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003401 if (writer->use_bytearray) {
Victor Stinner2bf89932015-10-14 11:25:33 +02003402 if (PyByteArray_Resize(result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003403 Py_DECREF(result);
3404 return NULL;
3405 }
3406 }
3407 else {
Victor Stinner2bf89932015-10-14 11:25:33 +02003408 if (_PyBytes_Resize(&result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003409 assert(result == NULL);
3410 return NULL;
3411 }
Victor Stinner00165072015-10-09 01:53:21 +02003412 }
3413 }
Victor Stinner00165072015-10-09 01:53:21 +02003414 }
Victor Stinner00165072015-10-09 01:53:21 +02003415 return result;
3416}
Victor Stinnerce179bf2015-10-09 12:57:22 +02003417
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003418void*
Victor Stinnere9aa5952015-10-12 13:57:47 +02003419_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003420 const void *bytes, Py_ssize_t size)
Victor Stinnerce179bf2015-10-09 12:57:22 +02003421{
Victor Stinnere9aa5952015-10-12 13:57:47 +02003422 char *str = (char *)ptr;
3423
Victor Stinnerce179bf2015-10-09 12:57:22 +02003424 str = _PyBytesWriter_Prepare(writer, str, size);
3425 if (str == NULL)
3426 return NULL;
3427
Christian Heimesf051e432016-09-13 20:22:02 +02003428 memcpy(str, bytes, size);
Victor Stinnerce179bf2015-10-09 12:57:22 +02003429 str += size;
3430
3431 return str;
3432}