blob: ccabbdca1d5624ab92916849bd092c2c7db84401 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Victor Stinnerd9ea5ca2020-04-15 02:57:50 +02006#include "pycore_abstract.h" // _PyIndex_Check()
Victor Stinner91698d82020-06-25 14:07:40 +02007#include "pycore_bytes_methods.h" // _Py_bytes_startswith()
Serhiy Storchaka2ad93822020-12-03 12:46:16 +02008#include "pycore_format.h" // F_LJUST
Victor Stinner91698d82020-06-25 14:07:40 +02009#include "pycore_initconfig.h" // _PyStatus_OK()
10#include "pycore_object.h" // _PyObject_GC_TRACK
Victor Stinnerd9ea5ca2020-04-15 02:57:50 +020011#include "pycore_pymem.h" // PYMEM_CLEANBYTE
Christian Heimes2c9c7a52008-05-26 13:42:13 +000012
Gregory P. Smith8cb65692015-04-25 23:22:26 +000013#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +000014#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000015
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020016/*[clinic input]
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030017class bytes "PyBytesObject *" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020018[clinic start generated code]*/
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030019/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020020
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030021#include "clinic/bytesobject.c.h"
22
Hai Shi46874c22020-01-30 17:20:25 -060023_Py_IDENTIFIER(__bytes__);
24
Serhiy Storchaka2ad93822020-12-03 12:46:16 +020025/* PyBytesObject_SIZE gives the basic size of a bytes object; any memory allocation
26 for a bytes object of length n should request PyBytesObject_SIZE + n bytes.
Mark Dickinsonfd24b322008-12-06 15:33:31 +000027
28 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
Serhiy Storchaka2ad93822020-12-03 12:46:16 +020029 3 or 7 bytes per bytes object allocation on a typical system.
Mark Dickinsonfd24b322008-12-06 15:33:31 +000030*/
31#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
32
Victor Stinner2bf89932015-10-14 11:25:33 +020033/* Forward declaration */
34Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
35 char *str);
36
Victor Stinnerc41eed12020-06-23 15:54:35 +020037
38static struct _Py_bytes_state*
39get_bytes_state(void)
40{
41 PyInterpreterState *interp = _PyInterpreterState_GET();
42 return &interp->bytes;
43}
44
45
Victor Stinner91698d82020-06-25 14:07:40 +020046// Return a borrowed reference to the empty bytes string singleton.
47static inline PyObject* bytes_get_empty(void)
48{
49 struct _Py_bytes_state *state = get_bytes_state();
50 // bytes_get_empty() must not be called before _PyBytes_Init()
51 // or after _PyBytes_Fini()
52 assert(state->empty_string != NULL);
53 return state->empty_string;
54}
55
56
57// Return a strong reference to the empty bytes string singleton.
58static inline PyObject* bytes_new_empty(void)
59{
60 PyObject *empty = bytes_get_empty();
61 Py_INCREF(empty);
62 return (PyObject *)empty;
63}
64
65
66static int
67bytes_create_empty_string_singleton(struct _Py_bytes_state *state)
68{
69 // Create the empty bytes string singleton
70 PyBytesObject *op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE);
71 if (op == NULL) {
72 return -1;
73 }
74 _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, 0);
75 op->ob_shash = -1;
76 op->ob_sval[0] = '\0';
77
78 assert(state->empty_string == NULL);
79 state->empty_string = (PyObject *)op;
80 return 0;
81}
82
83
Christian Heimes2c9c7a52008-05-26 13:42:13 +000084/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000085 For PyBytes_FromString(), the parameter `str' points to a null-terminated
86 string containing exactly `size' bytes.
87
Martin Pantera90a4a92016-05-30 04:04:50 +000088 For PyBytes_FromStringAndSize(), the parameter `str' is
Christian Heimes2c9c7a52008-05-26 13:42:13 +000089 either NULL or else points to a string containing at least `size' bytes.
90 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
91 not have to be null-terminated. (Therefore it is safe to construct a
92 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
93 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
94 bytes (setting the last byte to the null terminating character) and you can
95 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000096 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000097 alter the data yourself, since the strings may be shared.
98
99 The PyObject member `op->ob_size', which denotes the number of "extra
100 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +0200101 allocated for string data, not counting the null terminating character.
102 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000103 PyBytes_FromStringAndSize()) or the length of the string in the `str'
104 parameter (for PyBytes_FromString()).
105*/
Victor Stinnerdb067af2014-05-02 22:31:14 +0200106static PyObject *
107_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +0000108{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200109 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +0200110 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +0200111
Victor Stinnerc41eed12020-06-23 15:54:35 +0200112 if (size == 0) {
Victor Stinner91698d82020-06-25 14:07:40 +0200113 return bytes_new_empty();
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000114 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000115
Victor Stinner049e5092014-08-17 22:20:00 +0200116 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000117 PyErr_SetString(PyExc_OverflowError,
118 "byte string is too large");
119 return NULL;
120 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +0000121
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000122 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +0200123 if (use_calloc)
124 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
125 else
126 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Victor Stinner04fc4f22020-06-16 01:28:07 +0200127 if (op == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000128 return PyErr_NoMemory();
Victor Stinner04fc4f22020-06-16 01:28:07 +0200129 }
130 _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000131 op->ob_shash = -1;
Victor Stinner91698d82020-06-25 14:07:40 +0200132 if (!use_calloc) {
Victor Stinnerdb067af2014-05-02 22:31:14 +0200133 op->ob_sval[size] = '\0';
Victor Stinnerdb067af2014-05-02 22:31:14 +0200134 }
135 return (PyObject *) op;
136}
137
138PyObject *
139PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
140{
141 PyBytesObject *op;
142 if (size < 0) {
143 PyErr_SetString(PyExc_SystemError,
144 "Negative size passed to PyBytes_FromStringAndSize");
145 return NULL;
146 }
Victor Stinnerc41eed12020-06-23 15:54:35 +0200147 if (size == 1 && str != NULL) {
148 struct _Py_bytes_state *state = get_bytes_state();
149 op = state->characters[*str & UCHAR_MAX];
150 if (op != NULL) {
151 Py_INCREF(op);
152 return (PyObject *)op;
153 }
Victor Stinnerdb067af2014-05-02 22:31:14 +0200154 }
Victor Stinner91698d82020-06-25 14:07:40 +0200155 if (size == 0) {
156 return bytes_new_empty();
157 }
Victor Stinnerdb067af2014-05-02 22:31:14 +0200158
159 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
160 if (op == NULL)
161 return NULL;
162 if (str == NULL)
163 return (PyObject *) op;
164
Christian Heimesf051e432016-09-13 20:22:02 +0200165 memcpy(op->ob_sval, str, size);
Victor Stinnerdb067af2014-05-02 22:31:14 +0200166 /* share short strings */
167 if (size == 1) {
Victor Stinnerc41eed12020-06-23 15:54:35 +0200168 struct _Py_bytes_state *state = get_bytes_state();
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000169 Py_INCREF(op);
Victor Stinnerc41eed12020-06-23 15:54:35 +0200170 state->characters[*str & UCHAR_MAX] = op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000171 }
172 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000173}
174
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000175PyObject *
176PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000177{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200178 size_t size;
179 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000180
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000181 assert(str != NULL);
182 size = strlen(str);
183 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
184 PyErr_SetString(PyExc_OverflowError,
185 "byte string is too long");
186 return NULL;
187 }
Victor Stinnerc41eed12020-06-23 15:54:35 +0200188
189 struct _Py_bytes_state *state = get_bytes_state();
190 if (size == 0) {
Victor Stinner91698d82020-06-25 14:07:40 +0200191 return bytes_new_empty();
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000192 }
Victor Stinnerc41eed12020-06-23 15:54:35 +0200193 else if (size == 1) {
194 op = state->characters[*str & UCHAR_MAX];
195 if (op != NULL) {
196 Py_INCREF(op);
197 return (PyObject *)op;
198 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000199 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000200
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000201 /* Inline PyObject_NewVar */
Victor Stinner32bd68c2020-12-01 10:37:39 +0100202 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Victor Stinner04fc4f22020-06-16 01:28:07 +0200203 if (op == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000204 return PyErr_NoMemory();
Victor Stinner04fc4f22020-06-16 01:28:07 +0200205 }
206 _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000207 op->ob_shash = -1;
Christian Heimesf051e432016-09-13 20:22:02 +0200208 memcpy(op->ob_sval, str, size+1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000209 /* share short strings */
Victor Stinner91698d82020-06-25 14:07:40 +0200210 if (size == 1) {
211 assert(state->characters[*str & UCHAR_MAX] == NULL);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000212 Py_INCREF(op);
Victor Stinnerc41eed12020-06-23 15:54:35 +0200213 state->characters[*str & UCHAR_MAX] = op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000214 }
215 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000216}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000217
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000218PyObject *
219PyBytes_FromFormatV(const char *format, va_list vargs)
220{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000221 char *s;
Victor Stinner03dab782015-10-14 00:21:35 +0200222 const char *f;
223 const char *p;
224 Py_ssize_t prec;
225 int longflag;
226 int size_tflag;
227 /* Longest 64-bit formatted numbers:
228 - "18446744073709551615\0" (21 bytes)
229 - "-9223372036854775808\0" (21 bytes)
230 Decimal takes the most space (it isn't enough for octal.)
Guido van Rossum343e97f2007-04-09 00:43:24 +0000231
Victor Stinner03dab782015-10-14 00:21:35 +0200232 Longest 64-bit pointer representation:
233 "0xffffffffffffffff\0" (19 bytes). */
234 char buffer[21];
235 _PyBytesWriter writer;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000236
Victor Stinner03dab782015-10-14 00:21:35 +0200237 _PyBytesWriter_Init(&writer);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000238
Victor Stinner03dab782015-10-14 00:21:35 +0200239 s = _PyBytesWriter_Alloc(&writer, strlen(format));
240 if (s == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000241 return NULL;
Victor Stinner03dab782015-10-14 00:21:35 +0200242 writer.overallocate = 1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000243
Victor Stinner03dab782015-10-14 00:21:35 +0200244#define WRITE_BYTES(str) \
245 do { \
246 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
247 if (s == NULL) \
248 goto error; \
249 } while (0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000250
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000251 for (f = format; *f; f++) {
Victor Stinner03dab782015-10-14 00:21:35 +0200252 if (*f != '%') {
253 *s++ = *f;
254 continue;
255 }
256
257 p = f++;
258
259 /* ignore the width (ex: 10 in "%10s") */
260 while (Py_ISDIGIT(*f))
261 f++;
262
263 /* parse the precision (ex: 10 in "%.10s") */
264 prec = 0;
265 if (*f == '.') {
266 f++;
267 for (; Py_ISDIGIT(*f); f++) {
268 prec = (prec * 10) + (*f - '0');
269 }
270 }
271
272 while (*f && *f != '%' && !Py_ISALPHA(*f))
273 f++;
274
275 /* handle the long flag ('l'), but only for %ld and %lu.
276 others can be added when necessary. */
277 longflag = 0;
278 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
279 longflag = 1;
280 ++f;
281 }
282
283 /* handle the size_t flag ('z'). */
284 size_tflag = 0;
285 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
286 size_tflag = 1;
287 ++f;
288 }
289
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700290 /* subtract bytes preallocated for the format string
Victor Stinner03dab782015-10-14 00:21:35 +0200291 (ex: 2 for "%s") */
292 writer.min_size -= (f - p + 1);
293
294 switch (*f) {
295 case 'c':
296 {
297 int c = va_arg(vargs, int);
298 if (c < 0 || c > 255) {
299 PyErr_SetString(PyExc_OverflowError,
300 "PyBytes_FromFormatV(): %c format "
301 "expects an integer in range [0; 255]");
302 goto error;
303 }
304 writer.min_size++;
305 *s++ = (unsigned char)c;
306 break;
307 }
308
309 case 'd':
Victor Stinnerd36cf5f2020-06-10 18:38:05 +0200310 if (longflag) {
Victor Stinner03dab782015-10-14 00:21:35 +0200311 sprintf(buffer, "%ld", va_arg(vargs, long));
Victor Stinnerd36cf5f2020-06-10 18:38:05 +0200312 }
313 else if (size_tflag) {
314 sprintf(buffer, "%zd", va_arg(vargs, Py_ssize_t));
315 }
316 else {
Victor Stinner03dab782015-10-14 00:21:35 +0200317 sprintf(buffer, "%d", va_arg(vargs, int));
Victor Stinnerd36cf5f2020-06-10 18:38:05 +0200318 }
Victor Stinner03dab782015-10-14 00:21:35 +0200319 assert(strlen(buffer) < sizeof(buffer));
320 WRITE_BYTES(buffer);
321 break;
322
323 case 'u':
Victor Stinnerd36cf5f2020-06-10 18:38:05 +0200324 if (longflag) {
325 sprintf(buffer, "%lu", va_arg(vargs, unsigned long));
326 }
327 else if (size_tflag) {
328 sprintf(buffer, "%zu", va_arg(vargs, size_t));
329 }
330 else {
331 sprintf(buffer, "%u", va_arg(vargs, unsigned int));
332 }
Victor Stinner03dab782015-10-14 00:21:35 +0200333 assert(strlen(buffer) < sizeof(buffer));
334 WRITE_BYTES(buffer);
335 break;
336
337 case 'i':
338 sprintf(buffer, "%i", va_arg(vargs, int));
339 assert(strlen(buffer) < sizeof(buffer));
340 WRITE_BYTES(buffer);
341 break;
342
343 case 'x':
344 sprintf(buffer, "%x", va_arg(vargs, int));
345 assert(strlen(buffer) < sizeof(buffer));
346 WRITE_BYTES(buffer);
347 break;
348
349 case 's':
350 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000351 Py_ssize_t i;
Victor Stinner03dab782015-10-14 00:21:35 +0200352
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200353 p = va_arg(vargs, const char*);
Serhiy Storchakad586ccb2019-01-12 10:30:35 +0200354 if (prec <= 0) {
355 i = strlen(p);
356 }
357 else {
358 i = 0;
359 while (i < prec && p[i]) {
360 i++;
361 }
362 }
Victor Stinner03dab782015-10-14 00:21:35 +0200363 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
364 if (s == NULL)
365 goto error;
366 break;
367 }
368
369 case 'p':
370 sprintf(buffer, "%p", va_arg(vargs, void*));
371 assert(strlen(buffer) < sizeof(buffer));
372 /* %p is ill-defined: ensure leading 0x. */
373 if (buffer[1] == 'X')
374 buffer[1] = 'x';
375 else if (buffer[1] != 'x') {
376 memmove(buffer+2, buffer, strlen(buffer)+1);
377 buffer[0] = '0';
378 buffer[1] = 'x';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000379 }
Victor Stinner03dab782015-10-14 00:21:35 +0200380 WRITE_BYTES(buffer);
381 break;
382
383 case '%':
384 writer.min_size++;
385 *s++ = '%';
386 break;
387
388 default:
389 if (*f == 0) {
390 /* fix min_size if we reached the end of the format string */
391 writer.min_size++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000392 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000393
Victor Stinner03dab782015-10-14 00:21:35 +0200394 /* invalid format string: copy unformatted string and exit */
395 WRITE_BYTES(p);
396 return _PyBytesWriter_Finish(&writer, s);
397 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000398 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000399
Victor Stinner03dab782015-10-14 00:21:35 +0200400#undef WRITE_BYTES
401
402 return _PyBytesWriter_Finish(&writer, s);
403
404 error:
405 _PyBytesWriter_Dealloc(&writer);
406 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000407}
408
409PyObject *
410PyBytes_FromFormat(const char *format, ...)
411{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000412 PyObject* ret;
413 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000414
415#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000416 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000417#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000418 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000419#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000420 ret = PyBytes_FromFormatV(format, vargs);
421 va_end(vargs);
422 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000423}
424
Ethan Furmanb95b5612015-01-23 20:05:18 -0800425/* Helpers for formatstring */
426
427Py_LOCAL_INLINE(PyObject *)
428getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
429{
430 Py_ssize_t argidx = *p_argidx;
431 if (argidx < arglen) {
432 (*p_argidx)++;
433 if (arglen < 0)
434 return args;
435 else
436 return PyTuple_GetItem(args, argidx);
437 }
438 PyErr_SetString(PyExc_TypeError,
439 "not enough arguments for format string");
440 return NULL;
441}
442
Ethan Furmanb95b5612015-01-23 20:05:18 -0800443/* Returns a new reference to a PyBytes object, or NULL on failure. */
444
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200445static char*
446formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200447 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800448{
449 char *p;
450 PyObject *result;
451 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200452 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800453
454 x = PyFloat_AsDouble(v);
455 if (x == -1.0 && PyErr_Occurred()) {
456 PyErr_Format(PyExc_TypeError, "float argument required, "
457 "not %.200s", Py_TYPE(v)->tp_name);
458 return NULL;
459 }
460
461 if (prec < 0)
462 prec = 6;
463
464 p = PyOS_double_to_string(x, type, prec,
465 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
466
467 if (p == NULL)
468 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200469
470 len = strlen(p);
471 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200472 str = _PyBytesWriter_Prepare(writer, str, len);
473 if (str == NULL)
474 return NULL;
Christian Heimesf051e432016-09-13 20:22:02 +0200475 memcpy(str, p, len);
Victor Stinner71dc3d82016-04-26 12:35:13 +0200476 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200477 str += len;
478 return str;
479 }
480
481 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800482 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200483 *p_result = result;
Zackery Spytz96c59322018-10-03 00:01:30 -0600484 return result != NULL ? str : NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800485}
486
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300487static PyObject *
488formatlong(PyObject *v, int flags, int prec, int type)
489{
490 PyObject *result, *iobj;
491 if (type == 'i')
492 type = 'd';
493 if (PyLong_Check(v))
494 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
495 if (PyNumber_Check(v)) {
496 /* make sure number is a type of integer for o, x, and X */
497 if (type == 'o' || type == 'x' || type == 'X')
Serhiy Storchaka5f4b229d2020-05-28 10:33:45 +0300498 iobj = _PyNumber_Index(v);
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300499 else
500 iobj = PyNumber_Long(v);
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300501 if (iobj != NULL) {
Serhiy Storchakae67f7db2020-06-29 22:36:41 +0300502 assert(PyLong_Check(iobj));
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300503 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
504 Py_DECREF(iobj);
505 return result;
506 }
Serhiy Storchakae67f7db2020-06-29 22:36:41 +0300507 if (!PyErr_ExceptionMatches(PyExc_TypeError))
508 return NULL;
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300509 }
510 PyErr_Format(PyExc_TypeError,
511 "%%%c format: %s is required, not %.200s", type,
512 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
Serhiy Storchakae2ec0b22020-10-09 14:14:37 +0300513 : "a real number",
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300514 Py_TYPE(v)->tp_name);
515 return NULL;
516}
517
518static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200519byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800520{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300521 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200522 *p = PyBytes_AS_STRING(arg)[0];
523 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800524 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300525 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200526 *p = PyByteArray_AS_STRING(arg)[0];
527 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800528 }
529 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300530 int overflow;
Serhiy Storchakae67f7db2020-06-29 22:36:41 +0300531 long ival = PyLong_AsLongAndOverflow(arg, &overflow);
532 if (ival == -1 && PyErr_Occurred()) {
533 if (PyErr_ExceptionMatches(PyExc_TypeError)) {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300534 goto onError;
535 }
Serhiy Storchakae67f7db2020-06-29 22:36:41 +0300536 return 0;
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300537 }
Serhiy Storchakae67f7db2020-06-29 22:36:41 +0300538 if (!(0 <= ival && ival <= 255)) {
539 /* this includes an overflow in converting to C long */
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300540 PyErr_SetString(PyExc_OverflowError,
541 "%c arg not in range(256)");
542 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800543 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300544 *p = (char)ival;
545 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800546 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300547 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200548 PyErr_SetString(PyExc_TypeError,
549 "%c requires an integer in range(256) or a single byte");
550 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800551}
552
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800553static PyObject *_PyBytes_FromBuffer(PyObject *x);
554
Ethan Furmanb95b5612015-01-23 20:05:18 -0800555static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200556format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800557{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200558 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800559 /* is it a bytes object? */
560 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200561 *pbuf = PyBytes_AS_STRING(v);
562 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800563 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200564 return v;
565 }
566 if (PyByteArray_Check(v)) {
567 *pbuf = PyByteArray_AS_STRING(v);
568 *plen = PyByteArray_GET_SIZE(v);
569 Py_INCREF(v);
570 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800571 }
572 /* does it support __bytes__? */
573 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
574 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +0100575 result = _PyObject_CallNoArg(func);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800576 Py_DECREF(func);
577 if (result == NULL)
578 return NULL;
579 if (!PyBytes_Check(result)) {
580 PyErr_Format(PyExc_TypeError,
581 "__bytes__ returned non-bytes (type %.200s)",
582 Py_TYPE(result)->tp_name);
583 Py_DECREF(result);
584 return NULL;
585 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200586 *pbuf = PyBytes_AS_STRING(result);
587 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800588 return result;
589 }
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800590 /* does it support buffer protocol? */
591 if (PyObject_CheckBuffer(v)) {
592 /* maybe we can avoid making a copy of the buffer object here? */
593 result = _PyBytes_FromBuffer(v);
594 if (result == NULL)
595 return NULL;
596 *pbuf = PyBytes_AS_STRING(result);
597 *plen = PyBytes_GET_SIZE(result);
598 return result;
599 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800600 PyErr_Format(PyExc_TypeError,
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800601 "%%b requires a bytes-like object, "
602 "or an object that implements __bytes__, not '%.100s'",
Ethan Furmanb95b5612015-01-23 20:05:18 -0800603 Py_TYPE(v)->tp_name);
604 return NULL;
605}
606
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200607/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800608
609PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +0200610_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
611 PyObject *args, int use_bytearray)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800612{
Victor Stinner772b2b02015-10-14 09:56:53 +0200613 const char *fmt;
614 char *res;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800615 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200616 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800617 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800618 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200619 _PyBytesWriter writer;
620
Victor Stinner772b2b02015-10-14 09:56:53 +0200621 if (args == NULL) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800622 PyErr_BadInternalCall();
623 return NULL;
624 }
Victor Stinner772b2b02015-10-14 09:56:53 +0200625 fmt = format;
626 fmtcnt = format_len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200627
628 _PyBytesWriter_Init(&writer);
Victor Stinner772b2b02015-10-14 09:56:53 +0200629 writer.use_bytearray = use_bytearray;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200630
631 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
632 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800633 return NULL;
Victor Stinner772b2b02015-10-14 09:56:53 +0200634 if (!use_bytearray)
635 writer.overallocate = 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200636
Ethan Furmanb95b5612015-01-23 20:05:18 -0800637 if (PyTuple_Check(args)) {
638 arglen = PyTuple_GET_SIZE(args);
639 argidx = 0;
640 }
641 else {
642 arglen = -1;
643 argidx = -2;
644 }
645 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
646 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
647 !PyByteArray_Check(args)) {
648 dict = args;
649 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200650
Ethan Furmanb95b5612015-01-23 20:05:18 -0800651 while (--fmtcnt >= 0) {
652 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200653 Py_ssize_t len;
654 char *pos;
655
Xiang Zhangb76ad512017-03-06 17:17:05 +0800656 pos = (char *)memchr(fmt + 1, '%', fmtcnt);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200657 if (pos != NULL)
658 len = pos - fmt;
Victor Stinner772b2b02015-10-14 09:56:53 +0200659 else
Xiang Zhangb76ad512017-03-06 17:17:05 +0800660 len = fmtcnt + 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200661 assert(len != 0);
662
Christian Heimesf051e432016-09-13 20:22:02 +0200663 memcpy(res, fmt, len);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200664 res += len;
665 fmt += len;
666 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800667 }
668 else {
669 /* Got a format specifier */
670 int flags = 0;
671 Py_ssize_t width = -1;
672 int prec = -1;
673 int c = '\0';
674 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800675 PyObject *v = NULL;
676 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200677 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800678 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200679 Py_ssize_t len = 0;
680 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200681 Py_ssize_t alloc;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800682
Ethan Furmanb95b5612015-01-23 20:05:18 -0800683 fmt++;
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200684 if (*fmt == '%') {
685 *res++ = '%';
686 fmt++;
687 fmtcnt--;
688 continue;
689 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800690 if (*fmt == '(') {
Victor Stinner772b2b02015-10-14 09:56:53 +0200691 const char *keystart;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800692 Py_ssize_t keylen;
693 PyObject *key;
694 int pcount = 1;
695
696 if (dict == NULL) {
697 PyErr_SetString(PyExc_TypeError,
698 "format requires a mapping");
699 goto error;
700 }
701 ++fmt;
702 --fmtcnt;
703 keystart = fmt;
704 /* Skip over balanced parentheses */
705 while (pcount > 0 && --fmtcnt >= 0) {
706 if (*fmt == ')')
707 --pcount;
708 else if (*fmt == '(')
709 ++pcount;
710 fmt++;
711 }
712 keylen = fmt - keystart - 1;
713 if (fmtcnt < 0 || pcount > 0) {
714 PyErr_SetString(PyExc_ValueError,
715 "incomplete format key");
716 goto error;
717 }
718 key = PyBytes_FromStringAndSize(keystart,
719 keylen);
720 if (key == NULL)
721 goto error;
722 if (args_owned) {
723 Py_DECREF(args);
724 args_owned = 0;
725 }
726 args = PyObject_GetItem(dict, key);
727 Py_DECREF(key);
728 if (args == NULL) {
729 goto error;
730 }
731 args_owned = 1;
732 arglen = -1;
733 argidx = -2;
734 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200735
736 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800737 while (--fmtcnt >= 0) {
738 switch (c = *fmt++) {
739 case '-': flags |= F_LJUST; continue;
740 case '+': flags |= F_SIGN; continue;
741 case ' ': flags |= F_BLANK; continue;
742 case '#': flags |= F_ALT; continue;
743 case '0': flags |= F_ZERO; continue;
744 }
745 break;
746 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200747
748 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800749 if (c == '*') {
750 v = getnextarg(args, arglen, &argidx);
751 if (v == NULL)
752 goto error;
753 if (!PyLong_Check(v)) {
754 PyErr_SetString(PyExc_TypeError,
755 "* wants int");
756 goto error;
757 }
758 width = PyLong_AsSsize_t(v);
759 if (width == -1 && PyErr_Occurred())
760 goto error;
761 if (width < 0) {
762 flags |= F_LJUST;
763 width = -width;
764 }
765 if (--fmtcnt >= 0)
766 c = *fmt++;
767 }
768 else if (c >= 0 && isdigit(c)) {
769 width = c - '0';
770 while (--fmtcnt >= 0) {
771 c = Py_CHARMASK(*fmt++);
772 if (!isdigit(c))
773 break;
774 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
775 PyErr_SetString(
776 PyExc_ValueError,
777 "width too big");
778 goto error;
779 }
780 width = width*10 + (c - '0');
781 }
782 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200783
784 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800785 if (c == '.') {
786 prec = 0;
787 if (--fmtcnt >= 0)
788 c = *fmt++;
789 if (c == '*') {
790 v = getnextarg(args, arglen, &argidx);
791 if (v == NULL)
792 goto error;
793 if (!PyLong_Check(v)) {
794 PyErr_SetString(
795 PyExc_TypeError,
796 "* wants int");
797 goto error;
798 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200799 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800800 if (prec == -1 && PyErr_Occurred())
801 goto error;
802 if (prec < 0)
803 prec = 0;
804 if (--fmtcnt >= 0)
805 c = *fmt++;
806 }
807 else if (c >= 0 && isdigit(c)) {
808 prec = c - '0';
809 while (--fmtcnt >= 0) {
810 c = Py_CHARMASK(*fmt++);
811 if (!isdigit(c))
812 break;
813 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
814 PyErr_SetString(
815 PyExc_ValueError,
816 "prec too big");
817 goto error;
818 }
819 prec = prec*10 + (c - '0');
820 }
821 }
822 } /* prec */
823 if (fmtcnt >= 0) {
824 if (c == 'h' || c == 'l' || c == 'L') {
825 if (--fmtcnt >= 0)
826 c = *fmt++;
827 }
828 }
829 if (fmtcnt < 0) {
830 PyErr_SetString(PyExc_ValueError,
831 "incomplete format");
832 goto error;
833 }
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200834 v = getnextarg(args, arglen, &argidx);
835 if (v == NULL)
836 goto error;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200837
Alexey Izbyshevccd99752018-08-23 10:50:52 +0300838 if (fmtcnt == 0) {
839 /* last write: disable writer overallocation */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200840 writer.overallocate = 0;
841 }
842
Ethan Furmanb95b5612015-01-23 20:05:18 -0800843 sign = 0;
844 fill = ' ';
845 switch (c) {
Ethan Furman62e977f2015-03-11 08:17:00 -0700846 case 'r':
847 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800848 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200849 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800850 if (temp == NULL)
851 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200852 assert(PyUnicode_IS_ASCII(temp));
853 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
854 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800855 if (prec >= 0 && len > prec)
856 len = prec;
857 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200858
Ethan Furmanb95b5612015-01-23 20:05:18 -0800859 case 's':
860 // %s is only for 2/3 code; 3 only code should use %b
861 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200862 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800863 if (temp == NULL)
864 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800865 if (prec >= 0 && len > prec)
866 len = prec;
867 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200868
Ethan Furmanb95b5612015-01-23 20:05:18 -0800869 case 'i':
870 case 'd':
871 case 'u':
872 case 'o':
873 case 'x':
874 case 'X':
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200875 if (PyLong_CheckExact(v)
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200876 && width == -1 && prec == -1
877 && !(flags & (F_SIGN | F_BLANK))
878 && c != 'X')
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200879 {
880 /* Fast path */
881 int alternate = flags & F_ALT;
882 int base;
883
884 switch(c)
885 {
886 default:
Barry Warsawb2e57942017-09-14 18:13:16 -0700887 Py_UNREACHABLE();
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200888 case 'd':
889 case 'i':
890 case 'u':
891 base = 10;
892 break;
893 case 'o':
894 base = 8;
895 break;
896 case 'x':
897 case 'X':
898 base = 16;
899 break;
900 }
901
902 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200903 writer.min_size -= 2; /* size preallocated for "%d" */
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200904 res = _PyLong_FormatBytesWriter(&writer, res,
905 v, base, alternate);
906 if (res == NULL)
907 goto error;
908 continue;
909 }
910
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300911 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200912 if (!temp)
913 goto error;
914 assert(PyUnicode_IS_ASCII(temp));
915 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
916 len = PyUnicode_GET_LENGTH(temp);
917 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800918 if (flags & F_ZERO)
919 fill = '0';
920 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200921
Ethan Furmanb95b5612015-01-23 20:05:18 -0800922 case 'e':
923 case 'E':
924 case 'f':
925 case 'F':
926 case 'g':
927 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200928 if (width == -1 && prec == -1
929 && !(flags & (F_SIGN | F_BLANK)))
930 {
931 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200932 writer.min_size -= 2; /* size preallocated for "%f" */
Victor Stinnerad771582015-10-09 12:38:53 +0200933 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200934 if (res == NULL)
935 goto error;
936 continue;
937 }
938
Victor Stinnerad771582015-10-09 12:38:53 +0200939 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800940 goto error;
941 pbuf = PyBytes_AS_STRING(temp);
942 len = PyBytes_GET_SIZE(temp);
943 sign = 1;
944 if (flags & F_ZERO)
945 fill = '0';
946 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200947
Ethan Furmanb95b5612015-01-23 20:05:18 -0800948 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200949 pbuf = &onechar;
950 len = byte_converter(v, &onechar);
951 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800952 goto error;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200953 if (width == -1) {
954 /* Fast path */
955 *res++ = onechar;
956 continue;
957 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800958 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200959
Ethan Furmanb95b5612015-01-23 20:05:18 -0800960 default:
961 PyErr_Format(PyExc_ValueError,
962 "unsupported format character '%c' (0x%x) "
963 "at index %zd",
964 c, c,
Victor Stinner772b2b02015-10-14 09:56:53 +0200965 (Py_ssize_t)(fmt - 1 - format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800966 goto error;
967 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200968
Ethan Furmanb95b5612015-01-23 20:05:18 -0800969 if (sign) {
970 if (*pbuf == '-' || *pbuf == '+') {
971 sign = *pbuf++;
972 len--;
973 }
974 else if (flags & F_SIGN)
975 sign = '+';
976 else if (flags & F_BLANK)
977 sign = ' ';
978 else
979 sign = 0;
980 }
981 if (width < len)
982 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200983
984 alloc = width;
985 if (sign != 0 && len == width)
986 alloc++;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200987 /* 2: size preallocated for %s */
988 if (alloc > 2) {
989 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200990 if (res == NULL)
991 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800992 }
Victor Stinner60ec6ef2019-10-07 22:31:42 +0200993#ifndef NDEBUG
994 char *before = res;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200995#endif
996
997 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800998 if (sign) {
999 if (fill != ' ')
1000 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -08001001 if (width > len)
1002 width--;
1003 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001004
1005 /* Write the numeric prefix for "x", "X" and "o" formats
1006 if the alternate form is used.
1007 For example, write "0x" for the "%#x" format. */
Serhiy Storchakab1a16192016-12-17 21:48:03 +02001008 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001009 assert(pbuf[0] == '0');
1010 assert(pbuf[1] == c);
1011 if (fill != ' ') {
1012 *res++ = *pbuf++;
1013 *res++ = *pbuf++;
1014 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001015 width -= 2;
1016 if (width < 0)
1017 width = 0;
1018 len -= 2;
1019 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001020
1021 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001022 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001023 memset(res, fill, width - len);
1024 res += (width - len);
1025 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -08001026 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001027
1028 /* If padding with spaces: write sign if needed and/or numeric
1029 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001030 if (fill == ' ') {
1031 if (sign)
1032 *res++ = sign;
Serhiy Storchakab1a16192016-12-17 21:48:03 +02001033 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001034 assert(pbuf[0] == '0');
1035 assert(pbuf[1] == c);
1036 *res++ = *pbuf++;
1037 *res++ = *pbuf++;
1038 }
1039 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001040
1041 /* Copy bytes */
Christian Heimesf051e432016-09-13 20:22:02 +02001042 memcpy(res, pbuf, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001043 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001044
1045 /* Pad right with the fill character if needed */
1046 if (width > len) {
1047 memset(res, ' ', width - len);
1048 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001049 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001050
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +02001051 if (dict && (argidx < arglen)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001052 PyErr_SetString(PyExc_TypeError,
1053 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -08001054 Py_XDECREF(temp);
1055 goto error;
1056 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001057 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001058
Victor Stinner60ec6ef2019-10-07 22:31:42 +02001059#ifndef NDEBUG
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001060 /* check that we computed the exact size for this write */
1061 assert((res - before) == alloc);
1062#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -08001063 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001064
1065 /* If overallocation was disabled, ensure that it was the last
1066 write. Otherwise, we missed an optimization */
Alexey Izbyshevccd99752018-08-23 10:50:52 +03001067 assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001068 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001069
Ethan Furmanb95b5612015-01-23 20:05:18 -08001070 if (argidx < arglen && !dict) {
1071 PyErr_SetString(PyExc_TypeError,
1072 "not all arguments converted during bytes formatting");
1073 goto error;
1074 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001075
Ethan Furmanb95b5612015-01-23 20:05:18 -08001076 if (args_owned) {
1077 Py_DECREF(args);
1078 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001079 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001080
1081 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001082 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001083 if (args_owned) {
1084 Py_DECREF(args);
1085 }
1086 return NULL;
1087}
1088
Greg Price3a4f6672019-09-12 11:12:22 -07001089/* Unescape a backslash-escaped string. */
Eric V. Smith42454af2016-10-31 09:22:08 -04001090PyObject *_PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001091 Py_ssize_t len,
1092 const char *errors,
Eric V. Smith42454af2016-10-31 09:22:08 -04001093 const char **first_invalid_escape)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001094{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001095 int c;
Victor Stinner2ec80632015-10-14 13:32:13 +02001096 char *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001097 const char *end;
Victor Stinner2ec80632015-10-14 13:32:13 +02001098 _PyBytesWriter writer;
1099
1100 _PyBytesWriter_Init(&writer);
1101
1102 p = _PyBytesWriter_Alloc(&writer, len);
1103 if (p == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001104 return NULL;
Victor Stinner2ec80632015-10-14 13:32:13 +02001105 writer.overallocate = 1;
1106
Eric V. Smith42454af2016-10-31 09:22:08 -04001107 *first_invalid_escape = NULL;
1108
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001109 end = s + len;
1110 while (s < end) {
1111 if (*s != '\\') {
Greg Price3a4f6672019-09-12 11:12:22 -07001112 *p++ = *s++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001113 continue;
1114 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001115
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001116 s++;
Victor Stinner2ec80632015-10-14 13:32:13 +02001117 if (s == end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001118 PyErr_SetString(PyExc_ValueError,
1119 "Trailing \\ in string");
1120 goto failed;
1121 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001122
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001123 switch (*s++) {
1124 /* XXX This assumes ASCII! */
1125 case '\n': break;
1126 case '\\': *p++ = '\\'; break;
1127 case '\'': *p++ = '\''; break;
1128 case '\"': *p++ = '\"'; break;
1129 case 'b': *p++ = '\b'; break;
1130 case 'f': *p++ = '\014'; break; /* FF */
1131 case 't': *p++ = '\t'; break;
1132 case 'n': *p++ = '\n'; break;
1133 case 'r': *p++ = '\r'; break;
1134 case 'v': *p++ = '\013'; break; /* VT */
1135 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1136 case '0': case '1': case '2': case '3':
1137 case '4': case '5': case '6': case '7':
1138 c = s[-1] - '0';
1139 if (s < end && '0' <= *s && *s <= '7') {
1140 c = (c<<3) + *s++ - '0';
1141 if (s < end && '0' <= *s && *s <= '7')
1142 c = (c<<3) + *s++ - '0';
1143 }
1144 *p++ = c;
1145 break;
1146 case 'x':
Victor Stinner2ec80632015-10-14 13:32:13 +02001147 if (s+1 < end) {
1148 int digit1, digit2;
1149 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1150 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1151 if (digit1 < 16 && digit2 < 16) {
1152 *p++ = (unsigned char)((digit1 << 4) + digit2);
1153 s += 2;
1154 break;
1155 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001156 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001157 /* invalid hexadecimal digits */
1158
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001159 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001160 PyErr_Format(PyExc_ValueError,
Serhiy Storchakad53fe5f2019-03-13 22:59:55 +02001161 "invalid \\x escape at position %zd",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001162 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001163 goto failed;
1164 }
1165 if (strcmp(errors, "replace") == 0) {
1166 *p++ = '?';
1167 } else if (strcmp(errors, "ignore") == 0)
1168 /* do nothing */;
1169 else {
1170 PyErr_Format(PyExc_ValueError,
1171 "decoding error; unknown "
1172 "error handling code: %.400s",
1173 errors);
1174 goto failed;
1175 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001176 /* skip \x */
1177 if (s < end && Py_ISXDIGIT(s[0]))
1178 s++; /* and a hexdigit */
1179 break;
Victor Stinner2ec80632015-10-14 13:32:13 +02001180
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001181 default:
Eric V. Smith42454af2016-10-31 09:22:08 -04001182 if (*first_invalid_escape == NULL) {
1183 *first_invalid_escape = s-1; /* Back up one char, since we've
1184 already incremented s. */
1185 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001186 *p++ = '\\';
Eric V. Smith42454af2016-10-31 09:22:08 -04001187 s--;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001188 }
1189 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001190
1191 return _PyBytesWriter_Finish(&writer, p);
1192
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001193 failed:
Victor Stinner2ec80632015-10-14 13:32:13 +02001194 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001195 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001196}
1197
Eric V. Smith42454af2016-10-31 09:22:08 -04001198PyObject *PyBytes_DecodeEscape(const char *s,
1199 Py_ssize_t len,
1200 const char *errors,
Greg Price3a4f6672019-09-12 11:12:22 -07001201 Py_ssize_t Py_UNUSED(unicode),
1202 const char *Py_UNUSED(recode_encoding))
Eric V. Smith42454af2016-10-31 09:22:08 -04001203{
1204 const char* first_invalid_escape;
Greg Price3a4f6672019-09-12 11:12:22 -07001205 PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
Eric V. Smith42454af2016-10-31 09:22:08 -04001206 &first_invalid_escape);
1207 if (result == NULL)
1208 return NULL;
1209 if (first_invalid_escape != NULL) {
1210 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1211 "invalid escape sequence '\\%c'",
Serhiy Storchaka56cb4652017-10-20 17:08:15 +03001212 (unsigned char)*first_invalid_escape) < 0) {
Eric V. Smith42454af2016-10-31 09:22:08 -04001213 Py_DECREF(result);
1214 return NULL;
1215 }
1216 }
1217 return result;
1218
1219}
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001220/* -------------------------------------------------------------------- */
1221/* object api */
1222
1223Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001224PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001225{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001226 if (!PyBytes_Check(op)) {
1227 PyErr_Format(PyExc_TypeError,
1228 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1229 return -1;
1230 }
1231 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001232}
1233
1234char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001235PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001236{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001237 if (!PyBytes_Check(op)) {
1238 PyErr_Format(PyExc_TypeError,
1239 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1240 return NULL;
1241 }
1242 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001243}
1244
1245int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001246PyBytes_AsStringAndSize(PyObject *obj,
1247 char **s,
1248 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001249{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001250 if (s == NULL) {
1251 PyErr_BadInternalCall();
1252 return -1;
1253 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001254
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001255 if (!PyBytes_Check(obj)) {
1256 PyErr_Format(PyExc_TypeError,
1257 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1258 return -1;
1259 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001260
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001261 *s = PyBytes_AS_STRING(obj);
1262 if (len != NULL)
1263 *len = PyBytes_GET_SIZE(obj);
1264 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001265 PyErr_SetString(PyExc_ValueError,
1266 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001267 return -1;
1268 }
1269 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001270}
Neal Norwitz6968b052007-02-27 19:02:19 +00001271
1272/* -------------------------------------------------------------------- */
1273/* Methods */
1274
Victor Stinner91698d82020-06-25 14:07:40 +02001275#define STRINGLIB_GET_EMPTY() bytes_get_empty()
Victor Stinnerc41eed12020-06-23 15:54:35 +02001276
Eric Smith0923d1d2009-04-16 20:16:10 +00001277#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001278
1279#include "stringlib/fastsearch.h"
1280#include "stringlib/count.h"
1281#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001282#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001283#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001284#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001285#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001286
Eric Smith0f78bff2009-11-30 01:01:42 +00001287#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001288
Victor Stinnerc41eed12020-06-23 15:54:35 +02001289#undef STRINGLIB_GET_EMPTY
1290
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001291PyObject *
1292PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001293{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001294 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001295 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001296 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001297 PyObject *v;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001298 unsigned char quote;
1299 const unsigned char *s;
1300 Py_UCS1 *p;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001301
1302 /* Compute size of output string */
1303 squotes = dquotes = 0;
1304 newsize = 3; /* b'' */
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001305 s = (const unsigned char*)op->ob_sval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001306 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001307 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001308 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001309 case '\'': squotes++; break;
1310 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001311 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001312 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001313 default:
1314 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001315 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001316 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001317 if (newsize > PY_SSIZE_T_MAX - incr)
1318 goto overflow;
1319 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001320 }
1321 quote = '\'';
1322 if (smartquotes && squotes && !dquotes)
1323 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001324 if (squotes && quote == '\'') {
1325 if (newsize > PY_SSIZE_T_MAX - squotes)
1326 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001327 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001328 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001329
1330 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001331 if (v == NULL) {
1332 return NULL;
1333 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001334 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001335
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001336 *p++ = 'b', *p++ = quote;
1337 for (i = 0; i < length; i++) {
1338 unsigned char c = op->ob_sval[i];
1339 if (c == quote || c == '\\')
1340 *p++ = '\\', *p++ = c;
1341 else if (c == '\t')
1342 *p++ = '\\', *p++ = 't';
1343 else if (c == '\n')
1344 *p++ = '\\', *p++ = 'n';
1345 else if (c == '\r')
1346 *p++ = '\\', *p++ = 'r';
1347 else if (c < ' ' || c >= 0x7f) {
1348 *p++ = '\\';
1349 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001350 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1351 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001352 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001353 else
1354 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001355 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001356 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001357 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001358 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001359
1360 overflow:
1361 PyErr_SetString(PyExc_OverflowError,
1362 "bytes object is too large to make repr");
1363 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001364}
1365
Neal Norwitz6968b052007-02-27 19:02:19 +00001366static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001367bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001368{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001369 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001370}
1371
Neal Norwitz6968b052007-02-27 19:02:19 +00001372static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001373bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001374{
Victor Stinnerda7933e2020-04-13 03:04:28 +02001375 if (_Py_GetConfig()->bytes_warning) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001376 if (PyErr_WarnEx(PyExc_BytesWarning,
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001377 "str() on a bytes instance", 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001378 return NULL;
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001379 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001380 }
1381 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001382}
1383
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001384static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001385bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001386{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001387 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001388}
Neal Norwitz6968b052007-02-27 19:02:19 +00001389
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001390/* This is also used by PyBytes_Concat() */
1391static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001392bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001393{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001394 Py_buffer va, vb;
1395 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001396
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001397 va.len = -1;
1398 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001399 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1400 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001401 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
Serhiy Storchaka6b5a9ec2017-03-19 19:47:02 +02001402 Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001403 goto done;
1404 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001405
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001406 /* Optimize end cases */
1407 if (va.len == 0 && PyBytes_CheckExact(b)) {
1408 result = b;
1409 Py_INCREF(result);
1410 goto done;
1411 }
1412 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1413 result = a;
1414 Py_INCREF(result);
1415 goto done;
1416 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001417
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001418 if (va.len > PY_SSIZE_T_MAX - vb.len) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001419 PyErr_NoMemory();
1420 goto done;
1421 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001422
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001423 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001424 if (result != NULL) {
1425 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1426 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1427 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001428
1429 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001430 if (va.len != -1)
1431 PyBuffer_Release(&va);
1432 if (vb.len != -1)
1433 PyBuffer_Release(&vb);
1434 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001435}
Neal Norwitz6968b052007-02-27 19:02:19 +00001436
1437static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001438bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001439{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001440 Py_ssize_t i;
1441 Py_ssize_t j;
1442 Py_ssize_t size;
1443 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001444 size_t nbytes;
1445 if (n < 0)
1446 n = 0;
1447 /* watch out for overflows: the size can overflow int,
1448 * and the # of bytes needed can overflow size_t
1449 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001450 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001451 PyErr_SetString(PyExc_OverflowError,
1452 "repeated bytes are too long");
1453 return NULL;
1454 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001455 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001456 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1457 Py_INCREF(a);
1458 return (PyObject *)a;
1459 }
1460 nbytes = (size_t)size;
1461 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1462 PyErr_SetString(PyExc_OverflowError,
1463 "repeated bytes are too long");
1464 return NULL;
1465 }
Victor Stinner32bd68c2020-12-01 10:37:39 +01001466 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + nbytes);
Victor Stinner04fc4f22020-06-16 01:28:07 +02001467 if (op == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001468 return PyErr_NoMemory();
Victor Stinner04fc4f22020-06-16 01:28:07 +02001469 }
1470 _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001471 op->ob_shash = -1;
1472 op->ob_sval[size] = '\0';
1473 if (Py_SIZE(a) == 1 && n > 0) {
1474 memset(op->ob_sval, a->ob_sval[0] , n);
1475 return (PyObject *) op;
1476 }
1477 i = 0;
1478 if (i < size) {
Christian Heimesf051e432016-09-13 20:22:02 +02001479 memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001480 i = Py_SIZE(a);
1481 }
1482 while (i < size) {
1483 j = (i <= size-i) ? i : size-i;
Christian Heimesf051e432016-09-13 20:22:02 +02001484 memcpy(op->ob_sval+i, op->ob_sval, j);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001485 i += j;
1486 }
1487 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001488}
1489
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001490static int
1491bytes_contains(PyObject *self, PyObject *arg)
1492{
1493 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1494}
1495
Neal Norwitz6968b052007-02-27 19:02:19 +00001496static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001497bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001498{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001499 if (i < 0 || i >= Py_SIZE(a)) {
1500 PyErr_SetString(PyExc_IndexError, "index out of range");
1501 return NULL;
1502 }
1503 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001504}
1505
Benjamin Peterson621b4302016-09-09 13:54:34 -07001506static int
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001507bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1508{
1509 int cmp;
1510 Py_ssize_t len;
1511
1512 len = Py_SIZE(a);
1513 if (Py_SIZE(b) != len)
1514 return 0;
1515
1516 if (a->ob_sval[0] != b->ob_sval[0])
1517 return 0;
1518
1519 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1520 return (cmp == 0);
1521}
1522
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001523static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001524bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001525{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001526 int c;
1527 Py_ssize_t len_a, len_b;
1528 Py_ssize_t min_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001529
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001530 /* Make sure both arguments are strings. */
1531 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Victor Stinnerda7933e2020-04-13 03:04:28 +02001532 if (_Py_GetConfig()->bytes_warning && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchaka313467e2020-11-22 22:00:53 +02001533 if (PyUnicode_Check(a) || PyUnicode_Check(b)) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001534 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001535 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001536 return NULL;
1537 }
Serhiy Storchaka313467e2020-11-22 22:00:53 +02001538 if (PyLong_Check(a) || PyLong_Check(b)) {
1539 if (PyErr_WarnEx(PyExc_BytesWarning,
1540 "Comparison between bytes and int", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001541 return NULL;
1542 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001543 }
stratakise8b19652017-11-02 11:32:54 +01001544 Py_RETURN_NOTIMPLEMENTED;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001545 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001546 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001547 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001548 case Py_EQ:
1549 case Py_LE:
1550 case Py_GE:
Serhiy Storchaka2ad93822020-12-03 12:46:16 +02001551 /* a byte string is equal to itself */
stratakise8b19652017-11-02 11:32:54 +01001552 Py_RETURN_TRUE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001553 case Py_NE:
1554 case Py_LT:
1555 case Py_GT:
stratakise8b19652017-11-02 11:32:54 +01001556 Py_RETURN_FALSE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001557 default:
1558 PyErr_BadArgument();
1559 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001560 }
1561 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001562 else if (op == Py_EQ || op == Py_NE) {
1563 int eq = bytes_compare_eq(a, b);
1564 eq ^= (op == Py_NE);
stratakise8b19652017-11-02 11:32:54 +01001565 return PyBool_FromLong(eq);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001566 }
1567 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001568 len_a = Py_SIZE(a);
1569 len_b = Py_SIZE(b);
1570 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001571 if (min_len > 0) {
1572 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001573 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001574 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001575 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001576 else
1577 c = 0;
stratakise8b19652017-11-02 11:32:54 +01001578 if (c != 0)
1579 Py_RETURN_RICHCOMPARE(c, 0, op);
1580 Py_RETURN_RICHCOMPARE(len_a, len_b, op);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001581 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001582}
1583
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001584static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001585bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001586{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001587 if (a->ob_shash == -1) {
1588 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001589 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001590 }
1591 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001592}
1593
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001594static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001595bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001596{
Victor Stinnera15e2602020-04-08 02:01:56 +02001597 if (_PyIndex_Check(item)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001598 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1599 if (i == -1 && PyErr_Occurred())
1600 return NULL;
1601 if (i < 0)
1602 i += PyBytes_GET_SIZE(self);
1603 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1604 PyErr_SetString(PyExc_IndexError,
1605 "index out of range");
1606 return NULL;
1607 }
1608 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1609 }
1610 else if (PySlice_Check(item)) {
Zackery Spytz14514d92019-05-17 01:13:03 -06001611 Py_ssize_t start, stop, step, slicelength, i;
1612 size_t cur;
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001613 const char* source_buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001614 char* result_buf;
1615 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001616
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001617 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001618 return NULL;
1619 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001620 slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1621 &stop, step);
Neal Norwitz6968b052007-02-27 19:02:19 +00001622
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001623 if (slicelength <= 0) {
1624 return PyBytes_FromStringAndSize("", 0);
1625 }
1626 else if (start == 0 && step == 1 &&
1627 slicelength == PyBytes_GET_SIZE(self) &&
1628 PyBytes_CheckExact(self)) {
1629 Py_INCREF(self);
1630 return (PyObject *)self;
1631 }
1632 else if (step == 1) {
1633 return PyBytes_FromStringAndSize(
1634 PyBytes_AS_STRING(self) + start,
1635 slicelength);
1636 }
1637 else {
1638 source_buf = PyBytes_AS_STRING(self);
1639 result = PyBytes_FromStringAndSize(NULL, slicelength);
1640 if (result == NULL)
1641 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001642
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001643 result_buf = PyBytes_AS_STRING(result);
1644 for (cur = start, i = 0; i < slicelength;
1645 cur += step, i++) {
1646 result_buf[i] = source_buf[cur];
1647 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001648
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001649 return result;
1650 }
1651 }
1652 else {
1653 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001654 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001655 Py_TYPE(item)->tp_name);
1656 return NULL;
1657 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001658}
1659
1660static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001661bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001662{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001663 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1664 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001665}
1666
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001667static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001668 (lenfunc)bytes_length, /*sq_length*/
1669 (binaryfunc)bytes_concat, /*sq_concat*/
1670 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1671 (ssizeargfunc)bytes_item, /*sq_item*/
1672 0, /*sq_slice*/
1673 0, /*sq_ass_item*/
1674 0, /*sq_ass_slice*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001675 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001676};
1677
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001678static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001679 (lenfunc)bytes_length,
1680 (binaryfunc)bytes_subscript,
1681 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001682};
1683
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001684static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001685 (getbufferproc)bytes_buffer_getbuffer,
1686 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001687};
1688
1689
1690#define LEFTSTRIP 0
1691#define RIGHTSTRIP 1
1692#define BOTHSTRIP 2
1693
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001694/*[clinic input]
1695bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001696
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001697 sep: object = None
1698 The delimiter according which to split the bytes.
1699 None (the default value) means split on ASCII whitespace characters
1700 (space, tab, return, newline, formfeed, vertical tab).
1701 maxsplit: Py_ssize_t = -1
1702 Maximum number of splits to do.
1703 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001704
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001705Return a list of the sections in the bytes, using sep as the delimiter.
1706[clinic start generated code]*/
1707
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001708static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001709bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1710/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001711{
1712 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001713 const char *s = PyBytes_AS_STRING(self), *sub;
1714 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001715 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001716
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001717 if (maxsplit < 0)
1718 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001719 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001720 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001721 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001722 return NULL;
1723 sub = vsub.buf;
1724 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001725
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001726 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1727 PyBuffer_Release(&vsub);
1728 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001729}
1730
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001731/*[clinic input]
1732bytes.partition
1733
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001734 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001735 /
1736
1737Partition the bytes into three parts using the given separator.
1738
1739This will search for the separator sep in the bytes. If the separator is found,
1740returns a 3-tuple containing the part before the separator, the separator
1741itself, and the part after it.
1742
1743If the separator is not found, returns a 3-tuple containing the original bytes
1744object and two empty bytes objects.
1745[clinic start generated code]*/
1746
Neal Norwitz6968b052007-02-27 19:02:19 +00001747static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001748bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001749/*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001750{
Neal Norwitz6968b052007-02-27 19:02:19 +00001751 return stringlib_partition(
1752 (PyObject*) self,
1753 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001754 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001755 );
1756}
1757
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001758/*[clinic input]
1759bytes.rpartition
1760
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001761 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001762 /
1763
1764Partition the bytes into three parts using the given separator.
1765
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001766This will search for the separator sep in the bytes, starting at the end. If
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001767the separator is found, returns a 3-tuple containing the part before the
1768separator, the separator itself, and the part after it.
1769
1770If the separator is not found, returns a 3-tuple containing two empty bytes
1771objects and the original bytes object.
1772[clinic start generated code]*/
1773
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001774static PyObject *
1775bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001776/*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001777{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001778 return stringlib_rpartition(
1779 (PyObject*) self,
1780 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001781 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001782 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001783}
1784
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001785/*[clinic input]
1786bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001787
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001788Return a list of the sections in the bytes, using sep as the delimiter.
1789
1790Splitting is done starting at the end of the bytes and working to the front.
1791[clinic start generated code]*/
1792
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001793static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001794bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1795/*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001796{
1797 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001798 const char *s = PyBytes_AS_STRING(self), *sub;
1799 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001800 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001801
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001802 if (maxsplit < 0)
1803 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001804 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001805 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001806 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001807 return NULL;
1808 sub = vsub.buf;
1809 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001810
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001811 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1812 PyBuffer_Release(&vsub);
1813 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001814}
1815
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001816
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001817/*[clinic input]
1818bytes.join
1819
1820 iterable_of_bytes: object
1821 /
1822
1823Concatenate any number of bytes objects.
1824
1825The bytes whose method is called is inserted in between each pair.
1826
1827The result is returned as a new bytes object.
1828
1829Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1830[clinic start generated code]*/
1831
Neal Norwitz6968b052007-02-27 19:02:19 +00001832static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001833bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1834/*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001835{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001836 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001837}
1838
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001839PyObject *
1840_PyBytes_Join(PyObject *sep, PyObject *x)
1841{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001842 assert(sep != NULL && PyBytes_Check(sep));
1843 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001844 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001845}
1846
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001847static PyObject *
1848bytes_find(PyBytesObject *self, PyObject *args)
1849{
1850 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1851}
1852
1853static PyObject *
1854bytes_index(PyBytesObject *self, PyObject *args)
1855{
1856 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1857}
1858
1859
1860static PyObject *
1861bytes_rfind(PyBytesObject *self, PyObject *args)
1862{
1863 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1864}
1865
1866
1867static PyObject *
1868bytes_rindex(PyBytesObject *self, PyObject *args)
1869{
1870 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1871}
1872
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001873
1874Py_LOCAL_INLINE(PyObject *)
1875do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001876{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001877 Py_buffer vsep;
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001878 const char *s = PyBytes_AS_STRING(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001879 Py_ssize_t len = PyBytes_GET_SIZE(self);
1880 char *sep;
1881 Py_ssize_t seplen;
1882 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001883
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001884 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001885 return NULL;
1886 sep = vsep.buf;
1887 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001888
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001889 i = 0;
1890 if (striptype != RIGHTSTRIP) {
1891 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1892 i++;
1893 }
1894 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001895
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001896 j = len;
1897 if (striptype != LEFTSTRIP) {
1898 do {
1899 j--;
1900 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1901 j++;
1902 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001903
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001904 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001905
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001906 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1907 Py_INCREF(self);
1908 return (PyObject*)self;
1909 }
1910 else
1911 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001912}
1913
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001914
1915Py_LOCAL_INLINE(PyObject *)
1916do_strip(PyBytesObject *self, int striptype)
1917{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001918 const char *s = PyBytes_AS_STRING(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001919 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001920
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001921 i = 0;
1922 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001923 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001924 i++;
1925 }
1926 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001927
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001928 j = len;
1929 if (striptype != LEFTSTRIP) {
1930 do {
1931 j--;
David Malcolm96960882010-11-05 17:23:41 +00001932 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001933 j++;
1934 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001935
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001936 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1937 Py_INCREF(self);
1938 return (PyObject*)self;
1939 }
1940 else
1941 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001942}
1943
1944
1945Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001946do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001947{
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001948 if (bytes != Py_None) {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001949 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001950 }
1951 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001952}
1953
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001954/*[clinic input]
1955bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001956
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001957 bytes: object = None
1958 /
1959
1960Strip leading and trailing bytes contained in the argument.
1961
1962If the argument is omitted or None, strip leading and trailing ASCII whitespace.
1963[clinic start generated code]*/
1964
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001965static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001966bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001967/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001968{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001969 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001970}
1971
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001972/*[clinic input]
1973bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001974
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001975 bytes: object = None
1976 /
1977
1978Strip leading bytes contained in the argument.
1979
1980If the argument is omitted or None, strip leading ASCII whitespace.
1981[clinic start generated code]*/
1982
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001983static PyObject *
1984bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001985/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001986{
1987 return do_argstrip(self, LEFTSTRIP, bytes);
1988}
1989
1990/*[clinic input]
1991bytes.rstrip
1992
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001993 bytes: object = None
1994 /
1995
1996Strip trailing bytes contained in the argument.
1997
1998If the argument is omitted or None, strip trailing ASCII whitespace.
1999[clinic start generated code]*/
2000
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002001static PyObject *
2002bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002003/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002004{
2005 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002006}
Neal Norwitz6968b052007-02-27 19:02:19 +00002007
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002008
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002009static PyObject *
2010bytes_count(PyBytesObject *self, PyObject *args)
2011{
2012 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2013}
2014
2015
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002016/*[clinic input]
2017bytes.translate
2018
Victor Stinner049e5092014-08-17 22:20:00 +02002019 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002020 Translation table, which must be a bytes object of length 256.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002021 /
Martin Panter1b6c6da2016-08-27 08:35:02 +00002022 delete as deletechars: object(c_default="NULL") = b''
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002023
2024Return a copy with each character mapped by the given translation table.
2025
Martin Panter1b6c6da2016-08-27 08:35:02 +00002026All characters occurring in the optional argument delete are removed.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002027The remaining characters are mapped through the given translation table.
2028[clinic start generated code]*/
2029
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002030static PyObject *
Martin Panter1b6c6da2016-08-27 08:35:02 +00002031bytes_translate_impl(PyBytesObject *self, PyObject *table,
Larry Hastings89964c42015-04-14 18:07:59 -04002032 PyObject *deletechars)
Martin Panter1b6c6da2016-08-27 08:35:02 +00002033/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002034{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03002035 const char *input;
2036 char *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002037 Py_buffer table_view = {NULL, NULL};
2038 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002039 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002040 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002041 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002042 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002043 Py_ssize_t inlen, tablen, dellen = 0;
2044 PyObject *result;
2045 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002046
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002047 if (PyBytes_Check(table)) {
2048 table_chars = PyBytes_AS_STRING(table);
2049 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002050 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002051 else if (table == Py_None) {
2052 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002053 tablen = 256;
2054 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002055 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002056 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002057 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002058 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002059 tablen = table_view.len;
2060 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002061
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002062 if (tablen != 256) {
2063 PyErr_SetString(PyExc_ValueError,
2064 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002065 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002066 return NULL;
2067 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002068
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002069 if (deletechars != NULL) {
2070 if (PyBytes_Check(deletechars)) {
2071 del_table_chars = PyBytes_AS_STRING(deletechars);
2072 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002073 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002074 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002075 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002076 PyBuffer_Release(&table_view);
2077 return NULL;
2078 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002079 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002080 dellen = del_table_view.len;
2081 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002082 }
2083 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002084 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002085 dellen = 0;
2086 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002087
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002088 inlen = PyBytes_GET_SIZE(input_obj);
2089 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002090 if (result == NULL) {
2091 PyBuffer_Release(&del_table_view);
2092 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002093 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002094 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002095 output_start = output = PyBytes_AS_STRING(result);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002096 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002097
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002098 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002099 /* If no deletions are required, use faster code */
2100 for (i = inlen; --i >= 0; ) {
2101 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002102 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002103 changed = 1;
2104 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002105 if (!changed && PyBytes_CheckExact(input_obj)) {
2106 Py_INCREF(input_obj);
2107 Py_DECREF(result);
2108 result = input_obj;
2109 }
2110 PyBuffer_Release(&del_table_view);
2111 PyBuffer_Release(&table_view);
2112 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002113 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002114
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002115 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002116 for (i = 0; i < 256; i++)
2117 trans_table[i] = Py_CHARMASK(i);
2118 } else {
2119 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002120 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002121 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002122 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002123
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002124 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002125 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002126 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002127
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002128 for (i = inlen; --i >= 0; ) {
2129 c = Py_CHARMASK(*input++);
2130 if (trans_table[c] != -1)
2131 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2132 continue;
2133 changed = 1;
2134 }
2135 if (!changed && PyBytes_CheckExact(input_obj)) {
2136 Py_DECREF(result);
2137 Py_INCREF(input_obj);
2138 return input_obj;
2139 }
Serhiy Storchaka2ad93822020-12-03 12:46:16 +02002140 /* Fix the size of the resulting byte string */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002141 if (inlen > 0)
2142 _PyBytes_Resize(&result, output - output_start);
2143 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002144}
2145
2146
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002147/*[clinic input]
2148
2149@staticmethod
2150bytes.maketrans
2151
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002152 frm: Py_buffer
2153 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002154 /
2155
2156Return a translation table useable for the bytes or bytearray translate method.
2157
2158The returned table will be one where each byte in frm is mapped to the byte at
2159the same position in to.
2160
2161The bytes objects frm and to must be of the same length.
2162[clinic start generated code]*/
2163
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002164static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002165bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002166/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002167{
2168 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002169}
2170
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002171
2172/*[clinic input]
2173bytes.replace
2174
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002175 old: Py_buffer
2176 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002177 count: Py_ssize_t = -1
2178 Maximum number of occurrences to replace.
2179 -1 (the default value) means replace all occurrences.
2180 /
2181
2182Return a copy with all occurrences of substring old replaced by new.
2183
2184If the optional argument count is given, only the first count occurrences are
2185replaced.
2186[clinic start generated code]*/
2187
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002188static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002189bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
Larry Hastings89964c42015-04-14 18:07:59 -04002190 Py_ssize_t count)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002191/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002192{
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +03002193 return stringlib_replace((PyObject *)self,
2194 (const char *)old->buf, old->len,
2195 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002196}
2197
2198/** End DALKE **/
2199
sweeneydea81849b2020-04-22 17:05:48 -04002200/*[clinic input]
2201bytes.removeprefix as bytes_removeprefix
2202
2203 prefix: Py_buffer
2204 /
2205
2206Return a bytes object with the given prefix string removed if present.
2207
2208If the bytes starts with the prefix string, return bytes[len(prefix):].
2209Otherwise, return a copy of the original bytes.
2210[clinic start generated code]*/
2211
2212static PyObject *
2213bytes_removeprefix_impl(PyBytesObject *self, Py_buffer *prefix)
2214/*[clinic end generated code: output=f006865331a06ab6 input=0c93bac817a8502c]*/
2215{
2216 const char *self_start = PyBytes_AS_STRING(self);
2217 Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2218 const char *prefix_start = prefix->buf;
2219 Py_ssize_t prefix_len = prefix->len;
2220
2221 if (self_len >= prefix_len
2222 && prefix_len > 0
2223 && memcmp(self_start, prefix_start, prefix_len) == 0)
2224 {
2225 return PyBytes_FromStringAndSize(self_start + prefix_len,
2226 self_len - prefix_len);
2227 }
2228
2229 if (PyBytes_CheckExact(self)) {
2230 Py_INCREF(self);
2231 return (PyObject *)self;
2232 }
2233
2234 return PyBytes_FromStringAndSize(self_start, self_len);
2235}
2236
2237/*[clinic input]
2238bytes.removesuffix as bytes_removesuffix
2239
2240 suffix: Py_buffer
2241 /
2242
2243Return a bytes object with the given suffix string removed if present.
2244
2245If the bytes ends with the suffix string and that suffix is not empty,
2246return bytes[:-len(prefix)]. Otherwise, return a copy of the original
2247bytes.
2248[clinic start generated code]*/
2249
2250static PyObject *
2251bytes_removesuffix_impl(PyBytesObject *self, Py_buffer *suffix)
2252/*[clinic end generated code: output=d887d308e3242eeb input=9f4e1da8c637bbf1]*/
2253{
2254 const char *self_start = PyBytes_AS_STRING(self);
2255 Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2256 const char *suffix_start = suffix->buf;
2257 Py_ssize_t suffix_len = suffix->len;
2258
2259 if (self_len >= suffix_len
2260 && suffix_len > 0
2261 && memcmp(self_start + self_len - suffix_len,
2262 suffix_start, suffix_len) == 0)
2263 {
2264 return PyBytes_FromStringAndSize(self_start,
2265 self_len - suffix_len);
2266 }
2267
2268 if (PyBytes_CheckExact(self)) {
2269 Py_INCREF(self);
2270 return (PyObject *)self;
2271 }
2272
2273 return PyBytes_FromStringAndSize(self_start, self_len);
2274}
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002275
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002276static PyObject *
2277bytes_startswith(PyBytesObject *self, PyObject *args)
2278{
2279 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2280}
2281
2282static PyObject *
2283bytes_endswith(PyBytesObject *self, PyObject *args)
2284{
2285 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2286}
2287
2288
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002289/*[clinic input]
2290bytes.decode
2291
2292 encoding: str(c_default="NULL") = 'utf-8'
2293 The encoding with which to decode the bytes.
2294 errors: str(c_default="NULL") = 'strict'
2295 The error handling scheme to use for the handling of decoding errors.
2296 The default is 'strict' meaning that decoding errors raise a
2297 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2298 as well as any other name registered with codecs.register_error that
2299 can handle UnicodeDecodeErrors.
2300
2301Decode the bytes using the codec registered for encoding.
2302[clinic start generated code]*/
2303
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002304static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002305bytes_decode_impl(PyBytesObject *self, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04002306 const char *errors)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002307/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002308{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002309 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002310}
2311
Guido van Rossum20188312006-05-05 15:15:40 +00002312
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002313/*[clinic input]
2314bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002315
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002316 keepends: bool(accept={int}) = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002317
2318Return a list of the lines in the bytes, breaking at line boundaries.
2319
2320Line breaks are not included in the resulting list unless keepends is given and
2321true.
2322[clinic start generated code]*/
2323
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002324static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002325bytes_splitlines_impl(PyBytesObject *self, int keepends)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002326/*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002327{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002328 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002329 (PyObject*) self, PyBytes_AS_STRING(self),
2330 PyBytes_GET_SIZE(self), keepends
2331 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002332}
2333
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002334/*[clinic input]
2335@classmethod
2336bytes.fromhex
2337
2338 string: unicode
2339 /
2340
2341Create a bytes object from a string of hexadecimal numbers.
2342
2343Spaces between two numbers are accepted.
2344Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2345[clinic start generated code]*/
2346
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002347static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002348bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002349/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002350{
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002351 PyObject *result = _PyBytes_FromHex(string, 0);
2352 if (type != &PyBytes_Type && result != NULL) {
Petr Viktorinffd97532020-02-11 17:46:57 +01002353 Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002354 }
2355 return result;
Victor Stinner2bf89932015-10-14 11:25:33 +02002356}
2357
2358PyObject*
2359_PyBytes_FromHex(PyObject *string, int use_bytearray)
2360{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002361 char *buf;
Victor Stinner2bf89932015-10-14 11:25:33 +02002362 Py_ssize_t hexlen, invalid_char;
2363 unsigned int top, bot;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03002364 const Py_UCS1 *str, *end;
Victor Stinner2bf89932015-10-14 11:25:33 +02002365 _PyBytesWriter writer;
2366
2367 _PyBytesWriter_Init(&writer);
2368 writer.use_bytearray = use_bytearray;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002369
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002370 assert(PyUnicode_Check(string));
2371 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002372 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002373 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002374
Victor Stinner2bf89932015-10-14 11:25:33 +02002375 if (!PyUnicode_IS_ASCII(string)) {
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03002376 const void *data = PyUnicode_DATA(string);
Victor Stinner2bf89932015-10-14 11:25:33 +02002377 unsigned int kind = PyUnicode_KIND(string);
2378 Py_ssize_t i;
2379
2380 /* search for the first non-ASCII character */
2381 for (i = 0; i < hexlen; i++) {
2382 if (PyUnicode_READ(kind, data, i) >= 128)
2383 break;
2384 }
2385 invalid_char = i;
2386 goto error;
2387 }
2388
2389 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2390 str = PyUnicode_1BYTE_DATA(string);
2391
2392 /* This overestimates if there are spaces */
2393 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2394 if (buf == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002395 return NULL;
Victor Stinner2bf89932015-10-14 11:25:33 +02002396
2397 end = str + hexlen;
2398 while (str < end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002399 /* skip over spaces in the input */
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002400 if (Py_ISSPACE(*str)) {
Victor Stinner2bf89932015-10-14 11:25:33 +02002401 do {
2402 str++;
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002403 } while (Py_ISSPACE(*str));
Victor Stinner2bf89932015-10-14 11:25:33 +02002404 if (str >= end)
2405 break;
2406 }
2407
2408 top = _PyLong_DigitValue[*str];
2409 if (top >= 16) {
2410 invalid_char = str - PyUnicode_1BYTE_DATA(string);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002411 goto error;
2412 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002413 str++;
2414
2415 bot = _PyLong_DigitValue[*str];
2416 if (bot >= 16) {
2417 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2418 goto error;
2419 }
2420 str++;
2421
2422 *buf++ = (unsigned char)((top << 4) + bot);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002423 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002424
2425 return _PyBytesWriter_Finish(&writer, buf);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002426
2427 error:
Victor Stinner2bf89932015-10-14 11:25:33 +02002428 PyErr_Format(PyExc_ValueError,
2429 "non-hexadecimal number found in "
2430 "fromhex() arg at position %zd", invalid_char);
2431 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002432 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002433}
2434
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002435/*[clinic input]
2436bytes.hex
2437
2438 sep: object = NULL
2439 An optional single character or byte to separate hex bytes.
2440 bytes_per_sep: int = 1
2441 How many bytes between separators. Positive values count from the
2442 right, negative values count from the left.
2443
Serhiy Storchaka2ad93822020-12-03 12:46:16 +02002444Create a string of hexadecimal numbers from a bytes object.
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002445
2446Example:
2447>>> value = b'\xb9\x01\xef'
2448>>> value.hex()
2449'b901ef'
2450>>> value.hex(':')
2451'b9:01:ef'
2452>>> value.hex(':', 2)
2453'b9:01ef'
2454>>> value.hex(':', -2)
2455'b901:ef'
2456[clinic start generated code]*/
Gregory P. Smith8cb65692015-04-25 23:22:26 +00002457
2458static PyObject *
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002459bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
Serhiy Storchaka2ad93822020-12-03 12:46:16 +02002460/*[clinic end generated code: output=1f134da504064139 input=1a21282b1f1ae595]*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002461{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03002462 const char *argbuf = PyBytes_AS_STRING(self);
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002463 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002464 return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002465}
2466
2467static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302468bytes_getnewargs(PyBytesObject *v, PyObject *Py_UNUSED(ignored))
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002469{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002470 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002471}
2472
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002473
2474static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002475bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002476 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302477 {"capitalize", stringlib_capitalize, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002478 _Py_capitalize__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002479 STRINGLIB_CENTER_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002480 {"count", (PyCFunction)bytes_count, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002481 _Py_count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002482 BYTES_DECODE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002483 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002484 _Py_endswith__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002485 STRINGLIB_EXPANDTABS_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002486 {"find", (PyCFunction)bytes_find, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002487 _Py_find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002488 BYTES_FROMHEX_METHODDEF
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002489 BYTES_HEX_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002490 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302491 {"isalnum", stringlib_isalnum, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002492 _Py_isalnum__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302493 {"isalpha", stringlib_isalpha, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002494 _Py_isalpha__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302495 {"isascii", stringlib_isascii, METH_NOARGS,
INADA Naokia49ac992018-01-27 14:06:21 +09002496 _Py_isascii__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302497 {"isdigit", stringlib_isdigit, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002498 _Py_isdigit__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302499 {"islower", stringlib_islower, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002500 _Py_islower__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302501 {"isspace", stringlib_isspace, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002502 _Py_isspace__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302503 {"istitle", stringlib_istitle, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002504 _Py_istitle__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302505 {"isupper", stringlib_isupper, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002506 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002507 BYTES_JOIN_METHODDEF
Tal Einatc929df32018-07-06 13:17:38 +03002508 STRINGLIB_LJUST_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302509 {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002510 BYTES_LSTRIP_METHODDEF
2511 BYTES_MAKETRANS_METHODDEF
2512 BYTES_PARTITION_METHODDEF
2513 BYTES_REPLACE_METHODDEF
sweeneydea81849b2020-04-22 17:05:48 -04002514 BYTES_REMOVEPREFIX_METHODDEF
2515 BYTES_REMOVESUFFIX_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002516 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2517 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002518 STRINGLIB_RJUST_METHODDEF
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002519 BYTES_RPARTITION_METHODDEF
2520 BYTES_RSPLIT_METHODDEF
2521 BYTES_RSTRIP_METHODDEF
2522 BYTES_SPLIT_METHODDEF
2523 BYTES_SPLITLINES_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002524 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002525 _Py_startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002526 BYTES_STRIP_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302527 {"swapcase", stringlib_swapcase, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002528 _Py_swapcase__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302529 {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002530 BYTES_TRANSLATE_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302531 {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002532 STRINGLIB_ZFILL_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002533 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002534};
2535
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002536static PyObject *
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002537bytes_mod(PyObject *self, PyObject *arg)
Ethan Furmanb95b5612015-01-23 20:05:18 -08002538{
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002539 if (!PyBytes_Check(self)) {
2540 Py_RETURN_NOTIMPLEMENTED;
Victor Stinner772b2b02015-10-14 09:56:53 +02002541 }
Victor Stinner772b2b02015-10-14 09:56:53 +02002542 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002543 arg, 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -08002544}
2545
2546static PyNumberMethods bytes_as_number = {
2547 0, /*nb_add*/
2548 0, /*nb_subtract*/
2549 0, /*nb_multiply*/
2550 bytes_mod, /*nb_remainder*/
2551};
2552
2553static PyObject *
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002554bytes_subtype_new(PyTypeObject *, PyObject *);
2555
2556/*[clinic input]
2557@classmethod
2558bytes.__new__ as bytes_new
2559
2560 source as x: object = NULL
2561 encoding: str = NULL
2562 errors: str = NULL
2563
2564[clinic start generated code]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002565
2566static PyObject *
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002567bytes_new_impl(PyTypeObject *type, PyObject *x, const char *encoding,
2568 const char *errors)
2569/*[clinic end generated code: output=1e0c471be311a425 input=f0a966d19b7262b4]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002570{
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002571 PyObject *bytes;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002572 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002573 Py_ssize_t size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002574
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002575 if (x == NULL) {
2576 if (encoding != NULL || errors != NULL) {
2577 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka2c2044e2018-10-21 15:29:12 +03002578 encoding != NULL ?
2579 "encoding without a string argument" :
2580 "errors without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002581 return NULL;
2582 }
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002583 bytes = PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002584 }
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002585 else if (encoding != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002586 /* Encode via the codec registry */
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002587 if (!PyUnicode_Check(x)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002588 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002589 "encoding without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002590 return NULL;
2591 }
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002592 bytes = PyUnicode_AsEncodedString(x, encoding, errors);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002593 }
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002594 else if (errors != NULL) {
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002595 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002596 PyUnicode_Check(x) ?
2597 "string argument without an encoding" :
2598 "errors without a string argument");
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002599 return NULL;
2600 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002601 /* We'd like to call PyObject_Bytes here, but we need to check for an
2602 integer argument before deferring to PyBytes_FromObject, something
2603 PyObject_Bytes doesn't do. */
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002604 else if ((func = _PyObject_LookupSpecial(x, &PyId___bytes__)) != NULL) {
2605 bytes = _PyObject_CallNoArg(func);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002606 Py_DECREF(func);
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002607 if (bytes == NULL)
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002608 return NULL;
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002609 if (!PyBytes_Check(bytes)) {
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002610 PyErr_Format(PyExc_TypeError,
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002611 "__bytes__ returned non-bytes (type %.200s)",
2612 Py_TYPE(bytes)->tp_name);
2613 Py_DECREF(bytes);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002614 return NULL;
2615 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002616 }
2617 else if (PyErr_Occurred())
2618 return NULL;
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002619 else if (PyUnicode_Check(x)) {
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002620 PyErr_SetString(PyExc_TypeError,
2621 "string argument without an encoding");
2622 return NULL;
2623 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002624 /* Is it an integer? */
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002625 else if (_PyIndex_Check(x)) {
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002626 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2627 if (size == -1 && PyErr_Occurred()) {
Serhiy Storchakae8904212018-10-15 00:02:57 +03002628 if (!PyErr_ExceptionMatches(PyExc_TypeError))
INADA Naokia634e232017-01-06 17:32:01 +09002629 return NULL;
2630 PyErr_Clear(); /* fall through */
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002631 bytes = PyBytes_FromObject(x);
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002632 }
INADA Naokia634e232017-01-06 17:32:01 +09002633 else {
2634 if (size < 0) {
2635 PyErr_SetString(PyExc_ValueError, "negative count");
2636 return NULL;
2637 }
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002638 bytes = _PyBytes_FromSize(size, 1);
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002639 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002640 }
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002641 else {
2642 bytes = PyBytes_FromObject(x);
2643 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002644
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002645 if (bytes != NULL && type != &PyBytes_Type) {
2646 Py_SETREF(bytes, bytes_subtype_new(type, bytes));
2647 }
2648
2649 return bytes;
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002650}
2651
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002652static PyObject*
2653_PyBytes_FromBuffer(PyObject *x)
2654{
2655 PyObject *new;
2656 Py_buffer view;
2657
2658 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2659 return NULL;
2660
2661 new = PyBytes_FromStringAndSize(NULL, view.len);
2662 if (!new)
2663 goto fail;
2664 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2665 &view, view.len, 'C') < 0)
2666 goto fail;
2667 PyBuffer_Release(&view);
2668 return new;
2669
2670fail:
2671 Py_XDECREF(new);
2672 PyBuffer_Release(&view);
2673 return NULL;
2674}
2675
2676static PyObject*
2677_PyBytes_FromList(PyObject *x)
2678{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002679 Py_ssize_t i, size = PyList_GET_SIZE(x);
2680 Py_ssize_t value;
2681 char *str;
2682 PyObject *item;
2683 _PyBytesWriter writer;
2684
2685 _PyBytesWriter_Init(&writer);
2686 str = _PyBytesWriter_Alloc(&writer, size);
2687 if (str == NULL)
2688 return NULL;
2689 writer.overallocate = 1;
2690 size = writer.allocated;
2691
2692 for (i = 0; i < PyList_GET_SIZE(x); i++) {
2693 item = PyList_GET_ITEM(x, i);
2694 Py_INCREF(item);
2695 value = PyNumber_AsSsize_t(item, NULL);
2696 Py_DECREF(item);
2697 if (value == -1 && PyErr_Occurred())
2698 goto error;
2699
2700 if (value < 0 || value >= 256) {
2701 PyErr_SetString(PyExc_ValueError,
2702 "bytes must be in range(0, 256)");
2703 goto error;
2704 }
2705
2706 if (i >= size) {
2707 str = _PyBytesWriter_Resize(&writer, str, size+1);
2708 if (str == NULL)
2709 return NULL;
2710 size = writer.allocated;
2711 }
2712 *str++ = (char) value;
2713 }
2714 return _PyBytesWriter_Finish(&writer, str);
2715
2716 error:
2717 _PyBytesWriter_Dealloc(&writer);
2718 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002719}
2720
2721static PyObject*
2722_PyBytes_FromTuple(PyObject *x)
2723{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002724 PyObject *bytes;
2725 Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2726 Py_ssize_t value;
2727 char *str;
2728 PyObject *item;
2729
2730 bytes = PyBytes_FromStringAndSize(NULL, size);
2731 if (bytes == NULL)
2732 return NULL;
2733 str = ((PyBytesObject *)bytes)->ob_sval;
2734
2735 for (i = 0; i < size; i++) {
2736 item = PyTuple_GET_ITEM(x, i);
2737 value = PyNumber_AsSsize_t(item, NULL);
2738 if (value == -1 && PyErr_Occurred())
2739 goto error;
2740
2741 if (value < 0 || value >= 256) {
2742 PyErr_SetString(PyExc_ValueError,
2743 "bytes must be in range(0, 256)");
2744 goto error;
2745 }
2746 *str++ = (char) value;
2747 }
2748 return bytes;
2749
2750 error:
2751 Py_DECREF(bytes);
2752 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002753}
2754
2755static PyObject *
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002756_PyBytes_FromIterator(PyObject *it, PyObject *x)
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002757{
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002758 char *str;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002759 Py_ssize_t i, size;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002760 _PyBytesWriter writer;
2761
Serhiy Storchaka2ad93822020-12-03 12:46:16 +02002762 /* For iterator version, create a bytes object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02002763 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002764 if (size == -1 && PyErr_Occurred())
2765 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002766
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002767 _PyBytesWriter_Init(&writer);
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002768 str = _PyBytesWriter_Alloc(&writer, size);
2769 if (str == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002770 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002771 writer.overallocate = 1;
2772 size = writer.allocated;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002773
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002774 /* Run the iterator to exhaustion */
2775 for (i = 0; ; i++) {
2776 PyObject *item;
2777 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002778
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002779 /* Get the next item */
2780 item = PyIter_Next(it);
2781 if (item == NULL) {
2782 if (PyErr_Occurred())
2783 goto error;
2784 break;
2785 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002786
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002787 /* Interpret it as an int (__index__) */
Serhiy Storchakaf54d7812016-07-06 21:39:44 +03002788 value = PyNumber_AsSsize_t(item, NULL);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002789 Py_DECREF(item);
2790 if (value == -1 && PyErr_Occurred())
2791 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002792
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002793 /* Range check */
2794 if (value < 0 || value >= 256) {
2795 PyErr_SetString(PyExc_ValueError,
2796 "bytes must be in range(0, 256)");
2797 goto error;
2798 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002799
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002800 /* Append the byte */
2801 if (i >= size) {
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002802 str = _PyBytesWriter_Resize(&writer, str, size+1);
2803 if (str == NULL)
2804 return NULL;
2805 size = writer.allocated;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002806 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002807 *str++ = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002808 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002809
2810 return _PyBytesWriter_Finish(&writer, str);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002811
2812 error:
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002813 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002814 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002815}
2816
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002817PyObject *
2818PyBytes_FromObject(PyObject *x)
2819{
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002820 PyObject *it, *result;
2821
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002822 if (x == NULL) {
2823 PyErr_BadInternalCall();
2824 return NULL;
2825 }
2826
2827 if (PyBytes_CheckExact(x)) {
2828 Py_INCREF(x);
2829 return x;
2830 }
2831
2832 /* Use the modern buffer interface */
2833 if (PyObject_CheckBuffer(x))
2834 return _PyBytes_FromBuffer(x);
2835
2836 if (PyList_CheckExact(x))
2837 return _PyBytes_FromList(x);
2838
2839 if (PyTuple_CheckExact(x))
2840 return _PyBytes_FromTuple(x);
2841
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002842 if (!PyUnicode_Check(x)) {
2843 it = PyObject_GetIter(x);
2844 if (it != NULL) {
2845 result = _PyBytes_FromIterator(it, x);
2846 Py_DECREF(it);
2847 return result;
2848 }
Serhiy Storchakae8904212018-10-15 00:02:57 +03002849 if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2850 return NULL;
2851 }
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002852 }
2853
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002854 PyErr_Format(PyExc_TypeError,
2855 "cannot convert '%.200s' object to bytes",
Victor Stinner58ac7002020-02-07 03:04:21 +01002856 Py_TYPE(x)->tp_name);
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002857 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002858}
2859
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002860static PyObject *
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002861bytes_subtype_new(PyTypeObject *type, PyObject *tmp)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002862{
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002863 PyObject *pnew;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002864 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002865
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002866 assert(PyType_IsSubtype(type, &PyBytes_Type));
Serhiy Storchaka15095802015-11-25 15:47:01 +02002867 assert(PyBytes_Check(tmp));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002868 n = PyBytes_GET_SIZE(tmp);
2869 pnew = type->tp_alloc(type, n);
2870 if (pnew != NULL) {
Christian Heimesf051e432016-09-13 20:22:02 +02002871 memcpy(PyBytes_AS_STRING(pnew),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002872 PyBytes_AS_STRING(tmp), n+1);
2873 ((PyBytesObject *)pnew)->ob_shash =
2874 ((PyBytesObject *)tmp)->ob_shash;
2875 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002876 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002877}
2878
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002879PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002880"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002881bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002882bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002883bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2884bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002885\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002886Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002887 - an iterable yielding integers in range(256)\n\
2888 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002889 - any object implementing the buffer API.\n\
2890 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002891
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002892static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002893
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002894PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002895 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2896 "bytes",
2897 PyBytesObject_SIZE,
2898 sizeof(char),
Inada Naoki7d408692019-05-29 17:23:27 +09002899 0, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002900 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002901 0, /* tp_getattr */
2902 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002903 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002904 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08002905 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002906 &bytes_as_sequence, /* tp_as_sequence */
2907 &bytes_as_mapping, /* tp_as_mapping */
2908 (hashfunc)bytes_hash, /* tp_hash */
2909 0, /* tp_call */
2910 bytes_str, /* tp_str */
2911 PyObject_GenericGetAttr, /* tp_getattro */
2912 0, /* tp_setattro */
2913 &bytes_as_buffer, /* tp_as_buffer */
2914 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2915 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2916 bytes_doc, /* tp_doc */
2917 0, /* tp_traverse */
2918 0, /* tp_clear */
2919 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2920 0, /* tp_weaklistoffset */
2921 bytes_iter, /* tp_iter */
2922 0, /* tp_iternext */
2923 bytes_methods, /* tp_methods */
2924 0, /* tp_members */
2925 0, /* tp_getset */
2926 &PyBaseObject_Type, /* tp_base */
2927 0, /* tp_dict */
2928 0, /* tp_descr_get */
2929 0, /* tp_descr_set */
2930 0, /* tp_dictoffset */
2931 0, /* tp_init */
2932 0, /* tp_alloc */
2933 bytes_new, /* tp_new */
2934 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002935};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002936
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002937void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002938PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002939{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002940 assert(pv != NULL);
2941 if (*pv == NULL)
2942 return;
2943 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002944 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002945 return;
2946 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02002947
2948 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2949 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05002950 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02002951 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02002952
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002953 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02002954 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2955 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2956 Py_CLEAR(*pv);
2957 return;
2958 }
2959
2960 oldsize = PyBytes_GET_SIZE(*pv);
2961 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2962 PyErr_NoMemory();
2963 goto error;
2964 }
2965 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2966 goto error;
2967
2968 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2969 PyBuffer_Release(&wb);
2970 return;
2971
2972 error:
2973 PyBuffer_Release(&wb);
2974 Py_CLEAR(*pv);
2975 return;
2976 }
2977
2978 else {
2979 /* Multiple references, need to create new object */
2980 PyObject *v;
2981 v = bytes_concat(*pv, w);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002982 Py_SETREF(*pv, v);
Antoine Pitrou161d6952014-05-01 14:36:20 +02002983 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002984}
2985
2986void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002987PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002988{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002989 PyBytes_Concat(pv, w);
2990 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002991}
2992
2993
Ethan Furmanb95b5612015-01-23 20:05:18 -08002994/* The following function breaks the notion that bytes are immutable:
2995 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002996 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08002997 as creating a new bytes object and destroying the old one, only
2998 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002999 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08003000 Note that if there's not enough memory to resize the bytes object, the
3001 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003002 memory" exception is set, and -1 is returned. Else (on success) 0 is
3003 returned, and the value in *pv may or may not be the same as on input.
3004 As always, an extra byte is allocated for a trailing \0 byte (newsize
3005 does *not* include that), and a trailing \0 byte is stored.
3006*/
3007
3008int
3009_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3010{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003011 PyObject *v;
3012 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003013 v = *pv;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03003014 if (!PyBytes_Check(v) || newsize < 0) {
3015 goto error;
3016 }
3017 if (Py_SIZE(v) == newsize) {
3018 /* return early if newsize equals to v->ob_size */
3019 return 0;
3020 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02003021 if (Py_SIZE(v) == 0) {
3022 if (newsize == 0) {
3023 return 0;
3024 }
3025 *pv = _PyBytes_FromSize(newsize, 0);
3026 Py_DECREF(v);
3027 return (*pv == NULL) ? -1 : 0;
3028 }
Berker Peksag4a72a7b2016-09-16 17:31:06 +03003029 if (Py_REFCNT(v) != 1) {
3030 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003031 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02003032 if (newsize == 0) {
Victor Stinner91698d82020-06-25 14:07:40 +02003033 *pv = bytes_new_empty();
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02003034 Py_DECREF(v);
Victor Stinner91698d82020-06-25 14:07:40 +02003035 return 0;
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02003036 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003037 /* XXX UNREF/NEWREF interface should be more symmetrical */
Victor Stinner49932fe2020-02-03 17:55:05 +01003038#ifdef Py_REF_DEBUG
3039 _Py_RefTotal--;
3040#endif
3041#ifdef Py_TRACE_REFS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003042 _Py_ForgetReference(v);
Victor Stinner49932fe2020-02-03 17:55:05 +01003043#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003044 *pv = (PyObject *)
Victor Stinner32bd68c2020-12-01 10:37:39 +01003045 PyObject_Realloc(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003046 if (*pv == NULL) {
Victor Stinner32bd68c2020-12-01 10:37:39 +01003047 PyObject_Free(v);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003048 PyErr_NoMemory();
3049 return -1;
3050 }
3051 _Py_NewReference(*pv);
3052 sv = (PyBytesObject *) *pv;
Victor Stinner60ac6ed2020-02-07 23:18:08 +01003053 Py_SET_SIZE(sv, newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003054 sv->ob_sval[newsize] = '\0';
3055 sv->ob_shash = -1; /* invalidate cached hash value */
3056 return 0;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03003057error:
3058 *pv = 0;
3059 Py_DECREF(v);
3060 PyErr_BadInternalCall();
3061 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003062}
3063
Victor Stinner91698d82020-06-25 14:07:40 +02003064
3065PyStatus
3066_PyBytes_Init(PyThreadState *tstate)
3067{
3068 struct _Py_bytes_state *state = &tstate->interp->bytes;
3069 if (bytes_create_empty_string_singleton(state) < 0) {
3070 return _PyStatus_NO_MEMORY();
3071 }
3072 return _PyStatus_OK();
3073}
3074
3075
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003076void
Victor Stinnerc41eed12020-06-23 15:54:35 +02003077_PyBytes_Fini(PyThreadState *tstate)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003078{
Victor Stinnerc41eed12020-06-23 15:54:35 +02003079 struct _Py_bytes_state* state = &tstate->interp->bytes;
3080 for (int i = 0; i < UCHAR_MAX + 1; i++) {
3081 Py_CLEAR(state->characters[i]);
3082 }
3083 Py_CLEAR(state->empty_string);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003084}
3085
Benjamin Peterson4116f362008-05-27 00:36:20 +00003086/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003087
3088typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003089 PyObject_HEAD
3090 Py_ssize_t it_index;
3091 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003092} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003093
3094static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003095striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003096{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003097 _PyObject_GC_UNTRACK(it);
3098 Py_XDECREF(it->it_seq);
3099 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003100}
3101
3102static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003103striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003104{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003105 Py_VISIT(it->it_seq);
3106 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003107}
3108
3109static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003110striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003111{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003112 PyBytesObject *seq;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003113
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003114 assert(it != NULL);
3115 seq = it->it_seq;
3116 if (seq == NULL)
3117 return NULL;
3118 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003119
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003120 if (it->it_index < PyBytes_GET_SIZE(seq)) {
Guido van Rossum488512b2020-08-03 09:04:13 -07003121 return PyLong_FromLong(
3122 (unsigned char)seq->ob_sval[it->it_index++]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003123 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003124
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003125 it->it_seq = NULL;
Serhiy Storchakafbb1c5e2016-03-30 20:40:02 +03003126 Py_DECREF(seq);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003127 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003128}
3129
3130static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303131striter_len(striterobject *it, PyObject *Py_UNUSED(ignored))
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003132{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003133 Py_ssize_t len = 0;
3134 if (it->it_seq)
3135 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3136 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003137}
3138
3139PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003140 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003141
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003142static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303143striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003144{
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003145 _Py_IDENTIFIER(iter);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003146 if (it->it_seq != NULL) {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003147 return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003148 it->it_seq, it->it_index);
3149 } else {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003150 return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003151 }
3152}
3153
3154PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3155
3156static PyObject *
3157striter_setstate(striterobject *it, PyObject *state)
3158{
3159 Py_ssize_t index = PyLong_AsSsize_t(state);
3160 if (index == -1 && PyErr_Occurred())
3161 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003162 if (it->it_seq != NULL) {
3163 if (index < 0)
3164 index = 0;
3165 else if (index > PyBytes_GET_SIZE(it->it_seq))
3166 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3167 it->it_index = index;
3168 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003169 Py_RETURN_NONE;
3170}
3171
3172PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3173
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003174static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003175 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3176 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003177 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3178 reduce_doc},
3179 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3180 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003181 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003182};
3183
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003184PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003185 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3186 "bytes_iterator", /* tp_name */
3187 sizeof(striterobject), /* tp_basicsize */
3188 0, /* tp_itemsize */
3189 /* methods */
3190 (destructor)striter_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003191 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003192 0, /* tp_getattr */
3193 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003194 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003195 0, /* tp_repr */
3196 0, /* tp_as_number */
3197 0, /* tp_as_sequence */
3198 0, /* tp_as_mapping */
3199 0, /* tp_hash */
3200 0, /* tp_call */
3201 0, /* tp_str */
3202 PyObject_GenericGetAttr, /* tp_getattro */
3203 0, /* tp_setattro */
3204 0, /* tp_as_buffer */
3205 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3206 0, /* tp_doc */
3207 (traverseproc)striter_traverse, /* tp_traverse */
3208 0, /* tp_clear */
3209 0, /* tp_richcompare */
3210 0, /* tp_weaklistoffset */
3211 PyObject_SelfIter, /* tp_iter */
3212 (iternextfunc)striter_next, /* tp_iternext */
3213 striter_methods, /* tp_methods */
3214 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003215};
3216
3217static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003218bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003219{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003220 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003221
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003222 if (!PyBytes_Check(seq)) {
3223 PyErr_BadInternalCall();
3224 return NULL;
3225 }
3226 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3227 if (it == NULL)
3228 return NULL;
3229 it->it_index = 0;
3230 Py_INCREF(seq);
3231 it->it_seq = (PyBytesObject *)seq;
3232 _PyObject_GC_TRACK(it);
3233 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003234}
Victor Stinner00165072015-10-09 01:53:21 +02003235
3236
3237/* _PyBytesWriter API */
3238
3239#ifdef MS_WINDOWS
3240 /* On Windows, overallocate by 50% is the best factor */
3241# define OVERALLOCATE_FACTOR 2
3242#else
3243 /* On Linux, overallocate by 25% is the best factor */
3244# define OVERALLOCATE_FACTOR 4
3245#endif
3246
3247void
3248_PyBytesWriter_Init(_PyBytesWriter *writer)
3249{
Victor Stinner661aacc2015-10-14 09:41:48 +02003250 /* Set all attributes before small_buffer to 0 */
3251 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003252#ifndef NDEBUG
3253 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3254 sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003255#endif
3256}
3257
3258void
3259_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3260{
3261 Py_CLEAR(writer->buffer);
3262}
3263
3264Py_LOCAL_INLINE(char*)
3265_PyBytesWriter_AsString(_PyBytesWriter *writer)
3266{
Victor Stinner661aacc2015-10-14 09:41:48 +02003267 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003268 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003269 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003270 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003271 else if (writer->use_bytearray) {
3272 assert(writer->buffer != NULL);
3273 return PyByteArray_AS_STRING(writer->buffer);
3274 }
3275 else {
3276 assert(writer->buffer != NULL);
3277 return PyBytes_AS_STRING(writer->buffer);
3278 }
Victor Stinner00165072015-10-09 01:53:21 +02003279}
3280
3281Py_LOCAL_INLINE(Py_ssize_t)
Victor Stinner2bf89932015-10-14 11:25:33 +02003282_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
Victor Stinner00165072015-10-09 01:53:21 +02003283{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03003284 const char *start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003285 assert(str != NULL);
3286 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003287 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003288 return str - start;
3289}
3290
Victor Stinner68762572019-10-07 18:42:01 +02003291#ifndef NDEBUG
3292Py_LOCAL_INLINE(int)
Victor Stinner00165072015-10-09 01:53:21 +02003293_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3294{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03003295 const char *start, *end;
Victor Stinner00165072015-10-09 01:53:21 +02003296
Victor Stinner661aacc2015-10-14 09:41:48 +02003297 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003298 assert(writer->buffer == NULL);
3299 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003300 else {
3301 assert(writer->buffer != NULL);
3302 if (writer->use_bytearray)
3303 assert(PyByteArray_CheckExact(writer->buffer));
3304 else
3305 assert(PyBytes_CheckExact(writer->buffer));
3306 assert(Py_REFCNT(writer->buffer) == 1);
3307 }
Victor Stinner00165072015-10-09 01:53:21 +02003308
Victor Stinner661aacc2015-10-14 09:41:48 +02003309 if (writer->use_bytearray) {
3310 /* bytearray has its own overallocation algorithm,
3311 writer overallocation must be disabled */
3312 assert(!writer->overallocate);
3313 }
3314
3315 assert(0 <= writer->allocated);
Victor Stinner53926a12015-10-09 12:37:03 +02003316 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003317 /* the last byte must always be null */
Victor Stinner661aacc2015-10-14 09:41:48 +02003318 start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003319 assert(start[writer->allocated] == 0);
3320
3321 end = start + writer->allocated;
3322 assert(str != NULL);
3323 assert(start <= str && str <= end);
Victor Stinner68762572019-10-07 18:42:01 +02003324 return 1;
Victor Stinner00165072015-10-09 01:53:21 +02003325}
Victor Stinner68762572019-10-07 18:42:01 +02003326#endif
Victor Stinner00165072015-10-09 01:53:21 +02003327
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003328void*
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003329_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
Victor Stinner00165072015-10-09 01:53:21 +02003330{
3331 Py_ssize_t allocated, pos;
3332
Victor Stinner68762572019-10-07 18:42:01 +02003333 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003334 assert(writer->allocated < size);
Victor Stinner00165072015-10-09 01:53:21 +02003335
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003336 allocated = size;
Victor Stinner00165072015-10-09 01:53:21 +02003337 if (writer->overallocate
3338 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3339 /* overallocate to limit the number of realloc() */
3340 allocated += allocated / OVERALLOCATE_FACTOR;
3341 }
3342
Victor Stinner2bf89932015-10-14 11:25:33 +02003343 pos = _PyBytesWriter_GetSize(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003344 if (!writer->use_small_buffer) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003345 if (writer->use_bytearray) {
3346 if (PyByteArray_Resize(writer->buffer, allocated))
3347 goto error;
3348 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3349 but we cannot use ob_alloc because bytes may need to be moved
3350 to use the whole buffer. bytearray uses an internal optimization
3351 to avoid moving or copying bytes when bytes are removed at the
3352 beginning (ex: del bytearray[:1]). */
3353 }
3354 else {
3355 if (_PyBytes_Resize(&writer->buffer, allocated))
3356 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003357 }
3358 }
3359 else {
3360 /* convert from stack buffer to bytes object buffer */
3361 assert(writer->buffer == NULL);
3362
Victor Stinner661aacc2015-10-14 09:41:48 +02003363 if (writer->use_bytearray)
3364 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3365 else
3366 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003367 if (writer->buffer == NULL)
Victor Stinner661aacc2015-10-14 09:41:48 +02003368 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003369
3370 if (pos != 0) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003371 char *dest;
3372 if (writer->use_bytearray)
3373 dest = PyByteArray_AS_STRING(writer->buffer);
3374 else
3375 dest = PyBytes_AS_STRING(writer->buffer);
Christian Heimesf051e432016-09-13 20:22:02 +02003376 memcpy(dest,
Victor Stinnerb3653a32015-10-09 03:38:24 +02003377 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02003378 pos);
3379 }
3380
Victor Stinnerb3653a32015-10-09 03:38:24 +02003381 writer->use_small_buffer = 0;
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003382#ifndef NDEBUG
3383 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3384 sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003385#endif
Victor Stinner00165072015-10-09 01:53:21 +02003386 }
3387 writer->allocated = allocated;
3388
3389 str = _PyBytesWriter_AsString(writer) + pos;
Victor Stinner68762572019-10-07 18:42:01 +02003390 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinner00165072015-10-09 01:53:21 +02003391 return str;
Victor Stinner661aacc2015-10-14 09:41:48 +02003392
3393error:
3394 _PyBytesWriter_Dealloc(writer);
3395 return NULL;
Victor Stinner00165072015-10-09 01:53:21 +02003396}
3397
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003398void*
3399_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3400{
3401 Py_ssize_t new_min_size;
3402
Victor Stinner68762572019-10-07 18:42:01 +02003403 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003404 assert(size >= 0);
3405
3406 if (size == 0) {
3407 /* nothing to do */
3408 return str;
3409 }
3410
3411 if (writer->min_size > PY_SSIZE_T_MAX - size) {
3412 PyErr_NoMemory();
3413 _PyBytesWriter_Dealloc(writer);
3414 return NULL;
3415 }
3416 new_min_size = writer->min_size + size;
3417
3418 if (new_min_size > writer->allocated)
3419 str = _PyBytesWriter_Resize(writer, str, new_min_size);
3420
3421 writer->min_size = new_min_size;
3422 return str;
3423}
3424
Victor Stinner00165072015-10-09 01:53:21 +02003425/* Allocate the buffer to write size bytes.
3426 Return the pointer to the beginning of buffer data.
3427 Raise an exception and return NULL on error. */
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003428void*
Victor Stinner00165072015-10-09 01:53:21 +02003429_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3430{
3431 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02003432 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02003433 assert(size >= 0);
3434
Victor Stinnerb3653a32015-10-09 03:38:24 +02003435 writer->use_small_buffer = 1;
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003436#ifndef NDEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003437 writer->allocated = sizeof(writer->small_buffer) - 1;
Victor Stinnerf6358a72015-10-14 12:02:39 +02003438 /* In debug mode, don't use the full small buffer because it is less
3439 efficient than bytes and bytearray objects to detect buffer underflow
3440 and buffer overflow. Use 10 bytes of the small buffer to test also
3441 code using the smaller buffer in debug mode.
3442
3443 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3444 in debug mode to also be able to detect stack overflow when running
3445 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3446 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3447 stack overflow. */
3448 writer->allocated = Py_MIN(writer->allocated, 10);
3449 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3450 to detect buffer overflow */
Victor Stinnerb3653a32015-10-09 03:38:24 +02003451 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003452#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02003453 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02003454#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02003455 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02003456}
3457
3458PyObject *
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003459_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
Victor Stinner00165072015-10-09 01:53:21 +02003460{
Victor Stinner2bf89932015-10-14 11:25:33 +02003461 Py_ssize_t size;
Victor Stinner00165072015-10-09 01:53:21 +02003462 PyObject *result;
3463
Victor Stinner68762572019-10-07 18:42:01 +02003464 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinner00165072015-10-09 01:53:21 +02003465
Victor Stinner2bf89932015-10-14 11:25:33 +02003466 size = _PyBytesWriter_GetSize(writer, str);
3467 if (size == 0 && !writer->use_bytearray) {
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003468 Py_CLEAR(writer->buffer);
3469 /* Get the empty byte string singleton */
3470 result = PyBytes_FromStringAndSize(NULL, 0);
3471 }
3472 else if (writer->use_small_buffer) {
Victor Stinnere914d412016-04-15 17:52:27 +02003473 if (writer->use_bytearray) {
3474 result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3475 }
3476 else {
3477 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3478 }
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003479 }
3480 else {
3481 result = writer->buffer;
3482 writer->buffer = NULL;
3483
Victor Stinner2bf89932015-10-14 11:25:33 +02003484 if (size != writer->allocated) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003485 if (writer->use_bytearray) {
Victor Stinner2bf89932015-10-14 11:25:33 +02003486 if (PyByteArray_Resize(result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003487 Py_DECREF(result);
3488 return NULL;
3489 }
3490 }
3491 else {
Victor Stinner2bf89932015-10-14 11:25:33 +02003492 if (_PyBytes_Resize(&result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003493 assert(result == NULL);
3494 return NULL;
3495 }
Victor Stinner00165072015-10-09 01:53:21 +02003496 }
3497 }
Victor Stinner00165072015-10-09 01:53:21 +02003498 }
Victor Stinner00165072015-10-09 01:53:21 +02003499 return result;
3500}
Victor Stinnerce179bf2015-10-09 12:57:22 +02003501
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003502void*
Victor Stinnere9aa5952015-10-12 13:57:47 +02003503_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003504 const void *bytes, Py_ssize_t size)
Victor Stinnerce179bf2015-10-09 12:57:22 +02003505{
Victor Stinnere9aa5952015-10-12 13:57:47 +02003506 char *str = (char *)ptr;
3507
Victor Stinnerce179bf2015-10-09 12:57:22 +02003508 str = _PyBytesWriter_Prepare(writer, str, size);
3509 if (str == NULL)
3510 return NULL;
3511
Christian Heimesf051e432016-09-13 20:22:02 +02003512 memcpy(str, bytes, size);
Victor Stinnerce179bf2015-10-09 12:57:22 +02003513 str += size;
3514
3515 return str;
3516}