blob: 782bc8e1fa0b7d3ca56a7ac84e54d0adc65181b3 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Victor Stinnerd9ea5ca2020-04-15 02:57:50 +02006#include "pycore_abstract.h" // _PyIndex_Check()
Victor Stinner91698d82020-06-25 14:07:40 +02007#include "pycore_bytes_methods.h" // _Py_bytes_startswith()
8#include "pycore_initconfig.h" // _PyStatus_OK()
9#include "pycore_object.h" // _PyObject_GC_TRACK
Victor Stinnerd9ea5ca2020-04-15 02:57:50 +020010#include "pycore_pymem.h" // PYMEM_CLEANBYTE
Christian Heimes2c9c7a52008-05-26 13:42:13 +000011
Gregory P. Smith8cb65692015-04-25 23:22:26 +000012#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +000013#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000014
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020015/*[clinic input]
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030016class bytes "PyBytesObject *" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020017[clinic start generated code]*/
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030018/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020019
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030020#include "clinic/bytesobject.c.h"
21
Hai Shi46874c22020-01-30 17:20:25 -060022_Py_IDENTIFIER(__bytes__);
23
Mark Dickinsonfd24b322008-12-06 15:33:31 +000024/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
25 for a string of length n should request PyBytesObject_SIZE + n bytes.
26
27 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
28 3 bytes per string allocation on a typical system.
29*/
30#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
31
Victor Stinner2bf89932015-10-14 11:25:33 +020032/* Forward declaration */
33Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
34 char *str);
35
Victor Stinnerc41eed12020-06-23 15:54:35 +020036
37static struct _Py_bytes_state*
38get_bytes_state(void)
39{
40 PyInterpreterState *interp = _PyInterpreterState_GET();
41 return &interp->bytes;
42}
43
44
Victor Stinner91698d82020-06-25 14:07:40 +020045// Return a borrowed reference to the empty bytes string singleton.
46static inline PyObject* bytes_get_empty(void)
47{
48 struct _Py_bytes_state *state = get_bytes_state();
49 // bytes_get_empty() must not be called before _PyBytes_Init()
50 // or after _PyBytes_Fini()
51 assert(state->empty_string != NULL);
52 return state->empty_string;
53}
54
55
56// Return a strong reference to the empty bytes string singleton.
57static inline PyObject* bytes_new_empty(void)
58{
59 PyObject *empty = bytes_get_empty();
60 Py_INCREF(empty);
61 return (PyObject *)empty;
62}
63
64
65static int
66bytes_create_empty_string_singleton(struct _Py_bytes_state *state)
67{
68 // Create the empty bytes string singleton
69 PyBytesObject *op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE);
70 if (op == NULL) {
71 return -1;
72 }
73 _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, 0);
74 op->ob_shash = -1;
75 op->ob_sval[0] = '\0';
76
77 assert(state->empty_string == NULL);
78 state->empty_string = (PyObject *)op;
79 return 0;
80}
81
82
Christian Heimes2c9c7a52008-05-26 13:42:13 +000083/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000084 For PyBytes_FromString(), the parameter `str' points to a null-terminated
85 string containing exactly `size' bytes.
86
Martin Pantera90a4a92016-05-30 04:04:50 +000087 For PyBytes_FromStringAndSize(), the parameter `str' is
Christian Heimes2c9c7a52008-05-26 13:42:13 +000088 either NULL or else points to a string containing at least `size' bytes.
89 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
90 not have to be null-terminated. (Therefore it is safe to construct a
91 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
92 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
93 bytes (setting the last byte to the null terminating character) and you can
94 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000095 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000096 alter the data yourself, since the strings may be shared.
97
98 The PyObject member `op->ob_size', which denotes the number of "extra
99 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +0200100 allocated for string data, not counting the null terminating character.
101 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000102 PyBytes_FromStringAndSize()) or the length of the string in the `str'
103 parameter (for PyBytes_FromString()).
104*/
Victor Stinnerdb067af2014-05-02 22:31:14 +0200105static PyObject *
106_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +0000107{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200108 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +0200109 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +0200110
Victor Stinnerc41eed12020-06-23 15:54:35 +0200111 if (size == 0) {
Victor Stinner91698d82020-06-25 14:07:40 +0200112 return bytes_new_empty();
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000113 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000114
Victor Stinner049e5092014-08-17 22:20:00 +0200115 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000116 PyErr_SetString(PyExc_OverflowError,
117 "byte string is too large");
118 return NULL;
119 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +0000120
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000121 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +0200122 if (use_calloc)
123 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
124 else
125 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Victor Stinner04fc4f22020-06-16 01:28:07 +0200126 if (op == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000127 return PyErr_NoMemory();
Victor Stinner04fc4f22020-06-16 01:28:07 +0200128 }
129 _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000130 op->ob_shash = -1;
Victor Stinner91698d82020-06-25 14:07:40 +0200131 if (!use_calloc) {
Victor Stinnerdb067af2014-05-02 22:31:14 +0200132 op->ob_sval[size] = '\0';
Victor Stinnerdb067af2014-05-02 22:31:14 +0200133 }
134 return (PyObject *) op;
135}
136
137PyObject *
138PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
139{
140 PyBytesObject *op;
141 if (size < 0) {
142 PyErr_SetString(PyExc_SystemError,
143 "Negative size passed to PyBytes_FromStringAndSize");
144 return NULL;
145 }
Victor Stinnerc41eed12020-06-23 15:54:35 +0200146 if (size == 1 && str != NULL) {
147 struct _Py_bytes_state *state = get_bytes_state();
148 op = state->characters[*str & UCHAR_MAX];
149 if (op != NULL) {
150 Py_INCREF(op);
151 return (PyObject *)op;
152 }
Victor Stinnerdb067af2014-05-02 22:31:14 +0200153 }
Victor Stinner91698d82020-06-25 14:07:40 +0200154 if (size == 0) {
155 return bytes_new_empty();
156 }
Victor Stinnerdb067af2014-05-02 22:31:14 +0200157
158 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
159 if (op == NULL)
160 return NULL;
161 if (str == NULL)
162 return (PyObject *) op;
163
Christian Heimesf051e432016-09-13 20:22:02 +0200164 memcpy(op->ob_sval, str, size);
Victor Stinnerdb067af2014-05-02 22:31:14 +0200165 /* share short strings */
166 if (size == 1) {
Victor Stinnerc41eed12020-06-23 15:54:35 +0200167 struct _Py_bytes_state *state = get_bytes_state();
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000168 Py_INCREF(op);
Victor Stinnerc41eed12020-06-23 15:54:35 +0200169 state->characters[*str & UCHAR_MAX] = op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000170 }
171 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000172}
173
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000174PyObject *
175PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000176{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200177 size_t size;
178 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000179
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000180 assert(str != NULL);
181 size = strlen(str);
182 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
183 PyErr_SetString(PyExc_OverflowError,
184 "byte string is too long");
185 return NULL;
186 }
Victor Stinnerc41eed12020-06-23 15:54:35 +0200187
188 struct _Py_bytes_state *state = get_bytes_state();
189 if (size == 0) {
Victor Stinner91698d82020-06-25 14:07:40 +0200190 return bytes_new_empty();
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000191 }
Victor Stinnerc41eed12020-06-23 15:54:35 +0200192 else if (size == 1) {
193 op = state->characters[*str & UCHAR_MAX];
194 if (op != NULL) {
195 Py_INCREF(op);
196 return (PyObject *)op;
197 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000198 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000199
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000200 /* Inline PyObject_NewVar */
201 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
Victor Stinner04fc4f22020-06-16 01:28:07 +0200202 if (op == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000203 return PyErr_NoMemory();
Victor Stinner04fc4f22020-06-16 01:28:07 +0200204 }
205 _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000206 op->ob_shash = -1;
Christian Heimesf051e432016-09-13 20:22:02 +0200207 memcpy(op->ob_sval, str, size+1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000208 /* share short strings */
Victor Stinner91698d82020-06-25 14:07:40 +0200209 if (size == 1) {
210 assert(state->characters[*str & UCHAR_MAX] == NULL);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000211 Py_INCREF(op);
Victor Stinnerc41eed12020-06-23 15:54:35 +0200212 state->characters[*str & UCHAR_MAX] = op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000213 }
214 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000215}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000216
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000217PyObject *
218PyBytes_FromFormatV(const char *format, va_list vargs)
219{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000220 char *s;
Victor Stinner03dab782015-10-14 00:21:35 +0200221 const char *f;
222 const char *p;
223 Py_ssize_t prec;
224 int longflag;
225 int size_tflag;
226 /* Longest 64-bit formatted numbers:
227 - "18446744073709551615\0" (21 bytes)
228 - "-9223372036854775808\0" (21 bytes)
229 Decimal takes the most space (it isn't enough for octal.)
Guido van Rossum343e97f2007-04-09 00:43:24 +0000230
Victor Stinner03dab782015-10-14 00:21:35 +0200231 Longest 64-bit pointer representation:
232 "0xffffffffffffffff\0" (19 bytes). */
233 char buffer[21];
234 _PyBytesWriter writer;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000235
Victor Stinner03dab782015-10-14 00:21:35 +0200236 _PyBytesWriter_Init(&writer);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000237
Victor Stinner03dab782015-10-14 00:21:35 +0200238 s = _PyBytesWriter_Alloc(&writer, strlen(format));
239 if (s == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000240 return NULL;
Victor Stinner03dab782015-10-14 00:21:35 +0200241 writer.overallocate = 1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000242
Victor Stinner03dab782015-10-14 00:21:35 +0200243#define WRITE_BYTES(str) \
244 do { \
245 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
246 if (s == NULL) \
247 goto error; \
248 } while (0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000249
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000250 for (f = format; *f; f++) {
Victor Stinner03dab782015-10-14 00:21:35 +0200251 if (*f != '%') {
252 *s++ = *f;
253 continue;
254 }
255
256 p = f++;
257
258 /* ignore the width (ex: 10 in "%10s") */
259 while (Py_ISDIGIT(*f))
260 f++;
261
262 /* parse the precision (ex: 10 in "%.10s") */
263 prec = 0;
264 if (*f == '.') {
265 f++;
266 for (; Py_ISDIGIT(*f); f++) {
267 prec = (prec * 10) + (*f - '0');
268 }
269 }
270
271 while (*f && *f != '%' && !Py_ISALPHA(*f))
272 f++;
273
274 /* handle the long flag ('l'), but only for %ld and %lu.
275 others can be added when necessary. */
276 longflag = 0;
277 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
278 longflag = 1;
279 ++f;
280 }
281
282 /* handle the size_t flag ('z'). */
283 size_tflag = 0;
284 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
285 size_tflag = 1;
286 ++f;
287 }
288
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700289 /* subtract bytes preallocated for the format string
Victor Stinner03dab782015-10-14 00:21:35 +0200290 (ex: 2 for "%s") */
291 writer.min_size -= (f - p + 1);
292
293 switch (*f) {
294 case 'c':
295 {
296 int c = va_arg(vargs, int);
297 if (c < 0 || c > 255) {
298 PyErr_SetString(PyExc_OverflowError,
299 "PyBytes_FromFormatV(): %c format "
300 "expects an integer in range [0; 255]");
301 goto error;
302 }
303 writer.min_size++;
304 *s++ = (unsigned char)c;
305 break;
306 }
307
308 case 'd':
Victor Stinnerd36cf5f2020-06-10 18:38:05 +0200309 if (longflag) {
Victor Stinner03dab782015-10-14 00:21:35 +0200310 sprintf(buffer, "%ld", va_arg(vargs, long));
Victor Stinnerd36cf5f2020-06-10 18:38:05 +0200311 }
312 else if (size_tflag) {
313 sprintf(buffer, "%zd", va_arg(vargs, Py_ssize_t));
314 }
315 else {
Victor Stinner03dab782015-10-14 00:21:35 +0200316 sprintf(buffer, "%d", va_arg(vargs, int));
Victor Stinnerd36cf5f2020-06-10 18:38:05 +0200317 }
Victor Stinner03dab782015-10-14 00:21:35 +0200318 assert(strlen(buffer) < sizeof(buffer));
319 WRITE_BYTES(buffer);
320 break;
321
322 case 'u':
Victor Stinnerd36cf5f2020-06-10 18:38:05 +0200323 if (longflag) {
324 sprintf(buffer, "%lu", va_arg(vargs, unsigned long));
325 }
326 else if (size_tflag) {
327 sprintf(buffer, "%zu", va_arg(vargs, size_t));
328 }
329 else {
330 sprintf(buffer, "%u", va_arg(vargs, unsigned int));
331 }
Victor Stinner03dab782015-10-14 00:21:35 +0200332 assert(strlen(buffer) < sizeof(buffer));
333 WRITE_BYTES(buffer);
334 break;
335
336 case 'i':
337 sprintf(buffer, "%i", va_arg(vargs, int));
338 assert(strlen(buffer) < sizeof(buffer));
339 WRITE_BYTES(buffer);
340 break;
341
342 case 'x':
343 sprintf(buffer, "%x", va_arg(vargs, int));
344 assert(strlen(buffer) < sizeof(buffer));
345 WRITE_BYTES(buffer);
346 break;
347
348 case 's':
349 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 Py_ssize_t i;
Victor Stinner03dab782015-10-14 00:21:35 +0200351
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200352 p = va_arg(vargs, const char*);
Serhiy Storchakad586ccb2019-01-12 10:30:35 +0200353 if (prec <= 0) {
354 i = strlen(p);
355 }
356 else {
357 i = 0;
358 while (i < prec && p[i]) {
359 i++;
360 }
361 }
Victor Stinner03dab782015-10-14 00:21:35 +0200362 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
363 if (s == NULL)
364 goto error;
365 break;
366 }
367
368 case 'p':
369 sprintf(buffer, "%p", va_arg(vargs, void*));
370 assert(strlen(buffer) < sizeof(buffer));
371 /* %p is ill-defined: ensure leading 0x. */
372 if (buffer[1] == 'X')
373 buffer[1] = 'x';
374 else if (buffer[1] != 'x') {
375 memmove(buffer+2, buffer, strlen(buffer)+1);
376 buffer[0] = '0';
377 buffer[1] = 'x';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000378 }
Victor Stinner03dab782015-10-14 00:21:35 +0200379 WRITE_BYTES(buffer);
380 break;
381
382 case '%':
383 writer.min_size++;
384 *s++ = '%';
385 break;
386
387 default:
388 if (*f == 0) {
389 /* fix min_size if we reached the end of the format string */
390 writer.min_size++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000391 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000392
Victor Stinner03dab782015-10-14 00:21:35 +0200393 /* invalid format string: copy unformatted string and exit */
394 WRITE_BYTES(p);
395 return _PyBytesWriter_Finish(&writer, s);
396 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000397 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000398
Victor Stinner03dab782015-10-14 00:21:35 +0200399#undef WRITE_BYTES
400
401 return _PyBytesWriter_Finish(&writer, s);
402
403 error:
404 _PyBytesWriter_Dealloc(&writer);
405 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000406}
407
408PyObject *
409PyBytes_FromFormat(const char *format, ...)
410{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000411 PyObject* ret;
412 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000413
414#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000415 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000416#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000417 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000418#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000419 ret = PyBytes_FromFormatV(format, vargs);
420 va_end(vargs);
421 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000422}
423
Ethan Furmanb95b5612015-01-23 20:05:18 -0800424/* Helpers for formatstring */
425
426Py_LOCAL_INLINE(PyObject *)
427getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
428{
429 Py_ssize_t argidx = *p_argidx;
430 if (argidx < arglen) {
431 (*p_argidx)++;
432 if (arglen < 0)
433 return args;
434 else
435 return PyTuple_GetItem(args, argidx);
436 }
437 PyErr_SetString(PyExc_TypeError,
438 "not enough arguments for format string");
439 return NULL;
440}
441
442/* Format codes
443 * F_LJUST '-'
444 * F_SIGN '+'
445 * F_BLANK ' '
446 * F_ALT '#'
447 * F_ZERO '0'
448 */
449#define F_LJUST (1<<0)
450#define F_SIGN (1<<1)
451#define F_BLANK (1<<2)
452#define F_ALT (1<<3)
453#define F_ZERO (1<<4)
454
455/* Returns a new reference to a PyBytes object, or NULL on failure. */
456
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200457static char*
458formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200459 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800460{
461 char *p;
462 PyObject *result;
463 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200464 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800465
466 x = PyFloat_AsDouble(v);
467 if (x == -1.0 && PyErr_Occurred()) {
468 PyErr_Format(PyExc_TypeError, "float argument required, "
469 "not %.200s", Py_TYPE(v)->tp_name);
470 return NULL;
471 }
472
473 if (prec < 0)
474 prec = 6;
475
476 p = PyOS_double_to_string(x, type, prec,
477 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
478
479 if (p == NULL)
480 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200481
482 len = strlen(p);
483 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200484 str = _PyBytesWriter_Prepare(writer, str, len);
485 if (str == NULL)
486 return NULL;
Christian Heimesf051e432016-09-13 20:22:02 +0200487 memcpy(str, p, len);
Victor Stinner71dc3d82016-04-26 12:35:13 +0200488 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200489 str += len;
490 return str;
491 }
492
493 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800494 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200495 *p_result = result;
Zackery Spytz96c59322018-10-03 00:01:30 -0600496 return result != NULL ? str : NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800497}
498
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300499static PyObject *
500formatlong(PyObject *v, int flags, int prec, int type)
501{
502 PyObject *result, *iobj;
503 if (type == 'i')
504 type = 'd';
505 if (PyLong_Check(v))
506 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
507 if (PyNumber_Check(v)) {
508 /* make sure number is a type of integer for o, x, and X */
509 if (type == 'o' || type == 'x' || type == 'X')
Serhiy Storchaka5f4b229d2020-05-28 10:33:45 +0300510 iobj = _PyNumber_Index(v);
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300511 else
512 iobj = PyNumber_Long(v);
513 if (iobj == NULL) {
514 if (!PyErr_ExceptionMatches(PyExc_TypeError))
515 return NULL;
516 }
517 else if (!PyLong_Check(iobj))
518 Py_CLEAR(iobj);
519 if (iobj != NULL) {
520 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
521 Py_DECREF(iobj);
522 return result;
523 }
524 }
525 PyErr_Format(PyExc_TypeError,
526 "%%%c format: %s is required, not %.200s", type,
527 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
528 : "a number",
529 Py_TYPE(v)->tp_name);
530 return NULL;
531}
532
533static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200534byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800535{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300536 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200537 *p = PyBytes_AS_STRING(arg)[0];
538 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800539 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300540 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200541 *p = PyByteArray_AS_STRING(arg)[0];
542 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800543 }
544 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300545 PyObject *iobj;
546 long ival;
547 int overflow;
548 /* make sure number is a type of integer */
549 if (PyLong_Check(arg)) {
550 ival = PyLong_AsLongAndOverflow(arg, &overflow);
551 }
552 else {
553 iobj = PyNumber_Index(arg);
554 if (iobj == NULL) {
555 if (!PyErr_ExceptionMatches(PyExc_TypeError))
556 return 0;
557 goto onError;
558 }
559 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
560 Py_DECREF(iobj);
561 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300562 if (!overflow && ival == -1 && PyErr_Occurred())
563 goto onError;
564 if (overflow || !(0 <= ival && ival <= 255)) {
565 PyErr_SetString(PyExc_OverflowError,
566 "%c arg not in range(256)");
567 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800568 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300569 *p = (char)ival;
570 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800571 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300572 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200573 PyErr_SetString(PyExc_TypeError,
574 "%c requires an integer in range(256) or a single byte");
575 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800576}
577
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800578static PyObject *_PyBytes_FromBuffer(PyObject *x);
579
Ethan Furmanb95b5612015-01-23 20:05:18 -0800580static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200581format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800582{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200583 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800584 /* is it a bytes object? */
585 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200586 *pbuf = PyBytes_AS_STRING(v);
587 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800588 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200589 return v;
590 }
591 if (PyByteArray_Check(v)) {
592 *pbuf = PyByteArray_AS_STRING(v);
593 *plen = PyByteArray_GET_SIZE(v);
594 Py_INCREF(v);
595 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800596 }
597 /* does it support __bytes__? */
598 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
599 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +0100600 result = _PyObject_CallNoArg(func);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800601 Py_DECREF(func);
602 if (result == NULL)
603 return NULL;
604 if (!PyBytes_Check(result)) {
605 PyErr_Format(PyExc_TypeError,
606 "__bytes__ returned non-bytes (type %.200s)",
607 Py_TYPE(result)->tp_name);
608 Py_DECREF(result);
609 return NULL;
610 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200611 *pbuf = PyBytes_AS_STRING(result);
612 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800613 return result;
614 }
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800615 /* does it support buffer protocol? */
616 if (PyObject_CheckBuffer(v)) {
617 /* maybe we can avoid making a copy of the buffer object here? */
618 result = _PyBytes_FromBuffer(v);
619 if (result == NULL)
620 return NULL;
621 *pbuf = PyBytes_AS_STRING(result);
622 *plen = PyBytes_GET_SIZE(result);
623 return result;
624 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800625 PyErr_Format(PyExc_TypeError,
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800626 "%%b requires a bytes-like object, "
627 "or an object that implements __bytes__, not '%.100s'",
Ethan Furmanb95b5612015-01-23 20:05:18 -0800628 Py_TYPE(v)->tp_name);
629 return NULL;
630}
631
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200632/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800633
634PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +0200635_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
636 PyObject *args, int use_bytearray)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800637{
Victor Stinner772b2b02015-10-14 09:56:53 +0200638 const char *fmt;
639 char *res;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800640 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200641 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800642 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800643 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200644 _PyBytesWriter writer;
645
Victor Stinner772b2b02015-10-14 09:56:53 +0200646 if (args == NULL) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800647 PyErr_BadInternalCall();
648 return NULL;
649 }
Victor Stinner772b2b02015-10-14 09:56:53 +0200650 fmt = format;
651 fmtcnt = format_len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200652
653 _PyBytesWriter_Init(&writer);
Victor Stinner772b2b02015-10-14 09:56:53 +0200654 writer.use_bytearray = use_bytearray;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200655
656 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
657 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800658 return NULL;
Victor Stinner772b2b02015-10-14 09:56:53 +0200659 if (!use_bytearray)
660 writer.overallocate = 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200661
Ethan Furmanb95b5612015-01-23 20:05:18 -0800662 if (PyTuple_Check(args)) {
663 arglen = PyTuple_GET_SIZE(args);
664 argidx = 0;
665 }
666 else {
667 arglen = -1;
668 argidx = -2;
669 }
670 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
671 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
672 !PyByteArray_Check(args)) {
673 dict = args;
674 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200675
Ethan Furmanb95b5612015-01-23 20:05:18 -0800676 while (--fmtcnt >= 0) {
677 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200678 Py_ssize_t len;
679 char *pos;
680
Xiang Zhangb76ad512017-03-06 17:17:05 +0800681 pos = (char *)memchr(fmt + 1, '%', fmtcnt);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200682 if (pos != NULL)
683 len = pos - fmt;
Victor Stinner772b2b02015-10-14 09:56:53 +0200684 else
Xiang Zhangb76ad512017-03-06 17:17:05 +0800685 len = fmtcnt + 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200686 assert(len != 0);
687
Christian Heimesf051e432016-09-13 20:22:02 +0200688 memcpy(res, fmt, len);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200689 res += len;
690 fmt += len;
691 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800692 }
693 else {
694 /* Got a format specifier */
695 int flags = 0;
696 Py_ssize_t width = -1;
697 int prec = -1;
698 int c = '\0';
699 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800700 PyObject *v = NULL;
701 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200702 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800703 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200704 Py_ssize_t len = 0;
705 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200706 Py_ssize_t alloc;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800707
Ethan Furmanb95b5612015-01-23 20:05:18 -0800708 fmt++;
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200709 if (*fmt == '%') {
710 *res++ = '%';
711 fmt++;
712 fmtcnt--;
713 continue;
714 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800715 if (*fmt == '(') {
Victor Stinner772b2b02015-10-14 09:56:53 +0200716 const char *keystart;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800717 Py_ssize_t keylen;
718 PyObject *key;
719 int pcount = 1;
720
721 if (dict == NULL) {
722 PyErr_SetString(PyExc_TypeError,
723 "format requires a mapping");
724 goto error;
725 }
726 ++fmt;
727 --fmtcnt;
728 keystart = fmt;
729 /* Skip over balanced parentheses */
730 while (pcount > 0 && --fmtcnt >= 0) {
731 if (*fmt == ')')
732 --pcount;
733 else if (*fmt == '(')
734 ++pcount;
735 fmt++;
736 }
737 keylen = fmt - keystart - 1;
738 if (fmtcnt < 0 || pcount > 0) {
739 PyErr_SetString(PyExc_ValueError,
740 "incomplete format key");
741 goto error;
742 }
743 key = PyBytes_FromStringAndSize(keystart,
744 keylen);
745 if (key == NULL)
746 goto error;
747 if (args_owned) {
748 Py_DECREF(args);
749 args_owned = 0;
750 }
751 args = PyObject_GetItem(dict, key);
752 Py_DECREF(key);
753 if (args == NULL) {
754 goto error;
755 }
756 args_owned = 1;
757 arglen = -1;
758 argidx = -2;
759 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200760
761 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800762 while (--fmtcnt >= 0) {
763 switch (c = *fmt++) {
764 case '-': flags |= F_LJUST; continue;
765 case '+': flags |= F_SIGN; continue;
766 case ' ': flags |= F_BLANK; continue;
767 case '#': flags |= F_ALT; continue;
768 case '0': flags |= F_ZERO; continue;
769 }
770 break;
771 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200772
773 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800774 if (c == '*') {
775 v = getnextarg(args, arglen, &argidx);
776 if (v == NULL)
777 goto error;
778 if (!PyLong_Check(v)) {
779 PyErr_SetString(PyExc_TypeError,
780 "* wants int");
781 goto error;
782 }
783 width = PyLong_AsSsize_t(v);
784 if (width == -1 && PyErr_Occurred())
785 goto error;
786 if (width < 0) {
787 flags |= F_LJUST;
788 width = -width;
789 }
790 if (--fmtcnt >= 0)
791 c = *fmt++;
792 }
793 else if (c >= 0 && isdigit(c)) {
794 width = c - '0';
795 while (--fmtcnt >= 0) {
796 c = Py_CHARMASK(*fmt++);
797 if (!isdigit(c))
798 break;
799 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
800 PyErr_SetString(
801 PyExc_ValueError,
802 "width too big");
803 goto error;
804 }
805 width = width*10 + (c - '0');
806 }
807 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200808
809 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800810 if (c == '.') {
811 prec = 0;
812 if (--fmtcnt >= 0)
813 c = *fmt++;
814 if (c == '*') {
815 v = getnextarg(args, arglen, &argidx);
816 if (v == NULL)
817 goto error;
818 if (!PyLong_Check(v)) {
819 PyErr_SetString(
820 PyExc_TypeError,
821 "* wants int");
822 goto error;
823 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200824 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800825 if (prec == -1 && PyErr_Occurred())
826 goto error;
827 if (prec < 0)
828 prec = 0;
829 if (--fmtcnt >= 0)
830 c = *fmt++;
831 }
832 else if (c >= 0 && isdigit(c)) {
833 prec = c - '0';
834 while (--fmtcnt >= 0) {
835 c = Py_CHARMASK(*fmt++);
836 if (!isdigit(c))
837 break;
838 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
839 PyErr_SetString(
840 PyExc_ValueError,
841 "prec too big");
842 goto error;
843 }
844 prec = prec*10 + (c - '0');
845 }
846 }
847 } /* prec */
848 if (fmtcnt >= 0) {
849 if (c == 'h' || c == 'l' || c == 'L') {
850 if (--fmtcnt >= 0)
851 c = *fmt++;
852 }
853 }
854 if (fmtcnt < 0) {
855 PyErr_SetString(PyExc_ValueError,
856 "incomplete format");
857 goto error;
858 }
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200859 v = getnextarg(args, arglen, &argidx);
860 if (v == NULL)
861 goto error;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200862
Alexey Izbyshevccd99752018-08-23 10:50:52 +0300863 if (fmtcnt == 0) {
864 /* last write: disable writer overallocation */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200865 writer.overallocate = 0;
866 }
867
Ethan Furmanb95b5612015-01-23 20:05:18 -0800868 sign = 0;
869 fill = ' ';
870 switch (c) {
Ethan Furman62e977f2015-03-11 08:17:00 -0700871 case 'r':
872 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800873 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200874 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800875 if (temp == NULL)
876 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200877 assert(PyUnicode_IS_ASCII(temp));
878 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
879 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800880 if (prec >= 0 && len > prec)
881 len = prec;
882 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200883
Ethan Furmanb95b5612015-01-23 20:05:18 -0800884 case 's':
885 // %s is only for 2/3 code; 3 only code should use %b
886 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200887 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800888 if (temp == NULL)
889 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800890 if (prec >= 0 && len > prec)
891 len = prec;
892 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200893
Ethan Furmanb95b5612015-01-23 20:05:18 -0800894 case 'i':
895 case 'd':
896 case 'u':
897 case 'o':
898 case 'x':
899 case 'X':
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200900 if (PyLong_CheckExact(v)
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200901 && width == -1 && prec == -1
902 && !(flags & (F_SIGN | F_BLANK))
903 && c != 'X')
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200904 {
905 /* Fast path */
906 int alternate = flags & F_ALT;
907 int base;
908
909 switch(c)
910 {
911 default:
Barry Warsawb2e57942017-09-14 18:13:16 -0700912 Py_UNREACHABLE();
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200913 case 'd':
914 case 'i':
915 case 'u':
916 base = 10;
917 break;
918 case 'o':
919 base = 8;
920 break;
921 case 'x':
922 case 'X':
923 base = 16;
924 break;
925 }
926
927 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200928 writer.min_size -= 2; /* size preallocated for "%d" */
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200929 res = _PyLong_FormatBytesWriter(&writer, res,
930 v, base, alternate);
931 if (res == NULL)
932 goto error;
933 continue;
934 }
935
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300936 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200937 if (!temp)
938 goto error;
939 assert(PyUnicode_IS_ASCII(temp));
940 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
941 len = PyUnicode_GET_LENGTH(temp);
942 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800943 if (flags & F_ZERO)
944 fill = '0';
945 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200946
Ethan Furmanb95b5612015-01-23 20:05:18 -0800947 case 'e':
948 case 'E':
949 case 'f':
950 case 'F':
951 case 'g':
952 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200953 if (width == -1 && prec == -1
954 && !(flags & (F_SIGN | F_BLANK)))
955 {
956 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200957 writer.min_size -= 2; /* size preallocated for "%f" */
Victor Stinnerad771582015-10-09 12:38:53 +0200958 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200959 if (res == NULL)
960 goto error;
961 continue;
962 }
963
Victor Stinnerad771582015-10-09 12:38:53 +0200964 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800965 goto error;
966 pbuf = PyBytes_AS_STRING(temp);
967 len = PyBytes_GET_SIZE(temp);
968 sign = 1;
969 if (flags & F_ZERO)
970 fill = '0';
971 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200972
Ethan Furmanb95b5612015-01-23 20:05:18 -0800973 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200974 pbuf = &onechar;
975 len = byte_converter(v, &onechar);
976 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800977 goto error;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200978 if (width == -1) {
979 /* Fast path */
980 *res++ = onechar;
981 continue;
982 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800983 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200984
Ethan Furmanb95b5612015-01-23 20:05:18 -0800985 default:
986 PyErr_Format(PyExc_ValueError,
987 "unsupported format character '%c' (0x%x) "
988 "at index %zd",
989 c, c,
Victor Stinner772b2b02015-10-14 09:56:53 +0200990 (Py_ssize_t)(fmt - 1 - format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800991 goto error;
992 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200993
Ethan Furmanb95b5612015-01-23 20:05:18 -0800994 if (sign) {
995 if (*pbuf == '-' || *pbuf == '+') {
996 sign = *pbuf++;
997 len--;
998 }
999 else if (flags & F_SIGN)
1000 sign = '+';
1001 else if (flags & F_BLANK)
1002 sign = ' ';
1003 else
1004 sign = 0;
1005 }
1006 if (width < len)
1007 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001008
1009 alloc = width;
1010 if (sign != 0 && len == width)
1011 alloc++;
Victor Stinner0cdad1e2015-10-09 22:50:36 +02001012 /* 2: size preallocated for %s */
1013 if (alloc > 2) {
1014 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001015 if (res == NULL)
1016 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -08001017 }
Victor Stinner60ec6ef2019-10-07 22:31:42 +02001018#ifndef NDEBUG
1019 char *before = res;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001020#endif
1021
1022 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001023 if (sign) {
1024 if (fill != ' ')
1025 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -08001026 if (width > len)
1027 width--;
1028 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001029
1030 /* Write the numeric prefix for "x", "X" and "o" formats
1031 if the alternate form is used.
1032 For example, write "0x" for the "%#x" format. */
Serhiy Storchakab1a16192016-12-17 21:48:03 +02001033 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001034 assert(pbuf[0] == '0');
1035 assert(pbuf[1] == c);
1036 if (fill != ' ') {
1037 *res++ = *pbuf++;
1038 *res++ = *pbuf++;
1039 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001040 width -= 2;
1041 if (width < 0)
1042 width = 0;
1043 len -= 2;
1044 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001045
1046 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001047 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001048 memset(res, fill, width - len);
1049 res += (width - len);
1050 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -08001051 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001052
1053 /* If padding with spaces: write sign if needed and/or numeric
1054 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001055 if (fill == ' ') {
1056 if (sign)
1057 *res++ = sign;
Serhiy Storchakab1a16192016-12-17 21:48:03 +02001058 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001059 assert(pbuf[0] == '0');
1060 assert(pbuf[1] == c);
1061 *res++ = *pbuf++;
1062 *res++ = *pbuf++;
1063 }
1064 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001065
1066 /* Copy bytes */
Christian Heimesf051e432016-09-13 20:22:02 +02001067 memcpy(res, pbuf, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001068 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001069
1070 /* Pad right with the fill character if needed */
1071 if (width > len) {
1072 memset(res, ' ', width - len);
1073 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001074 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001075
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +02001076 if (dict && (argidx < arglen)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001077 PyErr_SetString(PyExc_TypeError,
1078 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -08001079 Py_XDECREF(temp);
1080 goto error;
1081 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001082 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001083
Victor Stinner60ec6ef2019-10-07 22:31:42 +02001084#ifndef NDEBUG
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001085 /* check that we computed the exact size for this write */
1086 assert((res - before) == alloc);
1087#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -08001088 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001089
1090 /* If overallocation was disabled, ensure that it was the last
1091 write. Otherwise, we missed an optimization */
Alexey Izbyshevccd99752018-08-23 10:50:52 +03001092 assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001093 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001094
Ethan Furmanb95b5612015-01-23 20:05:18 -08001095 if (argidx < arglen && !dict) {
1096 PyErr_SetString(PyExc_TypeError,
1097 "not all arguments converted during bytes formatting");
1098 goto error;
1099 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001100
Ethan Furmanb95b5612015-01-23 20:05:18 -08001101 if (args_owned) {
1102 Py_DECREF(args);
1103 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001104 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001105
1106 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001107 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001108 if (args_owned) {
1109 Py_DECREF(args);
1110 }
1111 return NULL;
1112}
1113
Greg Price3a4f6672019-09-12 11:12:22 -07001114/* Unescape a backslash-escaped string. */
Eric V. Smith42454af2016-10-31 09:22:08 -04001115PyObject *_PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001116 Py_ssize_t len,
1117 const char *errors,
Eric V. Smith42454af2016-10-31 09:22:08 -04001118 const char **first_invalid_escape)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001119{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001120 int c;
Victor Stinner2ec80632015-10-14 13:32:13 +02001121 char *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001122 const char *end;
Victor Stinner2ec80632015-10-14 13:32:13 +02001123 _PyBytesWriter writer;
1124
1125 _PyBytesWriter_Init(&writer);
1126
1127 p = _PyBytesWriter_Alloc(&writer, len);
1128 if (p == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001129 return NULL;
Victor Stinner2ec80632015-10-14 13:32:13 +02001130 writer.overallocate = 1;
1131
Eric V. Smith42454af2016-10-31 09:22:08 -04001132 *first_invalid_escape = NULL;
1133
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001134 end = s + len;
1135 while (s < end) {
1136 if (*s != '\\') {
Greg Price3a4f6672019-09-12 11:12:22 -07001137 *p++ = *s++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001138 continue;
1139 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001140
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001141 s++;
Victor Stinner2ec80632015-10-14 13:32:13 +02001142 if (s == end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001143 PyErr_SetString(PyExc_ValueError,
1144 "Trailing \\ in string");
1145 goto failed;
1146 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001147
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001148 switch (*s++) {
1149 /* XXX This assumes ASCII! */
1150 case '\n': break;
1151 case '\\': *p++ = '\\'; break;
1152 case '\'': *p++ = '\''; break;
1153 case '\"': *p++ = '\"'; break;
1154 case 'b': *p++ = '\b'; break;
1155 case 'f': *p++ = '\014'; break; /* FF */
1156 case 't': *p++ = '\t'; break;
1157 case 'n': *p++ = '\n'; break;
1158 case 'r': *p++ = '\r'; break;
1159 case 'v': *p++ = '\013'; break; /* VT */
1160 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1161 case '0': case '1': case '2': case '3':
1162 case '4': case '5': case '6': case '7':
1163 c = s[-1] - '0';
1164 if (s < end && '0' <= *s && *s <= '7') {
1165 c = (c<<3) + *s++ - '0';
1166 if (s < end && '0' <= *s && *s <= '7')
1167 c = (c<<3) + *s++ - '0';
1168 }
1169 *p++ = c;
1170 break;
1171 case 'x':
Victor Stinner2ec80632015-10-14 13:32:13 +02001172 if (s+1 < end) {
1173 int digit1, digit2;
1174 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1175 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1176 if (digit1 < 16 && digit2 < 16) {
1177 *p++ = (unsigned char)((digit1 << 4) + digit2);
1178 s += 2;
1179 break;
1180 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001181 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001182 /* invalid hexadecimal digits */
1183
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001184 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001185 PyErr_Format(PyExc_ValueError,
Serhiy Storchakad53fe5f2019-03-13 22:59:55 +02001186 "invalid \\x escape at position %zd",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001187 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001188 goto failed;
1189 }
1190 if (strcmp(errors, "replace") == 0) {
1191 *p++ = '?';
1192 } else if (strcmp(errors, "ignore") == 0)
1193 /* do nothing */;
1194 else {
1195 PyErr_Format(PyExc_ValueError,
1196 "decoding error; unknown "
1197 "error handling code: %.400s",
1198 errors);
1199 goto failed;
1200 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001201 /* skip \x */
1202 if (s < end && Py_ISXDIGIT(s[0]))
1203 s++; /* and a hexdigit */
1204 break;
Victor Stinner2ec80632015-10-14 13:32:13 +02001205
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001206 default:
Eric V. Smith42454af2016-10-31 09:22:08 -04001207 if (*first_invalid_escape == NULL) {
1208 *first_invalid_escape = s-1; /* Back up one char, since we've
1209 already incremented s. */
1210 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001211 *p++ = '\\';
Eric V. Smith42454af2016-10-31 09:22:08 -04001212 s--;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001213 }
1214 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001215
1216 return _PyBytesWriter_Finish(&writer, p);
1217
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001218 failed:
Victor Stinner2ec80632015-10-14 13:32:13 +02001219 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001220 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001221}
1222
Eric V. Smith42454af2016-10-31 09:22:08 -04001223PyObject *PyBytes_DecodeEscape(const char *s,
1224 Py_ssize_t len,
1225 const char *errors,
Greg Price3a4f6672019-09-12 11:12:22 -07001226 Py_ssize_t Py_UNUSED(unicode),
1227 const char *Py_UNUSED(recode_encoding))
Eric V. Smith42454af2016-10-31 09:22:08 -04001228{
1229 const char* first_invalid_escape;
Greg Price3a4f6672019-09-12 11:12:22 -07001230 PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
Eric V. Smith42454af2016-10-31 09:22:08 -04001231 &first_invalid_escape);
1232 if (result == NULL)
1233 return NULL;
1234 if (first_invalid_escape != NULL) {
1235 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1236 "invalid escape sequence '\\%c'",
Serhiy Storchaka56cb4652017-10-20 17:08:15 +03001237 (unsigned char)*first_invalid_escape) < 0) {
Eric V. Smith42454af2016-10-31 09:22:08 -04001238 Py_DECREF(result);
1239 return NULL;
1240 }
1241 }
1242 return result;
1243
1244}
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001245/* -------------------------------------------------------------------- */
1246/* object api */
1247
1248Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001249PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001250{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001251 if (!PyBytes_Check(op)) {
1252 PyErr_Format(PyExc_TypeError,
1253 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1254 return -1;
1255 }
1256 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001257}
1258
1259char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001260PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001261{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001262 if (!PyBytes_Check(op)) {
1263 PyErr_Format(PyExc_TypeError,
1264 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1265 return NULL;
1266 }
1267 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001268}
1269
1270int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001271PyBytes_AsStringAndSize(PyObject *obj,
1272 char **s,
1273 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001274{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001275 if (s == NULL) {
1276 PyErr_BadInternalCall();
1277 return -1;
1278 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001279
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001280 if (!PyBytes_Check(obj)) {
1281 PyErr_Format(PyExc_TypeError,
1282 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1283 return -1;
1284 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001285
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001286 *s = PyBytes_AS_STRING(obj);
1287 if (len != NULL)
1288 *len = PyBytes_GET_SIZE(obj);
1289 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001290 PyErr_SetString(PyExc_ValueError,
1291 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001292 return -1;
1293 }
1294 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001295}
Neal Norwitz6968b052007-02-27 19:02:19 +00001296
1297/* -------------------------------------------------------------------- */
1298/* Methods */
1299
Victor Stinner91698d82020-06-25 14:07:40 +02001300#define STRINGLIB_GET_EMPTY() bytes_get_empty()
Victor Stinnerc41eed12020-06-23 15:54:35 +02001301
Eric Smith0923d1d2009-04-16 20:16:10 +00001302#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001303
1304#include "stringlib/fastsearch.h"
1305#include "stringlib/count.h"
1306#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001307#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001308#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001309#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001310#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001311
Eric Smith0f78bff2009-11-30 01:01:42 +00001312#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001313
Victor Stinnerc41eed12020-06-23 15:54:35 +02001314#undef STRINGLIB_GET_EMPTY
1315
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001316PyObject *
1317PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001318{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001319 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001320 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001321 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001322 PyObject *v;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001323 unsigned char quote;
1324 const unsigned char *s;
1325 Py_UCS1 *p;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001326
1327 /* Compute size of output string */
1328 squotes = dquotes = 0;
1329 newsize = 3; /* b'' */
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001330 s = (const unsigned char*)op->ob_sval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001331 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001332 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001333 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001334 case '\'': squotes++; break;
1335 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001336 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001337 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001338 default:
1339 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001340 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001341 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001342 if (newsize > PY_SSIZE_T_MAX - incr)
1343 goto overflow;
1344 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001345 }
1346 quote = '\'';
1347 if (smartquotes && squotes && !dquotes)
1348 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001349 if (squotes && quote == '\'') {
1350 if (newsize > PY_SSIZE_T_MAX - squotes)
1351 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001352 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001353 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001354
1355 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001356 if (v == NULL) {
1357 return NULL;
1358 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001359 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001360
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001361 *p++ = 'b', *p++ = quote;
1362 for (i = 0; i < length; i++) {
1363 unsigned char c = op->ob_sval[i];
1364 if (c == quote || c == '\\')
1365 *p++ = '\\', *p++ = c;
1366 else if (c == '\t')
1367 *p++ = '\\', *p++ = 't';
1368 else if (c == '\n')
1369 *p++ = '\\', *p++ = 'n';
1370 else if (c == '\r')
1371 *p++ = '\\', *p++ = 'r';
1372 else if (c < ' ' || c >= 0x7f) {
1373 *p++ = '\\';
1374 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001375 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1376 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001377 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001378 else
1379 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001380 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001381 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001382 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001383 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001384
1385 overflow:
1386 PyErr_SetString(PyExc_OverflowError,
1387 "bytes object is too large to make repr");
1388 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001389}
1390
Neal Norwitz6968b052007-02-27 19:02:19 +00001391static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001392bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001393{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001394 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001395}
1396
Neal Norwitz6968b052007-02-27 19:02:19 +00001397static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001398bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001399{
Victor Stinnerda7933e2020-04-13 03:04:28 +02001400 if (_Py_GetConfig()->bytes_warning) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001401 if (PyErr_WarnEx(PyExc_BytesWarning,
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001402 "str() on a bytes instance", 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001403 return NULL;
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001404 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001405 }
1406 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001407}
1408
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001409static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001410bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001411{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001412 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001413}
Neal Norwitz6968b052007-02-27 19:02:19 +00001414
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001415/* This is also used by PyBytes_Concat() */
1416static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001417bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001418{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001419 Py_buffer va, vb;
1420 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001421
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001422 va.len = -1;
1423 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001424 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1425 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001426 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
Serhiy Storchaka6b5a9ec2017-03-19 19:47:02 +02001427 Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001428 goto done;
1429 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001430
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001431 /* Optimize end cases */
1432 if (va.len == 0 && PyBytes_CheckExact(b)) {
1433 result = b;
1434 Py_INCREF(result);
1435 goto done;
1436 }
1437 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1438 result = a;
1439 Py_INCREF(result);
1440 goto done;
1441 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001442
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001443 if (va.len > PY_SSIZE_T_MAX - vb.len) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001444 PyErr_NoMemory();
1445 goto done;
1446 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001447
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001448 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001449 if (result != NULL) {
1450 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1451 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1452 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001453
1454 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001455 if (va.len != -1)
1456 PyBuffer_Release(&va);
1457 if (vb.len != -1)
1458 PyBuffer_Release(&vb);
1459 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001460}
Neal Norwitz6968b052007-02-27 19:02:19 +00001461
1462static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001463bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001464{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001465 Py_ssize_t i;
1466 Py_ssize_t j;
1467 Py_ssize_t size;
1468 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001469 size_t nbytes;
1470 if (n < 0)
1471 n = 0;
1472 /* watch out for overflows: the size can overflow int,
1473 * and the # of bytes needed can overflow size_t
1474 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001475 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001476 PyErr_SetString(PyExc_OverflowError,
1477 "repeated bytes are too long");
1478 return NULL;
1479 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001480 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001481 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1482 Py_INCREF(a);
1483 return (PyObject *)a;
1484 }
1485 nbytes = (size_t)size;
1486 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1487 PyErr_SetString(PyExc_OverflowError,
1488 "repeated bytes are too long");
1489 return NULL;
1490 }
1491 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
Victor Stinner04fc4f22020-06-16 01:28:07 +02001492 if (op == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001493 return PyErr_NoMemory();
Victor Stinner04fc4f22020-06-16 01:28:07 +02001494 }
1495 _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001496 op->ob_shash = -1;
1497 op->ob_sval[size] = '\0';
1498 if (Py_SIZE(a) == 1 && n > 0) {
1499 memset(op->ob_sval, a->ob_sval[0] , n);
1500 return (PyObject *) op;
1501 }
1502 i = 0;
1503 if (i < size) {
Christian Heimesf051e432016-09-13 20:22:02 +02001504 memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001505 i = Py_SIZE(a);
1506 }
1507 while (i < size) {
1508 j = (i <= size-i) ? i : size-i;
Christian Heimesf051e432016-09-13 20:22:02 +02001509 memcpy(op->ob_sval+i, op->ob_sval, j);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001510 i += j;
1511 }
1512 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001513}
1514
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001515static int
1516bytes_contains(PyObject *self, PyObject *arg)
1517{
1518 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1519}
1520
Neal Norwitz6968b052007-02-27 19:02:19 +00001521static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001522bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001523{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001524 if (i < 0 || i >= Py_SIZE(a)) {
1525 PyErr_SetString(PyExc_IndexError, "index out of range");
1526 return NULL;
1527 }
1528 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001529}
1530
Benjamin Peterson621b4302016-09-09 13:54:34 -07001531static int
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001532bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1533{
1534 int cmp;
1535 Py_ssize_t len;
1536
1537 len = Py_SIZE(a);
1538 if (Py_SIZE(b) != len)
1539 return 0;
1540
1541 if (a->ob_sval[0] != b->ob_sval[0])
1542 return 0;
1543
1544 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1545 return (cmp == 0);
1546}
1547
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001548static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001549bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001550{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001551 int c;
1552 Py_ssize_t len_a, len_b;
1553 Py_ssize_t min_len;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001554 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001555
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001556 /* Make sure both arguments are strings. */
1557 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Victor Stinnerda7933e2020-04-13 03:04:28 +02001558 if (_Py_GetConfig()->bytes_warning && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001559 rc = PyObject_IsInstance((PyObject*)a,
1560 (PyObject*)&PyUnicode_Type);
1561 if (!rc)
1562 rc = PyObject_IsInstance((PyObject*)b,
1563 (PyObject*)&PyUnicode_Type);
1564 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001565 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001566 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001567 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001568 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001569 return NULL;
1570 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001571 else {
1572 rc = PyObject_IsInstance((PyObject*)a,
1573 (PyObject*)&PyLong_Type);
1574 if (!rc)
1575 rc = PyObject_IsInstance((PyObject*)b,
1576 (PyObject*)&PyLong_Type);
1577 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001578 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001579 if (rc) {
1580 if (PyErr_WarnEx(PyExc_BytesWarning,
1581 "Comparison between bytes and int", 1))
1582 return NULL;
1583 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001584 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001585 }
stratakise8b19652017-11-02 11:32:54 +01001586 Py_RETURN_NOTIMPLEMENTED;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001587 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001588 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001589 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001590 case Py_EQ:
1591 case Py_LE:
1592 case Py_GE:
1593 /* a string is equal to itself */
stratakise8b19652017-11-02 11:32:54 +01001594 Py_RETURN_TRUE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001595 case Py_NE:
1596 case Py_LT:
1597 case Py_GT:
stratakise8b19652017-11-02 11:32:54 +01001598 Py_RETURN_FALSE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001599 default:
1600 PyErr_BadArgument();
1601 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001602 }
1603 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001604 else if (op == Py_EQ || op == Py_NE) {
1605 int eq = bytes_compare_eq(a, b);
1606 eq ^= (op == Py_NE);
stratakise8b19652017-11-02 11:32:54 +01001607 return PyBool_FromLong(eq);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001608 }
1609 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001610 len_a = Py_SIZE(a);
1611 len_b = Py_SIZE(b);
1612 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001613 if (min_len > 0) {
1614 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001615 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001616 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001617 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001618 else
1619 c = 0;
stratakise8b19652017-11-02 11:32:54 +01001620 if (c != 0)
1621 Py_RETURN_RICHCOMPARE(c, 0, op);
1622 Py_RETURN_RICHCOMPARE(len_a, len_b, op);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001623 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001624}
1625
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001626static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001627bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001628{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001629 if (a->ob_shash == -1) {
1630 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001631 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001632 }
1633 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001634}
1635
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001636static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001637bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001638{
Victor Stinnera15e2602020-04-08 02:01:56 +02001639 if (_PyIndex_Check(item)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001640 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1641 if (i == -1 && PyErr_Occurred())
1642 return NULL;
1643 if (i < 0)
1644 i += PyBytes_GET_SIZE(self);
1645 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1646 PyErr_SetString(PyExc_IndexError,
1647 "index out of range");
1648 return NULL;
1649 }
1650 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1651 }
1652 else if (PySlice_Check(item)) {
Zackery Spytz14514d92019-05-17 01:13:03 -06001653 Py_ssize_t start, stop, step, slicelength, i;
1654 size_t cur;
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001655 const char* source_buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001656 char* result_buf;
1657 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001658
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001659 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001660 return NULL;
1661 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001662 slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1663 &stop, step);
Neal Norwitz6968b052007-02-27 19:02:19 +00001664
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001665 if (slicelength <= 0) {
1666 return PyBytes_FromStringAndSize("", 0);
1667 }
1668 else if (start == 0 && step == 1 &&
1669 slicelength == PyBytes_GET_SIZE(self) &&
1670 PyBytes_CheckExact(self)) {
1671 Py_INCREF(self);
1672 return (PyObject *)self;
1673 }
1674 else if (step == 1) {
1675 return PyBytes_FromStringAndSize(
1676 PyBytes_AS_STRING(self) + start,
1677 slicelength);
1678 }
1679 else {
1680 source_buf = PyBytes_AS_STRING(self);
1681 result = PyBytes_FromStringAndSize(NULL, slicelength);
1682 if (result == NULL)
1683 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001684
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001685 result_buf = PyBytes_AS_STRING(result);
1686 for (cur = start, i = 0; i < slicelength;
1687 cur += step, i++) {
1688 result_buf[i] = source_buf[cur];
1689 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001690
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001691 return result;
1692 }
1693 }
1694 else {
1695 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001696 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001697 Py_TYPE(item)->tp_name);
1698 return NULL;
1699 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001700}
1701
1702static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001703bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001704{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001705 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1706 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001707}
1708
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001709static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001710 (lenfunc)bytes_length, /*sq_length*/
1711 (binaryfunc)bytes_concat, /*sq_concat*/
1712 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1713 (ssizeargfunc)bytes_item, /*sq_item*/
1714 0, /*sq_slice*/
1715 0, /*sq_ass_item*/
1716 0, /*sq_ass_slice*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001717 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001718};
1719
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001720static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001721 (lenfunc)bytes_length,
1722 (binaryfunc)bytes_subscript,
1723 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001724};
1725
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001726static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001727 (getbufferproc)bytes_buffer_getbuffer,
1728 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001729};
1730
1731
1732#define LEFTSTRIP 0
1733#define RIGHTSTRIP 1
1734#define BOTHSTRIP 2
1735
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001736/*[clinic input]
1737bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001738
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001739 sep: object = None
1740 The delimiter according which to split the bytes.
1741 None (the default value) means split on ASCII whitespace characters
1742 (space, tab, return, newline, formfeed, vertical tab).
1743 maxsplit: Py_ssize_t = -1
1744 Maximum number of splits to do.
1745 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001746
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001747Return a list of the sections in the bytes, using sep as the delimiter.
1748[clinic start generated code]*/
1749
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001750static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001751bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1752/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001753{
1754 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001755 const char *s = PyBytes_AS_STRING(self), *sub;
1756 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001757 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001758
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001759 if (maxsplit < 0)
1760 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001761 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001762 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001763 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001764 return NULL;
1765 sub = vsub.buf;
1766 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001767
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001768 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1769 PyBuffer_Release(&vsub);
1770 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001771}
1772
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001773/*[clinic input]
1774bytes.partition
1775
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001776 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001777 /
1778
1779Partition the bytes into three parts using the given separator.
1780
1781This will search for the separator sep in the bytes. If the separator is found,
1782returns a 3-tuple containing the part before the separator, the separator
1783itself, and the part after it.
1784
1785If the separator is not found, returns a 3-tuple containing the original bytes
1786object and two empty bytes objects.
1787[clinic start generated code]*/
1788
Neal Norwitz6968b052007-02-27 19:02:19 +00001789static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001790bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001791/*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001792{
Neal Norwitz6968b052007-02-27 19:02:19 +00001793 return stringlib_partition(
1794 (PyObject*) self,
1795 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001796 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001797 );
1798}
1799
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001800/*[clinic input]
1801bytes.rpartition
1802
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001803 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001804 /
1805
1806Partition the bytes into three parts using the given separator.
1807
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001808This will search for the separator sep in the bytes, starting at the end. If
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001809the separator is found, returns a 3-tuple containing the part before the
1810separator, the separator itself, and the part after it.
1811
1812If the separator is not found, returns a 3-tuple containing two empty bytes
1813objects and the original bytes object.
1814[clinic start generated code]*/
1815
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001816static PyObject *
1817bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001818/*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001819{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001820 return stringlib_rpartition(
1821 (PyObject*) self,
1822 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001823 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001824 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001825}
1826
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001827/*[clinic input]
1828bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001829
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001830Return a list of the sections in the bytes, using sep as the delimiter.
1831
1832Splitting is done starting at the end of the bytes and working to the front.
1833[clinic start generated code]*/
1834
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001835static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001836bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1837/*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001838{
1839 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001840 const char *s = PyBytes_AS_STRING(self), *sub;
1841 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001842 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001843
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001844 if (maxsplit < 0)
1845 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001846 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001847 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001848 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001849 return NULL;
1850 sub = vsub.buf;
1851 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001852
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001853 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1854 PyBuffer_Release(&vsub);
1855 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001856}
1857
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001858
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001859/*[clinic input]
1860bytes.join
1861
1862 iterable_of_bytes: object
1863 /
1864
1865Concatenate any number of bytes objects.
1866
1867The bytes whose method is called is inserted in between each pair.
1868
1869The result is returned as a new bytes object.
1870
1871Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1872[clinic start generated code]*/
1873
Neal Norwitz6968b052007-02-27 19:02:19 +00001874static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001875bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1876/*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001877{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001878 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001879}
1880
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001881PyObject *
1882_PyBytes_Join(PyObject *sep, PyObject *x)
1883{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001884 assert(sep != NULL && PyBytes_Check(sep));
1885 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001886 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001887}
1888
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001889static PyObject *
1890bytes_find(PyBytesObject *self, PyObject *args)
1891{
1892 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1893}
1894
1895static PyObject *
1896bytes_index(PyBytesObject *self, PyObject *args)
1897{
1898 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1899}
1900
1901
1902static PyObject *
1903bytes_rfind(PyBytesObject *self, PyObject *args)
1904{
1905 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1906}
1907
1908
1909static PyObject *
1910bytes_rindex(PyBytesObject *self, PyObject *args)
1911{
1912 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1913}
1914
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001915
1916Py_LOCAL_INLINE(PyObject *)
1917do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001918{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001919 Py_buffer vsep;
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001920 const char *s = PyBytes_AS_STRING(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001921 Py_ssize_t len = PyBytes_GET_SIZE(self);
1922 char *sep;
1923 Py_ssize_t seplen;
1924 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001925
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001926 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001927 return NULL;
1928 sep = vsep.buf;
1929 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001930
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001931 i = 0;
1932 if (striptype != RIGHTSTRIP) {
1933 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1934 i++;
1935 }
1936 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001937
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001938 j = len;
1939 if (striptype != LEFTSTRIP) {
1940 do {
1941 j--;
1942 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1943 j++;
1944 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001945
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001946 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001947
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001948 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1949 Py_INCREF(self);
1950 return (PyObject*)self;
1951 }
1952 else
1953 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001954}
1955
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001956
1957Py_LOCAL_INLINE(PyObject *)
1958do_strip(PyBytesObject *self, int striptype)
1959{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001960 const char *s = PyBytes_AS_STRING(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001961 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001962
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001963 i = 0;
1964 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001965 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001966 i++;
1967 }
1968 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001969
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001970 j = len;
1971 if (striptype != LEFTSTRIP) {
1972 do {
1973 j--;
David Malcolm96960882010-11-05 17:23:41 +00001974 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001975 j++;
1976 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001977
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001978 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1979 Py_INCREF(self);
1980 return (PyObject*)self;
1981 }
1982 else
1983 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001984}
1985
1986
1987Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001988do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001989{
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001990 if (bytes != Py_None) {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001991 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001992 }
1993 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001994}
1995
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001996/*[clinic input]
1997bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001998
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001999 bytes: object = None
2000 /
2001
2002Strip leading and trailing bytes contained in the argument.
2003
2004If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2005[clinic start generated code]*/
2006
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002007static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002008bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002009/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002010{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002011 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002012}
2013
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002014/*[clinic input]
2015bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002016
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002017 bytes: object = None
2018 /
2019
2020Strip leading bytes contained in the argument.
2021
2022If the argument is omitted or None, strip leading ASCII whitespace.
2023[clinic start generated code]*/
2024
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002025static PyObject *
2026bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002027/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002028{
2029 return do_argstrip(self, LEFTSTRIP, bytes);
2030}
2031
2032/*[clinic input]
2033bytes.rstrip
2034
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002035 bytes: object = None
2036 /
2037
2038Strip trailing bytes contained in the argument.
2039
2040If the argument is omitted or None, strip trailing ASCII whitespace.
2041[clinic start generated code]*/
2042
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002043static PyObject *
2044bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002045/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002046{
2047 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002048}
Neal Norwitz6968b052007-02-27 19:02:19 +00002049
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002050
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002051static PyObject *
2052bytes_count(PyBytesObject *self, PyObject *args)
2053{
2054 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2055}
2056
2057
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002058/*[clinic input]
2059bytes.translate
2060
Victor Stinner049e5092014-08-17 22:20:00 +02002061 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002062 Translation table, which must be a bytes object of length 256.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002063 /
Martin Panter1b6c6da2016-08-27 08:35:02 +00002064 delete as deletechars: object(c_default="NULL") = b''
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002065
2066Return a copy with each character mapped by the given translation table.
2067
Martin Panter1b6c6da2016-08-27 08:35:02 +00002068All characters occurring in the optional argument delete are removed.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002069The remaining characters are mapped through the given translation table.
2070[clinic start generated code]*/
2071
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002072static PyObject *
Martin Panter1b6c6da2016-08-27 08:35:02 +00002073bytes_translate_impl(PyBytesObject *self, PyObject *table,
Larry Hastings89964c42015-04-14 18:07:59 -04002074 PyObject *deletechars)
Martin Panter1b6c6da2016-08-27 08:35:02 +00002075/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002076{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03002077 const char *input;
2078 char *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002079 Py_buffer table_view = {NULL, NULL};
2080 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002081 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002082 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002083 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002084 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002085 Py_ssize_t inlen, tablen, dellen = 0;
2086 PyObject *result;
2087 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002088
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002089 if (PyBytes_Check(table)) {
2090 table_chars = PyBytes_AS_STRING(table);
2091 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002092 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002093 else if (table == Py_None) {
2094 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002095 tablen = 256;
2096 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002097 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002098 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002099 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002100 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002101 tablen = table_view.len;
2102 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002103
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002104 if (tablen != 256) {
2105 PyErr_SetString(PyExc_ValueError,
2106 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002107 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002108 return NULL;
2109 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002110
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002111 if (deletechars != NULL) {
2112 if (PyBytes_Check(deletechars)) {
2113 del_table_chars = PyBytes_AS_STRING(deletechars);
2114 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002115 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002116 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002117 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002118 PyBuffer_Release(&table_view);
2119 return NULL;
2120 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002121 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002122 dellen = del_table_view.len;
2123 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002124 }
2125 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002126 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002127 dellen = 0;
2128 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002129
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002130 inlen = PyBytes_GET_SIZE(input_obj);
2131 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002132 if (result == NULL) {
2133 PyBuffer_Release(&del_table_view);
2134 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002135 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002136 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002137 output_start = output = PyBytes_AS_STRING(result);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002138 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002139
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002140 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002141 /* If no deletions are required, use faster code */
2142 for (i = inlen; --i >= 0; ) {
2143 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002144 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002145 changed = 1;
2146 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002147 if (!changed && PyBytes_CheckExact(input_obj)) {
2148 Py_INCREF(input_obj);
2149 Py_DECREF(result);
2150 result = input_obj;
2151 }
2152 PyBuffer_Release(&del_table_view);
2153 PyBuffer_Release(&table_view);
2154 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002155 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002156
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002157 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002158 for (i = 0; i < 256; i++)
2159 trans_table[i] = Py_CHARMASK(i);
2160 } else {
2161 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002162 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002163 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002164 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002165
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002166 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002167 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002168 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002169
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002170 for (i = inlen; --i >= 0; ) {
2171 c = Py_CHARMASK(*input++);
2172 if (trans_table[c] != -1)
2173 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2174 continue;
2175 changed = 1;
2176 }
2177 if (!changed && PyBytes_CheckExact(input_obj)) {
2178 Py_DECREF(result);
2179 Py_INCREF(input_obj);
2180 return input_obj;
2181 }
2182 /* Fix the size of the resulting string */
2183 if (inlen > 0)
2184 _PyBytes_Resize(&result, output - output_start);
2185 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002186}
2187
2188
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002189/*[clinic input]
2190
2191@staticmethod
2192bytes.maketrans
2193
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002194 frm: Py_buffer
2195 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002196 /
2197
2198Return a translation table useable for the bytes or bytearray translate method.
2199
2200The returned table will be one where each byte in frm is mapped to the byte at
2201the same position in to.
2202
2203The bytes objects frm and to must be of the same length.
2204[clinic start generated code]*/
2205
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002206static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002207bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002208/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002209{
2210 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002211}
2212
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002213
2214/*[clinic input]
2215bytes.replace
2216
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002217 old: Py_buffer
2218 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002219 count: Py_ssize_t = -1
2220 Maximum number of occurrences to replace.
2221 -1 (the default value) means replace all occurrences.
2222 /
2223
2224Return a copy with all occurrences of substring old replaced by new.
2225
2226If the optional argument count is given, only the first count occurrences are
2227replaced.
2228[clinic start generated code]*/
2229
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002230static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002231bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
Larry Hastings89964c42015-04-14 18:07:59 -04002232 Py_ssize_t count)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002233/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002234{
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +03002235 return stringlib_replace((PyObject *)self,
2236 (const char *)old->buf, old->len,
2237 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002238}
2239
2240/** End DALKE **/
2241
sweeneydea81849b2020-04-22 17:05:48 -04002242/*[clinic input]
2243bytes.removeprefix as bytes_removeprefix
2244
2245 prefix: Py_buffer
2246 /
2247
2248Return a bytes object with the given prefix string removed if present.
2249
2250If the bytes starts with the prefix string, return bytes[len(prefix):].
2251Otherwise, return a copy of the original bytes.
2252[clinic start generated code]*/
2253
2254static PyObject *
2255bytes_removeprefix_impl(PyBytesObject *self, Py_buffer *prefix)
2256/*[clinic end generated code: output=f006865331a06ab6 input=0c93bac817a8502c]*/
2257{
2258 const char *self_start = PyBytes_AS_STRING(self);
2259 Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2260 const char *prefix_start = prefix->buf;
2261 Py_ssize_t prefix_len = prefix->len;
2262
2263 if (self_len >= prefix_len
2264 && prefix_len > 0
2265 && memcmp(self_start, prefix_start, prefix_len) == 0)
2266 {
2267 return PyBytes_FromStringAndSize(self_start + prefix_len,
2268 self_len - prefix_len);
2269 }
2270
2271 if (PyBytes_CheckExact(self)) {
2272 Py_INCREF(self);
2273 return (PyObject *)self;
2274 }
2275
2276 return PyBytes_FromStringAndSize(self_start, self_len);
2277}
2278
2279/*[clinic input]
2280bytes.removesuffix as bytes_removesuffix
2281
2282 suffix: Py_buffer
2283 /
2284
2285Return a bytes object with the given suffix string removed if present.
2286
2287If the bytes ends with the suffix string and that suffix is not empty,
2288return bytes[:-len(prefix)]. Otherwise, return a copy of the original
2289bytes.
2290[clinic start generated code]*/
2291
2292static PyObject *
2293bytes_removesuffix_impl(PyBytesObject *self, Py_buffer *suffix)
2294/*[clinic end generated code: output=d887d308e3242eeb input=9f4e1da8c637bbf1]*/
2295{
2296 const char *self_start = PyBytes_AS_STRING(self);
2297 Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2298 const char *suffix_start = suffix->buf;
2299 Py_ssize_t suffix_len = suffix->len;
2300
2301 if (self_len >= suffix_len
2302 && suffix_len > 0
2303 && memcmp(self_start + self_len - suffix_len,
2304 suffix_start, suffix_len) == 0)
2305 {
2306 return PyBytes_FromStringAndSize(self_start,
2307 self_len - suffix_len);
2308 }
2309
2310 if (PyBytes_CheckExact(self)) {
2311 Py_INCREF(self);
2312 return (PyObject *)self;
2313 }
2314
2315 return PyBytes_FromStringAndSize(self_start, self_len);
2316}
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002317
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002318static PyObject *
2319bytes_startswith(PyBytesObject *self, PyObject *args)
2320{
2321 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2322}
2323
2324static PyObject *
2325bytes_endswith(PyBytesObject *self, PyObject *args)
2326{
2327 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2328}
2329
2330
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002331/*[clinic input]
2332bytes.decode
2333
2334 encoding: str(c_default="NULL") = 'utf-8'
2335 The encoding with which to decode the bytes.
2336 errors: str(c_default="NULL") = 'strict'
2337 The error handling scheme to use for the handling of decoding errors.
2338 The default is 'strict' meaning that decoding errors raise a
2339 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2340 as well as any other name registered with codecs.register_error that
2341 can handle UnicodeDecodeErrors.
2342
2343Decode the bytes using the codec registered for encoding.
2344[clinic start generated code]*/
2345
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002346static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002347bytes_decode_impl(PyBytesObject *self, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04002348 const char *errors)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002349/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002350{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002351 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002352}
2353
Guido van Rossum20188312006-05-05 15:15:40 +00002354
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002355/*[clinic input]
2356bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002357
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002358 keepends: bool(accept={int}) = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002359
2360Return a list of the lines in the bytes, breaking at line boundaries.
2361
2362Line breaks are not included in the resulting list unless keepends is given and
2363true.
2364[clinic start generated code]*/
2365
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002366static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002367bytes_splitlines_impl(PyBytesObject *self, int keepends)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002368/*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002369{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002370 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002371 (PyObject*) self, PyBytes_AS_STRING(self),
2372 PyBytes_GET_SIZE(self), keepends
2373 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002374}
2375
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002376/*[clinic input]
2377@classmethod
2378bytes.fromhex
2379
2380 string: unicode
2381 /
2382
2383Create a bytes object from a string of hexadecimal numbers.
2384
2385Spaces between two numbers are accepted.
2386Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2387[clinic start generated code]*/
2388
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002389static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002390bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002391/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002392{
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002393 PyObject *result = _PyBytes_FromHex(string, 0);
2394 if (type != &PyBytes_Type && result != NULL) {
Petr Viktorinffd97532020-02-11 17:46:57 +01002395 Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002396 }
2397 return result;
Victor Stinner2bf89932015-10-14 11:25:33 +02002398}
2399
2400PyObject*
2401_PyBytes_FromHex(PyObject *string, int use_bytearray)
2402{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002403 char *buf;
Victor Stinner2bf89932015-10-14 11:25:33 +02002404 Py_ssize_t hexlen, invalid_char;
2405 unsigned int top, bot;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03002406 const Py_UCS1 *str, *end;
Victor Stinner2bf89932015-10-14 11:25:33 +02002407 _PyBytesWriter writer;
2408
2409 _PyBytesWriter_Init(&writer);
2410 writer.use_bytearray = use_bytearray;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002411
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002412 assert(PyUnicode_Check(string));
2413 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002414 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002415 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002416
Victor Stinner2bf89932015-10-14 11:25:33 +02002417 if (!PyUnicode_IS_ASCII(string)) {
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03002418 const void *data = PyUnicode_DATA(string);
Victor Stinner2bf89932015-10-14 11:25:33 +02002419 unsigned int kind = PyUnicode_KIND(string);
2420 Py_ssize_t i;
2421
2422 /* search for the first non-ASCII character */
2423 for (i = 0; i < hexlen; i++) {
2424 if (PyUnicode_READ(kind, data, i) >= 128)
2425 break;
2426 }
2427 invalid_char = i;
2428 goto error;
2429 }
2430
2431 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2432 str = PyUnicode_1BYTE_DATA(string);
2433
2434 /* This overestimates if there are spaces */
2435 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2436 if (buf == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002437 return NULL;
Victor Stinner2bf89932015-10-14 11:25:33 +02002438
2439 end = str + hexlen;
2440 while (str < end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002441 /* skip over spaces in the input */
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002442 if (Py_ISSPACE(*str)) {
Victor Stinner2bf89932015-10-14 11:25:33 +02002443 do {
2444 str++;
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002445 } while (Py_ISSPACE(*str));
Victor Stinner2bf89932015-10-14 11:25:33 +02002446 if (str >= end)
2447 break;
2448 }
2449
2450 top = _PyLong_DigitValue[*str];
2451 if (top >= 16) {
2452 invalid_char = str - PyUnicode_1BYTE_DATA(string);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002453 goto error;
2454 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002455 str++;
2456
2457 bot = _PyLong_DigitValue[*str];
2458 if (bot >= 16) {
2459 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2460 goto error;
2461 }
2462 str++;
2463
2464 *buf++ = (unsigned char)((top << 4) + bot);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002465 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002466
2467 return _PyBytesWriter_Finish(&writer, buf);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002468
2469 error:
Victor Stinner2bf89932015-10-14 11:25:33 +02002470 PyErr_Format(PyExc_ValueError,
2471 "non-hexadecimal number found in "
2472 "fromhex() arg at position %zd", invalid_char);
2473 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002474 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002475}
2476
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002477/*[clinic input]
2478bytes.hex
2479
2480 sep: object = NULL
2481 An optional single character or byte to separate hex bytes.
2482 bytes_per_sep: int = 1
2483 How many bytes between separators. Positive values count from the
2484 right, negative values count from the left.
2485
2486Create a str of hexadecimal numbers from a bytes object.
2487
2488Example:
2489>>> value = b'\xb9\x01\xef'
2490>>> value.hex()
2491'b901ef'
2492>>> value.hex(':')
2493'b9:01:ef'
2494>>> value.hex(':', 2)
2495'b9:01ef'
2496>>> value.hex(':', -2)
2497'b901:ef'
2498[clinic start generated code]*/
Gregory P. Smith8cb65692015-04-25 23:22:26 +00002499
2500static PyObject *
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002501bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2502/*[clinic end generated code: output=1f134da504064139 input=f1238d3455990218]*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002503{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03002504 const char *argbuf = PyBytes_AS_STRING(self);
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002505 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002506 return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002507}
2508
2509static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302510bytes_getnewargs(PyBytesObject *v, PyObject *Py_UNUSED(ignored))
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002511{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002512 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002513}
2514
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002515
2516static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002517bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002518 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302519 {"capitalize", stringlib_capitalize, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002520 _Py_capitalize__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002521 STRINGLIB_CENTER_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002522 {"count", (PyCFunction)bytes_count, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002523 _Py_count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002524 BYTES_DECODE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002525 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002526 _Py_endswith__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002527 STRINGLIB_EXPANDTABS_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002528 {"find", (PyCFunction)bytes_find, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002529 _Py_find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002530 BYTES_FROMHEX_METHODDEF
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002531 BYTES_HEX_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002532 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302533 {"isalnum", stringlib_isalnum, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002534 _Py_isalnum__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302535 {"isalpha", stringlib_isalpha, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002536 _Py_isalpha__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302537 {"isascii", stringlib_isascii, METH_NOARGS,
INADA Naokia49ac992018-01-27 14:06:21 +09002538 _Py_isascii__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302539 {"isdigit", stringlib_isdigit, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002540 _Py_isdigit__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302541 {"islower", stringlib_islower, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002542 _Py_islower__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302543 {"isspace", stringlib_isspace, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002544 _Py_isspace__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302545 {"istitle", stringlib_istitle, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002546 _Py_istitle__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302547 {"isupper", stringlib_isupper, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002548 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002549 BYTES_JOIN_METHODDEF
Tal Einatc929df32018-07-06 13:17:38 +03002550 STRINGLIB_LJUST_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302551 {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002552 BYTES_LSTRIP_METHODDEF
2553 BYTES_MAKETRANS_METHODDEF
2554 BYTES_PARTITION_METHODDEF
2555 BYTES_REPLACE_METHODDEF
sweeneydea81849b2020-04-22 17:05:48 -04002556 BYTES_REMOVEPREFIX_METHODDEF
2557 BYTES_REMOVESUFFIX_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002558 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2559 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002560 STRINGLIB_RJUST_METHODDEF
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002561 BYTES_RPARTITION_METHODDEF
2562 BYTES_RSPLIT_METHODDEF
2563 BYTES_RSTRIP_METHODDEF
2564 BYTES_SPLIT_METHODDEF
2565 BYTES_SPLITLINES_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002566 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002567 _Py_startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002568 BYTES_STRIP_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302569 {"swapcase", stringlib_swapcase, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002570 _Py_swapcase__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302571 {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002572 BYTES_TRANSLATE_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302573 {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002574 STRINGLIB_ZFILL_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002575 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002576};
2577
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002578static PyObject *
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002579bytes_mod(PyObject *self, PyObject *arg)
Ethan Furmanb95b5612015-01-23 20:05:18 -08002580{
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002581 if (!PyBytes_Check(self)) {
2582 Py_RETURN_NOTIMPLEMENTED;
Victor Stinner772b2b02015-10-14 09:56:53 +02002583 }
Victor Stinner772b2b02015-10-14 09:56:53 +02002584 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002585 arg, 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -08002586}
2587
2588static PyNumberMethods bytes_as_number = {
2589 0, /*nb_add*/
2590 0, /*nb_subtract*/
2591 0, /*nb_multiply*/
2592 bytes_mod, /*nb_remainder*/
2593};
2594
2595static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002596bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002597
2598static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002599bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002600{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002601 PyObject *x = NULL;
2602 const char *encoding = NULL;
2603 const char *errors = NULL;
2604 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002605 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002606 Py_ssize_t size;
2607 static char *kwlist[] = {"source", "encoding", "errors", 0};
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002608
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002609 if (type != &PyBytes_Type)
Serhiy Storchaka15095802015-11-25 15:47:01 +02002610 return bytes_subtype_new(type, args, kwds);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002611 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2612 &encoding, &errors))
2613 return NULL;
2614 if (x == NULL) {
2615 if (encoding != NULL || errors != NULL) {
2616 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka2c2044e2018-10-21 15:29:12 +03002617 encoding != NULL ?
2618 "encoding without a string argument" :
2619 "errors without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002620 return NULL;
2621 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02002622 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002623 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002624
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002625 if (encoding != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002626 /* Encode via the codec registry */
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002627 if (!PyUnicode_Check(x)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002628 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002629 "encoding without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002630 return NULL;
2631 }
2632 new = PyUnicode_AsEncodedString(x, encoding, errors);
2633 if (new == NULL)
2634 return NULL;
2635 assert(PyBytes_Check(new));
2636 return new;
2637 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002638
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002639 if (errors != NULL) {
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002640 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002641 PyUnicode_Check(x) ?
2642 "string argument without an encoding" :
2643 "errors without a string argument");
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002644 return NULL;
2645 }
2646
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002647 /* We'd like to call PyObject_Bytes here, but we need to check for an
2648 integer argument before deferring to PyBytes_FromObject, something
2649 PyObject_Bytes doesn't do. */
2650 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2651 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +01002652 new = _PyObject_CallNoArg(func);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002653 Py_DECREF(func);
2654 if (new == NULL)
2655 return NULL;
2656 if (!PyBytes_Check(new)) {
2657 PyErr_Format(PyExc_TypeError,
2658 "__bytes__ returned non-bytes (type %.200s)",
2659 Py_TYPE(new)->tp_name);
2660 Py_DECREF(new);
2661 return NULL;
2662 }
2663 return new;
2664 }
2665 else if (PyErr_Occurred())
2666 return NULL;
2667
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002668 if (PyUnicode_Check(x)) {
2669 PyErr_SetString(PyExc_TypeError,
2670 "string argument without an encoding");
2671 return NULL;
2672 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002673 /* Is it an integer? */
Victor Stinnera15e2602020-04-08 02:01:56 +02002674 if (_PyIndex_Check(x)) {
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002675 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2676 if (size == -1 && PyErr_Occurred()) {
Serhiy Storchakae8904212018-10-15 00:02:57 +03002677 if (!PyErr_ExceptionMatches(PyExc_TypeError))
INADA Naokia634e232017-01-06 17:32:01 +09002678 return NULL;
2679 PyErr_Clear(); /* fall through */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002680 }
INADA Naokia634e232017-01-06 17:32:01 +09002681 else {
2682 if (size < 0) {
2683 PyErr_SetString(PyExc_ValueError, "negative count");
2684 return NULL;
2685 }
2686 new = _PyBytes_FromSize(size, 1);
2687 if (new == NULL)
2688 return NULL;
2689 return new;
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002690 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002691 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002692
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002693 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002694}
2695
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002696static PyObject*
2697_PyBytes_FromBuffer(PyObject *x)
2698{
2699 PyObject *new;
2700 Py_buffer view;
2701
2702 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2703 return NULL;
2704
2705 new = PyBytes_FromStringAndSize(NULL, view.len);
2706 if (!new)
2707 goto fail;
2708 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2709 &view, view.len, 'C') < 0)
2710 goto fail;
2711 PyBuffer_Release(&view);
2712 return new;
2713
2714fail:
2715 Py_XDECREF(new);
2716 PyBuffer_Release(&view);
2717 return NULL;
2718}
2719
2720static PyObject*
2721_PyBytes_FromList(PyObject *x)
2722{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002723 Py_ssize_t i, size = PyList_GET_SIZE(x);
2724 Py_ssize_t value;
2725 char *str;
2726 PyObject *item;
2727 _PyBytesWriter writer;
2728
2729 _PyBytesWriter_Init(&writer);
2730 str = _PyBytesWriter_Alloc(&writer, size);
2731 if (str == NULL)
2732 return NULL;
2733 writer.overallocate = 1;
2734 size = writer.allocated;
2735
2736 for (i = 0; i < PyList_GET_SIZE(x); i++) {
2737 item = PyList_GET_ITEM(x, i);
2738 Py_INCREF(item);
2739 value = PyNumber_AsSsize_t(item, NULL);
2740 Py_DECREF(item);
2741 if (value == -1 && PyErr_Occurred())
2742 goto error;
2743
2744 if (value < 0 || value >= 256) {
2745 PyErr_SetString(PyExc_ValueError,
2746 "bytes must be in range(0, 256)");
2747 goto error;
2748 }
2749
2750 if (i >= size) {
2751 str = _PyBytesWriter_Resize(&writer, str, size+1);
2752 if (str == NULL)
2753 return NULL;
2754 size = writer.allocated;
2755 }
2756 *str++ = (char) value;
2757 }
2758 return _PyBytesWriter_Finish(&writer, str);
2759
2760 error:
2761 _PyBytesWriter_Dealloc(&writer);
2762 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002763}
2764
2765static PyObject*
2766_PyBytes_FromTuple(PyObject *x)
2767{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002768 PyObject *bytes;
2769 Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2770 Py_ssize_t value;
2771 char *str;
2772 PyObject *item;
2773
2774 bytes = PyBytes_FromStringAndSize(NULL, size);
2775 if (bytes == NULL)
2776 return NULL;
2777 str = ((PyBytesObject *)bytes)->ob_sval;
2778
2779 for (i = 0; i < size; i++) {
2780 item = PyTuple_GET_ITEM(x, i);
2781 value = PyNumber_AsSsize_t(item, NULL);
2782 if (value == -1 && PyErr_Occurred())
2783 goto error;
2784
2785 if (value < 0 || value >= 256) {
2786 PyErr_SetString(PyExc_ValueError,
2787 "bytes must be in range(0, 256)");
2788 goto error;
2789 }
2790 *str++ = (char) value;
2791 }
2792 return bytes;
2793
2794 error:
2795 Py_DECREF(bytes);
2796 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002797}
2798
2799static PyObject *
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002800_PyBytes_FromIterator(PyObject *it, PyObject *x)
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002801{
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002802 char *str;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002803 Py_ssize_t i, size;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002804 _PyBytesWriter writer;
2805
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002806 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02002807 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002808 if (size == -1 && PyErr_Occurred())
2809 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002810
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002811 _PyBytesWriter_Init(&writer);
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002812 str = _PyBytesWriter_Alloc(&writer, size);
2813 if (str == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002814 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002815 writer.overallocate = 1;
2816 size = writer.allocated;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002817
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002818 /* Run the iterator to exhaustion */
2819 for (i = 0; ; i++) {
2820 PyObject *item;
2821 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002822
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002823 /* Get the next item */
2824 item = PyIter_Next(it);
2825 if (item == NULL) {
2826 if (PyErr_Occurred())
2827 goto error;
2828 break;
2829 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002830
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002831 /* Interpret it as an int (__index__) */
Serhiy Storchakaf54d7812016-07-06 21:39:44 +03002832 value = PyNumber_AsSsize_t(item, NULL);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002833 Py_DECREF(item);
2834 if (value == -1 && PyErr_Occurred())
2835 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002836
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002837 /* Range check */
2838 if (value < 0 || value >= 256) {
2839 PyErr_SetString(PyExc_ValueError,
2840 "bytes must be in range(0, 256)");
2841 goto error;
2842 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002843
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002844 /* Append the byte */
2845 if (i >= size) {
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002846 str = _PyBytesWriter_Resize(&writer, str, size+1);
2847 if (str == NULL)
2848 return NULL;
2849 size = writer.allocated;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002850 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002851 *str++ = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002852 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002853
2854 return _PyBytesWriter_Finish(&writer, str);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002855
2856 error:
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002857 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002858 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002859}
2860
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002861PyObject *
2862PyBytes_FromObject(PyObject *x)
2863{
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002864 PyObject *it, *result;
2865
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002866 if (x == NULL) {
2867 PyErr_BadInternalCall();
2868 return NULL;
2869 }
2870
2871 if (PyBytes_CheckExact(x)) {
2872 Py_INCREF(x);
2873 return x;
2874 }
2875
2876 /* Use the modern buffer interface */
2877 if (PyObject_CheckBuffer(x))
2878 return _PyBytes_FromBuffer(x);
2879
2880 if (PyList_CheckExact(x))
2881 return _PyBytes_FromList(x);
2882
2883 if (PyTuple_CheckExact(x))
2884 return _PyBytes_FromTuple(x);
2885
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002886 if (!PyUnicode_Check(x)) {
2887 it = PyObject_GetIter(x);
2888 if (it != NULL) {
2889 result = _PyBytes_FromIterator(it, x);
2890 Py_DECREF(it);
2891 return result;
2892 }
Serhiy Storchakae8904212018-10-15 00:02:57 +03002893 if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2894 return NULL;
2895 }
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002896 }
2897
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002898 PyErr_Format(PyExc_TypeError,
2899 "cannot convert '%.200s' object to bytes",
Victor Stinner58ac7002020-02-07 03:04:21 +01002900 Py_TYPE(x)->tp_name);
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002901 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002902}
2903
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002904static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002905bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002906{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002907 PyObject *tmp, *pnew;
2908 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002909
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002910 assert(PyType_IsSubtype(type, &PyBytes_Type));
2911 tmp = bytes_new(&PyBytes_Type, args, kwds);
2912 if (tmp == NULL)
2913 return NULL;
Serhiy Storchaka15095802015-11-25 15:47:01 +02002914 assert(PyBytes_Check(tmp));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002915 n = PyBytes_GET_SIZE(tmp);
2916 pnew = type->tp_alloc(type, n);
2917 if (pnew != NULL) {
Christian Heimesf051e432016-09-13 20:22:02 +02002918 memcpy(PyBytes_AS_STRING(pnew),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002919 PyBytes_AS_STRING(tmp), n+1);
2920 ((PyBytesObject *)pnew)->ob_shash =
2921 ((PyBytesObject *)tmp)->ob_shash;
2922 }
2923 Py_DECREF(tmp);
2924 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002925}
2926
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002927PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002928"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002929bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002930bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002931bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2932bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002933\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002934Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002935 - an iterable yielding integers in range(256)\n\
2936 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002937 - any object implementing the buffer API.\n\
2938 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002939
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002940static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002941
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002942PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002943 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2944 "bytes",
2945 PyBytesObject_SIZE,
2946 sizeof(char),
Inada Naoki7d408692019-05-29 17:23:27 +09002947 0, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002948 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002949 0, /* tp_getattr */
2950 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002951 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002952 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08002953 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002954 &bytes_as_sequence, /* tp_as_sequence */
2955 &bytes_as_mapping, /* tp_as_mapping */
2956 (hashfunc)bytes_hash, /* tp_hash */
2957 0, /* tp_call */
2958 bytes_str, /* tp_str */
2959 PyObject_GenericGetAttr, /* tp_getattro */
2960 0, /* tp_setattro */
2961 &bytes_as_buffer, /* tp_as_buffer */
2962 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2963 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2964 bytes_doc, /* tp_doc */
2965 0, /* tp_traverse */
2966 0, /* tp_clear */
2967 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2968 0, /* tp_weaklistoffset */
2969 bytes_iter, /* tp_iter */
2970 0, /* tp_iternext */
2971 bytes_methods, /* tp_methods */
2972 0, /* tp_members */
2973 0, /* tp_getset */
2974 &PyBaseObject_Type, /* tp_base */
2975 0, /* tp_dict */
2976 0, /* tp_descr_get */
2977 0, /* tp_descr_set */
2978 0, /* tp_dictoffset */
2979 0, /* tp_init */
2980 0, /* tp_alloc */
2981 bytes_new, /* tp_new */
2982 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002983};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002984
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002985void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002986PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002987{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002988 assert(pv != NULL);
2989 if (*pv == NULL)
2990 return;
2991 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002992 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002993 return;
2994 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02002995
2996 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2997 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05002998 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02002999 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02003000
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003001 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02003002 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3003 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3004 Py_CLEAR(*pv);
3005 return;
3006 }
3007
3008 oldsize = PyBytes_GET_SIZE(*pv);
3009 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3010 PyErr_NoMemory();
3011 goto error;
3012 }
3013 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3014 goto error;
3015
3016 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3017 PyBuffer_Release(&wb);
3018 return;
3019
3020 error:
3021 PyBuffer_Release(&wb);
3022 Py_CLEAR(*pv);
3023 return;
3024 }
3025
3026 else {
3027 /* Multiple references, need to create new object */
3028 PyObject *v;
3029 v = bytes_concat(*pv, w);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03003030 Py_SETREF(*pv, v);
Antoine Pitrou161d6952014-05-01 14:36:20 +02003031 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003032}
3033
3034void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003035PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003036{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003037 PyBytes_Concat(pv, w);
3038 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003039}
3040
3041
Ethan Furmanb95b5612015-01-23 20:05:18 -08003042/* The following function breaks the notion that bytes are immutable:
3043 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003044 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08003045 as creating a new bytes object and destroying the old one, only
3046 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003047 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08003048 Note that if there's not enough memory to resize the bytes object, the
3049 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003050 memory" exception is set, and -1 is returned. Else (on success) 0 is
3051 returned, and the value in *pv may or may not be the same as on input.
3052 As always, an extra byte is allocated for a trailing \0 byte (newsize
3053 does *not* include that), and a trailing \0 byte is stored.
3054*/
3055
3056int
3057_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3058{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003059 PyObject *v;
3060 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003061 v = *pv;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03003062 if (!PyBytes_Check(v) || newsize < 0) {
3063 goto error;
3064 }
3065 if (Py_SIZE(v) == newsize) {
3066 /* return early if newsize equals to v->ob_size */
3067 return 0;
3068 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02003069 if (Py_SIZE(v) == 0) {
3070 if (newsize == 0) {
3071 return 0;
3072 }
3073 *pv = _PyBytes_FromSize(newsize, 0);
3074 Py_DECREF(v);
3075 return (*pv == NULL) ? -1 : 0;
3076 }
Berker Peksag4a72a7b2016-09-16 17:31:06 +03003077 if (Py_REFCNT(v) != 1) {
3078 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003079 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02003080 if (newsize == 0) {
Victor Stinner91698d82020-06-25 14:07:40 +02003081 *pv = bytes_new_empty();
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02003082 Py_DECREF(v);
Victor Stinner91698d82020-06-25 14:07:40 +02003083 return 0;
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02003084 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003085 /* XXX UNREF/NEWREF interface should be more symmetrical */
Victor Stinner49932fe2020-02-03 17:55:05 +01003086#ifdef Py_REF_DEBUG
3087 _Py_RefTotal--;
3088#endif
3089#ifdef Py_TRACE_REFS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003090 _Py_ForgetReference(v);
Victor Stinner49932fe2020-02-03 17:55:05 +01003091#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003092 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03003093 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003094 if (*pv == NULL) {
3095 PyObject_Del(v);
3096 PyErr_NoMemory();
3097 return -1;
3098 }
3099 _Py_NewReference(*pv);
3100 sv = (PyBytesObject *) *pv;
Victor Stinner60ac6ed2020-02-07 23:18:08 +01003101 Py_SET_SIZE(sv, newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003102 sv->ob_sval[newsize] = '\0';
3103 sv->ob_shash = -1; /* invalidate cached hash value */
3104 return 0;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03003105error:
3106 *pv = 0;
3107 Py_DECREF(v);
3108 PyErr_BadInternalCall();
3109 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003110}
3111
Victor Stinner91698d82020-06-25 14:07:40 +02003112
3113PyStatus
3114_PyBytes_Init(PyThreadState *tstate)
3115{
3116 struct _Py_bytes_state *state = &tstate->interp->bytes;
3117 if (bytes_create_empty_string_singleton(state) < 0) {
3118 return _PyStatus_NO_MEMORY();
3119 }
3120 return _PyStatus_OK();
3121}
3122
3123
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003124void
Victor Stinnerc41eed12020-06-23 15:54:35 +02003125_PyBytes_Fini(PyThreadState *tstate)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003126{
Victor Stinnerc41eed12020-06-23 15:54:35 +02003127 struct _Py_bytes_state* state = &tstate->interp->bytes;
3128 for (int i = 0; i < UCHAR_MAX + 1; i++) {
3129 Py_CLEAR(state->characters[i]);
3130 }
3131 Py_CLEAR(state->empty_string);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003132}
3133
Benjamin Peterson4116f362008-05-27 00:36:20 +00003134/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003135
3136typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003137 PyObject_HEAD
3138 Py_ssize_t it_index;
3139 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003140} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003141
3142static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003143striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003144{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003145 _PyObject_GC_UNTRACK(it);
3146 Py_XDECREF(it->it_seq);
3147 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003148}
3149
3150static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003151striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003152{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003153 Py_VISIT(it->it_seq);
3154 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003155}
3156
3157static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003158striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003159{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003160 PyBytesObject *seq;
3161 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003162
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003163 assert(it != NULL);
3164 seq = it->it_seq;
3165 if (seq == NULL)
3166 return NULL;
3167 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003168
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003169 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3170 item = PyLong_FromLong(
3171 (unsigned char)seq->ob_sval[it->it_index]);
3172 if (item != NULL)
3173 ++it->it_index;
3174 return item;
3175 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003176
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003177 it->it_seq = NULL;
Serhiy Storchakafbb1c5e2016-03-30 20:40:02 +03003178 Py_DECREF(seq);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003179 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003180}
3181
3182static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303183striter_len(striterobject *it, PyObject *Py_UNUSED(ignored))
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003184{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003185 Py_ssize_t len = 0;
3186 if (it->it_seq)
3187 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3188 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003189}
3190
3191PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003192 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003193
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003194static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303195striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003196{
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003197 _Py_IDENTIFIER(iter);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003198 if (it->it_seq != NULL) {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003199 return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003200 it->it_seq, it->it_index);
3201 } else {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003202 return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003203 }
3204}
3205
3206PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3207
3208static PyObject *
3209striter_setstate(striterobject *it, PyObject *state)
3210{
3211 Py_ssize_t index = PyLong_AsSsize_t(state);
3212 if (index == -1 && PyErr_Occurred())
3213 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003214 if (it->it_seq != NULL) {
3215 if (index < 0)
3216 index = 0;
3217 else if (index > PyBytes_GET_SIZE(it->it_seq))
3218 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3219 it->it_index = index;
3220 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003221 Py_RETURN_NONE;
3222}
3223
3224PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3225
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003226static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003227 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3228 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003229 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3230 reduce_doc},
3231 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3232 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003233 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003234};
3235
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003236PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003237 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3238 "bytes_iterator", /* tp_name */
3239 sizeof(striterobject), /* tp_basicsize */
3240 0, /* tp_itemsize */
3241 /* methods */
3242 (destructor)striter_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003243 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003244 0, /* tp_getattr */
3245 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003246 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003247 0, /* tp_repr */
3248 0, /* tp_as_number */
3249 0, /* tp_as_sequence */
3250 0, /* tp_as_mapping */
3251 0, /* tp_hash */
3252 0, /* tp_call */
3253 0, /* tp_str */
3254 PyObject_GenericGetAttr, /* tp_getattro */
3255 0, /* tp_setattro */
3256 0, /* tp_as_buffer */
3257 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3258 0, /* tp_doc */
3259 (traverseproc)striter_traverse, /* tp_traverse */
3260 0, /* tp_clear */
3261 0, /* tp_richcompare */
3262 0, /* tp_weaklistoffset */
3263 PyObject_SelfIter, /* tp_iter */
3264 (iternextfunc)striter_next, /* tp_iternext */
3265 striter_methods, /* tp_methods */
3266 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003267};
3268
3269static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003270bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003271{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003272 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003273
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003274 if (!PyBytes_Check(seq)) {
3275 PyErr_BadInternalCall();
3276 return NULL;
3277 }
3278 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3279 if (it == NULL)
3280 return NULL;
3281 it->it_index = 0;
3282 Py_INCREF(seq);
3283 it->it_seq = (PyBytesObject *)seq;
3284 _PyObject_GC_TRACK(it);
3285 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003286}
Victor Stinner00165072015-10-09 01:53:21 +02003287
3288
3289/* _PyBytesWriter API */
3290
3291#ifdef MS_WINDOWS
3292 /* On Windows, overallocate by 50% is the best factor */
3293# define OVERALLOCATE_FACTOR 2
3294#else
3295 /* On Linux, overallocate by 25% is the best factor */
3296# define OVERALLOCATE_FACTOR 4
3297#endif
3298
3299void
3300_PyBytesWriter_Init(_PyBytesWriter *writer)
3301{
Victor Stinner661aacc2015-10-14 09:41:48 +02003302 /* Set all attributes before small_buffer to 0 */
3303 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003304#ifndef NDEBUG
3305 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3306 sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003307#endif
3308}
3309
3310void
3311_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3312{
3313 Py_CLEAR(writer->buffer);
3314}
3315
3316Py_LOCAL_INLINE(char*)
3317_PyBytesWriter_AsString(_PyBytesWriter *writer)
3318{
Victor Stinner661aacc2015-10-14 09:41:48 +02003319 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003320 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003321 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003322 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003323 else if (writer->use_bytearray) {
3324 assert(writer->buffer != NULL);
3325 return PyByteArray_AS_STRING(writer->buffer);
3326 }
3327 else {
3328 assert(writer->buffer != NULL);
3329 return PyBytes_AS_STRING(writer->buffer);
3330 }
Victor Stinner00165072015-10-09 01:53:21 +02003331}
3332
3333Py_LOCAL_INLINE(Py_ssize_t)
Victor Stinner2bf89932015-10-14 11:25:33 +02003334_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
Victor Stinner00165072015-10-09 01:53:21 +02003335{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03003336 const char *start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003337 assert(str != NULL);
3338 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003339 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003340 return str - start;
3341}
3342
Victor Stinner68762572019-10-07 18:42:01 +02003343#ifndef NDEBUG
3344Py_LOCAL_INLINE(int)
Victor Stinner00165072015-10-09 01:53:21 +02003345_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3346{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03003347 const char *start, *end;
Victor Stinner00165072015-10-09 01:53:21 +02003348
Victor Stinner661aacc2015-10-14 09:41:48 +02003349 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003350 assert(writer->buffer == NULL);
3351 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003352 else {
3353 assert(writer->buffer != NULL);
3354 if (writer->use_bytearray)
3355 assert(PyByteArray_CheckExact(writer->buffer));
3356 else
3357 assert(PyBytes_CheckExact(writer->buffer));
3358 assert(Py_REFCNT(writer->buffer) == 1);
3359 }
Victor Stinner00165072015-10-09 01:53:21 +02003360
Victor Stinner661aacc2015-10-14 09:41:48 +02003361 if (writer->use_bytearray) {
3362 /* bytearray has its own overallocation algorithm,
3363 writer overallocation must be disabled */
3364 assert(!writer->overallocate);
3365 }
3366
3367 assert(0 <= writer->allocated);
Victor Stinner53926a12015-10-09 12:37:03 +02003368 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003369 /* the last byte must always be null */
Victor Stinner661aacc2015-10-14 09:41:48 +02003370 start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003371 assert(start[writer->allocated] == 0);
3372
3373 end = start + writer->allocated;
3374 assert(str != NULL);
3375 assert(start <= str && str <= end);
Victor Stinner68762572019-10-07 18:42:01 +02003376 return 1;
Victor Stinner00165072015-10-09 01:53:21 +02003377}
Victor Stinner68762572019-10-07 18:42:01 +02003378#endif
Victor Stinner00165072015-10-09 01:53:21 +02003379
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003380void*
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003381_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
Victor Stinner00165072015-10-09 01:53:21 +02003382{
3383 Py_ssize_t allocated, pos;
3384
Victor Stinner68762572019-10-07 18:42:01 +02003385 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003386 assert(writer->allocated < size);
Victor Stinner00165072015-10-09 01:53:21 +02003387
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003388 allocated = size;
Victor Stinner00165072015-10-09 01:53:21 +02003389 if (writer->overallocate
3390 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3391 /* overallocate to limit the number of realloc() */
3392 allocated += allocated / OVERALLOCATE_FACTOR;
3393 }
3394
Victor Stinner2bf89932015-10-14 11:25:33 +02003395 pos = _PyBytesWriter_GetSize(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003396 if (!writer->use_small_buffer) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003397 if (writer->use_bytearray) {
3398 if (PyByteArray_Resize(writer->buffer, allocated))
3399 goto error;
3400 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3401 but we cannot use ob_alloc because bytes may need to be moved
3402 to use the whole buffer. bytearray uses an internal optimization
3403 to avoid moving or copying bytes when bytes are removed at the
3404 beginning (ex: del bytearray[:1]). */
3405 }
3406 else {
3407 if (_PyBytes_Resize(&writer->buffer, allocated))
3408 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003409 }
3410 }
3411 else {
3412 /* convert from stack buffer to bytes object buffer */
3413 assert(writer->buffer == NULL);
3414
Victor Stinner661aacc2015-10-14 09:41:48 +02003415 if (writer->use_bytearray)
3416 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3417 else
3418 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003419 if (writer->buffer == NULL)
Victor Stinner661aacc2015-10-14 09:41:48 +02003420 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003421
3422 if (pos != 0) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003423 char *dest;
3424 if (writer->use_bytearray)
3425 dest = PyByteArray_AS_STRING(writer->buffer);
3426 else
3427 dest = PyBytes_AS_STRING(writer->buffer);
Christian Heimesf051e432016-09-13 20:22:02 +02003428 memcpy(dest,
Victor Stinnerb3653a32015-10-09 03:38:24 +02003429 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02003430 pos);
3431 }
3432
Victor Stinnerb3653a32015-10-09 03:38:24 +02003433 writer->use_small_buffer = 0;
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003434#ifndef NDEBUG
3435 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3436 sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003437#endif
Victor Stinner00165072015-10-09 01:53:21 +02003438 }
3439 writer->allocated = allocated;
3440
3441 str = _PyBytesWriter_AsString(writer) + pos;
Victor Stinner68762572019-10-07 18:42:01 +02003442 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinner00165072015-10-09 01:53:21 +02003443 return str;
Victor Stinner661aacc2015-10-14 09:41:48 +02003444
3445error:
3446 _PyBytesWriter_Dealloc(writer);
3447 return NULL;
Victor Stinner00165072015-10-09 01:53:21 +02003448}
3449
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003450void*
3451_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3452{
3453 Py_ssize_t new_min_size;
3454
Victor Stinner68762572019-10-07 18:42:01 +02003455 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003456 assert(size >= 0);
3457
3458 if (size == 0) {
3459 /* nothing to do */
3460 return str;
3461 }
3462
3463 if (writer->min_size > PY_SSIZE_T_MAX - size) {
3464 PyErr_NoMemory();
3465 _PyBytesWriter_Dealloc(writer);
3466 return NULL;
3467 }
3468 new_min_size = writer->min_size + size;
3469
3470 if (new_min_size > writer->allocated)
3471 str = _PyBytesWriter_Resize(writer, str, new_min_size);
3472
3473 writer->min_size = new_min_size;
3474 return str;
3475}
3476
Victor Stinner00165072015-10-09 01:53:21 +02003477/* Allocate the buffer to write size bytes.
3478 Return the pointer to the beginning of buffer data.
3479 Raise an exception and return NULL on error. */
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003480void*
Victor Stinner00165072015-10-09 01:53:21 +02003481_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3482{
3483 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02003484 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02003485 assert(size >= 0);
3486
Victor Stinnerb3653a32015-10-09 03:38:24 +02003487 writer->use_small_buffer = 1;
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003488#ifndef NDEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003489 writer->allocated = sizeof(writer->small_buffer) - 1;
Victor Stinnerf6358a72015-10-14 12:02:39 +02003490 /* In debug mode, don't use the full small buffer because it is less
3491 efficient than bytes and bytearray objects to detect buffer underflow
3492 and buffer overflow. Use 10 bytes of the small buffer to test also
3493 code using the smaller buffer in debug mode.
3494
3495 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3496 in debug mode to also be able to detect stack overflow when running
3497 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3498 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3499 stack overflow. */
3500 writer->allocated = Py_MIN(writer->allocated, 10);
3501 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3502 to detect buffer overflow */
Victor Stinnerb3653a32015-10-09 03:38:24 +02003503 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003504#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02003505 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02003506#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02003507 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02003508}
3509
3510PyObject *
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003511_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
Victor Stinner00165072015-10-09 01:53:21 +02003512{
Victor Stinner2bf89932015-10-14 11:25:33 +02003513 Py_ssize_t size;
Victor Stinner00165072015-10-09 01:53:21 +02003514 PyObject *result;
3515
Victor Stinner68762572019-10-07 18:42:01 +02003516 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinner00165072015-10-09 01:53:21 +02003517
Victor Stinner2bf89932015-10-14 11:25:33 +02003518 size = _PyBytesWriter_GetSize(writer, str);
3519 if (size == 0 && !writer->use_bytearray) {
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003520 Py_CLEAR(writer->buffer);
3521 /* Get the empty byte string singleton */
3522 result = PyBytes_FromStringAndSize(NULL, 0);
3523 }
3524 else if (writer->use_small_buffer) {
Victor Stinnere914d412016-04-15 17:52:27 +02003525 if (writer->use_bytearray) {
3526 result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3527 }
3528 else {
3529 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3530 }
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003531 }
3532 else {
3533 result = writer->buffer;
3534 writer->buffer = NULL;
3535
Victor Stinner2bf89932015-10-14 11:25:33 +02003536 if (size != writer->allocated) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003537 if (writer->use_bytearray) {
Victor Stinner2bf89932015-10-14 11:25:33 +02003538 if (PyByteArray_Resize(result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003539 Py_DECREF(result);
3540 return NULL;
3541 }
3542 }
3543 else {
Victor Stinner2bf89932015-10-14 11:25:33 +02003544 if (_PyBytes_Resize(&result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003545 assert(result == NULL);
3546 return NULL;
3547 }
Victor Stinner00165072015-10-09 01:53:21 +02003548 }
3549 }
Victor Stinner00165072015-10-09 01:53:21 +02003550 }
Victor Stinner00165072015-10-09 01:53:21 +02003551 return result;
3552}
Victor Stinnerce179bf2015-10-09 12:57:22 +02003553
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003554void*
Victor Stinnere9aa5952015-10-12 13:57:47 +02003555_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003556 const void *bytes, Py_ssize_t size)
Victor Stinnerce179bf2015-10-09 12:57:22 +02003557{
Victor Stinnere9aa5952015-10-12 13:57:47 +02003558 char *str = (char *)ptr;
3559
Victor Stinnerce179bf2015-10-09 12:57:22 +02003560 str = _PyBytesWriter_Prepare(writer, str, size);
3561 if (str == NULL)
3562 return NULL;
3563
Christian Heimesf051e432016-09-13 20:22:02 +02003564 memcpy(str, bytes, size);
Victor Stinnerce179bf2015-10-09 12:57:22 +02003565 str += size;
3566
3567 return str;
3568}