blob: 3a922d32b16e4ada9bc69803df258556ce9d838a [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Victor Stinnerd9ea5ca2020-04-15 02:57:50 +02006#include "pycore_abstract.h" // _PyIndex_Check()
Victor Stinner91698d82020-06-25 14:07:40 +02007#include "pycore_bytes_methods.h" // _Py_bytes_startswith()
8#include "pycore_initconfig.h" // _PyStatus_OK()
9#include "pycore_object.h" // _PyObject_GC_TRACK
Victor Stinnerd9ea5ca2020-04-15 02:57:50 +020010#include "pycore_pymem.h" // PYMEM_CLEANBYTE
Christian Heimes2c9c7a52008-05-26 13:42:13 +000011
Gregory P. Smith8cb65692015-04-25 23:22:26 +000012#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +000013#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000014
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020015/*[clinic input]
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030016class bytes "PyBytesObject *" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020017[clinic start generated code]*/
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030018/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020019
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030020#include "clinic/bytesobject.c.h"
21
Hai Shi46874c22020-01-30 17:20:25 -060022_Py_IDENTIFIER(__bytes__);
23
Mark Dickinsonfd24b322008-12-06 15:33:31 +000024/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
25 for a string of length n should request PyBytesObject_SIZE + n bytes.
26
27 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
28 3 bytes per string allocation on a typical system.
29*/
30#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
31
Victor Stinner2bf89932015-10-14 11:25:33 +020032/* Forward declaration */
33Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
34 char *str);
35
Victor Stinnerc41eed12020-06-23 15:54:35 +020036
37static struct _Py_bytes_state*
38get_bytes_state(void)
39{
40 PyInterpreterState *interp = _PyInterpreterState_GET();
41 return &interp->bytes;
42}
43
44
Victor Stinner91698d82020-06-25 14:07:40 +020045// Return a borrowed reference to the empty bytes string singleton.
46static inline PyObject* bytes_get_empty(void)
47{
48 struct _Py_bytes_state *state = get_bytes_state();
49 // bytes_get_empty() must not be called before _PyBytes_Init()
50 // or after _PyBytes_Fini()
51 assert(state->empty_string != NULL);
52 return state->empty_string;
53}
54
55
56// Return a strong reference to the empty bytes string singleton.
57static inline PyObject* bytes_new_empty(void)
58{
59 PyObject *empty = bytes_get_empty();
60 Py_INCREF(empty);
61 return (PyObject *)empty;
62}
63
64
65static int
66bytes_create_empty_string_singleton(struct _Py_bytes_state *state)
67{
68 // Create the empty bytes string singleton
69 PyBytesObject *op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE);
70 if (op == NULL) {
71 return -1;
72 }
73 _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, 0);
74 op->ob_shash = -1;
75 op->ob_sval[0] = '\0';
76
77 assert(state->empty_string == NULL);
78 state->empty_string = (PyObject *)op;
79 return 0;
80}
81
82
Christian Heimes2c9c7a52008-05-26 13:42:13 +000083/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000084 For PyBytes_FromString(), the parameter `str' points to a null-terminated
85 string containing exactly `size' bytes.
86
Martin Pantera90a4a92016-05-30 04:04:50 +000087 For PyBytes_FromStringAndSize(), the parameter `str' is
Christian Heimes2c9c7a52008-05-26 13:42:13 +000088 either NULL or else points to a string containing at least `size' bytes.
89 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
90 not have to be null-terminated. (Therefore it is safe to construct a
91 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
92 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
93 bytes (setting the last byte to the null terminating character) and you can
94 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000095 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000096 alter the data yourself, since the strings may be shared.
97
98 The PyObject member `op->ob_size', which denotes the number of "extra
99 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +0200100 allocated for string data, not counting the null terminating character.
101 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000102 PyBytes_FromStringAndSize()) or the length of the string in the `str'
103 parameter (for PyBytes_FromString()).
104*/
Victor Stinnerdb067af2014-05-02 22:31:14 +0200105static PyObject *
106_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +0000107{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200108 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +0200109 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +0200110
Victor Stinnerc41eed12020-06-23 15:54:35 +0200111 if (size == 0) {
Victor Stinner91698d82020-06-25 14:07:40 +0200112 return bytes_new_empty();
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000113 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000114
Victor Stinner049e5092014-08-17 22:20:00 +0200115 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000116 PyErr_SetString(PyExc_OverflowError,
117 "byte string is too large");
118 return NULL;
119 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +0000120
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000121 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +0200122 if (use_calloc)
123 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
124 else
125 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Victor Stinner04fc4f22020-06-16 01:28:07 +0200126 if (op == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000127 return PyErr_NoMemory();
Victor Stinner04fc4f22020-06-16 01:28:07 +0200128 }
129 _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000130 op->ob_shash = -1;
Victor Stinner91698d82020-06-25 14:07:40 +0200131 if (!use_calloc) {
Victor Stinnerdb067af2014-05-02 22:31:14 +0200132 op->ob_sval[size] = '\0';
Victor Stinnerdb067af2014-05-02 22:31:14 +0200133 }
134 return (PyObject *) op;
135}
136
137PyObject *
138PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
139{
140 PyBytesObject *op;
141 if (size < 0) {
142 PyErr_SetString(PyExc_SystemError,
143 "Negative size passed to PyBytes_FromStringAndSize");
144 return NULL;
145 }
Victor Stinnerc41eed12020-06-23 15:54:35 +0200146 if (size == 1 && str != NULL) {
147 struct _Py_bytes_state *state = get_bytes_state();
148 op = state->characters[*str & UCHAR_MAX];
149 if (op != NULL) {
150 Py_INCREF(op);
151 return (PyObject *)op;
152 }
Victor Stinnerdb067af2014-05-02 22:31:14 +0200153 }
Victor Stinner91698d82020-06-25 14:07:40 +0200154 if (size == 0) {
155 return bytes_new_empty();
156 }
Victor Stinnerdb067af2014-05-02 22:31:14 +0200157
158 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
159 if (op == NULL)
160 return NULL;
161 if (str == NULL)
162 return (PyObject *) op;
163
Christian Heimesf051e432016-09-13 20:22:02 +0200164 memcpy(op->ob_sval, str, size);
Victor Stinnerdb067af2014-05-02 22:31:14 +0200165 /* share short strings */
166 if (size == 1) {
Victor Stinnerc41eed12020-06-23 15:54:35 +0200167 struct _Py_bytes_state *state = get_bytes_state();
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000168 Py_INCREF(op);
Victor Stinnerc41eed12020-06-23 15:54:35 +0200169 state->characters[*str & UCHAR_MAX] = op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000170 }
171 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000172}
173
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000174PyObject *
175PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000176{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200177 size_t size;
178 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000179
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000180 assert(str != NULL);
181 size = strlen(str);
182 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
183 PyErr_SetString(PyExc_OverflowError,
184 "byte string is too long");
185 return NULL;
186 }
Victor Stinnerc41eed12020-06-23 15:54:35 +0200187
188 struct _Py_bytes_state *state = get_bytes_state();
189 if (size == 0) {
Victor Stinner91698d82020-06-25 14:07:40 +0200190 return bytes_new_empty();
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000191 }
Victor Stinnerc41eed12020-06-23 15:54:35 +0200192 else if (size == 1) {
193 op = state->characters[*str & UCHAR_MAX];
194 if (op != NULL) {
195 Py_INCREF(op);
196 return (PyObject *)op;
197 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000198 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000199
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000200 /* Inline PyObject_NewVar */
201 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
Victor Stinner04fc4f22020-06-16 01:28:07 +0200202 if (op == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000203 return PyErr_NoMemory();
Victor Stinner04fc4f22020-06-16 01:28:07 +0200204 }
205 _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000206 op->ob_shash = -1;
Christian Heimesf051e432016-09-13 20:22:02 +0200207 memcpy(op->ob_sval, str, size+1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000208 /* share short strings */
Victor Stinner91698d82020-06-25 14:07:40 +0200209 if (size == 1) {
210 assert(state->characters[*str & UCHAR_MAX] == NULL);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000211 Py_INCREF(op);
Victor Stinnerc41eed12020-06-23 15:54:35 +0200212 state->characters[*str & UCHAR_MAX] = op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000213 }
214 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000215}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000216
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000217PyObject *
218PyBytes_FromFormatV(const char *format, va_list vargs)
219{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000220 char *s;
Victor Stinner03dab782015-10-14 00:21:35 +0200221 const char *f;
222 const char *p;
223 Py_ssize_t prec;
224 int longflag;
225 int size_tflag;
226 /* Longest 64-bit formatted numbers:
227 - "18446744073709551615\0" (21 bytes)
228 - "-9223372036854775808\0" (21 bytes)
229 Decimal takes the most space (it isn't enough for octal.)
Guido van Rossum343e97f2007-04-09 00:43:24 +0000230
Victor Stinner03dab782015-10-14 00:21:35 +0200231 Longest 64-bit pointer representation:
232 "0xffffffffffffffff\0" (19 bytes). */
233 char buffer[21];
234 _PyBytesWriter writer;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000235
Victor Stinner03dab782015-10-14 00:21:35 +0200236 _PyBytesWriter_Init(&writer);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000237
Victor Stinner03dab782015-10-14 00:21:35 +0200238 s = _PyBytesWriter_Alloc(&writer, strlen(format));
239 if (s == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000240 return NULL;
Victor Stinner03dab782015-10-14 00:21:35 +0200241 writer.overallocate = 1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000242
Victor Stinner03dab782015-10-14 00:21:35 +0200243#define WRITE_BYTES(str) \
244 do { \
245 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
246 if (s == NULL) \
247 goto error; \
248 } while (0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000249
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000250 for (f = format; *f; f++) {
Victor Stinner03dab782015-10-14 00:21:35 +0200251 if (*f != '%') {
252 *s++ = *f;
253 continue;
254 }
255
256 p = f++;
257
258 /* ignore the width (ex: 10 in "%10s") */
259 while (Py_ISDIGIT(*f))
260 f++;
261
262 /* parse the precision (ex: 10 in "%.10s") */
263 prec = 0;
264 if (*f == '.') {
265 f++;
266 for (; Py_ISDIGIT(*f); f++) {
267 prec = (prec * 10) + (*f - '0');
268 }
269 }
270
271 while (*f && *f != '%' && !Py_ISALPHA(*f))
272 f++;
273
274 /* handle the long flag ('l'), but only for %ld and %lu.
275 others can be added when necessary. */
276 longflag = 0;
277 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
278 longflag = 1;
279 ++f;
280 }
281
282 /* handle the size_t flag ('z'). */
283 size_tflag = 0;
284 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
285 size_tflag = 1;
286 ++f;
287 }
288
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700289 /* subtract bytes preallocated for the format string
Victor Stinner03dab782015-10-14 00:21:35 +0200290 (ex: 2 for "%s") */
291 writer.min_size -= (f - p + 1);
292
293 switch (*f) {
294 case 'c':
295 {
296 int c = va_arg(vargs, int);
297 if (c < 0 || c > 255) {
298 PyErr_SetString(PyExc_OverflowError,
299 "PyBytes_FromFormatV(): %c format "
300 "expects an integer in range [0; 255]");
301 goto error;
302 }
303 writer.min_size++;
304 *s++ = (unsigned char)c;
305 break;
306 }
307
308 case 'd':
Victor Stinnerd36cf5f2020-06-10 18:38:05 +0200309 if (longflag) {
Victor Stinner03dab782015-10-14 00:21:35 +0200310 sprintf(buffer, "%ld", va_arg(vargs, long));
Victor Stinnerd36cf5f2020-06-10 18:38:05 +0200311 }
312 else if (size_tflag) {
313 sprintf(buffer, "%zd", va_arg(vargs, Py_ssize_t));
314 }
315 else {
Victor Stinner03dab782015-10-14 00:21:35 +0200316 sprintf(buffer, "%d", va_arg(vargs, int));
Victor Stinnerd36cf5f2020-06-10 18:38:05 +0200317 }
Victor Stinner03dab782015-10-14 00:21:35 +0200318 assert(strlen(buffer) < sizeof(buffer));
319 WRITE_BYTES(buffer);
320 break;
321
322 case 'u':
Victor Stinnerd36cf5f2020-06-10 18:38:05 +0200323 if (longflag) {
324 sprintf(buffer, "%lu", va_arg(vargs, unsigned long));
325 }
326 else if (size_tflag) {
327 sprintf(buffer, "%zu", va_arg(vargs, size_t));
328 }
329 else {
330 sprintf(buffer, "%u", va_arg(vargs, unsigned int));
331 }
Victor Stinner03dab782015-10-14 00:21:35 +0200332 assert(strlen(buffer) < sizeof(buffer));
333 WRITE_BYTES(buffer);
334 break;
335
336 case 'i':
337 sprintf(buffer, "%i", va_arg(vargs, int));
338 assert(strlen(buffer) < sizeof(buffer));
339 WRITE_BYTES(buffer);
340 break;
341
342 case 'x':
343 sprintf(buffer, "%x", va_arg(vargs, int));
344 assert(strlen(buffer) < sizeof(buffer));
345 WRITE_BYTES(buffer);
346 break;
347
348 case 's':
349 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 Py_ssize_t i;
Victor Stinner03dab782015-10-14 00:21:35 +0200351
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200352 p = va_arg(vargs, const char*);
Serhiy Storchakad586ccb2019-01-12 10:30:35 +0200353 if (prec <= 0) {
354 i = strlen(p);
355 }
356 else {
357 i = 0;
358 while (i < prec && p[i]) {
359 i++;
360 }
361 }
Victor Stinner03dab782015-10-14 00:21:35 +0200362 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
363 if (s == NULL)
364 goto error;
365 break;
366 }
367
368 case 'p':
369 sprintf(buffer, "%p", va_arg(vargs, void*));
370 assert(strlen(buffer) < sizeof(buffer));
371 /* %p is ill-defined: ensure leading 0x. */
372 if (buffer[1] == 'X')
373 buffer[1] = 'x';
374 else if (buffer[1] != 'x') {
375 memmove(buffer+2, buffer, strlen(buffer)+1);
376 buffer[0] = '0';
377 buffer[1] = 'x';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000378 }
Victor Stinner03dab782015-10-14 00:21:35 +0200379 WRITE_BYTES(buffer);
380 break;
381
382 case '%':
383 writer.min_size++;
384 *s++ = '%';
385 break;
386
387 default:
388 if (*f == 0) {
389 /* fix min_size if we reached the end of the format string */
390 writer.min_size++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000391 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000392
Victor Stinner03dab782015-10-14 00:21:35 +0200393 /* invalid format string: copy unformatted string and exit */
394 WRITE_BYTES(p);
395 return _PyBytesWriter_Finish(&writer, s);
396 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000397 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000398
Victor Stinner03dab782015-10-14 00:21:35 +0200399#undef WRITE_BYTES
400
401 return _PyBytesWriter_Finish(&writer, s);
402
403 error:
404 _PyBytesWriter_Dealloc(&writer);
405 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000406}
407
408PyObject *
409PyBytes_FromFormat(const char *format, ...)
410{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000411 PyObject* ret;
412 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000413
414#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000415 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000416#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000417 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000418#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000419 ret = PyBytes_FromFormatV(format, vargs);
420 va_end(vargs);
421 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000422}
423
Ethan Furmanb95b5612015-01-23 20:05:18 -0800424/* Helpers for formatstring */
425
426Py_LOCAL_INLINE(PyObject *)
427getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
428{
429 Py_ssize_t argidx = *p_argidx;
430 if (argidx < arglen) {
431 (*p_argidx)++;
432 if (arglen < 0)
433 return args;
434 else
435 return PyTuple_GetItem(args, argidx);
436 }
437 PyErr_SetString(PyExc_TypeError,
438 "not enough arguments for format string");
439 return NULL;
440}
441
442/* Format codes
443 * F_LJUST '-'
444 * F_SIGN '+'
445 * F_BLANK ' '
446 * F_ALT '#'
447 * F_ZERO '0'
448 */
449#define F_LJUST (1<<0)
450#define F_SIGN (1<<1)
451#define F_BLANK (1<<2)
452#define F_ALT (1<<3)
453#define F_ZERO (1<<4)
454
455/* Returns a new reference to a PyBytes object, or NULL on failure. */
456
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200457static char*
458formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200459 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800460{
461 char *p;
462 PyObject *result;
463 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200464 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800465
466 x = PyFloat_AsDouble(v);
467 if (x == -1.0 && PyErr_Occurred()) {
468 PyErr_Format(PyExc_TypeError, "float argument required, "
469 "not %.200s", Py_TYPE(v)->tp_name);
470 return NULL;
471 }
472
473 if (prec < 0)
474 prec = 6;
475
476 p = PyOS_double_to_string(x, type, prec,
477 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
478
479 if (p == NULL)
480 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200481
482 len = strlen(p);
483 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200484 str = _PyBytesWriter_Prepare(writer, str, len);
485 if (str == NULL)
486 return NULL;
Christian Heimesf051e432016-09-13 20:22:02 +0200487 memcpy(str, p, len);
Victor Stinner71dc3d82016-04-26 12:35:13 +0200488 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200489 str += len;
490 return str;
491 }
492
493 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800494 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200495 *p_result = result;
Zackery Spytz96c59322018-10-03 00:01:30 -0600496 return result != NULL ? str : NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800497}
498
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300499static PyObject *
500formatlong(PyObject *v, int flags, int prec, int type)
501{
502 PyObject *result, *iobj;
503 if (type == 'i')
504 type = 'd';
505 if (PyLong_Check(v))
506 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
507 if (PyNumber_Check(v)) {
508 /* make sure number is a type of integer for o, x, and X */
509 if (type == 'o' || type == 'x' || type == 'X')
Serhiy Storchaka5f4b229d2020-05-28 10:33:45 +0300510 iobj = _PyNumber_Index(v);
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300511 else
512 iobj = PyNumber_Long(v);
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300513 if (iobj != NULL) {
Serhiy Storchakae67f7db2020-06-29 22:36:41 +0300514 assert(PyLong_Check(iobj));
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300515 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
516 Py_DECREF(iobj);
517 return result;
518 }
Serhiy Storchakae67f7db2020-06-29 22:36:41 +0300519 if (!PyErr_ExceptionMatches(PyExc_TypeError))
520 return NULL;
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300521 }
522 PyErr_Format(PyExc_TypeError,
523 "%%%c format: %s is required, not %.200s", type,
524 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
525 : "a number",
526 Py_TYPE(v)->tp_name);
527 return NULL;
528}
529
530static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200531byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800532{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300533 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200534 *p = PyBytes_AS_STRING(arg)[0];
535 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800536 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300537 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200538 *p = PyByteArray_AS_STRING(arg)[0];
539 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800540 }
541 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300542 int overflow;
Serhiy Storchakae67f7db2020-06-29 22:36:41 +0300543 long ival = PyLong_AsLongAndOverflow(arg, &overflow);
544 if (ival == -1 && PyErr_Occurred()) {
545 if (PyErr_ExceptionMatches(PyExc_TypeError)) {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300546 goto onError;
547 }
Serhiy Storchakae67f7db2020-06-29 22:36:41 +0300548 return 0;
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300549 }
Serhiy Storchakae67f7db2020-06-29 22:36:41 +0300550 if (!(0 <= ival && ival <= 255)) {
551 /* this includes an overflow in converting to C long */
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300552 PyErr_SetString(PyExc_OverflowError,
553 "%c arg not in range(256)");
554 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800555 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300556 *p = (char)ival;
557 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800558 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300559 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200560 PyErr_SetString(PyExc_TypeError,
561 "%c requires an integer in range(256) or a single byte");
562 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800563}
564
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800565static PyObject *_PyBytes_FromBuffer(PyObject *x);
566
Ethan Furmanb95b5612015-01-23 20:05:18 -0800567static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200568format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800569{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200570 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800571 /* is it a bytes object? */
572 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200573 *pbuf = PyBytes_AS_STRING(v);
574 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800575 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200576 return v;
577 }
578 if (PyByteArray_Check(v)) {
579 *pbuf = PyByteArray_AS_STRING(v);
580 *plen = PyByteArray_GET_SIZE(v);
581 Py_INCREF(v);
582 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800583 }
584 /* does it support __bytes__? */
585 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
586 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +0100587 result = _PyObject_CallNoArg(func);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800588 Py_DECREF(func);
589 if (result == NULL)
590 return NULL;
591 if (!PyBytes_Check(result)) {
592 PyErr_Format(PyExc_TypeError,
593 "__bytes__ returned non-bytes (type %.200s)",
594 Py_TYPE(result)->tp_name);
595 Py_DECREF(result);
596 return NULL;
597 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200598 *pbuf = PyBytes_AS_STRING(result);
599 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800600 return result;
601 }
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800602 /* does it support buffer protocol? */
603 if (PyObject_CheckBuffer(v)) {
604 /* maybe we can avoid making a copy of the buffer object here? */
605 result = _PyBytes_FromBuffer(v);
606 if (result == NULL)
607 return NULL;
608 *pbuf = PyBytes_AS_STRING(result);
609 *plen = PyBytes_GET_SIZE(result);
610 return result;
611 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800612 PyErr_Format(PyExc_TypeError,
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800613 "%%b requires a bytes-like object, "
614 "or an object that implements __bytes__, not '%.100s'",
Ethan Furmanb95b5612015-01-23 20:05:18 -0800615 Py_TYPE(v)->tp_name);
616 return NULL;
617}
618
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200619/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800620
621PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +0200622_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
623 PyObject *args, int use_bytearray)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800624{
Victor Stinner772b2b02015-10-14 09:56:53 +0200625 const char *fmt;
626 char *res;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800627 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200628 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800629 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800630 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200631 _PyBytesWriter writer;
632
Victor Stinner772b2b02015-10-14 09:56:53 +0200633 if (args == NULL) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800634 PyErr_BadInternalCall();
635 return NULL;
636 }
Victor Stinner772b2b02015-10-14 09:56:53 +0200637 fmt = format;
638 fmtcnt = format_len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200639
640 _PyBytesWriter_Init(&writer);
Victor Stinner772b2b02015-10-14 09:56:53 +0200641 writer.use_bytearray = use_bytearray;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200642
643 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
644 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800645 return NULL;
Victor Stinner772b2b02015-10-14 09:56:53 +0200646 if (!use_bytearray)
647 writer.overallocate = 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200648
Ethan Furmanb95b5612015-01-23 20:05:18 -0800649 if (PyTuple_Check(args)) {
650 arglen = PyTuple_GET_SIZE(args);
651 argidx = 0;
652 }
653 else {
654 arglen = -1;
655 argidx = -2;
656 }
657 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
658 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
659 !PyByteArray_Check(args)) {
660 dict = args;
661 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200662
Ethan Furmanb95b5612015-01-23 20:05:18 -0800663 while (--fmtcnt >= 0) {
664 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200665 Py_ssize_t len;
666 char *pos;
667
Xiang Zhangb76ad512017-03-06 17:17:05 +0800668 pos = (char *)memchr(fmt + 1, '%', fmtcnt);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200669 if (pos != NULL)
670 len = pos - fmt;
Victor Stinner772b2b02015-10-14 09:56:53 +0200671 else
Xiang Zhangb76ad512017-03-06 17:17:05 +0800672 len = fmtcnt + 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200673 assert(len != 0);
674
Christian Heimesf051e432016-09-13 20:22:02 +0200675 memcpy(res, fmt, len);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200676 res += len;
677 fmt += len;
678 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800679 }
680 else {
681 /* Got a format specifier */
682 int flags = 0;
683 Py_ssize_t width = -1;
684 int prec = -1;
685 int c = '\0';
686 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800687 PyObject *v = NULL;
688 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200689 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800690 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200691 Py_ssize_t len = 0;
692 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200693 Py_ssize_t alloc;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800694
Ethan Furmanb95b5612015-01-23 20:05:18 -0800695 fmt++;
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200696 if (*fmt == '%') {
697 *res++ = '%';
698 fmt++;
699 fmtcnt--;
700 continue;
701 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800702 if (*fmt == '(') {
Victor Stinner772b2b02015-10-14 09:56:53 +0200703 const char *keystart;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800704 Py_ssize_t keylen;
705 PyObject *key;
706 int pcount = 1;
707
708 if (dict == NULL) {
709 PyErr_SetString(PyExc_TypeError,
710 "format requires a mapping");
711 goto error;
712 }
713 ++fmt;
714 --fmtcnt;
715 keystart = fmt;
716 /* Skip over balanced parentheses */
717 while (pcount > 0 && --fmtcnt >= 0) {
718 if (*fmt == ')')
719 --pcount;
720 else if (*fmt == '(')
721 ++pcount;
722 fmt++;
723 }
724 keylen = fmt - keystart - 1;
725 if (fmtcnt < 0 || pcount > 0) {
726 PyErr_SetString(PyExc_ValueError,
727 "incomplete format key");
728 goto error;
729 }
730 key = PyBytes_FromStringAndSize(keystart,
731 keylen);
732 if (key == NULL)
733 goto error;
734 if (args_owned) {
735 Py_DECREF(args);
736 args_owned = 0;
737 }
738 args = PyObject_GetItem(dict, key);
739 Py_DECREF(key);
740 if (args == NULL) {
741 goto error;
742 }
743 args_owned = 1;
744 arglen = -1;
745 argidx = -2;
746 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200747
748 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800749 while (--fmtcnt >= 0) {
750 switch (c = *fmt++) {
751 case '-': flags |= F_LJUST; continue;
752 case '+': flags |= F_SIGN; continue;
753 case ' ': flags |= F_BLANK; continue;
754 case '#': flags |= F_ALT; continue;
755 case '0': flags |= F_ZERO; continue;
756 }
757 break;
758 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200759
760 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800761 if (c == '*') {
762 v = getnextarg(args, arglen, &argidx);
763 if (v == NULL)
764 goto error;
765 if (!PyLong_Check(v)) {
766 PyErr_SetString(PyExc_TypeError,
767 "* wants int");
768 goto error;
769 }
770 width = PyLong_AsSsize_t(v);
771 if (width == -1 && PyErr_Occurred())
772 goto error;
773 if (width < 0) {
774 flags |= F_LJUST;
775 width = -width;
776 }
777 if (--fmtcnt >= 0)
778 c = *fmt++;
779 }
780 else if (c >= 0 && isdigit(c)) {
781 width = c - '0';
782 while (--fmtcnt >= 0) {
783 c = Py_CHARMASK(*fmt++);
784 if (!isdigit(c))
785 break;
786 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
787 PyErr_SetString(
788 PyExc_ValueError,
789 "width too big");
790 goto error;
791 }
792 width = width*10 + (c - '0');
793 }
794 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200795
796 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800797 if (c == '.') {
798 prec = 0;
799 if (--fmtcnt >= 0)
800 c = *fmt++;
801 if (c == '*') {
802 v = getnextarg(args, arglen, &argidx);
803 if (v == NULL)
804 goto error;
805 if (!PyLong_Check(v)) {
806 PyErr_SetString(
807 PyExc_TypeError,
808 "* wants int");
809 goto error;
810 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200811 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800812 if (prec == -1 && PyErr_Occurred())
813 goto error;
814 if (prec < 0)
815 prec = 0;
816 if (--fmtcnt >= 0)
817 c = *fmt++;
818 }
819 else if (c >= 0 && isdigit(c)) {
820 prec = c - '0';
821 while (--fmtcnt >= 0) {
822 c = Py_CHARMASK(*fmt++);
823 if (!isdigit(c))
824 break;
825 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
826 PyErr_SetString(
827 PyExc_ValueError,
828 "prec too big");
829 goto error;
830 }
831 prec = prec*10 + (c - '0');
832 }
833 }
834 } /* prec */
835 if (fmtcnt >= 0) {
836 if (c == 'h' || c == 'l' || c == 'L') {
837 if (--fmtcnt >= 0)
838 c = *fmt++;
839 }
840 }
841 if (fmtcnt < 0) {
842 PyErr_SetString(PyExc_ValueError,
843 "incomplete format");
844 goto error;
845 }
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200846 v = getnextarg(args, arglen, &argidx);
847 if (v == NULL)
848 goto error;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200849
Alexey Izbyshevccd99752018-08-23 10:50:52 +0300850 if (fmtcnt == 0) {
851 /* last write: disable writer overallocation */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200852 writer.overallocate = 0;
853 }
854
Ethan Furmanb95b5612015-01-23 20:05:18 -0800855 sign = 0;
856 fill = ' ';
857 switch (c) {
Ethan Furman62e977f2015-03-11 08:17:00 -0700858 case 'r':
859 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800860 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200861 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800862 if (temp == NULL)
863 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200864 assert(PyUnicode_IS_ASCII(temp));
865 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
866 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800867 if (prec >= 0 && len > prec)
868 len = prec;
869 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200870
Ethan Furmanb95b5612015-01-23 20:05:18 -0800871 case 's':
872 // %s is only for 2/3 code; 3 only code should use %b
873 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200874 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800875 if (temp == NULL)
876 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800877 if (prec >= 0 && len > prec)
878 len = prec;
879 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200880
Ethan Furmanb95b5612015-01-23 20:05:18 -0800881 case 'i':
882 case 'd':
883 case 'u':
884 case 'o':
885 case 'x':
886 case 'X':
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200887 if (PyLong_CheckExact(v)
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200888 && width == -1 && prec == -1
889 && !(flags & (F_SIGN | F_BLANK))
890 && c != 'X')
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200891 {
892 /* Fast path */
893 int alternate = flags & F_ALT;
894 int base;
895
896 switch(c)
897 {
898 default:
Barry Warsawb2e57942017-09-14 18:13:16 -0700899 Py_UNREACHABLE();
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200900 case 'd':
901 case 'i':
902 case 'u':
903 base = 10;
904 break;
905 case 'o':
906 base = 8;
907 break;
908 case 'x':
909 case 'X':
910 base = 16;
911 break;
912 }
913
914 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200915 writer.min_size -= 2; /* size preallocated for "%d" */
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200916 res = _PyLong_FormatBytesWriter(&writer, res,
917 v, base, alternate);
918 if (res == NULL)
919 goto error;
920 continue;
921 }
922
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300923 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200924 if (!temp)
925 goto error;
926 assert(PyUnicode_IS_ASCII(temp));
927 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
928 len = PyUnicode_GET_LENGTH(temp);
929 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800930 if (flags & F_ZERO)
931 fill = '0';
932 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200933
Ethan Furmanb95b5612015-01-23 20:05:18 -0800934 case 'e':
935 case 'E':
936 case 'f':
937 case 'F':
938 case 'g':
939 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200940 if (width == -1 && prec == -1
941 && !(flags & (F_SIGN | F_BLANK)))
942 {
943 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200944 writer.min_size -= 2; /* size preallocated for "%f" */
Victor Stinnerad771582015-10-09 12:38:53 +0200945 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200946 if (res == NULL)
947 goto error;
948 continue;
949 }
950
Victor Stinnerad771582015-10-09 12:38:53 +0200951 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800952 goto error;
953 pbuf = PyBytes_AS_STRING(temp);
954 len = PyBytes_GET_SIZE(temp);
955 sign = 1;
956 if (flags & F_ZERO)
957 fill = '0';
958 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200959
Ethan Furmanb95b5612015-01-23 20:05:18 -0800960 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200961 pbuf = &onechar;
962 len = byte_converter(v, &onechar);
963 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800964 goto error;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200965 if (width == -1) {
966 /* Fast path */
967 *res++ = onechar;
968 continue;
969 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800970 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200971
Ethan Furmanb95b5612015-01-23 20:05:18 -0800972 default:
973 PyErr_Format(PyExc_ValueError,
974 "unsupported format character '%c' (0x%x) "
975 "at index %zd",
976 c, c,
Victor Stinner772b2b02015-10-14 09:56:53 +0200977 (Py_ssize_t)(fmt - 1 - format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800978 goto error;
979 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200980
Ethan Furmanb95b5612015-01-23 20:05:18 -0800981 if (sign) {
982 if (*pbuf == '-' || *pbuf == '+') {
983 sign = *pbuf++;
984 len--;
985 }
986 else if (flags & F_SIGN)
987 sign = '+';
988 else if (flags & F_BLANK)
989 sign = ' ';
990 else
991 sign = 0;
992 }
993 if (width < len)
994 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200995
996 alloc = width;
997 if (sign != 0 && len == width)
998 alloc++;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200999 /* 2: size preallocated for %s */
1000 if (alloc > 2) {
1001 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001002 if (res == NULL)
1003 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -08001004 }
Victor Stinner60ec6ef2019-10-07 22:31:42 +02001005#ifndef NDEBUG
1006 char *before = res;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001007#endif
1008
1009 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001010 if (sign) {
1011 if (fill != ' ')
1012 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -08001013 if (width > len)
1014 width--;
1015 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001016
1017 /* Write the numeric prefix for "x", "X" and "o" formats
1018 if the alternate form is used.
1019 For example, write "0x" for the "%#x" format. */
Serhiy Storchakab1a16192016-12-17 21:48:03 +02001020 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001021 assert(pbuf[0] == '0');
1022 assert(pbuf[1] == c);
1023 if (fill != ' ') {
1024 *res++ = *pbuf++;
1025 *res++ = *pbuf++;
1026 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001027 width -= 2;
1028 if (width < 0)
1029 width = 0;
1030 len -= 2;
1031 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001032
1033 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001034 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001035 memset(res, fill, width - len);
1036 res += (width - len);
1037 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -08001038 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001039
1040 /* If padding with spaces: write sign if needed and/or numeric
1041 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001042 if (fill == ' ') {
1043 if (sign)
1044 *res++ = sign;
Serhiy Storchakab1a16192016-12-17 21:48:03 +02001045 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001046 assert(pbuf[0] == '0');
1047 assert(pbuf[1] == c);
1048 *res++ = *pbuf++;
1049 *res++ = *pbuf++;
1050 }
1051 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001052
1053 /* Copy bytes */
Christian Heimesf051e432016-09-13 20:22:02 +02001054 memcpy(res, pbuf, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001055 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001056
1057 /* Pad right with the fill character if needed */
1058 if (width > len) {
1059 memset(res, ' ', width - len);
1060 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001061 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001062
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +02001063 if (dict && (argidx < arglen)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001064 PyErr_SetString(PyExc_TypeError,
1065 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -08001066 Py_XDECREF(temp);
1067 goto error;
1068 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001069 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001070
Victor Stinner60ec6ef2019-10-07 22:31:42 +02001071#ifndef NDEBUG
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001072 /* check that we computed the exact size for this write */
1073 assert((res - before) == alloc);
1074#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -08001075 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001076
1077 /* If overallocation was disabled, ensure that it was the last
1078 write. Otherwise, we missed an optimization */
Alexey Izbyshevccd99752018-08-23 10:50:52 +03001079 assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001080 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001081
Ethan Furmanb95b5612015-01-23 20:05:18 -08001082 if (argidx < arglen && !dict) {
1083 PyErr_SetString(PyExc_TypeError,
1084 "not all arguments converted during bytes formatting");
1085 goto error;
1086 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001087
Ethan Furmanb95b5612015-01-23 20:05:18 -08001088 if (args_owned) {
1089 Py_DECREF(args);
1090 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001091 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001092
1093 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001094 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001095 if (args_owned) {
1096 Py_DECREF(args);
1097 }
1098 return NULL;
1099}
1100
Greg Price3a4f6672019-09-12 11:12:22 -07001101/* Unescape a backslash-escaped string. */
Eric V. Smith42454af2016-10-31 09:22:08 -04001102PyObject *_PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001103 Py_ssize_t len,
1104 const char *errors,
Eric V. Smith42454af2016-10-31 09:22:08 -04001105 const char **first_invalid_escape)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001106{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001107 int c;
Victor Stinner2ec80632015-10-14 13:32:13 +02001108 char *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001109 const char *end;
Victor Stinner2ec80632015-10-14 13:32:13 +02001110 _PyBytesWriter writer;
1111
1112 _PyBytesWriter_Init(&writer);
1113
1114 p = _PyBytesWriter_Alloc(&writer, len);
1115 if (p == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001116 return NULL;
Victor Stinner2ec80632015-10-14 13:32:13 +02001117 writer.overallocate = 1;
1118
Eric V. Smith42454af2016-10-31 09:22:08 -04001119 *first_invalid_escape = NULL;
1120
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001121 end = s + len;
1122 while (s < end) {
1123 if (*s != '\\') {
Greg Price3a4f6672019-09-12 11:12:22 -07001124 *p++ = *s++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001125 continue;
1126 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001127
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001128 s++;
Victor Stinner2ec80632015-10-14 13:32:13 +02001129 if (s == end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001130 PyErr_SetString(PyExc_ValueError,
1131 "Trailing \\ in string");
1132 goto failed;
1133 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001134
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001135 switch (*s++) {
1136 /* XXX This assumes ASCII! */
1137 case '\n': break;
1138 case '\\': *p++ = '\\'; break;
1139 case '\'': *p++ = '\''; break;
1140 case '\"': *p++ = '\"'; break;
1141 case 'b': *p++ = '\b'; break;
1142 case 'f': *p++ = '\014'; break; /* FF */
1143 case 't': *p++ = '\t'; break;
1144 case 'n': *p++ = '\n'; break;
1145 case 'r': *p++ = '\r'; break;
1146 case 'v': *p++ = '\013'; break; /* VT */
1147 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1148 case '0': case '1': case '2': case '3':
1149 case '4': case '5': case '6': case '7':
1150 c = s[-1] - '0';
1151 if (s < end && '0' <= *s && *s <= '7') {
1152 c = (c<<3) + *s++ - '0';
1153 if (s < end && '0' <= *s && *s <= '7')
1154 c = (c<<3) + *s++ - '0';
1155 }
1156 *p++ = c;
1157 break;
1158 case 'x':
Victor Stinner2ec80632015-10-14 13:32:13 +02001159 if (s+1 < end) {
1160 int digit1, digit2;
1161 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1162 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1163 if (digit1 < 16 && digit2 < 16) {
1164 *p++ = (unsigned char)((digit1 << 4) + digit2);
1165 s += 2;
1166 break;
1167 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001168 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001169 /* invalid hexadecimal digits */
1170
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001171 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001172 PyErr_Format(PyExc_ValueError,
Serhiy Storchakad53fe5f2019-03-13 22:59:55 +02001173 "invalid \\x escape at position %zd",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001174 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001175 goto failed;
1176 }
1177 if (strcmp(errors, "replace") == 0) {
1178 *p++ = '?';
1179 } else if (strcmp(errors, "ignore") == 0)
1180 /* do nothing */;
1181 else {
1182 PyErr_Format(PyExc_ValueError,
1183 "decoding error; unknown "
1184 "error handling code: %.400s",
1185 errors);
1186 goto failed;
1187 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001188 /* skip \x */
1189 if (s < end && Py_ISXDIGIT(s[0]))
1190 s++; /* and a hexdigit */
1191 break;
Victor Stinner2ec80632015-10-14 13:32:13 +02001192
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001193 default:
Eric V. Smith42454af2016-10-31 09:22:08 -04001194 if (*first_invalid_escape == NULL) {
1195 *first_invalid_escape = s-1; /* Back up one char, since we've
1196 already incremented s. */
1197 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001198 *p++ = '\\';
Eric V. Smith42454af2016-10-31 09:22:08 -04001199 s--;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001200 }
1201 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001202
1203 return _PyBytesWriter_Finish(&writer, p);
1204
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001205 failed:
Victor Stinner2ec80632015-10-14 13:32:13 +02001206 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001207 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001208}
1209
Eric V. Smith42454af2016-10-31 09:22:08 -04001210PyObject *PyBytes_DecodeEscape(const char *s,
1211 Py_ssize_t len,
1212 const char *errors,
Greg Price3a4f6672019-09-12 11:12:22 -07001213 Py_ssize_t Py_UNUSED(unicode),
1214 const char *Py_UNUSED(recode_encoding))
Eric V. Smith42454af2016-10-31 09:22:08 -04001215{
1216 const char* first_invalid_escape;
Greg Price3a4f6672019-09-12 11:12:22 -07001217 PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
Eric V. Smith42454af2016-10-31 09:22:08 -04001218 &first_invalid_escape);
1219 if (result == NULL)
1220 return NULL;
1221 if (first_invalid_escape != NULL) {
1222 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1223 "invalid escape sequence '\\%c'",
Serhiy Storchaka56cb4652017-10-20 17:08:15 +03001224 (unsigned char)*first_invalid_escape) < 0) {
Eric V. Smith42454af2016-10-31 09:22:08 -04001225 Py_DECREF(result);
1226 return NULL;
1227 }
1228 }
1229 return result;
1230
1231}
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001232/* -------------------------------------------------------------------- */
1233/* object api */
1234
1235Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001236PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001237{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001238 if (!PyBytes_Check(op)) {
1239 PyErr_Format(PyExc_TypeError,
1240 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1241 return -1;
1242 }
1243 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001244}
1245
1246char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001247PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001248{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001249 if (!PyBytes_Check(op)) {
1250 PyErr_Format(PyExc_TypeError,
1251 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1252 return NULL;
1253 }
1254 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001255}
1256
1257int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001258PyBytes_AsStringAndSize(PyObject *obj,
1259 char **s,
1260 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001261{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001262 if (s == NULL) {
1263 PyErr_BadInternalCall();
1264 return -1;
1265 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001266
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001267 if (!PyBytes_Check(obj)) {
1268 PyErr_Format(PyExc_TypeError,
1269 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1270 return -1;
1271 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001272
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001273 *s = PyBytes_AS_STRING(obj);
1274 if (len != NULL)
1275 *len = PyBytes_GET_SIZE(obj);
1276 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001277 PyErr_SetString(PyExc_ValueError,
1278 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001279 return -1;
1280 }
1281 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001282}
Neal Norwitz6968b052007-02-27 19:02:19 +00001283
1284/* -------------------------------------------------------------------- */
1285/* Methods */
1286
Victor Stinner91698d82020-06-25 14:07:40 +02001287#define STRINGLIB_GET_EMPTY() bytes_get_empty()
Victor Stinnerc41eed12020-06-23 15:54:35 +02001288
Eric Smith0923d1d2009-04-16 20:16:10 +00001289#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001290
1291#include "stringlib/fastsearch.h"
1292#include "stringlib/count.h"
1293#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001294#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001295#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001296#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001297#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001298
Eric Smith0f78bff2009-11-30 01:01:42 +00001299#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001300
Victor Stinnerc41eed12020-06-23 15:54:35 +02001301#undef STRINGLIB_GET_EMPTY
1302
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001303PyObject *
1304PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001305{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001306 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001307 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001308 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001309 PyObject *v;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001310 unsigned char quote;
1311 const unsigned char *s;
1312 Py_UCS1 *p;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001313
1314 /* Compute size of output string */
1315 squotes = dquotes = 0;
1316 newsize = 3; /* b'' */
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001317 s = (const unsigned char*)op->ob_sval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001318 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001319 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001320 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001321 case '\'': squotes++; break;
1322 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001323 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001324 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001325 default:
1326 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001327 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001328 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001329 if (newsize > PY_SSIZE_T_MAX - incr)
1330 goto overflow;
1331 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001332 }
1333 quote = '\'';
1334 if (smartquotes && squotes && !dquotes)
1335 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001336 if (squotes && quote == '\'') {
1337 if (newsize > PY_SSIZE_T_MAX - squotes)
1338 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001339 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001340 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001341
1342 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001343 if (v == NULL) {
1344 return NULL;
1345 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001346 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001347
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001348 *p++ = 'b', *p++ = quote;
1349 for (i = 0; i < length; i++) {
1350 unsigned char c = op->ob_sval[i];
1351 if (c == quote || c == '\\')
1352 *p++ = '\\', *p++ = c;
1353 else if (c == '\t')
1354 *p++ = '\\', *p++ = 't';
1355 else if (c == '\n')
1356 *p++ = '\\', *p++ = 'n';
1357 else if (c == '\r')
1358 *p++ = '\\', *p++ = 'r';
1359 else if (c < ' ' || c >= 0x7f) {
1360 *p++ = '\\';
1361 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001362 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1363 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001364 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001365 else
1366 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001367 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001368 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001369 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001370 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001371
1372 overflow:
1373 PyErr_SetString(PyExc_OverflowError,
1374 "bytes object is too large to make repr");
1375 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001376}
1377
Neal Norwitz6968b052007-02-27 19:02:19 +00001378static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001379bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001380{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001381 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001382}
1383
Neal Norwitz6968b052007-02-27 19:02:19 +00001384static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001385bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001386{
Victor Stinnerda7933e2020-04-13 03:04:28 +02001387 if (_Py_GetConfig()->bytes_warning) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001388 if (PyErr_WarnEx(PyExc_BytesWarning,
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001389 "str() on a bytes instance", 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001390 return NULL;
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001391 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001392 }
1393 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001394}
1395
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001396static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001397bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001398{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001399 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001400}
Neal Norwitz6968b052007-02-27 19:02:19 +00001401
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001402/* This is also used by PyBytes_Concat() */
1403static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001404bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001405{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001406 Py_buffer va, vb;
1407 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001408
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001409 va.len = -1;
1410 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001411 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1412 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001413 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
Serhiy Storchaka6b5a9ec2017-03-19 19:47:02 +02001414 Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001415 goto done;
1416 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001417
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001418 /* Optimize end cases */
1419 if (va.len == 0 && PyBytes_CheckExact(b)) {
1420 result = b;
1421 Py_INCREF(result);
1422 goto done;
1423 }
1424 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1425 result = a;
1426 Py_INCREF(result);
1427 goto done;
1428 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001429
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001430 if (va.len > PY_SSIZE_T_MAX - vb.len) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001431 PyErr_NoMemory();
1432 goto done;
1433 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001434
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001435 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001436 if (result != NULL) {
1437 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1438 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1439 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001440
1441 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001442 if (va.len != -1)
1443 PyBuffer_Release(&va);
1444 if (vb.len != -1)
1445 PyBuffer_Release(&vb);
1446 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001447}
Neal Norwitz6968b052007-02-27 19:02:19 +00001448
1449static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001450bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001451{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001452 Py_ssize_t i;
1453 Py_ssize_t j;
1454 Py_ssize_t size;
1455 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001456 size_t nbytes;
1457 if (n < 0)
1458 n = 0;
1459 /* watch out for overflows: the size can overflow int,
1460 * and the # of bytes needed can overflow size_t
1461 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001462 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001463 PyErr_SetString(PyExc_OverflowError,
1464 "repeated bytes are too long");
1465 return NULL;
1466 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001467 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001468 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1469 Py_INCREF(a);
1470 return (PyObject *)a;
1471 }
1472 nbytes = (size_t)size;
1473 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1474 PyErr_SetString(PyExc_OverflowError,
1475 "repeated bytes are too long");
1476 return NULL;
1477 }
1478 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
Victor Stinner04fc4f22020-06-16 01:28:07 +02001479 if (op == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001480 return PyErr_NoMemory();
Victor Stinner04fc4f22020-06-16 01:28:07 +02001481 }
1482 _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001483 op->ob_shash = -1;
1484 op->ob_sval[size] = '\0';
1485 if (Py_SIZE(a) == 1 && n > 0) {
1486 memset(op->ob_sval, a->ob_sval[0] , n);
1487 return (PyObject *) op;
1488 }
1489 i = 0;
1490 if (i < size) {
Christian Heimesf051e432016-09-13 20:22:02 +02001491 memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001492 i = Py_SIZE(a);
1493 }
1494 while (i < size) {
1495 j = (i <= size-i) ? i : size-i;
Christian Heimesf051e432016-09-13 20:22:02 +02001496 memcpy(op->ob_sval+i, op->ob_sval, j);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001497 i += j;
1498 }
1499 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001500}
1501
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001502static int
1503bytes_contains(PyObject *self, PyObject *arg)
1504{
1505 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1506}
1507
Neal Norwitz6968b052007-02-27 19:02:19 +00001508static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001509bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001510{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001511 if (i < 0 || i >= Py_SIZE(a)) {
1512 PyErr_SetString(PyExc_IndexError, "index out of range");
1513 return NULL;
1514 }
1515 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001516}
1517
Benjamin Peterson621b4302016-09-09 13:54:34 -07001518static int
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001519bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1520{
1521 int cmp;
1522 Py_ssize_t len;
1523
1524 len = Py_SIZE(a);
1525 if (Py_SIZE(b) != len)
1526 return 0;
1527
1528 if (a->ob_sval[0] != b->ob_sval[0])
1529 return 0;
1530
1531 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1532 return (cmp == 0);
1533}
1534
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001535static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001536bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001537{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001538 int c;
1539 Py_ssize_t len_a, len_b;
1540 Py_ssize_t min_len;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001541 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001542
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001543 /* Make sure both arguments are strings. */
1544 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Victor Stinnerda7933e2020-04-13 03:04:28 +02001545 if (_Py_GetConfig()->bytes_warning && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001546 rc = PyObject_IsInstance((PyObject*)a,
1547 (PyObject*)&PyUnicode_Type);
1548 if (!rc)
1549 rc = PyObject_IsInstance((PyObject*)b,
1550 (PyObject*)&PyUnicode_Type);
1551 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001552 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001553 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001554 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001555 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001556 return NULL;
1557 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001558 else {
1559 rc = PyObject_IsInstance((PyObject*)a,
1560 (PyObject*)&PyLong_Type);
1561 if (!rc)
1562 rc = PyObject_IsInstance((PyObject*)b,
1563 (PyObject*)&PyLong_Type);
1564 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001565 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001566 if (rc) {
1567 if (PyErr_WarnEx(PyExc_BytesWarning,
1568 "Comparison between bytes and int", 1))
1569 return NULL;
1570 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001571 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001572 }
stratakise8b19652017-11-02 11:32:54 +01001573 Py_RETURN_NOTIMPLEMENTED;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001574 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001575 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001576 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001577 case Py_EQ:
1578 case Py_LE:
1579 case Py_GE:
1580 /* a string is equal to itself */
stratakise8b19652017-11-02 11:32:54 +01001581 Py_RETURN_TRUE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001582 case Py_NE:
1583 case Py_LT:
1584 case Py_GT:
stratakise8b19652017-11-02 11:32:54 +01001585 Py_RETURN_FALSE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001586 default:
1587 PyErr_BadArgument();
1588 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001589 }
1590 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001591 else if (op == Py_EQ || op == Py_NE) {
1592 int eq = bytes_compare_eq(a, b);
1593 eq ^= (op == Py_NE);
stratakise8b19652017-11-02 11:32:54 +01001594 return PyBool_FromLong(eq);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001595 }
1596 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001597 len_a = Py_SIZE(a);
1598 len_b = Py_SIZE(b);
1599 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001600 if (min_len > 0) {
1601 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001602 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001603 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001604 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001605 else
1606 c = 0;
stratakise8b19652017-11-02 11:32:54 +01001607 if (c != 0)
1608 Py_RETURN_RICHCOMPARE(c, 0, op);
1609 Py_RETURN_RICHCOMPARE(len_a, len_b, op);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001610 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001611}
1612
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001613static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001614bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001615{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001616 if (a->ob_shash == -1) {
1617 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001618 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001619 }
1620 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001621}
1622
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001623static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001624bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001625{
Victor Stinnera15e2602020-04-08 02:01:56 +02001626 if (_PyIndex_Check(item)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001627 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1628 if (i == -1 && PyErr_Occurred())
1629 return NULL;
1630 if (i < 0)
1631 i += PyBytes_GET_SIZE(self);
1632 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1633 PyErr_SetString(PyExc_IndexError,
1634 "index out of range");
1635 return NULL;
1636 }
1637 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1638 }
1639 else if (PySlice_Check(item)) {
Zackery Spytz14514d92019-05-17 01:13:03 -06001640 Py_ssize_t start, stop, step, slicelength, i;
1641 size_t cur;
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001642 const char* source_buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001643 char* result_buf;
1644 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001645
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001646 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001647 return NULL;
1648 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001649 slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1650 &stop, step);
Neal Norwitz6968b052007-02-27 19:02:19 +00001651
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001652 if (slicelength <= 0) {
1653 return PyBytes_FromStringAndSize("", 0);
1654 }
1655 else if (start == 0 && step == 1 &&
1656 slicelength == PyBytes_GET_SIZE(self) &&
1657 PyBytes_CheckExact(self)) {
1658 Py_INCREF(self);
1659 return (PyObject *)self;
1660 }
1661 else if (step == 1) {
1662 return PyBytes_FromStringAndSize(
1663 PyBytes_AS_STRING(self) + start,
1664 slicelength);
1665 }
1666 else {
1667 source_buf = PyBytes_AS_STRING(self);
1668 result = PyBytes_FromStringAndSize(NULL, slicelength);
1669 if (result == NULL)
1670 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001671
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001672 result_buf = PyBytes_AS_STRING(result);
1673 for (cur = start, i = 0; i < slicelength;
1674 cur += step, i++) {
1675 result_buf[i] = source_buf[cur];
1676 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001677
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001678 return result;
1679 }
1680 }
1681 else {
1682 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001683 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001684 Py_TYPE(item)->tp_name);
1685 return NULL;
1686 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001687}
1688
1689static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001690bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001691{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001692 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1693 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001694}
1695
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001696static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001697 (lenfunc)bytes_length, /*sq_length*/
1698 (binaryfunc)bytes_concat, /*sq_concat*/
1699 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1700 (ssizeargfunc)bytes_item, /*sq_item*/
1701 0, /*sq_slice*/
1702 0, /*sq_ass_item*/
1703 0, /*sq_ass_slice*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001704 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001705};
1706
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001707static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001708 (lenfunc)bytes_length,
1709 (binaryfunc)bytes_subscript,
1710 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001711};
1712
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001713static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001714 (getbufferproc)bytes_buffer_getbuffer,
1715 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001716};
1717
1718
1719#define LEFTSTRIP 0
1720#define RIGHTSTRIP 1
1721#define BOTHSTRIP 2
1722
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001723/*[clinic input]
1724bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001725
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001726 sep: object = None
1727 The delimiter according which to split the bytes.
1728 None (the default value) means split on ASCII whitespace characters
1729 (space, tab, return, newline, formfeed, vertical tab).
1730 maxsplit: Py_ssize_t = -1
1731 Maximum number of splits to do.
1732 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001733
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001734Return a list of the sections in the bytes, using sep as the delimiter.
1735[clinic start generated code]*/
1736
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001737static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001738bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1739/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001740{
1741 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001742 const char *s = PyBytes_AS_STRING(self), *sub;
1743 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001744 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001745
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001746 if (maxsplit < 0)
1747 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001748 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001749 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001750 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001751 return NULL;
1752 sub = vsub.buf;
1753 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001754
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001755 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1756 PyBuffer_Release(&vsub);
1757 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001758}
1759
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001760/*[clinic input]
1761bytes.partition
1762
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001763 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001764 /
1765
1766Partition the bytes into three parts using the given separator.
1767
1768This will search for the separator sep in the bytes. If the separator is found,
1769returns a 3-tuple containing the part before the separator, the separator
1770itself, and the part after it.
1771
1772If the separator is not found, returns a 3-tuple containing the original bytes
1773object and two empty bytes objects.
1774[clinic start generated code]*/
1775
Neal Norwitz6968b052007-02-27 19:02:19 +00001776static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001777bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001778/*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001779{
Neal Norwitz6968b052007-02-27 19:02:19 +00001780 return stringlib_partition(
1781 (PyObject*) self,
1782 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001783 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001784 );
1785}
1786
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001787/*[clinic input]
1788bytes.rpartition
1789
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001790 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001791 /
1792
1793Partition the bytes into three parts using the given separator.
1794
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001795This will search for the separator sep in the bytes, starting at the end. If
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001796the separator is found, returns a 3-tuple containing the part before the
1797separator, the separator itself, and the part after it.
1798
1799If the separator is not found, returns a 3-tuple containing two empty bytes
1800objects and the original bytes object.
1801[clinic start generated code]*/
1802
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001803static PyObject *
1804bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001805/*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001806{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001807 return stringlib_rpartition(
1808 (PyObject*) self,
1809 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001810 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001811 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001812}
1813
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001814/*[clinic input]
1815bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001816
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001817Return a list of the sections in the bytes, using sep as the delimiter.
1818
1819Splitting is done starting at the end of the bytes and working to the front.
1820[clinic start generated code]*/
1821
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001822static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001823bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1824/*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001825{
1826 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001827 const char *s = PyBytes_AS_STRING(self), *sub;
1828 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001829 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001830
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001831 if (maxsplit < 0)
1832 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001833 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001834 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001835 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001836 return NULL;
1837 sub = vsub.buf;
1838 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001839
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001840 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1841 PyBuffer_Release(&vsub);
1842 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001843}
1844
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001845
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001846/*[clinic input]
1847bytes.join
1848
1849 iterable_of_bytes: object
1850 /
1851
1852Concatenate any number of bytes objects.
1853
1854The bytes whose method is called is inserted in between each pair.
1855
1856The result is returned as a new bytes object.
1857
1858Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1859[clinic start generated code]*/
1860
Neal Norwitz6968b052007-02-27 19:02:19 +00001861static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001862bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1863/*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001864{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001865 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001866}
1867
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001868PyObject *
1869_PyBytes_Join(PyObject *sep, PyObject *x)
1870{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001871 assert(sep != NULL && PyBytes_Check(sep));
1872 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001873 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001874}
1875
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001876static PyObject *
1877bytes_find(PyBytesObject *self, PyObject *args)
1878{
1879 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1880}
1881
1882static PyObject *
1883bytes_index(PyBytesObject *self, PyObject *args)
1884{
1885 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1886}
1887
1888
1889static PyObject *
1890bytes_rfind(PyBytesObject *self, PyObject *args)
1891{
1892 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1893}
1894
1895
1896static PyObject *
1897bytes_rindex(PyBytesObject *self, PyObject *args)
1898{
1899 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1900}
1901
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001902
1903Py_LOCAL_INLINE(PyObject *)
1904do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001905{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001906 Py_buffer vsep;
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001907 const char *s = PyBytes_AS_STRING(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001908 Py_ssize_t len = PyBytes_GET_SIZE(self);
1909 char *sep;
1910 Py_ssize_t seplen;
1911 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001912
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001913 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001914 return NULL;
1915 sep = vsep.buf;
1916 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001917
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001918 i = 0;
1919 if (striptype != RIGHTSTRIP) {
1920 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1921 i++;
1922 }
1923 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001924
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001925 j = len;
1926 if (striptype != LEFTSTRIP) {
1927 do {
1928 j--;
1929 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1930 j++;
1931 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001932
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001933 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001934
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001935 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1936 Py_INCREF(self);
1937 return (PyObject*)self;
1938 }
1939 else
1940 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001941}
1942
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001943
1944Py_LOCAL_INLINE(PyObject *)
1945do_strip(PyBytesObject *self, int striptype)
1946{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001947 const char *s = PyBytes_AS_STRING(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001948 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001949
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001950 i = 0;
1951 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001952 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001953 i++;
1954 }
1955 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001956
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001957 j = len;
1958 if (striptype != LEFTSTRIP) {
1959 do {
1960 j--;
David Malcolm96960882010-11-05 17:23:41 +00001961 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001962 j++;
1963 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001964
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001965 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1966 Py_INCREF(self);
1967 return (PyObject*)self;
1968 }
1969 else
1970 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001971}
1972
1973
1974Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001975do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001976{
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001977 if (bytes != Py_None) {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001978 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001979 }
1980 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001981}
1982
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001983/*[clinic input]
1984bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001985
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001986 bytes: object = None
1987 /
1988
1989Strip leading and trailing bytes contained in the argument.
1990
1991If the argument is omitted or None, strip leading and trailing ASCII whitespace.
1992[clinic start generated code]*/
1993
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001994static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001995bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001996/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001997{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001998 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001999}
2000
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002001/*[clinic input]
2002bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002003
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002004 bytes: object = None
2005 /
2006
2007Strip leading bytes contained in the argument.
2008
2009If the argument is omitted or None, strip leading ASCII whitespace.
2010[clinic start generated code]*/
2011
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002012static PyObject *
2013bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002014/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002015{
2016 return do_argstrip(self, LEFTSTRIP, bytes);
2017}
2018
2019/*[clinic input]
2020bytes.rstrip
2021
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002022 bytes: object = None
2023 /
2024
2025Strip trailing bytes contained in the argument.
2026
2027If the argument is omitted or None, strip trailing ASCII whitespace.
2028[clinic start generated code]*/
2029
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002030static PyObject *
2031bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002032/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002033{
2034 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002035}
Neal Norwitz6968b052007-02-27 19:02:19 +00002036
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002037
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002038static PyObject *
2039bytes_count(PyBytesObject *self, PyObject *args)
2040{
2041 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2042}
2043
2044
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002045/*[clinic input]
2046bytes.translate
2047
Victor Stinner049e5092014-08-17 22:20:00 +02002048 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002049 Translation table, which must be a bytes object of length 256.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002050 /
Martin Panter1b6c6da2016-08-27 08:35:02 +00002051 delete as deletechars: object(c_default="NULL") = b''
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002052
2053Return a copy with each character mapped by the given translation table.
2054
Martin Panter1b6c6da2016-08-27 08:35:02 +00002055All characters occurring in the optional argument delete are removed.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002056The remaining characters are mapped through the given translation table.
2057[clinic start generated code]*/
2058
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002059static PyObject *
Martin Panter1b6c6da2016-08-27 08:35:02 +00002060bytes_translate_impl(PyBytesObject *self, PyObject *table,
Larry Hastings89964c42015-04-14 18:07:59 -04002061 PyObject *deletechars)
Martin Panter1b6c6da2016-08-27 08:35:02 +00002062/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002063{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03002064 const char *input;
2065 char *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002066 Py_buffer table_view = {NULL, NULL};
2067 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002068 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002069 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002070 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002071 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002072 Py_ssize_t inlen, tablen, dellen = 0;
2073 PyObject *result;
2074 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002075
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002076 if (PyBytes_Check(table)) {
2077 table_chars = PyBytes_AS_STRING(table);
2078 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002079 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002080 else if (table == Py_None) {
2081 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002082 tablen = 256;
2083 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002084 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002085 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002086 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002087 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002088 tablen = table_view.len;
2089 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002090
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002091 if (tablen != 256) {
2092 PyErr_SetString(PyExc_ValueError,
2093 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002094 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002095 return NULL;
2096 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002097
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002098 if (deletechars != NULL) {
2099 if (PyBytes_Check(deletechars)) {
2100 del_table_chars = PyBytes_AS_STRING(deletechars);
2101 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002102 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002103 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002104 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002105 PyBuffer_Release(&table_view);
2106 return NULL;
2107 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002108 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002109 dellen = del_table_view.len;
2110 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002111 }
2112 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002113 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002114 dellen = 0;
2115 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002116
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002117 inlen = PyBytes_GET_SIZE(input_obj);
2118 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002119 if (result == NULL) {
2120 PyBuffer_Release(&del_table_view);
2121 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002122 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002123 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002124 output_start = output = PyBytes_AS_STRING(result);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002125 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002126
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002127 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002128 /* If no deletions are required, use faster code */
2129 for (i = inlen; --i >= 0; ) {
2130 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002131 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002132 changed = 1;
2133 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002134 if (!changed && PyBytes_CheckExact(input_obj)) {
2135 Py_INCREF(input_obj);
2136 Py_DECREF(result);
2137 result = input_obj;
2138 }
2139 PyBuffer_Release(&del_table_view);
2140 PyBuffer_Release(&table_view);
2141 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002142 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002143
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002144 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002145 for (i = 0; i < 256; i++)
2146 trans_table[i] = Py_CHARMASK(i);
2147 } else {
2148 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002149 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002150 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002151 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002152
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002153 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002154 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002155 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002156
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002157 for (i = inlen; --i >= 0; ) {
2158 c = Py_CHARMASK(*input++);
2159 if (trans_table[c] != -1)
2160 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2161 continue;
2162 changed = 1;
2163 }
2164 if (!changed && PyBytes_CheckExact(input_obj)) {
2165 Py_DECREF(result);
2166 Py_INCREF(input_obj);
2167 return input_obj;
2168 }
2169 /* Fix the size of the resulting string */
2170 if (inlen > 0)
2171 _PyBytes_Resize(&result, output - output_start);
2172 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002173}
2174
2175
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002176/*[clinic input]
2177
2178@staticmethod
2179bytes.maketrans
2180
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002181 frm: Py_buffer
2182 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002183 /
2184
2185Return a translation table useable for the bytes or bytearray translate method.
2186
2187The returned table will be one where each byte in frm is mapped to the byte at
2188the same position in to.
2189
2190The bytes objects frm and to must be of the same length.
2191[clinic start generated code]*/
2192
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002193static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002194bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002195/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002196{
2197 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002198}
2199
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002200
2201/*[clinic input]
2202bytes.replace
2203
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002204 old: Py_buffer
2205 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002206 count: Py_ssize_t = -1
2207 Maximum number of occurrences to replace.
2208 -1 (the default value) means replace all occurrences.
2209 /
2210
2211Return a copy with all occurrences of substring old replaced by new.
2212
2213If the optional argument count is given, only the first count occurrences are
2214replaced.
2215[clinic start generated code]*/
2216
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002217static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002218bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
Larry Hastings89964c42015-04-14 18:07:59 -04002219 Py_ssize_t count)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002220/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002221{
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +03002222 return stringlib_replace((PyObject *)self,
2223 (const char *)old->buf, old->len,
2224 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002225}
2226
2227/** End DALKE **/
2228
sweeneydea81849b2020-04-22 17:05:48 -04002229/*[clinic input]
2230bytes.removeprefix as bytes_removeprefix
2231
2232 prefix: Py_buffer
2233 /
2234
2235Return a bytes object with the given prefix string removed if present.
2236
2237If the bytes starts with the prefix string, return bytes[len(prefix):].
2238Otherwise, return a copy of the original bytes.
2239[clinic start generated code]*/
2240
2241static PyObject *
2242bytes_removeprefix_impl(PyBytesObject *self, Py_buffer *prefix)
2243/*[clinic end generated code: output=f006865331a06ab6 input=0c93bac817a8502c]*/
2244{
2245 const char *self_start = PyBytes_AS_STRING(self);
2246 Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2247 const char *prefix_start = prefix->buf;
2248 Py_ssize_t prefix_len = prefix->len;
2249
2250 if (self_len >= prefix_len
2251 && prefix_len > 0
2252 && memcmp(self_start, prefix_start, prefix_len) == 0)
2253 {
2254 return PyBytes_FromStringAndSize(self_start + prefix_len,
2255 self_len - prefix_len);
2256 }
2257
2258 if (PyBytes_CheckExact(self)) {
2259 Py_INCREF(self);
2260 return (PyObject *)self;
2261 }
2262
2263 return PyBytes_FromStringAndSize(self_start, self_len);
2264}
2265
2266/*[clinic input]
2267bytes.removesuffix as bytes_removesuffix
2268
2269 suffix: Py_buffer
2270 /
2271
2272Return a bytes object with the given suffix string removed if present.
2273
2274If the bytes ends with the suffix string and that suffix is not empty,
2275return bytes[:-len(prefix)]. Otherwise, return a copy of the original
2276bytes.
2277[clinic start generated code]*/
2278
2279static PyObject *
2280bytes_removesuffix_impl(PyBytesObject *self, Py_buffer *suffix)
2281/*[clinic end generated code: output=d887d308e3242eeb input=9f4e1da8c637bbf1]*/
2282{
2283 const char *self_start = PyBytes_AS_STRING(self);
2284 Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2285 const char *suffix_start = suffix->buf;
2286 Py_ssize_t suffix_len = suffix->len;
2287
2288 if (self_len >= suffix_len
2289 && suffix_len > 0
2290 && memcmp(self_start + self_len - suffix_len,
2291 suffix_start, suffix_len) == 0)
2292 {
2293 return PyBytes_FromStringAndSize(self_start,
2294 self_len - suffix_len);
2295 }
2296
2297 if (PyBytes_CheckExact(self)) {
2298 Py_INCREF(self);
2299 return (PyObject *)self;
2300 }
2301
2302 return PyBytes_FromStringAndSize(self_start, self_len);
2303}
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002304
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002305static PyObject *
2306bytes_startswith(PyBytesObject *self, PyObject *args)
2307{
2308 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2309}
2310
2311static PyObject *
2312bytes_endswith(PyBytesObject *self, PyObject *args)
2313{
2314 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2315}
2316
2317
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002318/*[clinic input]
2319bytes.decode
2320
2321 encoding: str(c_default="NULL") = 'utf-8'
2322 The encoding with which to decode the bytes.
2323 errors: str(c_default="NULL") = 'strict'
2324 The error handling scheme to use for the handling of decoding errors.
2325 The default is 'strict' meaning that decoding errors raise a
2326 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2327 as well as any other name registered with codecs.register_error that
2328 can handle UnicodeDecodeErrors.
2329
2330Decode the bytes using the codec registered for encoding.
2331[clinic start generated code]*/
2332
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002333static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002334bytes_decode_impl(PyBytesObject *self, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04002335 const char *errors)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002336/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002337{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002338 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002339}
2340
Guido van Rossum20188312006-05-05 15:15:40 +00002341
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002342/*[clinic input]
2343bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002344
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002345 keepends: bool(accept={int}) = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002346
2347Return a list of the lines in the bytes, breaking at line boundaries.
2348
2349Line breaks are not included in the resulting list unless keepends is given and
2350true.
2351[clinic start generated code]*/
2352
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002353static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002354bytes_splitlines_impl(PyBytesObject *self, int keepends)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002355/*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002356{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002357 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002358 (PyObject*) self, PyBytes_AS_STRING(self),
2359 PyBytes_GET_SIZE(self), keepends
2360 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002361}
2362
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002363/*[clinic input]
2364@classmethod
2365bytes.fromhex
2366
2367 string: unicode
2368 /
2369
2370Create a bytes object from a string of hexadecimal numbers.
2371
2372Spaces between two numbers are accepted.
2373Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2374[clinic start generated code]*/
2375
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002376static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002377bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002378/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002379{
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002380 PyObject *result = _PyBytes_FromHex(string, 0);
2381 if (type != &PyBytes_Type && result != NULL) {
Petr Viktorinffd97532020-02-11 17:46:57 +01002382 Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002383 }
2384 return result;
Victor Stinner2bf89932015-10-14 11:25:33 +02002385}
2386
2387PyObject*
2388_PyBytes_FromHex(PyObject *string, int use_bytearray)
2389{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002390 char *buf;
Victor Stinner2bf89932015-10-14 11:25:33 +02002391 Py_ssize_t hexlen, invalid_char;
2392 unsigned int top, bot;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03002393 const Py_UCS1 *str, *end;
Victor Stinner2bf89932015-10-14 11:25:33 +02002394 _PyBytesWriter writer;
2395
2396 _PyBytesWriter_Init(&writer);
2397 writer.use_bytearray = use_bytearray;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002398
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002399 assert(PyUnicode_Check(string));
2400 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002401 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002402 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002403
Victor Stinner2bf89932015-10-14 11:25:33 +02002404 if (!PyUnicode_IS_ASCII(string)) {
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03002405 const void *data = PyUnicode_DATA(string);
Victor Stinner2bf89932015-10-14 11:25:33 +02002406 unsigned int kind = PyUnicode_KIND(string);
2407 Py_ssize_t i;
2408
2409 /* search for the first non-ASCII character */
2410 for (i = 0; i < hexlen; i++) {
2411 if (PyUnicode_READ(kind, data, i) >= 128)
2412 break;
2413 }
2414 invalid_char = i;
2415 goto error;
2416 }
2417
2418 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2419 str = PyUnicode_1BYTE_DATA(string);
2420
2421 /* This overestimates if there are spaces */
2422 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2423 if (buf == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002424 return NULL;
Victor Stinner2bf89932015-10-14 11:25:33 +02002425
2426 end = str + hexlen;
2427 while (str < end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002428 /* skip over spaces in the input */
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002429 if (Py_ISSPACE(*str)) {
Victor Stinner2bf89932015-10-14 11:25:33 +02002430 do {
2431 str++;
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002432 } while (Py_ISSPACE(*str));
Victor Stinner2bf89932015-10-14 11:25:33 +02002433 if (str >= end)
2434 break;
2435 }
2436
2437 top = _PyLong_DigitValue[*str];
2438 if (top >= 16) {
2439 invalid_char = str - PyUnicode_1BYTE_DATA(string);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002440 goto error;
2441 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002442 str++;
2443
2444 bot = _PyLong_DigitValue[*str];
2445 if (bot >= 16) {
2446 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2447 goto error;
2448 }
2449 str++;
2450
2451 *buf++ = (unsigned char)((top << 4) + bot);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002452 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002453
2454 return _PyBytesWriter_Finish(&writer, buf);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002455
2456 error:
Victor Stinner2bf89932015-10-14 11:25:33 +02002457 PyErr_Format(PyExc_ValueError,
2458 "non-hexadecimal number found in "
2459 "fromhex() arg at position %zd", invalid_char);
2460 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002461 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002462}
2463
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002464/*[clinic input]
2465bytes.hex
2466
2467 sep: object = NULL
2468 An optional single character or byte to separate hex bytes.
2469 bytes_per_sep: int = 1
2470 How many bytes between separators. Positive values count from the
2471 right, negative values count from the left.
2472
2473Create a str of hexadecimal numbers from a bytes object.
2474
2475Example:
2476>>> value = b'\xb9\x01\xef'
2477>>> value.hex()
2478'b901ef'
2479>>> value.hex(':')
2480'b9:01:ef'
2481>>> value.hex(':', 2)
2482'b9:01ef'
2483>>> value.hex(':', -2)
2484'b901:ef'
2485[clinic start generated code]*/
Gregory P. Smith8cb65692015-04-25 23:22:26 +00002486
2487static PyObject *
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002488bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2489/*[clinic end generated code: output=1f134da504064139 input=f1238d3455990218]*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002490{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03002491 const char *argbuf = PyBytes_AS_STRING(self);
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002492 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002493 return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002494}
2495
2496static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302497bytes_getnewargs(PyBytesObject *v, PyObject *Py_UNUSED(ignored))
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002498{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002499 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002500}
2501
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002502
2503static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002504bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002505 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302506 {"capitalize", stringlib_capitalize, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002507 _Py_capitalize__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002508 STRINGLIB_CENTER_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002509 {"count", (PyCFunction)bytes_count, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002510 _Py_count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002511 BYTES_DECODE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002512 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002513 _Py_endswith__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002514 STRINGLIB_EXPANDTABS_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002515 {"find", (PyCFunction)bytes_find, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002516 _Py_find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002517 BYTES_FROMHEX_METHODDEF
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002518 BYTES_HEX_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002519 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302520 {"isalnum", stringlib_isalnum, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002521 _Py_isalnum__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302522 {"isalpha", stringlib_isalpha, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002523 _Py_isalpha__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302524 {"isascii", stringlib_isascii, METH_NOARGS,
INADA Naokia49ac992018-01-27 14:06:21 +09002525 _Py_isascii__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302526 {"isdigit", stringlib_isdigit, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002527 _Py_isdigit__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302528 {"islower", stringlib_islower, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002529 _Py_islower__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302530 {"isspace", stringlib_isspace, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002531 _Py_isspace__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302532 {"istitle", stringlib_istitle, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002533 _Py_istitle__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302534 {"isupper", stringlib_isupper, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002535 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002536 BYTES_JOIN_METHODDEF
Tal Einatc929df32018-07-06 13:17:38 +03002537 STRINGLIB_LJUST_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302538 {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002539 BYTES_LSTRIP_METHODDEF
2540 BYTES_MAKETRANS_METHODDEF
2541 BYTES_PARTITION_METHODDEF
2542 BYTES_REPLACE_METHODDEF
sweeneydea81849b2020-04-22 17:05:48 -04002543 BYTES_REMOVEPREFIX_METHODDEF
2544 BYTES_REMOVESUFFIX_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002545 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2546 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002547 STRINGLIB_RJUST_METHODDEF
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002548 BYTES_RPARTITION_METHODDEF
2549 BYTES_RSPLIT_METHODDEF
2550 BYTES_RSTRIP_METHODDEF
2551 BYTES_SPLIT_METHODDEF
2552 BYTES_SPLITLINES_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002553 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002554 _Py_startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002555 BYTES_STRIP_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302556 {"swapcase", stringlib_swapcase, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002557 _Py_swapcase__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302558 {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002559 BYTES_TRANSLATE_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302560 {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002561 STRINGLIB_ZFILL_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002562 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002563};
2564
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002565static PyObject *
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002566bytes_mod(PyObject *self, PyObject *arg)
Ethan Furmanb95b5612015-01-23 20:05:18 -08002567{
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002568 if (!PyBytes_Check(self)) {
2569 Py_RETURN_NOTIMPLEMENTED;
Victor Stinner772b2b02015-10-14 09:56:53 +02002570 }
Victor Stinner772b2b02015-10-14 09:56:53 +02002571 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002572 arg, 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -08002573}
2574
2575static PyNumberMethods bytes_as_number = {
2576 0, /*nb_add*/
2577 0, /*nb_subtract*/
2578 0, /*nb_multiply*/
2579 bytes_mod, /*nb_remainder*/
2580};
2581
2582static PyObject *
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002583bytes_subtype_new(PyTypeObject *, PyObject *);
2584
2585/*[clinic input]
2586@classmethod
2587bytes.__new__ as bytes_new
2588
2589 source as x: object = NULL
2590 encoding: str = NULL
2591 errors: str = NULL
2592
2593[clinic start generated code]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002594
2595static PyObject *
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002596bytes_new_impl(PyTypeObject *type, PyObject *x, const char *encoding,
2597 const char *errors)
2598/*[clinic end generated code: output=1e0c471be311a425 input=f0a966d19b7262b4]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002599{
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002600 PyObject *bytes;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002601 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002602 Py_ssize_t size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002603
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002604 if (x == NULL) {
2605 if (encoding != NULL || errors != NULL) {
2606 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka2c2044e2018-10-21 15:29:12 +03002607 encoding != NULL ?
2608 "encoding without a string argument" :
2609 "errors without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002610 return NULL;
2611 }
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002612 bytes = PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002613 }
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002614 else if (encoding != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002615 /* Encode via the codec registry */
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002616 if (!PyUnicode_Check(x)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002617 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002618 "encoding without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002619 return NULL;
2620 }
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002621 bytes = PyUnicode_AsEncodedString(x, encoding, errors);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002622 }
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002623 else if (errors != NULL) {
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002624 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002625 PyUnicode_Check(x) ?
2626 "string argument without an encoding" :
2627 "errors without a string argument");
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002628 return NULL;
2629 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002630 /* We'd like to call PyObject_Bytes here, but we need to check for an
2631 integer argument before deferring to PyBytes_FromObject, something
2632 PyObject_Bytes doesn't do. */
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002633 else if ((func = _PyObject_LookupSpecial(x, &PyId___bytes__)) != NULL) {
2634 bytes = _PyObject_CallNoArg(func);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002635 Py_DECREF(func);
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002636 if (bytes == NULL)
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002637 return NULL;
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002638 if (!PyBytes_Check(bytes)) {
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002639 PyErr_Format(PyExc_TypeError,
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002640 "__bytes__ returned non-bytes (type %.200s)",
2641 Py_TYPE(bytes)->tp_name);
2642 Py_DECREF(bytes);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002643 return NULL;
2644 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002645 }
2646 else if (PyErr_Occurred())
2647 return NULL;
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002648 else if (PyUnicode_Check(x)) {
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002649 PyErr_SetString(PyExc_TypeError,
2650 "string argument without an encoding");
2651 return NULL;
2652 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002653 /* Is it an integer? */
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002654 else if (_PyIndex_Check(x)) {
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002655 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2656 if (size == -1 && PyErr_Occurred()) {
Serhiy Storchakae8904212018-10-15 00:02:57 +03002657 if (!PyErr_ExceptionMatches(PyExc_TypeError))
INADA Naokia634e232017-01-06 17:32:01 +09002658 return NULL;
2659 PyErr_Clear(); /* fall through */
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002660 bytes = PyBytes_FromObject(x);
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002661 }
INADA Naokia634e232017-01-06 17:32:01 +09002662 else {
2663 if (size < 0) {
2664 PyErr_SetString(PyExc_ValueError, "negative count");
2665 return NULL;
2666 }
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002667 bytes = _PyBytes_FromSize(size, 1);
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002668 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002669 }
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002670 else {
2671 bytes = PyBytes_FromObject(x);
2672 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002673
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002674 if (bytes != NULL && type != &PyBytes_Type) {
2675 Py_SETREF(bytes, bytes_subtype_new(type, bytes));
2676 }
2677
2678 return bytes;
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002679}
2680
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002681static PyObject*
2682_PyBytes_FromBuffer(PyObject *x)
2683{
2684 PyObject *new;
2685 Py_buffer view;
2686
2687 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2688 return NULL;
2689
2690 new = PyBytes_FromStringAndSize(NULL, view.len);
2691 if (!new)
2692 goto fail;
2693 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2694 &view, view.len, 'C') < 0)
2695 goto fail;
2696 PyBuffer_Release(&view);
2697 return new;
2698
2699fail:
2700 Py_XDECREF(new);
2701 PyBuffer_Release(&view);
2702 return NULL;
2703}
2704
2705static PyObject*
2706_PyBytes_FromList(PyObject *x)
2707{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002708 Py_ssize_t i, size = PyList_GET_SIZE(x);
2709 Py_ssize_t value;
2710 char *str;
2711 PyObject *item;
2712 _PyBytesWriter writer;
2713
2714 _PyBytesWriter_Init(&writer);
2715 str = _PyBytesWriter_Alloc(&writer, size);
2716 if (str == NULL)
2717 return NULL;
2718 writer.overallocate = 1;
2719 size = writer.allocated;
2720
2721 for (i = 0; i < PyList_GET_SIZE(x); i++) {
2722 item = PyList_GET_ITEM(x, i);
2723 Py_INCREF(item);
2724 value = PyNumber_AsSsize_t(item, NULL);
2725 Py_DECREF(item);
2726 if (value == -1 && PyErr_Occurred())
2727 goto error;
2728
2729 if (value < 0 || value >= 256) {
2730 PyErr_SetString(PyExc_ValueError,
2731 "bytes must be in range(0, 256)");
2732 goto error;
2733 }
2734
2735 if (i >= size) {
2736 str = _PyBytesWriter_Resize(&writer, str, size+1);
2737 if (str == NULL)
2738 return NULL;
2739 size = writer.allocated;
2740 }
2741 *str++ = (char) value;
2742 }
2743 return _PyBytesWriter_Finish(&writer, str);
2744
2745 error:
2746 _PyBytesWriter_Dealloc(&writer);
2747 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002748}
2749
2750static PyObject*
2751_PyBytes_FromTuple(PyObject *x)
2752{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002753 PyObject *bytes;
2754 Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2755 Py_ssize_t value;
2756 char *str;
2757 PyObject *item;
2758
2759 bytes = PyBytes_FromStringAndSize(NULL, size);
2760 if (bytes == NULL)
2761 return NULL;
2762 str = ((PyBytesObject *)bytes)->ob_sval;
2763
2764 for (i = 0; i < size; i++) {
2765 item = PyTuple_GET_ITEM(x, i);
2766 value = PyNumber_AsSsize_t(item, NULL);
2767 if (value == -1 && PyErr_Occurred())
2768 goto error;
2769
2770 if (value < 0 || value >= 256) {
2771 PyErr_SetString(PyExc_ValueError,
2772 "bytes must be in range(0, 256)");
2773 goto error;
2774 }
2775 *str++ = (char) value;
2776 }
2777 return bytes;
2778
2779 error:
2780 Py_DECREF(bytes);
2781 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002782}
2783
2784static PyObject *
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002785_PyBytes_FromIterator(PyObject *it, PyObject *x)
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002786{
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002787 char *str;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002788 Py_ssize_t i, size;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002789 _PyBytesWriter writer;
2790
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002791 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02002792 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002793 if (size == -1 && PyErr_Occurred())
2794 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002795
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002796 _PyBytesWriter_Init(&writer);
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002797 str = _PyBytesWriter_Alloc(&writer, size);
2798 if (str == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002799 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002800 writer.overallocate = 1;
2801 size = writer.allocated;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002802
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002803 /* Run the iterator to exhaustion */
2804 for (i = 0; ; i++) {
2805 PyObject *item;
2806 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002807
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002808 /* Get the next item */
2809 item = PyIter_Next(it);
2810 if (item == NULL) {
2811 if (PyErr_Occurred())
2812 goto error;
2813 break;
2814 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002815
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002816 /* Interpret it as an int (__index__) */
Serhiy Storchakaf54d7812016-07-06 21:39:44 +03002817 value = PyNumber_AsSsize_t(item, NULL);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002818 Py_DECREF(item);
2819 if (value == -1 && PyErr_Occurred())
2820 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002821
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002822 /* Range check */
2823 if (value < 0 || value >= 256) {
2824 PyErr_SetString(PyExc_ValueError,
2825 "bytes must be in range(0, 256)");
2826 goto error;
2827 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002828
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002829 /* Append the byte */
2830 if (i >= size) {
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002831 str = _PyBytesWriter_Resize(&writer, str, size+1);
2832 if (str == NULL)
2833 return NULL;
2834 size = writer.allocated;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002835 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002836 *str++ = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002837 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002838
2839 return _PyBytesWriter_Finish(&writer, str);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002840
2841 error:
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002842 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002843 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002844}
2845
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002846PyObject *
2847PyBytes_FromObject(PyObject *x)
2848{
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002849 PyObject *it, *result;
2850
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002851 if (x == NULL) {
2852 PyErr_BadInternalCall();
2853 return NULL;
2854 }
2855
2856 if (PyBytes_CheckExact(x)) {
2857 Py_INCREF(x);
2858 return x;
2859 }
2860
2861 /* Use the modern buffer interface */
2862 if (PyObject_CheckBuffer(x))
2863 return _PyBytes_FromBuffer(x);
2864
2865 if (PyList_CheckExact(x))
2866 return _PyBytes_FromList(x);
2867
2868 if (PyTuple_CheckExact(x))
2869 return _PyBytes_FromTuple(x);
2870
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002871 if (!PyUnicode_Check(x)) {
2872 it = PyObject_GetIter(x);
2873 if (it != NULL) {
2874 result = _PyBytes_FromIterator(it, x);
2875 Py_DECREF(it);
2876 return result;
2877 }
Serhiy Storchakae8904212018-10-15 00:02:57 +03002878 if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2879 return NULL;
2880 }
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002881 }
2882
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002883 PyErr_Format(PyExc_TypeError,
2884 "cannot convert '%.200s' object to bytes",
Victor Stinner58ac7002020-02-07 03:04:21 +01002885 Py_TYPE(x)->tp_name);
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002886 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002887}
2888
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002889static PyObject *
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002890bytes_subtype_new(PyTypeObject *type, PyObject *tmp)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002891{
Serhiy Storchaka12f43342020-07-20 15:53:55 +03002892 PyObject *pnew;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002893 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002894
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002895 assert(PyType_IsSubtype(type, &PyBytes_Type));
Serhiy Storchaka15095802015-11-25 15:47:01 +02002896 assert(PyBytes_Check(tmp));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002897 n = PyBytes_GET_SIZE(tmp);
2898 pnew = type->tp_alloc(type, n);
2899 if (pnew != NULL) {
Christian Heimesf051e432016-09-13 20:22:02 +02002900 memcpy(PyBytes_AS_STRING(pnew),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002901 PyBytes_AS_STRING(tmp), n+1);
2902 ((PyBytesObject *)pnew)->ob_shash =
2903 ((PyBytesObject *)tmp)->ob_shash;
2904 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002905 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002906}
2907
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002908PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002909"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002910bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002911bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002912bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2913bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002914\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002915Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002916 - an iterable yielding integers in range(256)\n\
2917 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002918 - any object implementing the buffer API.\n\
2919 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002920
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002921static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002922
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002923PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002924 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2925 "bytes",
2926 PyBytesObject_SIZE,
2927 sizeof(char),
Inada Naoki7d408692019-05-29 17:23:27 +09002928 0, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002929 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002930 0, /* tp_getattr */
2931 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002932 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002933 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08002934 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002935 &bytes_as_sequence, /* tp_as_sequence */
2936 &bytes_as_mapping, /* tp_as_mapping */
2937 (hashfunc)bytes_hash, /* tp_hash */
2938 0, /* tp_call */
2939 bytes_str, /* tp_str */
2940 PyObject_GenericGetAttr, /* tp_getattro */
2941 0, /* tp_setattro */
2942 &bytes_as_buffer, /* tp_as_buffer */
2943 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2944 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2945 bytes_doc, /* tp_doc */
2946 0, /* tp_traverse */
2947 0, /* tp_clear */
2948 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2949 0, /* tp_weaklistoffset */
2950 bytes_iter, /* tp_iter */
2951 0, /* tp_iternext */
2952 bytes_methods, /* tp_methods */
2953 0, /* tp_members */
2954 0, /* tp_getset */
2955 &PyBaseObject_Type, /* tp_base */
2956 0, /* tp_dict */
2957 0, /* tp_descr_get */
2958 0, /* tp_descr_set */
2959 0, /* tp_dictoffset */
2960 0, /* tp_init */
2961 0, /* tp_alloc */
2962 bytes_new, /* tp_new */
2963 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002964};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002965
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002966void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002967PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002968{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002969 assert(pv != NULL);
2970 if (*pv == NULL)
2971 return;
2972 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002973 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002974 return;
2975 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02002976
2977 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2978 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05002979 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02002980 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02002981
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002982 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02002983 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2984 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2985 Py_CLEAR(*pv);
2986 return;
2987 }
2988
2989 oldsize = PyBytes_GET_SIZE(*pv);
2990 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2991 PyErr_NoMemory();
2992 goto error;
2993 }
2994 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2995 goto error;
2996
2997 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2998 PyBuffer_Release(&wb);
2999 return;
3000
3001 error:
3002 PyBuffer_Release(&wb);
3003 Py_CLEAR(*pv);
3004 return;
3005 }
3006
3007 else {
3008 /* Multiple references, need to create new object */
3009 PyObject *v;
3010 v = bytes_concat(*pv, w);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03003011 Py_SETREF(*pv, v);
Antoine Pitrou161d6952014-05-01 14:36:20 +02003012 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003013}
3014
3015void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003016PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003017{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003018 PyBytes_Concat(pv, w);
3019 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003020}
3021
3022
Ethan Furmanb95b5612015-01-23 20:05:18 -08003023/* The following function breaks the notion that bytes are immutable:
3024 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003025 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08003026 as creating a new bytes object and destroying the old one, only
3027 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003028 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08003029 Note that if there's not enough memory to resize the bytes object, the
3030 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003031 memory" exception is set, and -1 is returned. Else (on success) 0 is
3032 returned, and the value in *pv may or may not be the same as on input.
3033 As always, an extra byte is allocated for a trailing \0 byte (newsize
3034 does *not* include that), and a trailing \0 byte is stored.
3035*/
3036
3037int
3038_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3039{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003040 PyObject *v;
3041 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003042 v = *pv;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03003043 if (!PyBytes_Check(v) || newsize < 0) {
3044 goto error;
3045 }
3046 if (Py_SIZE(v) == newsize) {
3047 /* return early if newsize equals to v->ob_size */
3048 return 0;
3049 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02003050 if (Py_SIZE(v) == 0) {
3051 if (newsize == 0) {
3052 return 0;
3053 }
3054 *pv = _PyBytes_FromSize(newsize, 0);
3055 Py_DECREF(v);
3056 return (*pv == NULL) ? -1 : 0;
3057 }
Berker Peksag4a72a7b2016-09-16 17:31:06 +03003058 if (Py_REFCNT(v) != 1) {
3059 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003060 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02003061 if (newsize == 0) {
Victor Stinner91698d82020-06-25 14:07:40 +02003062 *pv = bytes_new_empty();
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02003063 Py_DECREF(v);
Victor Stinner91698d82020-06-25 14:07:40 +02003064 return 0;
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02003065 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003066 /* XXX UNREF/NEWREF interface should be more symmetrical */
Victor Stinner49932fe2020-02-03 17:55:05 +01003067#ifdef Py_REF_DEBUG
3068 _Py_RefTotal--;
3069#endif
3070#ifdef Py_TRACE_REFS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003071 _Py_ForgetReference(v);
Victor Stinner49932fe2020-02-03 17:55:05 +01003072#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003073 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03003074 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003075 if (*pv == NULL) {
3076 PyObject_Del(v);
3077 PyErr_NoMemory();
3078 return -1;
3079 }
3080 _Py_NewReference(*pv);
3081 sv = (PyBytesObject *) *pv;
Victor Stinner60ac6ed2020-02-07 23:18:08 +01003082 Py_SET_SIZE(sv, newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003083 sv->ob_sval[newsize] = '\0';
3084 sv->ob_shash = -1; /* invalidate cached hash value */
3085 return 0;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03003086error:
3087 *pv = 0;
3088 Py_DECREF(v);
3089 PyErr_BadInternalCall();
3090 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003091}
3092
Victor Stinner91698d82020-06-25 14:07:40 +02003093
3094PyStatus
3095_PyBytes_Init(PyThreadState *tstate)
3096{
3097 struct _Py_bytes_state *state = &tstate->interp->bytes;
3098 if (bytes_create_empty_string_singleton(state) < 0) {
3099 return _PyStatus_NO_MEMORY();
3100 }
3101 return _PyStatus_OK();
3102}
3103
3104
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003105void
Victor Stinnerc41eed12020-06-23 15:54:35 +02003106_PyBytes_Fini(PyThreadState *tstate)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003107{
Victor Stinnerc41eed12020-06-23 15:54:35 +02003108 struct _Py_bytes_state* state = &tstate->interp->bytes;
3109 for (int i = 0; i < UCHAR_MAX + 1; i++) {
3110 Py_CLEAR(state->characters[i]);
3111 }
3112 Py_CLEAR(state->empty_string);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003113}
3114
Benjamin Peterson4116f362008-05-27 00:36:20 +00003115/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003116
3117typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003118 PyObject_HEAD
3119 Py_ssize_t it_index;
3120 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003121} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003122
3123static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003124striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003125{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003126 _PyObject_GC_UNTRACK(it);
3127 Py_XDECREF(it->it_seq);
3128 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003129}
3130
3131static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003132striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003133{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003134 Py_VISIT(it->it_seq);
3135 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003136}
3137
3138static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003139striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003140{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003141 PyBytesObject *seq;
3142 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003143
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003144 assert(it != NULL);
3145 seq = it->it_seq;
3146 if (seq == NULL)
3147 return NULL;
3148 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003149
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003150 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3151 item = PyLong_FromLong(
3152 (unsigned char)seq->ob_sval[it->it_index]);
3153 if (item != NULL)
3154 ++it->it_index;
3155 return item;
3156 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003157
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003158 it->it_seq = NULL;
Serhiy Storchakafbb1c5e2016-03-30 20:40:02 +03003159 Py_DECREF(seq);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003160 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003161}
3162
3163static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303164striter_len(striterobject *it, PyObject *Py_UNUSED(ignored))
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003165{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003166 Py_ssize_t len = 0;
3167 if (it->it_seq)
3168 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3169 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003170}
3171
3172PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003173 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003174
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003175static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303176striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003177{
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003178 _Py_IDENTIFIER(iter);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003179 if (it->it_seq != NULL) {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003180 return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003181 it->it_seq, it->it_index);
3182 } else {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003183 return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003184 }
3185}
3186
3187PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3188
3189static PyObject *
3190striter_setstate(striterobject *it, PyObject *state)
3191{
3192 Py_ssize_t index = PyLong_AsSsize_t(state);
3193 if (index == -1 && PyErr_Occurred())
3194 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003195 if (it->it_seq != NULL) {
3196 if (index < 0)
3197 index = 0;
3198 else if (index > PyBytes_GET_SIZE(it->it_seq))
3199 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3200 it->it_index = index;
3201 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003202 Py_RETURN_NONE;
3203}
3204
3205PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3206
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003207static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003208 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3209 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003210 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3211 reduce_doc},
3212 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3213 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003214 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003215};
3216
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003217PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003218 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3219 "bytes_iterator", /* tp_name */
3220 sizeof(striterobject), /* tp_basicsize */
3221 0, /* tp_itemsize */
3222 /* methods */
3223 (destructor)striter_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003224 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003225 0, /* tp_getattr */
3226 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003227 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003228 0, /* tp_repr */
3229 0, /* tp_as_number */
3230 0, /* tp_as_sequence */
3231 0, /* tp_as_mapping */
3232 0, /* tp_hash */
3233 0, /* tp_call */
3234 0, /* tp_str */
3235 PyObject_GenericGetAttr, /* tp_getattro */
3236 0, /* tp_setattro */
3237 0, /* tp_as_buffer */
3238 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3239 0, /* tp_doc */
3240 (traverseproc)striter_traverse, /* tp_traverse */
3241 0, /* tp_clear */
3242 0, /* tp_richcompare */
3243 0, /* tp_weaklistoffset */
3244 PyObject_SelfIter, /* tp_iter */
3245 (iternextfunc)striter_next, /* tp_iternext */
3246 striter_methods, /* tp_methods */
3247 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003248};
3249
3250static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003251bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003252{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003253 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003254
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003255 if (!PyBytes_Check(seq)) {
3256 PyErr_BadInternalCall();
3257 return NULL;
3258 }
3259 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3260 if (it == NULL)
3261 return NULL;
3262 it->it_index = 0;
3263 Py_INCREF(seq);
3264 it->it_seq = (PyBytesObject *)seq;
3265 _PyObject_GC_TRACK(it);
3266 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003267}
Victor Stinner00165072015-10-09 01:53:21 +02003268
3269
3270/* _PyBytesWriter API */
3271
3272#ifdef MS_WINDOWS
3273 /* On Windows, overallocate by 50% is the best factor */
3274# define OVERALLOCATE_FACTOR 2
3275#else
3276 /* On Linux, overallocate by 25% is the best factor */
3277# define OVERALLOCATE_FACTOR 4
3278#endif
3279
3280void
3281_PyBytesWriter_Init(_PyBytesWriter *writer)
3282{
Victor Stinner661aacc2015-10-14 09:41:48 +02003283 /* Set all attributes before small_buffer to 0 */
3284 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003285#ifndef NDEBUG
3286 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3287 sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003288#endif
3289}
3290
3291void
3292_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3293{
3294 Py_CLEAR(writer->buffer);
3295}
3296
3297Py_LOCAL_INLINE(char*)
3298_PyBytesWriter_AsString(_PyBytesWriter *writer)
3299{
Victor Stinner661aacc2015-10-14 09:41:48 +02003300 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003301 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003302 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003303 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003304 else if (writer->use_bytearray) {
3305 assert(writer->buffer != NULL);
3306 return PyByteArray_AS_STRING(writer->buffer);
3307 }
3308 else {
3309 assert(writer->buffer != NULL);
3310 return PyBytes_AS_STRING(writer->buffer);
3311 }
Victor Stinner00165072015-10-09 01:53:21 +02003312}
3313
3314Py_LOCAL_INLINE(Py_ssize_t)
Victor Stinner2bf89932015-10-14 11:25:33 +02003315_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
Victor Stinner00165072015-10-09 01:53:21 +02003316{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03003317 const char *start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003318 assert(str != NULL);
3319 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003320 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003321 return str - start;
3322}
3323
Victor Stinner68762572019-10-07 18:42:01 +02003324#ifndef NDEBUG
3325Py_LOCAL_INLINE(int)
Victor Stinner00165072015-10-09 01:53:21 +02003326_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3327{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03003328 const char *start, *end;
Victor Stinner00165072015-10-09 01:53:21 +02003329
Victor Stinner661aacc2015-10-14 09:41:48 +02003330 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003331 assert(writer->buffer == NULL);
3332 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003333 else {
3334 assert(writer->buffer != NULL);
3335 if (writer->use_bytearray)
3336 assert(PyByteArray_CheckExact(writer->buffer));
3337 else
3338 assert(PyBytes_CheckExact(writer->buffer));
3339 assert(Py_REFCNT(writer->buffer) == 1);
3340 }
Victor Stinner00165072015-10-09 01:53:21 +02003341
Victor Stinner661aacc2015-10-14 09:41:48 +02003342 if (writer->use_bytearray) {
3343 /* bytearray has its own overallocation algorithm,
3344 writer overallocation must be disabled */
3345 assert(!writer->overallocate);
3346 }
3347
3348 assert(0 <= writer->allocated);
Victor Stinner53926a12015-10-09 12:37:03 +02003349 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003350 /* the last byte must always be null */
Victor Stinner661aacc2015-10-14 09:41:48 +02003351 start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003352 assert(start[writer->allocated] == 0);
3353
3354 end = start + writer->allocated;
3355 assert(str != NULL);
3356 assert(start <= str && str <= end);
Victor Stinner68762572019-10-07 18:42:01 +02003357 return 1;
Victor Stinner00165072015-10-09 01:53:21 +02003358}
Victor Stinner68762572019-10-07 18:42:01 +02003359#endif
Victor Stinner00165072015-10-09 01:53:21 +02003360
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003361void*
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003362_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
Victor Stinner00165072015-10-09 01:53:21 +02003363{
3364 Py_ssize_t allocated, pos;
3365
Victor Stinner68762572019-10-07 18:42:01 +02003366 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003367 assert(writer->allocated < size);
Victor Stinner00165072015-10-09 01:53:21 +02003368
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003369 allocated = size;
Victor Stinner00165072015-10-09 01:53:21 +02003370 if (writer->overallocate
3371 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3372 /* overallocate to limit the number of realloc() */
3373 allocated += allocated / OVERALLOCATE_FACTOR;
3374 }
3375
Victor Stinner2bf89932015-10-14 11:25:33 +02003376 pos = _PyBytesWriter_GetSize(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003377 if (!writer->use_small_buffer) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003378 if (writer->use_bytearray) {
3379 if (PyByteArray_Resize(writer->buffer, allocated))
3380 goto error;
3381 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3382 but we cannot use ob_alloc because bytes may need to be moved
3383 to use the whole buffer. bytearray uses an internal optimization
3384 to avoid moving or copying bytes when bytes are removed at the
3385 beginning (ex: del bytearray[:1]). */
3386 }
3387 else {
3388 if (_PyBytes_Resize(&writer->buffer, allocated))
3389 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003390 }
3391 }
3392 else {
3393 /* convert from stack buffer to bytes object buffer */
3394 assert(writer->buffer == NULL);
3395
Victor Stinner661aacc2015-10-14 09:41:48 +02003396 if (writer->use_bytearray)
3397 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3398 else
3399 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003400 if (writer->buffer == NULL)
Victor Stinner661aacc2015-10-14 09:41:48 +02003401 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003402
3403 if (pos != 0) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003404 char *dest;
3405 if (writer->use_bytearray)
3406 dest = PyByteArray_AS_STRING(writer->buffer);
3407 else
3408 dest = PyBytes_AS_STRING(writer->buffer);
Christian Heimesf051e432016-09-13 20:22:02 +02003409 memcpy(dest,
Victor Stinnerb3653a32015-10-09 03:38:24 +02003410 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02003411 pos);
3412 }
3413
Victor Stinnerb3653a32015-10-09 03:38:24 +02003414 writer->use_small_buffer = 0;
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003415#ifndef NDEBUG
3416 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3417 sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003418#endif
Victor Stinner00165072015-10-09 01:53:21 +02003419 }
3420 writer->allocated = allocated;
3421
3422 str = _PyBytesWriter_AsString(writer) + pos;
Victor Stinner68762572019-10-07 18:42:01 +02003423 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinner00165072015-10-09 01:53:21 +02003424 return str;
Victor Stinner661aacc2015-10-14 09:41:48 +02003425
3426error:
3427 _PyBytesWriter_Dealloc(writer);
3428 return NULL;
Victor Stinner00165072015-10-09 01:53:21 +02003429}
3430
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003431void*
3432_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3433{
3434 Py_ssize_t new_min_size;
3435
Victor Stinner68762572019-10-07 18:42:01 +02003436 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003437 assert(size >= 0);
3438
3439 if (size == 0) {
3440 /* nothing to do */
3441 return str;
3442 }
3443
3444 if (writer->min_size > PY_SSIZE_T_MAX - size) {
3445 PyErr_NoMemory();
3446 _PyBytesWriter_Dealloc(writer);
3447 return NULL;
3448 }
3449 new_min_size = writer->min_size + size;
3450
3451 if (new_min_size > writer->allocated)
3452 str = _PyBytesWriter_Resize(writer, str, new_min_size);
3453
3454 writer->min_size = new_min_size;
3455 return str;
3456}
3457
Victor Stinner00165072015-10-09 01:53:21 +02003458/* Allocate the buffer to write size bytes.
3459 Return the pointer to the beginning of buffer data.
3460 Raise an exception and return NULL on error. */
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003461void*
Victor Stinner00165072015-10-09 01:53:21 +02003462_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3463{
3464 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02003465 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02003466 assert(size >= 0);
3467
Victor Stinnerb3653a32015-10-09 03:38:24 +02003468 writer->use_small_buffer = 1;
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003469#ifndef NDEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003470 writer->allocated = sizeof(writer->small_buffer) - 1;
Victor Stinnerf6358a72015-10-14 12:02:39 +02003471 /* In debug mode, don't use the full small buffer because it is less
3472 efficient than bytes and bytearray objects to detect buffer underflow
3473 and buffer overflow. Use 10 bytes of the small buffer to test also
3474 code using the smaller buffer in debug mode.
3475
3476 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3477 in debug mode to also be able to detect stack overflow when running
3478 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3479 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3480 stack overflow. */
3481 writer->allocated = Py_MIN(writer->allocated, 10);
3482 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3483 to detect buffer overflow */
Victor Stinnerb3653a32015-10-09 03:38:24 +02003484 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003485#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02003486 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02003487#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02003488 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02003489}
3490
3491PyObject *
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003492_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
Victor Stinner00165072015-10-09 01:53:21 +02003493{
Victor Stinner2bf89932015-10-14 11:25:33 +02003494 Py_ssize_t size;
Victor Stinner00165072015-10-09 01:53:21 +02003495 PyObject *result;
3496
Victor Stinner68762572019-10-07 18:42:01 +02003497 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinner00165072015-10-09 01:53:21 +02003498
Victor Stinner2bf89932015-10-14 11:25:33 +02003499 size = _PyBytesWriter_GetSize(writer, str);
3500 if (size == 0 && !writer->use_bytearray) {
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003501 Py_CLEAR(writer->buffer);
3502 /* Get the empty byte string singleton */
3503 result = PyBytes_FromStringAndSize(NULL, 0);
3504 }
3505 else if (writer->use_small_buffer) {
Victor Stinnere914d412016-04-15 17:52:27 +02003506 if (writer->use_bytearray) {
3507 result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3508 }
3509 else {
3510 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3511 }
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003512 }
3513 else {
3514 result = writer->buffer;
3515 writer->buffer = NULL;
3516
Victor Stinner2bf89932015-10-14 11:25:33 +02003517 if (size != writer->allocated) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003518 if (writer->use_bytearray) {
Victor Stinner2bf89932015-10-14 11:25:33 +02003519 if (PyByteArray_Resize(result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003520 Py_DECREF(result);
3521 return NULL;
3522 }
3523 }
3524 else {
Victor Stinner2bf89932015-10-14 11:25:33 +02003525 if (_PyBytes_Resize(&result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003526 assert(result == NULL);
3527 return NULL;
3528 }
Victor Stinner00165072015-10-09 01:53:21 +02003529 }
3530 }
Victor Stinner00165072015-10-09 01:53:21 +02003531 }
Victor Stinner00165072015-10-09 01:53:21 +02003532 return result;
3533}
Victor Stinnerce179bf2015-10-09 12:57:22 +02003534
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003535void*
Victor Stinnere9aa5952015-10-12 13:57:47 +02003536_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003537 const void *bytes, Py_ssize_t size)
Victor Stinnerce179bf2015-10-09 12:57:22 +02003538{
Victor Stinnere9aa5952015-10-12 13:57:47 +02003539 char *str = (char *)ptr;
3540
Victor Stinnerce179bf2015-10-09 12:57:22 +02003541 str = _PyBytesWriter_Prepare(writer, str, size);
3542 if (str == NULL)
3543 return NULL;
3544
Christian Heimesf051e432016-09-13 20:22:02 +02003545 memcpy(str, bytes, size);
Victor Stinnerce179bf2015-10-09 12:57:22 +02003546 str += size;
3547
3548 return str;
3549}