blob: ce006e15dce9ea1db70f115f7d0224f5ee244752 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Victor Stinnerd9ea5ca2020-04-15 02:57:50 +02006#include "pycore_abstract.h" // _PyIndex_Check()
Victor Stinner45876a92020-02-12 22:32:34 +01007#include "pycore_bytes_methods.h"
Victor Stinnerbcda8f12018-11-21 22:27:47 +01008#include "pycore_object.h"
Victor Stinnerd9ea5ca2020-04-15 02:57:50 +02009#include "pycore_pymem.h" // PYMEM_CLEANBYTE
Christian Heimes2c9c7a52008-05-26 13:42:13 +000010
Gregory P. Smith8cb65692015-04-25 23:22:26 +000011#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +000012#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000013
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020014/*[clinic input]
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030015class bytes "PyBytesObject *" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020016[clinic start generated code]*/
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030017/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020018
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030019#include "clinic/bytesobject.c.h"
20
Hai Shi46874c22020-01-30 17:20:25 -060021_Py_IDENTIFIER(__bytes__);
22
Mark Dickinsonfd24b322008-12-06 15:33:31 +000023/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
24 for a string of length n should request PyBytesObject_SIZE + n bytes.
25
26 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
27 3 bytes per string allocation on a typical system.
28*/
29#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
30
Victor Stinner2bf89932015-10-14 11:25:33 +020031/* Forward declaration */
32Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
33 char *str);
34
Victor Stinnerc41eed12020-06-23 15:54:35 +020035
36static struct _Py_bytes_state*
37get_bytes_state(void)
38{
39 PyInterpreterState *interp = _PyInterpreterState_GET();
40 return &interp->bytes;
41}
42
43
Christian Heimes2c9c7a52008-05-26 13:42:13 +000044/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000045 For PyBytes_FromString(), the parameter `str' points to a null-terminated
46 string containing exactly `size' bytes.
47
Martin Pantera90a4a92016-05-30 04:04:50 +000048 For PyBytes_FromStringAndSize(), the parameter `str' is
Christian Heimes2c9c7a52008-05-26 13:42:13 +000049 either NULL or else points to a string containing at least `size' bytes.
50 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
51 not have to be null-terminated. (Therefore it is safe to construct a
52 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
53 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
54 bytes (setting the last byte to the null terminating character) and you can
55 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000056 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000057 alter the data yourself, since the strings may be shared.
58
59 The PyObject member `op->ob_size', which denotes the number of "extra
60 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020061 allocated for string data, not counting the null terminating character.
62 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000063 PyBytes_FromStringAndSize()) or the length of the string in the `str'
64 parameter (for PyBytes_FromString()).
65*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020066static PyObject *
67_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000068{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020069 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020070 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020071
Victor Stinnerc41eed12020-06-23 15:54:35 +020072 if (size == 0) {
73 struct _Py_bytes_state *state = get_bytes_state();
74 op = state->empty_string;
75 if (op != NULL) {
76 Py_INCREF(op);
77 return (PyObject *)op;
78 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000079 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000080
Victor Stinner049e5092014-08-17 22:20:00 +020081 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000082 PyErr_SetString(PyExc_OverflowError,
83 "byte string is too large");
84 return NULL;
85 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000086
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020088 if (use_calloc)
89 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
90 else
91 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Victor Stinner04fc4f22020-06-16 01:28:07 +020092 if (op == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000093 return PyErr_NoMemory();
Victor Stinner04fc4f22020-06-16 01:28:07 +020094 }
95 _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000096 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020097 if (!use_calloc)
98 op->ob_sval[size] = '\0';
99 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000100 if (size == 0) {
Victor Stinnerc41eed12020-06-23 15:54:35 +0200101 struct _Py_bytes_state *state = get_bytes_state();
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000102 Py_INCREF(op);
Victor Stinnerc41eed12020-06-23 15:54:35 +0200103 state->empty_string = op;
Victor Stinnerdb067af2014-05-02 22:31:14 +0200104 }
105 return (PyObject *) op;
106}
107
108PyObject *
109PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
110{
111 PyBytesObject *op;
112 if (size < 0) {
113 PyErr_SetString(PyExc_SystemError,
114 "Negative size passed to PyBytes_FromStringAndSize");
115 return NULL;
116 }
Victor Stinnerc41eed12020-06-23 15:54:35 +0200117 if (size == 1 && str != NULL) {
118 struct _Py_bytes_state *state = get_bytes_state();
119 op = state->characters[*str & UCHAR_MAX];
120 if (op != NULL) {
121 Py_INCREF(op);
122 return (PyObject *)op;
123 }
Victor Stinnerdb067af2014-05-02 22:31:14 +0200124 }
125
126 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
127 if (op == NULL)
128 return NULL;
129 if (str == NULL)
130 return (PyObject *) op;
131
Christian Heimesf051e432016-09-13 20:22:02 +0200132 memcpy(op->ob_sval, str, size);
Victor Stinnerdb067af2014-05-02 22:31:14 +0200133 /* share short strings */
134 if (size == 1) {
Victor Stinnerc41eed12020-06-23 15:54:35 +0200135 struct _Py_bytes_state *state = get_bytes_state();
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000136 Py_INCREF(op);
Victor Stinnerc41eed12020-06-23 15:54:35 +0200137 state->characters[*str & UCHAR_MAX] = op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000138 }
139 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000140}
141
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000142PyObject *
143PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000144{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200145 size_t size;
146 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000147
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000148 assert(str != NULL);
149 size = strlen(str);
150 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
151 PyErr_SetString(PyExc_OverflowError,
152 "byte string is too long");
153 return NULL;
154 }
Victor Stinnerc41eed12020-06-23 15:54:35 +0200155
156 struct _Py_bytes_state *state = get_bytes_state();
157 if (size == 0) {
158 op = state->empty_string;
159 if (op != NULL) {
160 Py_INCREF(op);
161 return (PyObject *)op;
162 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000163 }
Victor Stinnerc41eed12020-06-23 15:54:35 +0200164 else if (size == 1) {
165 op = state->characters[*str & UCHAR_MAX];
166 if (op != NULL) {
167 Py_INCREF(op);
168 return (PyObject *)op;
169 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000170 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000171
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000172 /* Inline PyObject_NewVar */
173 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
Victor Stinner04fc4f22020-06-16 01:28:07 +0200174 if (op == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000175 return PyErr_NoMemory();
Victor Stinner04fc4f22020-06-16 01:28:07 +0200176 }
177 _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000178 op->ob_shash = -1;
Christian Heimesf051e432016-09-13 20:22:02 +0200179 memcpy(op->ob_sval, str, size+1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000180 /* share short strings */
181 if (size == 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000182 Py_INCREF(op);
Victor Stinnerc41eed12020-06-23 15:54:35 +0200183 state->empty_string = op;
184 }
185 else if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000186 Py_INCREF(op);
Victor Stinnerc41eed12020-06-23 15:54:35 +0200187 state->characters[*str & UCHAR_MAX] = op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000188 }
189 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000190}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000191
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000192PyObject *
193PyBytes_FromFormatV(const char *format, va_list vargs)
194{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000195 char *s;
Victor Stinner03dab782015-10-14 00:21:35 +0200196 const char *f;
197 const char *p;
198 Py_ssize_t prec;
199 int longflag;
200 int size_tflag;
201 /* Longest 64-bit formatted numbers:
202 - "18446744073709551615\0" (21 bytes)
203 - "-9223372036854775808\0" (21 bytes)
204 Decimal takes the most space (it isn't enough for octal.)
Guido van Rossum343e97f2007-04-09 00:43:24 +0000205
Victor Stinner03dab782015-10-14 00:21:35 +0200206 Longest 64-bit pointer representation:
207 "0xffffffffffffffff\0" (19 bytes). */
208 char buffer[21];
209 _PyBytesWriter writer;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000210
Victor Stinner03dab782015-10-14 00:21:35 +0200211 _PyBytesWriter_Init(&writer);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000212
Victor Stinner03dab782015-10-14 00:21:35 +0200213 s = _PyBytesWriter_Alloc(&writer, strlen(format));
214 if (s == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000215 return NULL;
Victor Stinner03dab782015-10-14 00:21:35 +0200216 writer.overallocate = 1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000217
Victor Stinner03dab782015-10-14 00:21:35 +0200218#define WRITE_BYTES(str) \
219 do { \
220 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
221 if (s == NULL) \
222 goto error; \
223 } while (0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000224
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000225 for (f = format; *f; f++) {
Victor Stinner03dab782015-10-14 00:21:35 +0200226 if (*f != '%') {
227 *s++ = *f;
228 continue;
229 }
230
231 p = f++;
232
233 /* ignore the width (ex: 10 in "%10s") */
234 while (Py_ISDIGIT(*f))
235 f++;
236
237 /* parse the precision (ex: 10 in "%.10s") */
238 prec = 0;
239 if (*f == '.') {
240 f++;
241 for (; Py_ISDIGIT(*f); f++) {
242 prec = (prec * 10) + (*f - '0');
243 }
244 }
245
246 while (*f && *f != '%' && !Py_ISALPHA(*f))
247 f++;
248
249 /* handle the long flag ('l'), but only for %ld and %lu.
250 others can be added when necessary. */
251 longflag = 0;
252 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
253 longflag = 1;
254 ++f;
255 }
256
257 /* handle the size_t flag ('z'). */
258 size_tflag = 0;
259 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
260 size_tflag = 1;
261 ++f;
262 }
263
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700264 /* subtract bytes preallocated for the format string
Victor Stinner03dab782015-10-14 00:21:35 +0200265 (ex: 2 for "%s") */
266 writer.min_size -= (f - p + 1);
267
268 switch (*f) {
269 case 'c':
270 {
271 int c = va_arg(vargs, int);
272 if (c < 0 || c > 255) {
273 PyErr_SetString(PyExc_OverflowError,
274 "PyBytes_FromFormatV(): %c format "
275 "expects an integer in range [0; 255]");
276 goto error;
277 }
278 writer.min_size++;
279 *s++ = (unsigned char)c;
280 break;
281 }
282
283 case 'd':
Victor Stinnerd36cf5f2020-06-10 18:38:05 +0200284 if (longflag) {
Victor Stinner03dab782015-10-14 00:21:35 +0200285 sprintf(buffer, "%ld", va_arg(vargs, long));
Victor Stinnerd36cf5f2020-06-10 18:38:05 +0200286 }
287 else if (size_tflag) {
288 sprintf(buffer, "%zd", va_arg(vargs, Py_ssize_t));
289 }
290 else {
Victor Stinner03dab782015-10-14 00:21:35 +0200291 sprintf(buffer, "%d", va_arg(vargs, int));
Victor Stinnerd36cf5f2020-06-10 18:38:05 +0200292 }
Victor Stinner03dab782015-10-14 00:21:35 +0200293 assert(strlen(buffer) < sizeof(buffer));
294 WRITE_BYTES(buffer);
295 break;
296
297 case 'u':
Victor Stinnerd36cf5f2020-06-10 18:38:05 +0200298 if (longflag) {
299 sprintf(buffer, "%lu", va_arg(vargs, unsigned long));
300 }
301 else if (size_tflag) {
302 sprintf(buffer, "%zu", va_arg(vargs, size_t));
303 }
304 else {
305 sprintf(buffer, "%u", va_arg(vargs, unsigned int));
306 }
Victor Stinner03dab782015-10-14 00:21:35 +0200307 assert(strlen(buffer) < sizeof(buffer));
308 WRITE_BYTES(buffer);
309 break;
310
311 case 'i':
312 sprintf(buffer, "%i", va_arg(vargs, int));
313 assert(strlen(buffer) < sizeof(buffer));
314 WRITE_BYTES(buffer);
315 break;
316
317 case 'x':
318 sprintf(buffer, "%x", va_arg(vargs, int));
319 assert(strlen(buffer) < sizeof(buffer));
320 WRITE_BYTES(buffer);
321 break;
322
323 case 's':
324 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000325 Py_ssize_t i;
Victor Stinner03dab782015-10-14 00:21:35 +0200326
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200327 p = va_arg(vargs, const char*);
Serhiy Storchakad586ccb2019-01-12 10:30:35 +0200328 if (prec <= 0) {
329 i = strlen(p);
330 }
331 else {
332 i = 0;
333 while (i < prec && p[i]) {
334 i++;
335 }
336 }
Victor Stinner03dab782015-10-14 00:21:35 +0200337 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
338 if (s == NULL)
339 goto error;
340 break;
341 }
342
343 case 'p':
344 sprintf(buffer, "%p", va_arg(vargs, void*));
345 assert(strlen(buffer) < sizeof(buffer));
346 /* %p is ill-defined: ensure leading 0x. */
347 if (buffer[1] == 'X')
348 buffer[1] = 'x';
349 else if (buffer[1] != 'x') {
350 memmove(buffer+2, buffer, strlen(buffer)+1);
351 buffer[0] = '0';
352 buffer[1] = 'x';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000353 }
Victor Stinner03dab782015-10-14 00:21:35 +0200354 WRITE_BYTES(buffer);
355 break;
356
357 case '%':
358 writer.min_size++;
359 *s++ = '%';
360 break;
361
362 default:
363 if (*f == 0) {
364 /* fix min_size if we reached the end of the format string */
365 writer.min_size++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000366 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000367
Victor Stinner03dab782015-10-14 00:21:35 +0200368 /* invalid format string: copy unformatted string and exit */
369 WRITE_BYTES(p);
370 return _PyBytesWriter_Finish(&writer, s);
371 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000373
Victor Stinner03dab782015-10-14 00:21:35 +0200374#undef WRITE_BYTES
375
376 return _PyBytesWriter_Finish(&writer, s);
377
378 error:
379 _PyBytesWriter_Dealloc(&writer);
380 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000381}
382
383PyObject *
384PyBytes_FromFormat(const char *format, ...)
385{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000386 PyObject* ret;
387 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000388
389#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000390 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000391#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000392 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000393#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000394 ret = PyBytes_FromFormatV(format, vargs);
395 va_end(vargs);
396 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000397}
398
Ethan Furmanb95b5612015-01-23 20:05:18 -0800399/* Helpers for formatstring */
400
401Py_LOCAL_INLINE(PyObject *)
402getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
403{
404 Py_ssize_t argidx = *p_argidx;
405 if (argidx < arglen) {
406 (*p_argidx)++;
407 if (arglen < 0)
408 return args;
409 else
410 return PyTuple_GetItem(args, argidx);
411 }
412 PyErr_SetString(PyExc_TypeError,
413 "not enough arguments for format string");
414 return NULL;
415}
416
417/* Format codes
418 * F_LJUST '-'
419 * F_SIGN '+'
420 * F_BLANK ' '
421 * F_ALT '#'
422 * F_ZERO '0'
423 */
424#define F_LJUST (1<<0)
425#define F_SIGN (1<<1)
426#define F_BLANK (1<<2)
427#define F_ALT (1<<3)
428#define F_ZERO (1<<4)
429
430/* Returns a new reference to a PyBytes object, or NULL on failure. */
431
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200432static char*
433formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200434 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800435{
436 char *p;
437 PyObject *result;
438 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200439 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800440
441 x = PyFloat_AsDouble(v);
442 if (x == -1.0 && PyErr_Occurred()) {
443 PyErr_Format(PyExc_TypeError, "float argument required, "
444 "not %.200s", Py_TYPE(v)->tp_name);
445 return NULL;
446 }
447
448 if (prec < 0)
449 prec = 6;
450
451 p = PyOS_double_to_string(x, type, prec,
452 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
453
454 if (p == NULL)
455 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200456
457 len = strlen(p);
458 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200459 str = _PyBytesWriter_Prepare(writer, str, len);
460 if (str == NULL)
461 return NULL;
Christian Heimesf051e432016-09-13 20:22:02 +0200462 memcpy(str, p, len);
Victor Stinner71dc3d82016-04-26 12:35:13 +0200463 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200464 str += len;
465 return str;
466 }
467
468 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800469 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200470 *p_result = result;
Zackery Spytz96c59322018-10-03 00:01:30 -0600471 return result != NULL ? str : NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800472}
473
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300474static PyObject *
475formatlong(PyObject *v, int flags, int prec, int type)
476{
477 PyObject *result, *iobj;
478 if (type == 'i')
479 type = 'd';
480 if (PyLong_Check(v))
481 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
482 if (PyNumber_Check(v)) {
483 /* make sure number is a type of integer for o, x, and X */
484 if (type == 'o' || type == 'x' || type == 'X')
Serhiy Storchaka5f4b229d2020-05-28 10:33:45 +0300485 iobj = _PyNumber_Index(v);
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300486 else
487 iobj = PyNumber_Long(v);
488 if (iobj == NULL) {
489 if (!PyErr_ExceptionMatches(PyExc_TypeError))
490 return NULL;
491 }
492 else if (!PyLong_Check(iobj))
493 Py_CLEAR(iobj);
494 if (iobj != NULL) {
495 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
496 Py_DECREF(iobj);
497 return result;
498 }
499 }
500 PyErr_Format(PyExc_TypeError,
501 "%%%c format: %s is required, not %.200s", type,
502 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
503 : "a number",
504 Py_TYPE(v)->tp_name);
505 return NULL;
506}
507
508static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200509byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800510{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300511 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200512 *p = PyBytes_AS_STRING(arg)[0];
513 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800514 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300515 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200516 *p = PyByteArray_AS_STRING(arg)[0];
517 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800518 }
519 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300520 PyObject *iobj;
521 long ival;
522 int overflow;
523 /* make sure number is a type of integer */
524 if (PyLong_Check(arg)) {
525 ival = PyLong_AsLongAndOverflow(arg, &overflow);
526 }
527 else {
528 iobj = PyNumber_Index(arg);
529 if (iobj == NULL) {
530 if (!PyErr_ExceptionMatches(PyExc_TypeError))
531 return 0;
532 goto onError;
533 }
534 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
535 Py_DECREF(iobj);
536 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300537 if (!overflow && ival == -1 && PyErr_Occurred())
538 goto onError;
539 if (overflow || !(0 <= ival && ival <= 255)) {
540 PyErr_SetString(PyExc_OverflowError,
541 "%c arg not in range(256)");
542 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800543 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300544 *p = (char)ival;
545 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800546 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300547 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200548 PyErr_SetString(PyExc_TypeError,
549 "%c requires an integer in range(256) or a single byte");
550 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800551}
552
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800553static PyObject *_PyBytes_FromBuffer(PyObject *x);
554
Ethan Furmanb95b5612015-01-23 20:05:18 -0800555static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200556format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800557{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200558 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800559 /* is it a bytes object? */
560 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200561 *pbuf = PyBytes_AS_STRING(v);
562 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800563 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200564 return v;
565 }
566 if (PyByteArray_Check(v)) {
567 *pbuf = PyByteArray_AS_STRING(v);
568 *plen = PyByteArray_GET_SIZE(v);
569 Py_INCREF(v);
570 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800571 }
572 /* does it support __bytes__? */
573 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
574 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +0100575 result = _PyObject_CallNoArg(func);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800576 Py_DECREF(func);
577 if (result == NULL)
578 return NULL;
579 if (!PyBytes_Check(result)) {
580 PyErr_Format(PyExc_TypeError,
581 "__bytes__ returned non-bytes (type %.200s)",
582 Py_TYPE(result)->tp_name);
583 Py_DECREF(result);
584 return NULL;
585 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200586 *pbuf = PyBytes_AS_STRING(result);
587 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800588 return result;
589 }
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800590 /* does it support buffer protocol? */
591 if (PyObject_CheckBuffer(v)) {
592 /* maybe we can avoid making a copy of the buffer object here? */
593 result = _PyBytes_FromBuffer(v);
594 if (result == NULL)
595 return NULL;
596 *pbuf = PyBytes_AS_STRING(result);
597 *plen = PyBytes_GET_SIZE(result);
598 return result;
599 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800600 PyErr_Format(PyExc_TypeError,
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800601 "%%b requires a bytes-like object, "
602 "or an object that implements __bytes__, not '%.100s'",
Ethan Furmanb95b5612015-01-23 20:05:18 -0800603 Py_TYPE(v)->tp_name);
604 return NULL;
605}
606
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200607/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800608
609PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +0200610_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
611 PyObject *args, int use_bytearray)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800612{
Victor Stinner772b2b02015-10-14 09:56:53 +0200613 const char *fmt;
614 char *res;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800615 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200616 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800617 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800618 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200619 _PyBytesWriter writer;
620
Victor Stinner772b2b02015-10-14 09:56:53 +0200621 if (args == NULL) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800622 PyErr_BadInternalCall();
623 return NULL;
624 }
Victor Stinner772b2b02015-10-14 09:56:53 +0200625 fmt = format;
626 fmtcnt = format_len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200627
628 _PyBytesWriter_Init(&writer);
Victor Stinner772b2b02015-10-14 09:56:53 +0200629 writer.use_bytearray = use_bytearray;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200630
631 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
632 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800633 return NULL;
Victor Stinner772b2b02015-10-14 09:56:53 +0200634 if (!use_bytearray)
635 writer.overallocate = 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200636
Ethan Furmanb95b5612015-01-23 20:05:18 -0800637 if (PyTuple_Check(args)) {
638 arglen = PyTuple_GET_SIZE(args);
639 argidx = 0;
640 }
641 else {
642 arglen = -1;
643 argidx = -2;
644 }
645 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
646 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
647 !PyByteArray_Check(args)) {
648 dict = args;
649 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200650
Ethan Furmanb95b5612015-01-23 20:05:18 -0800651 while (--fmtcnt >= 0) {
652 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200653 Py_ssize_t len;
654 char *pos;
655
Xiang Zhangb76ad512017-03-06 17:17:05 +0800656 pos = (char *)memchr(fmt + 1, '%', fmtcnt);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200657 if (pos != NULL)
658 len = pos - fmt;
Victor Stinner772b2b02015-10-14 09:56:53 +0200659 else
Xiang Zhangb76ad512017-03-06 17:17:05 +0800660 len = fmtcnt + 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200661 assert(len != 0);
662
Christian Heimesf051e432016-09-13 20:22:02 +0200663 memcpy(res, fmt, len);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200664 res += len;
665 fmt += len;
666 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800667 }
668 else {
669 /* Got a format specifier */
670 int flags = 0;
671 Py_ssize_t width = -1;
672 int prec = -1;
673 int c = '\0';
674 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800675 PyObject *v = NULL;
676 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200677 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800678 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200679 Py_ssize_t len = 0;
680 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200681 Py_ssize_t alloc;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800682
Ethan Furmanb95b5612015-01-23 20:05:18 -0800683 fmt++;
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200684 if (*fmt == '%') {
685 *res++ = '%';
686 fmt++;
687 fmtcnt--;
688 continue;
689 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800690 if (*fmt == '(') {
Victor Stinner772b2b02015-10-14 09:56:53 +0200691 const char *keystart;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800692 Py_ssize_t keylen;
693 PyObject *key;
694 int pcount = 1;
695
696 if (dict == NULL) {
697 PyErr_SetString(PyExc_TypeError,
698 "format requires a mapping");
699 goto error;
700 }
701 ++fmt;
702 --fmtcnt;
703 keystart = fmt;
704 /* Skip over balanced parentheses */
705 while (pcount > 0 && --fmtcnt >= 0) {
706 if (*fmt == ')')
707 --pcount;
708 else if (*fmt == '(')
709 ++pcount;
710 fmt++;
711 }
712 keylen = fmt - keystart - 1;
713 if (fmtcnt < 0 || pcount > 0) {
714 PyErr_SetString(PyExc_ValueError,
715 "incomplete format key");
716 goto error;
717 }
718 key = PyBytes_FromStringAndSize(keystart,
719 keylen);
720 if (key == NULL)
721 goto error;
722 if (args_owned) {
723 Py_DECREF(args);
724 args_owned = 0;
725 }
726 args = PyObject_GetItem(dict, key);
727 Py_DECREF(key);
728 if (args == NULL) {
729 goto error;
730 }
731 args_owned = 1;
732 arglen = -1;
733 argidx = -2;
734 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200735
736 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800737 while (--fmtcnt >= 0) {
738 switch (c = *fmt++) {
739 case '-': flags |= F_LJUST; continue;
740 case '+': flags |= F_SIGN; continue;
741 case ' ': flags |= F_BLANK; continue;
742 case '#': flags |= F_ALT; continue;
743 case '0': flags |= F_ZERO; continue;
744 }
745 break;
746 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200747
748 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800749 if (c == '*') {
750 v = getnextarg(args, arglen, &argidx);
751 if (v == NULL)
752 goto error;
753 if (!PyLong_Check(v)) {
754 PyErr_SetString(PyExc_TypeError,
755 "* wants int");
756 goto error;
757 }
758 width = PyLong_AsSsize_t(v);
759 if (width == -1 && PyErr_Occurred())
760 goto error;
761 if (width < 0) {
762 flags |= F_LJUST;
763 width = -width;
764 }
765 if (--fmtcnt >= 0)
766 c = *fmt++;
767 }
768 else if (c >= 0 && isdigit(c)) {
769 width = c - '0';
770 while (--fmtcnt >= 0) {
771 c = Py_CHARMASK(*fmt++);
772 if (!isdigit(c))
773 break;
774 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
775 PyErr_SetString(
776 PyExc_ValueError,
777 "width too big");
778 goto error;
779 }
780 width = width*10 + (c - '0');
781 }
782 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200783
784 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800785 if (c == '.') {
786 prec = 0;
787 if (--fmtcnt >= 0)
788 c = *fmt++;
789 if (c == '*') {
790 v = getnextarg(args, arglen, &argidx);
791 if (v == NULL)
792 goto error;
793 if (!PyLong_Check(v)) {
794 PyErr_SetString(
795 PyExc_TypeError,
796 "* wants int");
797 goto error;
798 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200799 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800800 if (prec == -1 && PyErr_Occurred())
801 goto error;
802 if (prec < 0)
803 prec = 0;
804 if (--fmtcnt >= 0)
805 c = *fmt++;
806 }
807 else if (c >= 0 && isdigit(c)) {
808 prec = c - '0';
809 while (--fmtcnt >= 0) {
810 c = Py_CHARMASK(*fmt++);
811 if (!isdigit(c))
812 break;
813 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
814 PyErr_SetString(
815 PyExc_ValueError,
816 "prec too big");
817 goto error;
818 }
819 prec = prec*10 + (c - '0');
820 }
821 }
822 } /* prec */
823 if (fmtcnt >= 0) {
824 if (c == 'h' || c == 'l' || c == 'L') {
825 if (--fmtcnt >= 0)
826 c = *fmt++;
827 }
828 }
829 if (fmtcnt < 0) {
830 PyErr_SetString(PyExc_ValueError,
831 "incomplete format");
832 goto error;
833 }
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200834 v = getnextarg(args, arglen, &argidx);
835 if (v == NULL)
836 goto error;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200837
Alexey Izbyshevccd99752018-08-23 10:50:52 +0300838 if (fmtcnt == 0) {
839 /* last write: disable writer overallocation */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200840 writer.overallocate = 0;
841 }
842
Ethan Furmanb95b5612015-01-23 20:05:18 -0800843 sign = 0;
844 fill = ' ';
845 switch (c) {
Ethan Furman62e977f2015-03-11 08:17:00 -0700846 case 'r':
847 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800848 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200849 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800850 if (temp == NULL)
851 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200852 assert(PyUnicode_IS_ASCII(temp));
853 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
854 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800855 if (prec >= 0 && len > prec)
856 len = prec;
857 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200858
Ethan Furmanb95b5612015-01-23 20:05:18 -0800859 case 's':
860 // %s is only for 2/3 code; 3 only code should use %b
861 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200862 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800863 if (temp == NULL)
864 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800865 if (prec >= 0 && len > prec)
866 len = prec;
867 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200868
Ethan Furmanb95b5612015-01-23 20:05:18 -0800869 case 'i':
870 case 'd':
871 case 'u':
872 case 'o':
873 case 'x':
874 case 'X':
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200875 if (PyLong_CheckExact(v)
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200876 && width == -1 && prec == -1
877 && !(flags & (F_SIGN | F_BLANK))
878 && c != 'X')
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200879 {
880 /* Fast path */
881 int alternate = flags & F_ALT;
882 int base;
883
884 switch(c)
885 {
886 default:
Barry Warsawb2e57942017-09-14 18:13:16 -0700887 Py_UNREACHABLE();
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200888 case 'd':
889 case 'i':
890 case 'u':
891 base = 10;
892 break;
893 case 'o':
894 base = 8;
895 break;
896 case 'x':
897 case 'X':
898 base = 16;
899 break;
900 }
901
902 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200903 writer.min_size -= 2; /* size preallocated for "%d" */
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200904 res = _PyLong_FormatBytesWriter(&writer, res,
905 v, base, alternate);
906 if (res == NULL)
907 goto error;
908 continue;
909 }
910
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300911 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200912 if (!temp)
913 goto error;
914 assert(PyUnicode_IS_ASCII(temp));
915 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
916 len = PyUnicode_GET_LENGTH(temp);
917 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800918 if (flags & F_ZERO)
919 fill = '0';
920 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200921
Ethan Furmanb95b5612015-01-23 20:05:18 -0800922 case 'e':
923 case 'E':
924 case 'f':
925 case 'F':
926 case 'g':
927 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200928 if (width == -1 && prec == -1
929 && !(flags & (F_SIGN | F_BLANK)))
930 {
931 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200932 writer.min_size -= 2; /* size preallocated for "%f" */
Victor Stinnerad771582015-10-09 12:38:53 +0200933 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200934 if (res == NULL)
935 goto error;
936 continue;
937 }
938
Victor Stinnerad771582015-10-09 12:38:53 +0200939 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800940 goto error;
941 pbuf = PyBytes_AS_STRING(temp);
942 len = PyBytes_GET_SIZE(temp);
943 sign = 1;
944 if (flags & F_ZERO)
945 fill = '0';
946 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200947
Ethan Furmanb95b5612015-01-23 20:05:18 -0800948 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200949 pbuf = &onechar;
950 len = byte_converter(v, &onechar);
951 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800952 goto error;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200953 if (width == -1) {
954 /* Fast path */
955 *res++ = onechar;
956 continue;
957 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800958 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200959
Ethan Furmanb95b5612015-01-23 20:05:18 -0800960 default:
961 PyErr_Format(PyExc_ValueError,
962 "unsupported format character '%c' (0x%x) "
963 "at index %zd",
964 c, c,
Victor Stinner772b2b02015-10-14 09:56:53 +0200965 (Py_ssize_t)(fmt - 1 - format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800966 goto error;
967 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200968
Ethan Furmanb95b5612015-01-23 20:05:18 -0800969 if (sign) {
970 if (*pbuf == '-' || *pbuf == '+') {
971 sign = *pbuf++;
972 len--;
973 }
974 else if (flags & F_SIGN)
975 sign = '+';
976 else if (flags & F_BLANK)
977 sign = ' ';
978 else
979 sign = 0;
980 }
981 if (width < len)
982 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200983
984 alloc = width;
985 if (sign != 0 && len == width)
986 alloc++;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200987 /* 2: size preallocated for %s */
988 if (alloc > 2) {
989 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200990 if (res == NULL)
991 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800992 }
Victor Stinner60ec6ef2019-10-07 22:31:42 +0200993#ifndef NDEBUG
994 char *before = res;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200995#endif
996
997 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800998 if (sign) {
999 if (fill != ' ')
1000 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -08001001 if (width > len)
1002 width--;
1003 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001004
1005 /* Write the numeric prefix for "x", "X" and "o" formats
1006 if the alternate form is used.
1007 For example, write "0x" for the "%#x" format. */
Serhiy Storchakab1a16192016-12-17 21:48:03 +02001008 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001009 assert(pbuf[0] == '0');
1010 assert(pbuf[1] == c);
1011 if (fill != ' ') {
1012 *res++ = *pbuf++;
1013 *res++ = *pbuf++;
1014 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001015 width -= 2;
1016 if (width < 0)
1017 width = 0;
1018 len -= 2;
1019 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001020
1021 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001022 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001023 memset(res, fill, width - len);
1024 res += (width - len);
1025 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -08001026 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001027
1028 /* If padding with spaces: write sign if needed and/or numeric
1029 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001030 if (fill == ' ') {
1031 if (sign)
1032 *res++ = sign;
Serhiy Storchakab1a16192016-12-17 21:48:03 +02001033 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001034 assert(pbuf[0] == '0');
1035 assert(pbuf[1] == c);
1036 *res++ = *pbuf++;
1037 *res++ = *pbuf++;
1038 }
1039 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001040
1041 /* Copy bytes */
Christian Heimesf051e432016-09-13 20:22:02 +02001042 memcpy(res, pbuf, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001043 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001044
1045 /* Pad right with the fill character if needed */
1046 if (width > len) {
1047 memset(res, ' ', width - len);
1048 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001049 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001050
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +02001051 if (dict && (argidx < arglen)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001052 PyErr_SetString(PyExc_TypeError,
1053 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -08001054 Py_XDECREF(temp);
1055 goto error;
1056 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001057 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001058
Victor Stinner60ec6ef2019-10-07 22:31:42 +02001059#ifndef NDEBUG
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001060 /* check that we computed the exact size for this write */
1061 assert((res - before) == alloc);
1062#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -08001063 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001064
1065 /* If overallocation was disabled, ensure that it was the last
1066 write. Otherwise, we missed an optimization */
Alexey Izbyshevccd99752018-08-23 10:50:52 +03001067 assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001068 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001069
Ethan Furmanb95b5612015-01-23 20:05:18 -08001070 if (argidx < arglen && !dict) {
1071 PyErr_SetString(PyExc_TypeError,
1072 "not all arguments converted during bytes formatting");
1073 goto error;
1074 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001075
Ethan Furmanb95b5612015-01-23 20:05:18 -08001076 if (args_owned) {
1077 Py_DECREF(args);
1078 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001079 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001080
1081 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001082 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001083 if (args_owned) {
1084 Py_DECREF(args);
1085 }
1086 return NULL;
1087}
1088
Greg Price3a4f6672019-09-12 11:12:22 -07001089/* Unescape a backslash-escaped string. */
Eric V. Smith42454af2016-10-31 09:22:08 -04001090PyObject *_PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001091 Py_ssize_t len,
1092 const char *errors,
Eric V. Smith42454af2016-10-31 09:22:08 -04001093 const char **first_invalid_escape)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001094{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001095 int c;
Victor Stinner2ec80632015-10-14 13:32:13 +02001096 char *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001097 const char *end;
Victor Stinner2ec80632015-10-14 13:32:13 +02001098 _PyBytesWriter writer;
1099
1100 _PyBytesWriter_Init(&writer);
1101
1102 p = _PyBytesWriter_Alloc(&writer, len);
1103 if (p == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001104 return NULL;
Victor Stinner2ec80632015-10-14 13:32:13 +02001105 writer.overallocate = 1;
1106
Eric V. Smith42454af2016-10-31 09:22:08 -04001107 *first_invalid_escape = NULL;
1108
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001109 end = s + len;
1110 while (s < end) {
1111 if (*s != '\\') {
Greg Price3a4f6672019-09-12 11:12:22 -07001112 *p++ = *s++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001113 continue;
1114 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001115
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001116 s++;
Victor Stinner2ec80632015-10-14 13:32:13 +02001117 if (s == end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001118 PyErr_SetString(PyExc_ValueError,
1119 "Trailing \\ in string");
1120 goto failed;
1121 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001122
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001123 switch (*s++) {
1124 /* XXX This assumes ASCII! */
1125 case '\n': break;
1126 case '\\': *p++ = '\\'; break;
1127 case '\'': *p++ = '\''; break;
1128 case '\"': *p++ = '\"'; break;
1129 case 'b': *p++ = '\b'; break;
1130 case 'f': *p++ = '\014'; break; /* FF */
1131 case 't': *p++ = '\t'; break;
1132 case 'n': *p++ = '\n'; break;
1133 case 'r': *p++ = '\r'; break;
1134 case 'v': *p++ = '\013'; break; /* VT */
1135 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1136 case '0': case '1': case '2': case '3':
1137 case '4': case '5': case '6': case '7':
1138 c = s[-1] - '0';
1139 if (s < end && '0' <= *s && *s <= '7') {
1140 c = (c<<3) + *s++ - '0';
1141 if (s < end && '0' <= *s && *s <= '7')
1142 c = (c<<3) + *s++ - '0';
1143 }
1144 *p++ = c;
1145 break;
1146 case 'x':
Victor Stinner2ec80632015-10-14 13:32:13 +02001147 if (s+1 < end) {
1148 int digit1, digit2;
1149 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1150 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1151 if (digit1 < 16 && digit2 < 16) {
1152 *p++ = (unsigned char)((digit1 << 4) + digit2);
1153 s += 2;
1154 break;
1155 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001156 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001157 /* invalid hexadecimal digits */
1158
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001159 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001160 PyErr_Format(PyExc_ValueError,
Serhiy Storchakad53fe5f2019-03-13 22:59:55 +02001161 "invalid \\x escape at position %zd",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001162 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001163 goto failed;
1164 }
1165 if (strcmp(errors, "replace") == 0) {
1166 *p++ = '?';
1167 } else if (strcmp(errors, "ignore") == 0)
1168 /* do nothing */;
1169 else {
1170 PyErr_Format(PyExc_ValueError,
1171 "decoding error; unknown "
1172 "error handling code: %.400s",
1173 errors);
1174 goto failed;
1175 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001176 /* skip \x */
1177 if (s < end && Py_ISXDIGIT(s[0]))
1178 s++; /* and a hexdigit */
1179 break;
Victor Stinner2ec80632015-10-14 13:32:13 +02001180
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001181 default:
Eric V. Smith42454af2016-10-31 09:22:08 -04001182 if (*first_invalid_escape == NULL) {
1183 *first_invalid_escape = s-1; /* Back up one char, since we've
1184 already incremented s. */
1185 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001186 *p++ = '\\';
Eric V. Smith42454af2016-10-31 09:22:08 -04001187 s--;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001188 }
1189 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001190
1191 return _PyBytesWriter_Finish(&writer, p);
1192
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001193 failed:
Victor Stinner2ec80632015-10-14 13:32:13 +02001194 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001195 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001196}
1197
Eric V. Smith42454af2016-10-31 09:22:08 -04001198PyObject *PyBytes_DecodeEscape(const char *s,
1199 Py_ssize_t len,
1200 const char *errors,
Greg Price3a4f6672019-09-12 11:12:22 -07001201 Py_ssize_t Py_UNUSED(unicode),
1202 const char *Py_UNUSED(recode_encoding))
Eric V. Smith42454af2016-10-31 09:22:08 -04001203{
1204 const char* first_invalid_escape;
Greg Price3a4f6672019-09-12 11:12:22 -07001205 PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
Eric V. Smith42454af2016-10-31 09:22:08 -04001206 &first_invalid_escape);
1207 if (result == NULL)
1208 return NULL;
1209 if (first_invalid_escape != NULL) {
1210 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1211 "invalid escape sequence '\\%c'",
Serhiy Storchaka56cb4652017-10-20 17:08:15 +03001212 (unsigned char)*first_invalid_escape) < 0) {
Eric V. Smith42454af2016-10-31 09:22:08 -04001213 Py_DECREF(result);
1214 return NULL;
1215 }
1216 }
1217 return result;
1218
1219}
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001220/* -------------------------------------------------------------------- */
1221/* object api */
1222
1223Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001224PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001225{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001226 if (!PyBytes_Check(op)) {
1227 PyErr_Format(PyExc_TypeError,
1228 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1229 return -1;
1230 }
1231 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001232}
1233
1234char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001235PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001236{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001237 if (!PyBytes_Check(op)) {
1238 PyErr_Format(PyExc_TypeError,
1239 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1240 return NULL;
1241 }
1242 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001243}
1244
1245int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001246PyBytes_AsStringAndSize(PyObject *obj,
1247 char **s,
1248 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001249{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001250 if (s == NULL) {
1251 PyErr_BadInternalCall();
1252 return -1;
1253 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001254
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001255 if (!PyBytes_Check(obj)) {
1256 PyErr_Format(PyExc_TypeError,
1257 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1258 return -1;
1259 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001260
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001261 *s = PyBytes_AS_STRING(obj);
1262 if (len != NULL)
1263 *len = PyBytes_GET_SIZE(obj);
1264 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001265 PyErr_SetString(PyExc_ValueError,
1266 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001267 return -1;
1268 }
1269 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001270}
Neal Norwitz6968b052007-02-27 19:02:19 +00001271
1272/* -------------------------------------------------------------------- */
1273/* Methods */
1274
Victor Stinnerc41eed12020-06-23 15:54:35 +02001275#define STRINGLIB_GET_EMPTY() get_bytes_state()->empty_string
1276
Eric Smith0923d1d2009-04-16 20:16:10 +00001277#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001278
1279#include "stringlib/fastsearch.h"
1280#include "stringlib/count.h"
1281#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001282#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001283#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001284#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001285#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001286
Eric Smith0f78bff2009-11-30 01:01:42 +00001287#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001288
Victor Stinnerc41eed12020-06-23 15:54:35 +02001289#undef STRINGLIB_GET_EMPTY
1290
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001291PyObject *
1292PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001293{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001294 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001295 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001296 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001297 PyObject *v;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001298 unsigned char quote;
1299 const unsigned char *s;
1300 Py_UCS1 *p;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001301
1302 /* Compute size of output string */
1303 squotes = dquotes = 0;
1304 newsize = 3; /* b'' */
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001305 s = (const unsigned char*)op->ob_sval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001306 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001307 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001308 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001309 case '\'': squotes++; break;
1310 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001311 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001312 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001313 default:
1314 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001315 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001316 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001317 if (newsize > PY_SSIZE_T_MAX - incr)
1318 goto overflow;
1319 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001320 }
1321 quote = '\'';
1322 if (smartquotes && squotes && !dquotes)
1323 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001324 if (squotes && quote == '\'') {
1325 if (newsize > PY_SSIZE_T_MAX - squotes)
1326 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001327 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001328 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001329
1330 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001331 if (v == NULL) {
1332 return NULL;
1333 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001334 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001335
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001336 *p++ = 'b', *p++ = quote;
1337 for (i = 0; i < length; i++) {
1338 unsigned char c = op->ob_sval[i];
1339 if (c == quote || c == '\\')
1340 *p++ = '\\', *p++ = c;
1341 else if (c == '\t')
1342 *p++ = '\\', *p++ = 't';
1343 else if (c == '\n')
1344 *p++ = '\\', *p++ = 'n';
1345 else if (c == '\r')
1346 *p++ = '\\', *p++ = 'r';
1347 else if (c < ' ' || c >= 0x7f) {
1348 *p++ = '\\';
1349 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001350 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1351 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001352 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001353 else
1354 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001355 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001356 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001357 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001358 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001359
1360 overflow:
1361 PyErr_SetString(PyExc_OverflowError,
1362 "bytes object is too large to make repr");
1363 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001364}
1365
Neal Norwitz6968b052007-02-27 19:02:19 +00001366static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001367bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001368{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001369 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001370}
1371
Neal Norwitz6968b052007-02-27 19:02:19 +00001372static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001373bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001374{
Victor Stinnerda7933e2020-04-13 03:04:28 +02001375 if (_Py_GetConfig()->bytes_warning) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001376 if (PyErr_WarnEx(PyExc_BytesWarning,
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001377 "str() on a bytes instance", 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001378 return NULL;
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001379 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001380 }
1381 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001382}
1383
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001384static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001385bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001386{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001387 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001388}
Neal Norwitz6968b052007-02-27 19:02:19 +00001389
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001390/* This is also used by PyBytes_Concat() */
1391static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001392bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001393{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001394 Py_buffer va, vb;
1395 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001396
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001397 va.len = -1;
1398 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001399 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1400 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001401 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
Serhiy Storchaka6b5a9ec2017-03-19 19:47:02 +02001402 Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001403 goto done;
1404 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001405
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001406 /* Optimize end cases */
1407 if (va.len == 0 && PyBytes_CheckExact(b)) {
1408 result = b;
1409 Py_INCREF(result);
1410 goto done;
1411 }
1412 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1413 result = a;
1414 Py_INCREF(result);
1415 goto done;
1416 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001417
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001418 if (va.len > PY_SSIZE_T_MAX - vb.len) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001419 PyErr_NoMemory();
1420 goto done;
1421 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001422
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001423 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001424 if (result != NULL) {
1425 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1426 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1427 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001428
1429 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001430 if (va.len != -1)
1431 PyBuffer_Release(&va);
1432 if (vb.len != -1)
1433 PyBuffer_Release(&vb);
1434 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001435}
Neal Norwitz6968b052007-02-27 19:02:19 +00001436
1437static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001438bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001439{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001440 Py_ssize_t i;
1441 Py_ssize_t j;
1442 Py_ssize_t size;
1443 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001444 size_t nbytes;
1445 if (n < 0)
1446 n = 0;
1447 /* watch out for overflows: the size can overflow int,
1448 * and the # of bytes needed can overflow size_t
1449 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001450 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001451 PyErr_SetString(PyExc_OverflowError,
1452 "repeated bytes are too long");
1453 return NULL;
1454 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001455 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001456 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1457 Py_INCREF(a);
1458 return (PyObject *)a;
1459 }
1460 nbytes = (size_t)size;
1461 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1462 PyErr_SetString(PyExc_OverflowError,
1463 "repeated bytes are too long");
1464 return NULL;
1465 }
1466 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
Victor Stinner04fc4f22020-06-16 01:28:07 +02001467 if (op == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001468 return PyErr_NoMemory();
Victor Stinner04fc4f22020-06-16 01:28:07 +02001469 }
1470 _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001471 op->ob_shash = -1;
1472 op->ob_sval[size] = '\0';
1473 if (Py_SIZE(a) == 1 && n > 0) {
1474 memset(op->ob_sval, a->ob_sval[0] , n);
1475 return (PyObject *) op;
1476 }
1477 i = 0;
1478 if (i < size) {
Christian Heimesf051e432016-09-13 20:22:02 +02001479 memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001480 i = Py_SIZE(a);
1481 }
1482 while (i < size) {
1483 j = (i <= size-i) ? i : size-i;
Christian Heimesf051e432016-09-13 20:22:02 +02001484 memcpy(op->ob_sval+i, op->ob_sval, j);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001485 i += j;
1486 }
1487 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001488}
1489
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001490static int
1491bytes_contains(PyObject *self, PyObject *arg)
1492{
1493 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1494}
1495
Neal Norwitz6968b052007-02-27 19:02:19 +00001496static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001497bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001498{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001499 if (i < 0 || i >= Py_SIZE(a)) {
1500 PyErr_SetString(PyExc_IndexError, "index out of range");
1501 return NULL;
1502 }
1503 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001504}
1505
Benjamin Peterson621b4302016-09-09 13:54:34 -07001506static int
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001507bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1508{
1509 int cmp;
1510 Py_ssize_t len;
1511
1512 len = Py_SIZE(a);
1513 if (Py_SIZE(b) != len)
1514 return 0;
1515
1516 if (a->ob_sval[0] != b->ob_sval[0])
1517 return 0;
1518
1519 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1520 return (cmp == 0);
1521}
1522
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001523static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001524bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001525{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001526 int c;
1527 Py_ssize_t len_a, len_b;
1528 Py_ssize_t min_len;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001529 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001530
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001531 /* Make sure both arguments are strings. */
1532 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Victor Stinnerda7933e2020-04-13 03:04:28 +02001533 if (_Py_GetConfig()->bytes_warning && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001534 rc = PyObject_IsInstance((PyObject*)a,
1535 (PyObject*)&PyUnicode_Type);
1536 if (!rc)
1537 rc = PyObject_IsInstance((PyObject*)b,
1538 (PyObject*)&PyUnicode_Type);
1539 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001540 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001541 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001542 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001543 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001544 return NULL;
1545 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001546 else {
1547 rc = PyObject_IsInstance((PyObject*)a,
1548 (PyObject*)&PyLong_Type);
1549 if (!rc)
1550 rc = PyObject_IsInstance((PyObject*)b,
1551 (PyObject*)&PyLong_Type);
1552 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001553 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001554 if (rc) {
1555 if (PyErr_WarnEx(PyExc_BytesWarning,
1556 "Comparison between bytes and int", 1))
1557 return NULL;
1558 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001559 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001560 }
stratakise8b19652017-11-02 11:32:54 +01001561 Py_RETURN_NOTIMPLEMENTED;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001562 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001563 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001564 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001565 case Py_EQ:
1566 case Py_LE:
1567 case Py_GE:
1568 /* a string is equal to itself */
stratakise8b19652017-11-02 11:32:54 +01001569 Py_RETURN_TRUE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001570 case Py_NE:
1571 case Py_LT:
1572 case Py_GT:
stratakise8b19652017-11-02 11:32:54 +01001573 Py_RETURN_FALSE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001574 default:
1575 PyErr_BadArgument();
1576 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001577 }
1578 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001579 else if (op == Py_EQ || op == Py_NE) {
1580 int eq = bytes_compare_eq(a, b);
1581 eq ^= (op == Py_NE);
stratakise8b19652017-11-02 11:32:54 +01001582 return PyBool_FromLong(eq);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001583 }
1584 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001585 len_a = Py_SIZE(a);
1586 len_b = Py_SIZE(b);
1587 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001588 if (min_len > 0) {
1589 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001590 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001591 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001592 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001593 else
1594 c = 0;
stratakise8b19652017-11-02 11:32:54 +01001595 if (c != 0)
1596 Py_RETURN_RICHCOMPARE(c, 0, op);
1597 Py_RETURN_RICHCOMPARE(len_a, len_b, op);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001598 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001599}
1600
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001601static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001602bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001603{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001604 if (a->ob_shash == -1) {
1605 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001606 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001607 }
1608 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001609}
1610
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001611static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001612bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001613{
Victor Stinnera15e2602020-04-08 02:01:56 +02001614 if (_PyIndex_Check(item)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001615 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1616 if (i == -1 && PyErr_Occurred())
1617 return NULL;
1618 if (i < 0)
1619 i += PyBytes_GET_SIZE(self);
1620 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1621 PyErr_SetString(PyExc_IndexError,
1622 "index out of range");
1623 return NULL;
1624 }
1625 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1626 }
1627 else if (PySlice_Check(item)) {
Zackery Spytz14514d92019-05-17 01:13:03 -06001628 Py_ssize_t start, stop, step, slicelength, i;
1629 size_t cur;
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001630 const char* source_buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001631 char* result_buf;
1632 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001633
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001634 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001635 return NULL;
1636 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001637 slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1638 &stop, step);
Neal Norwitz6968b052007-02-27 19:02:19 +00001639
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001640 if (slicelength <= 0) {
1641 return PyBytes_FromStringAndSize("", 0);
1642 }
1643 else if (start == 0 && step == 1 &&
1644 slicelength == PyBytes_GET_SIZE(self) &&
1645 PyBytes_CheckExact(self)) {
1646 Py_INCREF(self);
1647 return (PyObject *)self;
1648 }
1649 else if (step == 1) {
1650 return PyBytes_FromStringAndSize(
1651 PyBytes_AS_STRING(self) + start,
1652 slicelength);
1653 }
1654 else {
1655 source_buf = PyBytes_AS_STRING(self);
1656 result = PyBytes_FromStringAndSize(NULL, slicelength);
1657 if (result == NULL)
1658 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001659
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001660 result_buf = PyBytes_AS_STRING(result);
1661 for (cur = start, i = 0; i < slicelength;
1662 cur += step, i++) {
1663 result_buf[i] = source_buf[cur];
1664 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001665
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001666 return result;
1667 }
1668 }
1669 else {
1670 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001671 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001672 Py_TYPE(item)->tp_name);
1673 return NULL;
1674 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001675}
1676
1677static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001678bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001679{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001680 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1681 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001682}
1683
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001684static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001685 (lenfunc)bytes_length, /*sq_length*/
1686 (binaryfunc)bytes_concat, /*sq_concat*/
1687 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1688 (ssizeargfunc)bytes_item, /*sq_item*/
1689 0, /*sq_slice*/
1690 0, /*sq_ass_item*/
1691 0, /*sq_ass_slice*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001692 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001693};
1694
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001695static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001696 (lenfunc)bytes_length,
1697 (binaryfunc)bytes_subscript,
1698 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001699};
1700
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001701static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001702 (getbufferproc)bytes_buffer_getbuffer,
1703 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001704};
1705
1706
1707#define LEFTSTRIP 0
1708#define RIGHTSTRIP 1
1709#define BOTHSTRIP 2
1710
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001711/*[clinic input]
1712bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001713
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001714 sep: object = None
1715 The delimiter according which to split the bytes.
1716 None (the default value) means split on ASCII whitespace characters
1717 (space, tab, return, newline, formfeed, vertical tab).
1718 maxsplit: Py_ssize_t = -1
1719 Maximum number of splits to do.
1720 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001721
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001722Return a list of the sections in the bytes, using sep as the delimiter.
1723[clinic start generated code]*/
1724
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001725static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001726bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1727/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001728{
1729 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001730 const char *s = PyBytes_AS_STRING(self), *sub;
1731 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001732 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001733
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001734 if (maxsplit < 0)
1735 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001736 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001737 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001738 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001739 return NULL;
1740 sub = vsub.buf;
1741 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001742
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001743 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1744 PyBuffer_Release(&vsub);
1745 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001746}
1747
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001748/*[clinic input]
1749bytes.partition
1750
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001751 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001752 /
1753
1754Partition the bytes into three parts using the given separator.
1755
1756This will search for the separator sep in the bytes. If the separator is found,
1757returns a 3-tuple containing the part before the separator, the separator
1758itself, and the part after it.
1759
1760If the separator is not found, returns a 3-tuple containing the original bytes
1761object and two empty bytes objects.
1762[clinic start generated code]*/
1763
Neal Norwitz6968b052007-02-27 19:02:19 +00001764static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001765bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001766/*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001767{
Neal Norwitz6968b052007-02-27 19:02:19 +00001768 return stringlib_partition(
1769 (PyObject*) self,
1770 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001771 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001772 );
1773}
1774
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001775/*[clinic input]
1776bytes.rpartition
1777
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001778 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001779 /
1780
1781Partition the bytes into three parts using the given separator.
1782
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001783This will search for the separator sep in the bytes, starting at the end. If
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001784the separator is found, returns a 3-tuple containing the part before the
1785separator, the separator itself, and the part after it.
1786
1787If the separator is not found, returns a 3-tuple containing two empty bytes
1788objects and the original bytes object.
1789[clinic start generated code]*/
1790
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001791static PyObject *
1792bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001793/*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001794{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001795 return stringlib_rpartition(
1796 (PyObject*) self,
1797 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001798 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001799 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001800}
1801
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001802/*[clinic input]
1803bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001804
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001805Return a list of the sections in the bytes, using sep as the delimiter.
1806
1807Splitting is done starting at the end of the bytes and working to the front.
1808[clinic start generated code]*/
1809
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001810static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001811bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1812/*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001813{
1814 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001815 const char *s = PyBytes_AS_STRING(self), *sub;
1816 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001817 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001818
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001819 if (maxsplit < 0)
1820 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001821 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001822 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001823 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001824 return NULL;
1825 sub = vsub.buf;
1826 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001827
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001828 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1829 PyBuffer_Release(&vsub);
1830 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001831}
1832
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001833
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001834/*[clinic input]
1835bytes.join
1836
1837 iterable_of_bytes: object
1838 /
1839
1840Concatenate any number of bytes objects.
1841
1842The bytes whose method is called is inserted in between each pair.
1843
1844The result is returned as a new bytes object.
1845
1846Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1847[clinic start generated code]*/
1848
Neal Norwitz6968b052007-02-27 19:02:19 +00001849static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001850bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1851/*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001852{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001853 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001854}
1855
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001856PyObject *
1857_PyBytes_Join(PyObject *sep, PyObject *x)
1858{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001859 assert(sep != NULL && PyBytes_Check(sep));
1860 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001861 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001862}
1863
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001864static PyObject *
1865bytes_find(PyBytesObject *self, PyObject *args)
1866{
1867 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1868}
1869
1870static PyObject *
1871bytes_index(PyBytesObject *self, PyObject *args)
1872{
1873 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1874}
1875
1876
1877static PyObject *
1878bytes_rfind(PyBytesObject *self, PyObject *args)
1879{
1880 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1881}
1882
1883
1884static PyObject *
1885bytes_rindex(PyBytesObject *self, PyObject *args)
1886{
1887 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1888}
1889
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001890
1891Py_LOCAL_INLINE(PyObject *)
1892do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001893{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001894 Py_buffer vsep;
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001895 const char *s = PyBytes_AS_STRING(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001896 Py_ssize_t len = PyBytes_GET_SIZE(self);
1897 char *sep;
1898 Py_ssize_t seplen;
1899 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001900
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001901 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001902 return NULL;
1903 sep = vsep.buf;
1904 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001905
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001906 i = 0;
1907 if (striptype != RIGHTSTRIP) {
1908 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1909 i++;
1910 }
1911 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001912
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001913 j = len;
1914 if (striptype != LEFTSTRIP) {
1915 do {
1916 j--;
1917 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1918 j++;
1919 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001920
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001921 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001922
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001923 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1924 Py_INCREF(self);
1925 return (PyObject*)self;
1926 }
1927 else
1928 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001929}
1930
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001931
1932Py_LOCAL_INLINE(PyObject *)
1933do_strip(PyBytesObject *self, int striptype)
1934{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001935 const char *s = PyBytes_AS_STRING(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001936 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001937
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001938 i = 0;
1939 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001940 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001941 i++;
1942 }
1943 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001944
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001945 j = len;
1946 if (striptype != LEFTSTRIP) {
1947 do {
1948 j--;
David Malcolm96960882010-11-05 17:23:41 +00001949 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001950 j++;
1951 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001952
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001953 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1954 Py_INCREF(self);
1955 return (PyObject*)self;
1956 }
1957 else
1958 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001959}
1960
1961
1962Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001963do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001964{
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001965 if (bytes != Py_None) {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001966 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001967 }
1968 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001969}
1970
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001971/*[clinic input]
1972bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001973
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001974 bytes: object = None
1975 /
1976
1977Strip leading and trailing bytes contained in the argument.
1978
1979If the argument is omitted or None, strip leading and trailing ASCII whitespace.
1980[clinic start generated code]*/
1981
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001982static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001983bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001984/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001985{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001986 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001987}
1988
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001989/*[clinic input]
1990bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001991
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001992 bytes: object = None
1993 /
1994
1995Strip leading bytes contained in the argument.
1996
1997If the argument is omitted or None, strip leading ASCII whitespace.
1998[clinic start generated code]*/
1999
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002000static PyObject *
2001bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002002/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002003{
2004 return do_argstrip(self, LEFTSTRIP, bytes);
2005}
2006
2007/*[clinic input]
2008bytes.rstrip
2009
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002010 bytes: object = None
2011 /
2012
2013Strip trailing bytes contained in the argument.
2014
2015If the argument is omitted or None, strip trailing ASCII whitespace.
2016[clinic start generated code]*/
2017
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002018static PyObject *
2019bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002020/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002021{
2022 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002023}
Neal Norwitz6968b052007-02-27 19:02:19 +00002024
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002025
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002026static PyObject *
2027bytes_count(PyBytesObject *self, PyObject *args)
2028{
2029 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2030}
2031
2032
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002033/*[clinic input]
2034bytes.translate
2035
Victor Stinner049e5092014-08-17 22:20:00 +02002036 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002037 Translation table, which must be a bytes object of length 256.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002038 /
Martin Panter1b6c6da2016-08-27 08:35:02 +00002039 delete as deletechars: object(c_default="NULL") = b''
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002040
2041Return a copy with each character mapped by the given translation table.
2042
Martin Panter1b6c6da2016-08-27 08:35:02 +00002043All characters occurring in the optional argument delete are removed.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002044The remaining characters are mapped through the given translation table.
2045[clinic start generated code]*/
2046
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002047static PyObject *
Martin Panter1b6c6da2016-08-27 08:35:02 +00002048bytes_translate_impl(PyBytesObject *self, PyObject *table,
Larry Hastings89964c42015-04-14 18:07:59 -04002049 PyObject *deletechars)
Martin Panter1b6c6da2016-08-27 08:35:02 +00002050/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002051{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03002052 const char *input;
2053 char *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002054 Py_buffer table_view = {NULL, NULL};
2055 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002056 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002057 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002058 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002059 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002060 Py_ssize_t inlen, tablen, dellen = 0;
2061 PyObject *result;
2062 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002063
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002064 if (PyBytes_Check(table)) {
2065 table_chars = PyBytes_AS_STRING(table);
2066 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002067 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002068 else if (table == Py_None) {
2069 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002070 tablen = 256;
2071 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002072 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002073 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002074 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002075 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002076 tablen = table_view.len;
2077 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002078
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002079 if (tablen != 256) {
2080 PyErr_SetString(PyExc_ValueError,
2081 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002082 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002083 return NULL;
2084 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002085
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002086 if (deletechars != NULL) {
2087 if (PyBytes_Check(deletechars)) {
2088 del_table_chars = PyBytes_AS_STRING(deletechars);
2089 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002090 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002091 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002092 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002093 PyBuffer_Release(&table_view);
2094 return NULL;
2095 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002096 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002097 dellen = del_table_view.len;
2098 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002099 }
2100 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002101 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002102 dellen = 0;
2103 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002104
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002105 inlen = PyBytes_GET_SIZE(input_obj);
2106 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002107 if (result == NULL) {
2108 PyBuffer_Release(&del_table_view);
2109 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002110 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002111 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002112 output_start = output = PyBytes_AS_STRING(result);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002113 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002114
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002115 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002116 /* If no deletions are required, use faster code */
2117 for (i = inlen; --i >= 0; ) {
2118 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002119 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002120 changed = 1;
2121 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002122 if (!changed && PyBytes_CheckExact(input_obj)) {
2123 Py_INCREF(input_obj);
2124 Py_DECREF(result);
2125 result = input_obj;
2126 }
2127 PyBuffer_Release(&del_table_view);
2128 PyBuffer_Release(&table_view);
2129 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002130 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002131
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002132 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002133 for (i = 0; i < 256; i++)
2134 trans_table[i] = Py_CHARMASK(i);
2135 } else {
2136 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002137 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002138 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002139 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002140
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002141 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002142 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002143 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002144
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002145 for (i = inlen; --i >= 0; ) {
2146 c = Py_CHARMASK(*input++);
2147 if (trans_table[c] != -1)
2148 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2149 continue;
2150 changed = 1;
2151 }
2152 if (!changed && PyBytes_CheckExact(input_obj)) {
2153 Py_DECREF(result);
2154 Py_INCREF(input_obj);
2155 return input_obj;
2156 }
2157 /* Fix the size of the resulting string */
2158 if (inlen > 0)
2159 _PyBytes_Resize(&result, output - output_start);
2160 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002161}
2162
2163
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002164/*[clinic input]
2165
2166@staticmethod
2167bytes.maketrans
2168
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002169 frm: Py_buffer
2170 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002171 /
2172
2173Return a translation table useable for the bytes or bytearray translate method.
2174
2175The returned table will be one where each byte in frm is mapped to the byte at
2176the same position in to.
2177
2178The bytes objects frm and to must be of the same length.
2179[clinic start generated code]*/
2180
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002181static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002182bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002183/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002184{
2185 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002186}
2187
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002188
2189/*[clinic input]
2190bytes.replace
2191
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002192 old: Py_buffer
2193 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002194 count: Py_ssize_t = -1
2195 Maximum number of occurrences to replace.
2196 -1 (the default value) means replace all occurrences.
2197 /
2198
2199Return a copy with all occurrences of substring old replaced by new.
2200
2201If the optional argument count is given, only the first count occurrences are
2202replaced.
2203[clinic start generated code]*/
2204
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002205static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002206bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
Larry Hastings89964c42015-04-14 18:07:59 -04002207 Py_ssize_t count)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002208/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002209{
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +03002210 return stringlib_replace((PyObject *)self,
2211 (const char *)old->buf, old->len,
2212 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002213}
2214
2215/** End DALKE **/
2216
sweeneydea81849b2020-04-22 17:05:48 -04002217/*[clinic input]
2218bytes.removeprefix as bytes_removeprefix
2219
2220 prefix: Py_buffer
2221 /
2222
2223Return a bytes object with the given prefix string removed if present.
2224
2225If the bytes starts with the prefix string, return bytes[len(prefix):].
2226Otherwise, return a copy of the original bytes.
2227[clinic start generated code]*/
2228
2229static PyObject *
2230bytes_removeprefix_impl(PyBytesObject *self, Py_buffer *prefix)
2231/*[clinic end generated code: output=f006865331a06ab6 input=0c93bac817a8502c]*/
2232{
2233 const char *self_start = PyBytes_AS_STRING(self);
2234 Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2235 const char *prefix_start = prefix->buf;
2236 Py_ssize_t prefix_len = prefix->len;
2237
2238 if (self_len >= prefix_len
2239 && prefix_len > 0
2240 && memcmp(self_start, prefix_start, prefix_len) == 0)
2241 {
2242 return PyBytes_FromStringAndSize(self_start + prefix_len,
2243 self_len - prefix_len);
2244 }
2245
2246 if (PyBytes_CheckExact(self)) {
2247 Py_INCREF(self);
2248 return (PyObject *)self;
2249 }
2250
2251 return PyBytes_FromStringAndSize(self_start, self_len);
2252}
2253
2254/*[clinic input]
2255bytes.removesuffix as bytes_removesuffix
2256
2257 suffix: Py_buffer
2258 /
2259
2260Return a bytes object with the given suffix string removed if present.
2261
2262If the bytes ends with the suffix string and that suffix is not empty,
2263return bytes[:-len(prefix)]. Otherwise, return a copy of the original
2264bytes.
2265[clinic start generated code]*/
2266
2267static PyObject *
2268bytes_removesuffix_impl(PyBytesObject *self, Py_buffer *suffix)
2269/*[clinic end generated code: output=d887d308e3242eeb input=9f4e1da8c637bbf1]*/
2270{
2271 const char *self_start = PyBytes_AS_STRING(self);
2272 Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2273 const char *suffix_start = suffix->buf;
2274 Py_ssize_t suffix_len = suffix->len;
2275
2276 if (self_len >= suffix_len
2277 && suffix_len > 0
2278 && memcmp(self_start + self_len - suffix_len,
2279 suffix_start, suffix_len) == 0)
2280 {
2281 return PyBytes_FromStringAndSize(self_start,
2282 self_len - suffix_len);
2283 }
2284
2285 if (PyBytes_CheckExact(self)) {
2286 Py_INCREF(self);
2287 return (PyObject *)self;
2288 }
2289
2290 return PyBytes_FromStringAndSize(self_start, self_len);
2291}
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002292
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002293static PyObject *
2294bytes_startswith(PyBytesObject *self, PyObject *args)
2295{
2296 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2297}
2298
2299static PyObject *
2300bytes_endswith(PyBytesObject *self, PyObject *args)
2301{
2302 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2303}
2304
2305
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002306/*[clinic input]
2307bytes.decode
2308
2309 encoding: str(c_default="NULL") = 'utf-8'
2310 The encoding with which to decode the bytes.
2311 errors: str(c_default="NULL") = 'strict'
2312 The error handling scheme to use for the handling of decoding errors.
2313 The default is 'strict' meaning that decoding errors raise a
2314 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2315 as well as any other name registered with codecs.register_error that
2316 can handle UnicodeDecodeErrors.
2317
2318Decode the bytes using the codec registered for encoding.
2319[clinic start generated code]*/
2320
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002321static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002322bytes_decode_impl(PyBytesObject *self, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04002323 const char *errors)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002324/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002325{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002326 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002327}
2328
Guido van Rossum20188312006-05-05 15:15:40 +00002329
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002330/*[clinic input]
2331bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002332
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002333 keepends: bool(accept={int}) = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002334
2335Return a list of the lines in the bytes, breaking at line boundaries.
2336
2337Line breaks are not included in the resulting list unless keepends is given and
2338true.
2339[clinic start generated code]*/
2340
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002341static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002342bytes_splitlines_impl(PyBytesObject *self, int keepends)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002343/*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002344{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002345 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002346 (PyObject*) self, PyBytes_AS_STRING(self),
2347 PyBytes_GET_SIZE(self), keepends
2348 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002349}
2350
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002351/*[clinic input]
2352@classmethod
2353bytes.fromhex
2354
2355 string: unicode
2356 /
2357
2358Create a bytes object from a string of hexadecimal numbers.
2359
2360Spaces between two numbers are accepted.
2361Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2362[clinic start generated code]*/
2363
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002364static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002365bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002366/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002367{
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002368 PyObject *result = _PyBytes_FromHex(string, 0);
2369 if (type != &PyBytes_Type && result != NULL) {
Petr Viktorinffd97532020-02-11 17:46:57 +01002370 Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002371 }
2372 return result;
Victor Stinner2bf89932015-10-14 11:25:33 +02002373}
2374
2375PyObject*
2376_PyBytes_FromHex(PyObject *string, int use_bytearray)
2377{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002378 char *buf;
Victor Stinner2bf89932015-10-14 11:25:33 +02002379 Py_ssize_t hexlen, invalid_char;
2380 unsigned int top, bot;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03002381 const Py_UCS1 *str, *end;
Victor Stinner2bf89932015-10-14 11:25:33 +02002382 _PyBytesWriter writer;
2383
2384 _PyBytesWriter_Init(&writer);
2385 writer.use_bytearray = use_bytearray;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002386
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002387 assert(PyUnicode_Check(string));
2388 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002389 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002390 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002391
Victor Stinner2bf89932015-10-14 11:25:33 +02002392 if (!PyUnicode_IS_ASCII(string)) {
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03002393 const void *data = PyUnicode_DATA(string);
Victor Stinner2bf89932015-10-14 11:25:33 +02002394 unsigned int kind = PyUnicode_KIND(string);
2395 Py_ssize_t i;
2396
2397 /* search for the first non-ASCII character */
2398 for (i = 0; i < hexlen; i++) {
2399 if (PyUnicode_READ(kind, data, i) >= 128)
2400 break;
2401 }
2402 invalid_char = i;
2403 goto error;
2404 }
2405
2406 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2407 str = PyUnicode_1BYTE_DATA(string);
2408
2409 /* This overestimates if there are spaces */
2410 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2411 if (buf == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002412 return NULL;
Victor Stinner2bf89932015-10-14 11:25:33 +02002413
2414 end = str + hexlen;
2415 while (str < end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002416 /* skip over spaces in the input */
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002417 if (Py_ISSPACE(*str)) {
Victor Stinner2bf89932015-10-14 11:25:33 +02002418 do {
2419 str++;
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002420 } while (Py_ISSPACE(*str));
Victor Stinner2bf89932015-10-14 11:25:33 +02002421 if (str >= end)
2422 break;
2423 }
2424
2425 top = _PyLong_DigitValue[*str];
2426 if (top >= 16) {
2427 invalid_char = str - PyUnicode_1BYTE_DATA(string);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002428 goto error;
2429 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002430 str++;
2431
2432 bot = _PyLong_DigitValue[*str];
2433 if (bot >= 16) {
2434 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2435 goto error;
2436 }
2437 str++;
2438
2439 *buf++ = (unsigned char)((top << 4) + bot);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002440 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002441
2442 return _PyBytesWriter_Finish(&writer, buf);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002443
2444 error:
Victor Stinner2bf89932015-10-14 11:25:33 +02002445 PyErr_Format(PyExc_ValueError,
2446 "non-hexadecimal number found in "
2447 "fromhex() arg at position %zd", invalid_char);
2448 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002449 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002450}
2451
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002452/*[clinic input]
2453bytes.hex
2454
2455 sep: object = NULL
2456 An optional single character or byte to separate hex bytes.
2457 bytes_per_sep: int = 1
2458 How many bytes between separators. Positive values count from the
2459 right, negative values count from the left.
2460
2461Create a str of hexadecimal numbers from a bytes object.
2462
2463Example:
2464>>> value = b'\xb9\x01\xef'
2465>>> value.hex()
2466'b901ef'
2467>>> value.hex(':')
2468'b9:01:ef'
2469>>> value.hex(':', 2)
2470'b9:01ef'
2471>>> value.hex(':', -2)
2472'b901:ef'
2473[clinic start generated code]*/
Gregory P. Smith8cb65692015-04-25 23:22:26 +00002474
2475static PyObject *
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002476bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2477/*[clinic end generated code: output=1f134da504064139 input=f1238d3455990218]*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002478{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03002479 const char *argbuf = PyBytes_AS_STRING(self);
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002480 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002481 return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002482}
2483
2484static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302485bytes_getnewargs(PyBytesObject *v, PyObject *Py_UNUSED(ignored))
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002486{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002487 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002488}
2489
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002490
2491static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002492bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002493 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302494 {"capitalize", stringlib_capitalize, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002495 _Py_capitalize__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002496 STRINGLIB_CENTER_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002497 {"count", (PyCFunction)bytes_count, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002498 _Py_count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002499 BYTES_DECODE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002500 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002501 _Py_endswith__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002502 STRINGLIB_EXPANDTABS_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002503 {"find", (PyCFunction)bytes_find, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002504 _Py_find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002505 BYTES_FROMHEX_METHODDEF
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002506 BYTES_HEX_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002507 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302508 {"isalnum", stringlib_isalnum, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002509 _Py_isalnum__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302510 {"isalpha", stringlib_isalpha, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002511 _Py_isalpha__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302512 {"isascii", stringlib_isascii, METH_NOARGS,
INADA Naokia49ac992018-01-27 14:06:21 +09002513 _Py_isascii__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302514 {"isdigit", stringlib_isdigit, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002515 _Py_isdigit__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302516 {"islower", stringlib_islower, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002517 _Py_islower__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302518 {"isspace", stringlib_isspace, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002519 _Py_isspace__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302520 {"istitle", stringlib_istitle, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002521 _Py_istitle__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302522 {"isupper", stringlib_isupper, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002523 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002524 BYTES_JOIN_METHODDEF
Tal Einatc929df32018-07-06 13:17:38 +03002525 STRINGLIB_LJUST_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302526 {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002527 BYTES_LSTRIP_METHODDEF
2528 BYTES_MAKETRANS_METHODDEF
2529 BYTES_PARTITION_METHODDEF
2530 BYTES_REPLACE_METHODDEF
sweeneydea81849b2020-04-22 17:05:48 -04002531 BYTES_REMOVEPREFIX_METHODDEF
2532 BYTES_REMOVESUFFIX_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002533 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2534 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002535 STRINGLIB_RJUST_METHODDEF
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002536 BYTES_RPARTITION_METHODDEF
2537 BYTES_RSPLIT_METHODDEF
2538 BYTES_RSTRIP_METHODDEF
2539 BYTES_SPLIT_METHODDEF
2540 BYTES_SPLITLINES_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002541 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002542 _Py_startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002543 BYTES_STRIP_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302544 {"swapcase", stringlib_swapcase, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002545 _Py_swapcase__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302546 {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002547 BYTES_TRANSLATE_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302548 {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002549 STRINGLIB_ZFILL_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002550 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002551};
2552
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002553static PyObject *
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002554bytes_mod(PyObject *self, PyObject *arg)
Ethan Furmanb95b5612015-01-23 20:05:18 -08002555{
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002556 if (!PyBytes_Check(self)) {
2557 Py_RETURN_NOTIMPLEMENTED;
Victor Stinner772b2b02015-10-14 09:56:53 +02002558 }
Victor Stinner772b2b02015-10-14 09:56:53 +02002559 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002560 arg, 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -08002561}
2562
2563static PyNumberMethods bytes_as_number = {
2564 0, /*nb_add*/
2565 0, /*nb_subtract*/
2566 0, /*nb_multiply*/
2567 bytes_mod, /*nb_remainder*/
2568};
2569
2570static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002571bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002572
2573static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002574bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002575{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002576 PyObject *x = NULL;
2577 const char *encoding = NULL;
2578 const char *errors = NULL;
2579 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002580 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002581 Py_ssize_t size;
2582 static char *kwlist[] = {"source", "encoding", "errors", 0};
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002583
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002584 if (type != &PyBytes_Type)
Serhiy Storchaka15095802015-11-25 15:47:01 +02002585 return bytes_subtype_new(type, args, kwds);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002586 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2587 &encoding, &errors))
2588 return NULL;
2589 if (x == NULL) {
2590 if (encoding != NULL || errors != NULL) {
2591 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka2c2044e2018-10-21 15:29:12 +03002592 encoding != NULL ?
2593 "encoding without a string argument" :
2594 "errors without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002595 return NULL;
2596 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02002597 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002598 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002599
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002600 if (encoding != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002601 /* Encode via the codec registry */
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002602 if (!PyUnicode_Check(x)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002603 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002604 "encoding without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002605 return NULL;
2606 }
2607 new = PyUnicode_AsEncodedString(x, encoding, errors);
2608 if (new == NULL)
2609 return NULL;
2610 assert(PyBytes_Check(new));
2611 return new;
2612 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002613
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002614 if (errors != NULL) {
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002615 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002616 PyUnicode_Check(x) ?
2617 "string argument without an encoding" :
2618 "errors without a string argument");
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002619 return NULL;
2620 }
2621
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002622 /* We'd like to call PyObject_Bytes here, but we need to check for an
2623 integer argument before deferring to PyBytes_FromObject, something
2624 PyObject_Bytes doesn't do. */
2625 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2626 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +01002627 new = _PyObject_CallNoArg(func);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002628 Py_DECREF(func);
2629 if (new == NULL)
2630 return NULL;
2631 if (!PyBytes_Check(new)) {
2632 PyErr_Format(PyExc_TypeError,
2633 "__bytes__ returned non-bytes (type %.200s)",
2634 Py_TYPE(new)->tp_name);
2635 Py_DECREF(new);
2636 return NULL;
2637 }
2638 return new;
2639 }
2640 else if (PyErr_Occurred())
2641 return NULL;
2642
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002643 if (PyUnicode_Check(x)) {
2644 PyErr_SetString(PyExc_TypeError,
2645 "string argument without an encoding");
2646 return NULL;
2647 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002648 /* Is it an integer? */
Victor Stinnera15e2602020-04-08 02:01:56 +02002649 if (_PyIndex_Check(x)) {
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002650 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2651 if (size == -1 && PyErr_Occurred()) {
Serhiy Storchakae8904212018-10-15 00:02:57 +03002652 if (!PyErr_ExceptionMatches(PyExc_TypeError))
INADA Naokia634e232017-01-06 17:32:01 +09002653 return NULL;
2654 PyErr_Clear(); /* fall through */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002655 }
INADA Naokia634e232017-01-06 17:32:01 +09002656 else {
2657 if (size < 0) {
2658 PyErr_SetString(PyExc_ValueError, "negative count");
2659 return NULL;
2660 }
2661 new = _PyBytes_FromSize(size, 1);
2662 if (new == NULL)
2663 return NULL;
2664 return new;
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002665 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002666 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002667
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002668 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002669}
2670
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002671static PyObject*
2672_PyBytes_FromBuffer(PyObject *x)
2673{
2674 PyObject *new;
2675 Py_buffer view;
2676
2677 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2678 return NULL;
2679
2680 new = PyBytes_FromStringAndSize(NULL, view.len);
2681 if (!new)
2682 goto fail;
2683 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2684 &view, view.len, 'C') < 0)
2685 goto fail;
2686 PyBuffer_Release(&view);
2687 return new;
2688
2689fail:
2690 Py_XDECREF(new);
2691 PyBuffer_Release(&view);
2692 return NULL;
2693}
2694
2695static PyObject*
2696_PyBytes_FromList(PyObject *x)
2697{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002698 Py_ssize_t i, size = PyList_GET_SIZE(x);
2699 Py_ssize_t value;
2700 char *str;
2701 PyObject *item;
2702 _PyBytesWriter writer;
2703
2704 _PyBytesWriter_Init(&writer);
2705 str = _PyBytesWriter_Alloc(&writer, size);
2706 if (str == NULL)
2707 return NULL;
2708 writer.overallocate = 1;
2709 size = writer.allocated;
2710
2711 for (i = 0; i < PyList_GET_SIZE(x); i++) {
2712 item = PyList_GET_ITEM(x, i);
2713 Py_INCREF(item);
2714 value = PyNumber_AsSsize_t(item, NULL);
2715 Py_DECREF(item);
2716 if (value == -1 && PyErr_Occurred())
2717 goto error;
2718
2719 if (value < 0 || value >= 256) {
2720 PyErr_SetString(PyExc_ValueError,
2721 "bytes must be in range(0, 256)");
2722 goto error;
2723 }
2724
2725 if (i >= size) {
2726 str = _PyBytesWriter_Resize(&writer, str, size+1);
2727 if (str == NULL)
2728 return NULL;
2729 size = writer.allocated;
2730 }
2731 *str++ = (char) value;
2732 }
2733 return _PyBytesWriter_Finish(&writer, str);
2734
2735 error:
2736 _PyBytesWriter_Dealloc(&writer);
2737 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002738}
2739
2740static PyObject*
2741_PyBytes_FromTuple(PyObject *x)
2742{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002743 PyObject *bytes;
2744 Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2745 Py_ssize_t value;
2746 char *str;
2747 PyObject *item;
2748
2749 bytes = PyBytes_FromStringAndSize(NULL, size);
2750 if (bytes == NULL)
2751 return NULL;
2752 str = ((PyBytesObject *)bytes)->ob_sval;
2753
2754 for (i = 0; i < size; i++) {
2755 item = PyTuple_GET_ITEM(x, i);
2756 value = PyNumber_AsSsize_t(item, NULL);
2757 if (value == -1 && PyErr_Occurred())
2758 goto error;
2759
2760 if (value < 0 || value >= 256) {
2761 PyErr_SetString(PyExc_ValueError,
2762 "bytes must be in range(0, 256)");
2763 goto error;
2764 }
2765 *str++ = (char) value;
2766 }
2767 return bytes;
2768
2769 error:
2770 Py_DECREF(bytes);
2771 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002772}
2773
2774static PyObject *
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002775_PyBytes_FromIterator(PyObject *it, PyObject *x)
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002776{
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002777 char *str;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002778 Py_ssize_t i, size;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002779 _PyBytesWriter writer;
2780
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002781 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02002782 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002783 if (size == -1 && PyErr_Occurred())
2784 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002785
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002786 _PyBytesWriter_Init(&writer);
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002787 str = _PyBytesWriter_Alloc(&writer, size);
2788 if (str == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002789 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002790 writer.overallocate = 1;
2791 size = writer.allocated;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002792
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002793 /* Run the iterator to exhaustion */
2794 for (i = 0; ; i++) {
2795 PyObject *item;
2796 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002797
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002798 /* Get the next item */
2799 item = PyIter_Next(it);
2800 if (item == NULL) {
2801 if (PyErr_Occurred())
2802 goto error;
2803 break;
2804 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002805
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002806 /* Interpret it as an int (__index__) */
Serhiy Storchakaf54d7812016-07-06 21:39:44 +03002807 value = PyNumber_AsSsize_t(item, NULL);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002808 Py_DECREF(item);
2809 if (value == -1 && PyErr_Occurred())
2810 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002811
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002812 /* Range check */
2813 if (value < 0 || value >= 256) {
2814 PyErr_SetString(PyExc_ValueError,
2815 "bytes must be in range(0, 256)");
2816 goto error;
2817 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002818
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002819 /* Append the byte */
2820 if (i >= size) {
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002821 str = _PyBytesWriter_Resize(&writer, str, size+1);
2822 if (str == NULL)
2823 return NULL;
2824 size = writer.allocated;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002825 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002826 *str++ = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002827 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002828
2829 return _PyBytesWriter_Finish(&writer, str);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002830
2831 error:
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002832 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002833 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002834}
2835
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002836PyObject *
2837PyBytes_FromObject(PyObject *x)
2838{
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002839 PyObject *it, *result;
2840
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002841 if (x == NULL) {
2842 PyErr_BadInternalCall();
2843 return NULL;
2844 }
2845
2846 if (PyBytes_CheckExact(x)) {
2847 Py_INCREF(x);
2848 return x;
2849 }
2850
2851 /* Use the modern buffer interface */
2852 if (PyObject_CheckBuffer(x))
2853 return _PyBytes_FromBuffer(x);
2854
2855 if (PyList_CheckExact(x))
2856 return _PyBytes_FromList(x);
2857
2858 if (PyTuple_CheckExact(x))
2859 return _PyBytes_FromTuple(x);
2860
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002861 if (!PyUnicode_Check(x)) {
2862 it = PyObject_GetIter(x);
2863 if (it != NULL) {
2864 result = _PyBytes_FromIterator(it, x);
2865 Py_DECREF(it);
2866 return result;
2867 }
Serhiy Storchakae8904212018-10-15 00:02:57 +03002868 if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2869 return NULL;
2870 }
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002871 }
2872
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002873 PyErr_Format(PyExc_TypeError,
2874 "cannot convert '%.200s' object to bytes",
Victor Stinner58ac7002020-02-07 03:04:21 +01002875 Py_TYPE(x)->tp_name);
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002876 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002877}
2878
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002879static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002880bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002881{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002882 PyObject *tmp, *pnew;
2883 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002884
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002885 assert(PyType_IsSubtype(type, &PyBytes_Type));
2886 tmp = bytes_new(&PyBytes_Type, args, kwds);
2887 if (tmp == NULL)
2888 return NULL;
Serhiy Storchaka15095802015-11-25 15:47:01 +02002889 assert(PyBytes_Check(tmp));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002890 n = PyBytes_GET_SIZE(tmp);
2891 pnew = type->tp_alloc(type, n);
2892 if (pnew != NULL) {
Christian Heimesf051e432016-09-13 20:22:02 +02002893 memcpy(PyBytes_AS_STRING(pnew),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002894 PyBytes_AS_STRING(tmp), n+1);
2895 ((PyBytesObject *)pnew)->ob_shash =
2896 ((PyBytesObject *)tmp)->ob_shash;
2897 }
2898 Py_DECREF(tmp);
2899 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002900}
2901
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002902PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002903"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002904bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002905bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002906bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2907bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002908\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002909Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002910 - an iterable yielding integers in range(256)\n\
2911 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002912 - any object implementing the buffer API.\n\
2913 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002914
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002915static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002916
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002917PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002918 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2919 "bytes",
2920 PyBytesObject_SIZE,
2921 sizeof(char),
Inada Naoki7d408692019-05-29 17:23:27 +09002922 0, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002923 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002924 0, /* tp_getattr */
2925 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002926 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002927 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08002928 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002929 &bytes_as_sequence, /* tp_as_sequence */
2930 &bytes_as_mapping, /* tp_as_mapping */
2931 (hashfunc)bytes_hash, /* tp_hash */
2932 0, /* tp_call */
2933 bytes_str, /* tp_str */
2934 PyObject_GenericGetAttr, /* tp_getattro */
2935 0, /* tp_setattro */
2936 &bytes_as_buffer, /* tp_as_buffer */
2937 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2938 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2939 bytes_doc, /* tp_doc */
2940 0, /* tp_traverse */
2941 0, /* tp_clear */
2942 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2943 0, /* tp_weaklistoffset */
2944 bytes_iter, /* tp_iter */
2945 0, /* tp_iternext */
2946 bytes_methods, /* tp_methods */
2947 0, /* tp_members */
2948 0, /* tp_getset */
2949 &PyBaseObject_Type, /* tp_base */
2950 0, /* tp_dict */
2951 0, /* tp_descr_get */
2952 0, /* tp_descr_set */
2953 0, /* tp_dictoffset */
2954 0, /* tp_init */
2955 0, /* tp_alloc */
2956 bytes_new, /* tp_new */
2957 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002958};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002959
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002960void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002961PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002962{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002963 assert(pv != NULL);
2964 if (*pv == NULL)
2965 return;
2966 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002967 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002968 return;
2969 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02002970
2971 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2972 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05002973 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02002974 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02002975
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002976 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02002977 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2978 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2979 Py_CLEAR(*pv);
2980 return;
2981 }
2982
2983 oldsize = PyBytes_GET_SIZE(*pv);
2984 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2985 PyErr_NoMemory();
2986 goto error;
2987 }
2988 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2989 goto error;
2990
2991 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2992 PyBuffer_Release(&wb);
2993 return;
2994
2995 error:
2996 PyBuffer_Release(&wb);
2997 Py_CLEAR(*pv);
2998 return;
2999 }
3000
3001 else {
3002 /* Multiple references, need to create new object */
3003 PyObject *v;
3004 v = bytes_concat(*pv, w);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03003005 Py_SETREF(*pv, v);
Antoine Pitrou161d6952014-05-01 14:36:20 +02003006 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003007}
3008
3009void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003010PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003011{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003012 PyBytes_Concat(pv, w);
3013 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003014}
3015
3016
Ethan Furmanb95b5612015-01-23 20:05:18 -08003017/* The following function breaks the notion that bytes are immutable:
3018 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003019 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08003020 as creating a new bytes object and destroying the old one, only
3021 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003022 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08003023 Note that if there's not enough memory to resize the bytes object, the
3024 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003025 memory" exception is set, and -1 is returned. Else (on success) 0 is
3026 returned, and the value in *pv may or may not be the same as on input.
3027 As always, an extra byte is allocated for a trailing \0 byte (newsize
3028 does *not* include that), and a trailing \0 byte is stored.
3029*/
3030
3031int
3032_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3033{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003034 PyObject *v;
3035 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003036 v = *pv;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03003037 if (!PyBytes_Check(v) || newsize < 0) {
3038 goto error;
3039 }
3040 if (Py_SIZE(v) == newsize) {
3041 /* return early if newsize equals to v->ob_size */
3042 return 0;
3043 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02003044 if (Py_SIZE(v) == 0) {
3045 if (newsize == 0) {
3046 return 0;
3047 }
3048 *pv = _PyBytes_FromSize(newsize, 0);
3049 Py_DECREF(v);
3050 return (*pv == NULL) ? -1 : 0;
3051 }
Berker Peksag4a72a7b2016-09-16 17:31:06 +03003052 if (Py_REFCNT(v) != 1) {
3053 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003054 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02003055 if (newsize == 0) {
3056 *pv = _PyBytes_FromSize(0, 0);
3057 Py_DECREF(v);
3058 return (*pv == NULL) ? -1 : 0;
3059 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003060 /* XXX UNREF/NEWREF interface should be more symmetrical */
Victor Stinner49932fe2020-02-03 17:55:05 +01003061#ifdef Py_REF_DEBUG
3062 _Py_RefTotal--;
3063#endif
3064#ifdef Py_TRACE_REFS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003065 _Py_ForgetReference(v);
Victor Stinner49932fe2020-02-03 17:55:05 +01003066#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003067 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03003068 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003069 if (*pv == NULL) {
3070 PyObject_Del(v);
3071 PyErr_NoMemory();
3072 return -1;
3073 }
3074 _Py_NewReference(*pv);
3075 sv = (PyBytesObject *) *pv;
Victor Stinner60ac6ed2020-02-07 23:18:08 +01003076 Py_SET_SIZE(sv, newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003077 sv->ob_sval[newsize] = '\0';
3078 sv->ob_shash = -1; /* invalidate cached hash value */
3079 return 0;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03003080error:
3081 *pv = 0;
3082 Py_DECREF(v);
3083 PyErr_BadInternalCall();
3084 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003085}
3086
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003087void
Victor Stinnerc41eed12020-06-23 15:54:35 +02003088_PyBytes_Fini(PyThreadState *tstate)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003089{
Victor Stinnerc41eed12020-06-23 15:54:35 +02003090 struct _Py_bytes_state* state = &tstate->interp->bytes;
3091 for (int i = 0; i < UCHAR_MAX + 1; i++) {
3092 Py_CLEAR(state->characters[i]);
3093 }
3094 Py_CLEAR(state->empty_string);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003095}
3096
Benjamin Peterson4116f362008-05-27 00:36:20 +00003097/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003098
3099typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003100 PyObject_HEAD
3101 Py_ssize_t it_index;
3102 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003103} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003104
3105static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003106striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003107{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003108 _PyObject_GC_UNTRACK(it);
3109 Py_XDECREF(it->it_seq);
3110 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003111}
3112
3113static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003114striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003115{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003116 Py_VISIT(it->it_seq);
3117 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003118}
3119
3120static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003121striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003122{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003123 PyBytesObject *seq;
3124 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003125
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003126 assert(it != NULL);
3127 seq = it->it_seq;
3128 if (seq == NULL)
3129 return NULL;
3130 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003131
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003132 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3133 item = PyLong_FromLong(
3134 (unsigned char)seq->ob_sval[it->it_index]);
3135 if (item != NULL)
3136 ++it->it_index;
3137 return item;
3138 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003139
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003140 it->it_seq = NULL;
Serhiy Storchakafbb1c5e2016-03-30 20:40:02 +03003141 Py_DECREF(seq);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003142 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003143}
3144
3145static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303146striter_len(striterobject *it, PyObject *Py_UNUSED(ignored))
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003147{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003148 Py_ssize_t len = 0;
3149 if (it->it_seq)
3150 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3151 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003152}
3153
3154PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003155 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003156
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003157static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303158striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003159{
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003160 _Py_IDENTIFIER(iter);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003161 if (it->it_seq != NULL) {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003162 return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003163 it->it_seq, it->it_index);
3164 } else {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003165 return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003166 }
3167}
3168
3169PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3170
3171static PyObject *
3172striter_setstate(striterobject *it, PyObject *state)
3173{
3174 Py_ssize_t index = PyLong_AsSsize_t(state);
3175 if (index == -1 && PyErr_Occurred())
3176 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003177 if (it->it_seq != NULL) {
3178 if (index < 0)
3179 index = 0;
3180 else if (index > PyBytes_GET_SIZE(it->it_seq))
3181 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3182 it->it_index = index;
3183 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003184 Py_RETURN_NONE;
3185}
3186
3187PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3188
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003189static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003190 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3191 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003192 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3193 reduce_doc},
3194 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3195 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003196 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003197};
3198
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003199PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003200 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3201 "bytes_iterator", /* tp_name */
3202 sizeof(striterobject), /* tp_basicsize */
3203 0, /* tp_itemsize */
3204 /* methods */
3205 (destructor)striter_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003206 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003207 0, /* tp_getattr */
3208 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003209 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003210 0, /* tp_repr */
3211 0, /* tp_as_number */
3212 0, /* tp_as_sequence */
3213 0, /* tp_as_mapping */
3214 0, /* tp_hash */
3215 0, /* tp_call */
3216 0, /* tp_str */
3217 PyObject_GenericGetAttr, /* tp_getattro */
3218 0, /* tp_setattro */
3219 0, /* tp_as_buffer */
3220 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3221 0, /* tp_doc */
3222 (traverseproc)striter_traverse, /* tp_traverse */
3223 0, /* tp_clear */
3224 0, /* tp_richcompare */
3225 0, /* tp_weaklistoffset */
3226 PyObject_SelfIter, /* tp_iter */
3227 (iternextfunc)striter_next, /* tp_iternext */
3228 striter_methods, /* tp_methods */
3229 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003230};
3231
3232static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003233bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003234{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003235 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003236
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003237 if (!PyBytes_Check(seq)) {
3238 PyErr_BadInternalCall();
3239 return NULL;
3240 }
3241 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3242 if (it == NULL)
3243 return NULL;
3244 it->it_index = 0;
3245 Py_INCREF(seq);
3246 it->it_seq = (PyBytesObject *)seq;
3247 _PyObject_GC_TRACK(it);
3248 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003249}
Victor Stinner00165072015-10-09 01:53:21 +02003250
3251
3252/* _PyBytesWriter API */
3253
3254#ifdef MS_WINDOWS
3255 /* On Windows, overallocate by 50% is the best factor */
3256# define OVERALLOCATE_FACTOR 2
3257#else
3258 /* On Linux, overallocate by 25% is the best factor */
3259# define OVERALLOCATE_FACTOR 4
3260#endif
3261
3262void
3263_PyBytesWriter_Init(_PyBytesWriter *writer)
3264{
Victor Stinner661aacc2015-10-14 09:41:48 +02003265 /* Set all attributes before small_buffer to 0 */
3266 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003267#ifndef NDEBUG
3268 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3269 sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003270#endif
3271}
3272
3273void
3274_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3275{
3276 Py_CLEAR(writer->buffer);
3277}
3278
3279Py_LOCAL_INLINE(char*)
3280_PyBytesWriter_AsString(_PyBytesWriter *writer)
3281{
Victor Stinner661aacc2015-10-14 09:41:48 +02003282 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003283 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003284 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003285 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003286 else if (writer->use_bytearray) {
3287 assert(writer->buffer != NULL);
3288 return PyByteArray_AS_STRING(writer->buffer);
3289 }
3290 else {
3291 assert(writer->buffer != NULL);
3292 return PyBytes_AS_STRING(writer->buffer);
3293 }
Victor Stinner00165072015-10-09 01:53:21 +02003294}
3295
3296Py_LOCAL_INLINE(Py_ssize_t)
Victor Stinner2bf89932015-10-14 11:25:33 +02003297_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
Victor Stinner00165072015-10-09 01:53:21 +02003298{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03003299 const char *start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003300 assert(str != NULL);
3301 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003302 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003303 return str - start;
3304}
3305
Victor Stinner68762572019-10-07 18:42:01 +02003306#ifndef NDEBUG
3307Py_LOCAL_INLINE(int)
Victor Stinner00165072015-10-09 01:53:21 +02003308_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3309{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03003310 const char *start, *end;
Victor Stinner00165072015-10-09 01:53:21 +02003311
Victor Stinner661aacc2015-10-14 09:41:48 +02003312 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003313 assert(writer->buffer == NULL);
3314 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003315 else {
3316 assert(writer->buffer != NULL);
3317 if (writer->use_bytearray)
3318 assert(PyByteArray_CheckExact(writer->buffer));
3319 else
3320 assert(PyBytes_CheckExact(writer->buffer));
3321 assert(Py_REFCNT(writer->buffer) == 1);
3322 }
Victor Stinner00165072015-10-09 01:53:21 +02003323
Victor Stinner661aacc2015-10-14 09:41:48 +02003324 if (writer->use_bytearray) {
3325 /* bytearray has its own overallocation algorithm,
3326 writer overallocation must be disabled */
3327 assert(!writer->overallocate);
3328 }
3329
3330 assert(0 <= writer->allocated);
Victor Stinner53926a12015-10-09 12:37:03 +02003331 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003332 /* the last byte must always be null */
Victor Stinner661aacc2015-10-14 09:41:48 +02003333 start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003334 assert(start[writer->allocated] == 0);
3335
3336 end = start + writer->allocated;
3337 assert(str != NULL);
3338 assert(start <= str && str <= end);
Victor Stinner68762572019-10-07 18:42:01 +02003339 return 1;
Victor Stinner00165072015-10-09 01:53:21 +02003340}
Victor Stinner68762572019-10-07 18:42:01 +02003341#endif
Victor Stinner00165072015-10-09 01:53:21 +02003342
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003343void*
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003344_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
Victor Stinner00165072015-10-09 01:53:21 +02003345{
3346 Py_ssize_t allocated, pos;
3347
Victor Stinner68762572019-10-07 18:42:01 +02003348 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003349 assert(writer->allocated < size);
Victor Stinner00165072015-10-09 01:53:21 +02003350
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003351 allocated = size;
Victor Stinner00165072015-10-09 01:53:21 +02003352 if (writer->overallocate
3353 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3354 /* overallocate to limit the number of realloc() */
3355 allocated += allocated / OVERALLOCATE_FACTOR;
3356 }
3357
Victor Stinner2bf89932015-10-14 11:25:33 +02003358 pos = _PyBytesWriter_GetSize(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003359 if (!writer->use_small_buffer) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003360 if (writer->use_bytearray) {
3361 if (PyByteArray_Resize(writer->buffer, allocated))
3362 goto error;
3363 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3364 but we cannot use ob_alloc because bytes may need to be moved
3365 to use the whole buffer. bytearray uses an internal optimization
3366 to avoid moving or copying bytes when bytes are removed at the
3367 beginning (ex: del bytearray[:1]). */
3368 }
3369 else {
3370 if (_PyBytes_Resize(&writer->buffer, allocated))
3371 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003372 }
3373 }
3374 else {
3375 /* convert from stack buffer to bytes object buffer */
3376 assert(writer->buffer == NULL);
3377
Victor Stinner661aacc2015-10-14 09:41:48 +02003378 if (writer->use_bytearray)
3379 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3380 else
3381 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003382 if (writer->buffer == NULL)
Victor Stinner661aacc2015-10-14 09:41:48 +02003383 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003384
3385 if (pos != 0) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003386 char *dest;
3387 if (writer->use_bytearray)
3388 dest = PyByteArray_AS_STRING(writer->buffer);
3389 else
3390 dest = PyBytes_AS_STRING(writer->buffer);
Christian Heimesf051e432016-09-13 20:22:02 +02003391 memcpy(dest,
Victor Stinnerb3653a32015-10-09 03:38:24 +02003392 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02003393 pos);
3394 }
3395
Victor Stinnerb3653a32015-10-09 03:38:24 +02003396 writer->use_small_buffer = 0;
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003397#ifndef NDEBUG
3398 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3399 sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003400#endif
Victor Stinner00165072015-10-09 01:53:21 +02003401 }
3402 writer->allocated = allocated;
3403
3404 str = _PyBytesWriter_AsString(writer) + pos;
Victor Stinner68762572019-10-07 18:42:01 +02003405 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinner00165072015-10-09 01:53:21 +02003406 return str;
Victor Stinner661aacc2015-10-14 09:41:48 +02003407
3408error:
3409 _PyBytesWriter_Dealloc(writer);
3410 return NULL;
Victor Stinner00165072015-10-09 01:53:21 +02003411}
3412
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003413void*
3414_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3415{
3416 Py_ssize_t new_min_size;
3417
Victor Stinner68762572019-10-07 18:42:01 +02003418 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003419 assert(size >= 0);
3420
3421 if (size == 0) {
3422 /* nothing to do */
3423 return str;
3424 }
3425
3426 if (writer->min_size > PY_SSIZE_T_MAX - size) {
3427 PyErr_NoMemory();
3428 _PyBytesWriter_Dealloc(writer);
3429 return NULL;
3430 }
3431 new_min_size = writer->min_size + size;
3432
3433 if (new_min_size > writer->allocated)
3434 str = _PyBytesWriter_Resize(writer, str, new_min_size);
3435
3436 writer->min_size = new_min_size;
3437 return str;
3438}
3439
Victor Stinner00165072015-10-09 01:53:21 +02003440/* Allocate the buffer to write size bytes.
3441 Return the pointer to the beginning of buffer data.
3442 Raise an exception and return NULL on error. */
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003443void*
Victor Stinner00165072015-10-09 01:53:21 +02003444_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3445{
3446 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02003447 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02003448 assert(size >= 0);
3449
Victor Stinnerb3653a32015-10-09 03:38:24 +02003450 writer->use_small_buffer = 1;
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003451#ifndef NDEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003452 writer->allocated = sizeof(writer->small_buffer) - 1;
Victor Stinnerf6358a72015-10-14 12:02:39 +02003453 /* In debug mode, don't use the full small buffer because it is less
3454 efficient than bytes and bytearray objects to detect buffer underflow
3455 and buffer overflow. Use 10 bytes of the small buffer to test also
3456 code using the smaller buffer in debug mode.
3457
3458 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3459 in debug mode to also be able to detect stack overflow when running
3460 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3461 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3462 stack overflow. */
3463 writer->allocated = Py_MIN(writer->allocated, 10);
3464 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3465 to detect buffer overflow */
Victor Stinnerb3653a32015-10-09 03:38:24 +02003466 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003467#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02003468 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02003469#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02003470 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02003471}
3472
3473PyObject *
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003474_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
Victor Stinner00165072015-10-09 01:53:21 +02003475{
Victor Stinner2bf89932015-10-14 11:25:33 +02003476 Py_ssize_t size;
Victor Stinner00165072015-10-09 01:53:21 +02003477 PyObject *result;
3478
Victor Stinner68762572019-10-07 18:42:01 +02003479 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinner00165072015-10-09 01:53:21 +02003480
Victor Stinner2bf89932015-10-14 11:25:33 +02003481 size = _PyBytesWriter_GetSize(writer, str);
3482 if (size == 0 && !writer->use_bytearray) {
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003483 Py_CLEAR(writer->buffer);
3484 /* Get the empty byte string singleton */
3485 result = PyBytes_FromStringAndSize(NULL, 0);
3486 }
3487 else if (writer->use_small_buffer) {
Victor Stinnere914d412016-04-15 17:52:27 +02003488 if (writer->use_bytearray) {
3489 result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3490 }
3491 else {
3492 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3493 }
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003494 }
3495 else {
3496 result = writer->buffer;
3497 writer->buffer = NULL;
3498
Victor Stinner2bf89932015-10-14 11:25:33 +02003499 if (size != writer->allocated) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003500 if (writer->use_bytearray) {
Victor Stinner2bf89932015-10-14 11:25:33 +02003501 if (PyByteArray_Resize(result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003502 Py_DECREF(result);
3503 return NULL;
3504 }
3505 }
3506 else {
Victor Stinner2bf89932015-10-14 11:25:33 +02003507 if (_PyBytes_Resize(&result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003508 assert(result == NULL);
3509 return NULL;
3510 }
Victor Stinner00165072015-10-09 01:53:21 +02003511 }
3512 }
Victor Stinner00165072015-10-09 01:53:21 +02003513 }
Victor Stinner00165072015-10-09 01:53:21 +02003514 return result;
3515}
Victor Stinnerce179bf2015-10-09 12:57:22 +02003516
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003517void*
Victor Stinnere9aa5952015-10-12 13:57:47 +02003518_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003519 const void *bytes, Py_ssize_t size)
Victor Stinnerce179bf2015-10-09 12:57:22 +02003520{
Victor Stinnere9aa5952015-10-12 13:57:47 +02003521 char *str = (char *)ptr;
3522
Victor Stinnerce179bf2015-10-09 12:57:22 +02003523 str = _PyBytesWriter_Prepare(writer, str, size);
3524 if (str == NULL)
3525 return NULL;
3526
Christian Heimesf051e432016-09-13 20:22:02 +02003527 memcpy(str, bytes, size);
Victor Stinnerce179bf2015-10-09 12:57:22 +02003528 str += size;
3529
3530 return str;
3531}