blob: b79c2460409ebdbdf06dddefbbd7674651aeaca0 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Victor Stinnerd9ea5ca2020-04-15 02:57:50 +02006#include "pycore_abstract.h" // _PyIndex_Check()
Victor Stinner45876a92020-02-12 22:32:34 +01007#include "pycore_bytes_methods.h"
Victor Stinnerbcda8f12018-11-21 22:27:47 +01008#include "pycore_object.h"
Victor Stinnerd9ea5ca2020-04-15 02:57:50 +02009#include "pycore_pymem.h" // PYMEM_CLEANBYTE
Christian Heimes2c9c7a52008-05-26 13:42:13 +000010
Gregory P. Smith8cb65692015-04-25 23:22:26 +000011#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +000012#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000013
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020014/*[clinic input]
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030015class bytes "PyBytesObject *" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020016[clinic start generated code]*/
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030017/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020018
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030019#include "clinic/bytesobject.c.h"
20
Christian Heimes2c9c7a52008-05-26 13:42:13 +000021static PyBytesObject *characters[UCHAR_MAX + 1];
22static PyBytesObject *nullstring;
23
Hai Shi46874c22020-01-30 17:20:25 -060024_Py_IDENTIFIER(__bytes__);
25
Mark Dickinsonfd24b322008-12-06 15:33:31 +000026/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyBytesObject_SIZE + n bytes.
28
29 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
30 3 bytes per string allocation on a typical system.
31*/
32#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
33
Victor Stinner2bf89932015-10-14 11:25:33 +020034/* Forward declaration */
35Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
36 char *str);
37
Christian Heimes2c9c7a52008-05-26 13:42:13 +000038/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000039 For PyBytes_FromString(), the parameter `str' points to a null-terminated
40 string containing exactly `size' bytes.
41
Martin Pantera90a4a92016-05-30 04:04:50 +000042 For PyBytes_FromStringAndSize(), the parameter `str' is
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043 either NULL or else points to a string containing at least `size' bytes.
44 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
45 not have to be null-terminated. (Therefore it is safe to construct a
46 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
47 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
48 bytes (setting the last byte to the null terminating character) and you can
49 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000050 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000051 alter the data yourself, since the strings may be shared.
52
53 The PyObject member `op->ob_size', which denotes the number of "extra
54 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020055 allocated for string data, not counting the null terminating character.
56 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000057 PyBytes_FromStringAndSize()) or the length of the string in the `str'
58 parameter (for PyBytes_FromString()).
59*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020060static PyObject *
61_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000062{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020063 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020064 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020065
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000066 if (size == 0 && (op = nullstring) != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 Py_INCREF(op);
68 return (PyObject *)op;
69 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000070
Victor Stinner049e5092014-08-17 22:20:00 +020071 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000072 PyErr_SetString(PyExc_OverflowError,
73 "byte string is too large");
74 return NULL;
75 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000076
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020078 if (use_calloc)
79 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
80 else
81 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000082 if (op == NULL)
83 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +010084 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020086 if (!use_calloc)
87 op->ob_sval[size] = '\0';
88 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000089 if (size == 0) {
90 nullstring = op;
91 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020092 }
93 return (PyObject *) op;
94}
95
96PyObject *
97PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
98{
99 PyBytesObject *op;
100 if (size < 0) {
101 PyErr_SetString(PyExc_SystemError,
102 "Negative size passed to PyBytes_FromStringAndSize");
103 return NULL;
104 }
105 if (size == 1 && str != NULL &&
106 (op = characters[*str & UCHAR_MAX]) != NULL)
107 {
Victor Stinnerdb067af2014-05-02 22:31:14 +0200108 Py_INCREF(op);
109 return (PyObject *)op;
110 }
111
112 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
113 if (op == NULL)
114 return NULL;
115 if (str == NULL)
116 return (PyObject *) op;
117
Christian Heimesf051e432016-09-13 20:22:02 +0200118 memcpy(op->ob_sval, str, size);
Victor Stinnerdb067af2014-05-02 22:31:14 +0200119 /* share short strings */
120 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000121 characters[*str & UCHAR_MAX] = op;
122 Py_INCREF(op);
123 }
124 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000125}
126
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000127PyObject *
128PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000129{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200130 size_t size;
131 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000132
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000133 assert(str != NULL);
134 size = strlen(str);
135 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
136 PyErr_SetString(PyExc_OverflowError,
137 "byte string is too long");
138 return NULL;
139 }
140 if (size == 0 && (op = nullstring) != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 Py_INCREF(op);
142 return (PyObject *)op;
143 }
144 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 Py_INCREF(op);
146 return (PyObject *)op;
147 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 /* Inline PyObject_NewVar */
150 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
151 if (op == NULL)
152 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +0100153 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 op->ob_shash = -1;
Christian Heimesf051e432016-09-13 20:22:02 +0200155 memcpy(op->ob_sval, str, size+1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 /* share short strings */
157 if (size == 0) {
158 nullstring = op;
159 Py_INCREF(op);
160 } else if (size == 1) {
161 characters[*str & UCHAR_MAX] = op;
162 Py_INCREF(op);
163 }
164 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000165}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000166
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000167PyObject *
168PyBytes_FromFormatV(const char *format, va_list vargs)
169{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000170 char *s;
Victor Stinner03dab782015-10-14 00:21:35 +0200171 const char *f;
172 const char *p;
173 Py_ssize_t prec;
174 int longflag;
175 int size_tflag;
176 /* Longest 64-bit formatted numbers:
177 - "18446744073709551615\0" (21 bytes)
178 - "-9223372036854775808\0" (21 bytes)
179 Decimal takes the most space (it isn't enough for octal.)
Guido van Rossum343e97f2007-04-09 00:43:24 +0000180
Victor Stinner03dab782015-10-14 00:21:35 +0200181 Longest 64-bit pointer representation:
182 "0xffffffffffffffff\0" (19 bytes). */
183 char buffer[21];
184 _PyBytesWriter writer;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000185
Victor Stinner03dab782015-10-14 00:21:35 +0200186 _PyBytesWriter_Init(&writer);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000187
Victor Stinner03dab782015-10-14 00:21:35 +0200188 s = _PyBytesWriter_Alloc(&writer, strlen(format));
189 if (s == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000190 return NULL;
Victor Stinner03dab782015-10-14 00:21:35 +0200191 writer.overallocate = 1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000192
Victor Stinner03dab782015-10-14 00:21:35 +0200193#define WRITE_BYTES(str) \
194 do { \
195 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
196 if (s == NULL) \
197 goto error; \
198 } while (0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000199
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000200 for (f = format; *f; f++) {
Victor Stinner03dab782015-10-14 00:21:35 +0200201 if (*f != '%') {
202 *s++ = *f;
203 continue;
204 }
205
206 p = f++;
207
208 /* ignore the width (ex: 10 in "%10s") */
209 while (Py_ISDIGIT(*f))
210 f++;
211
212 /* parse the precision (ex: 10 in "%.10s") */
213 prec = 0;
214 if (*f == '.') {
215 f++;
216 for (; Py_ISDIGIT(*f); f++) {
217 prec = (prec * 10) + (*f - '0');
218 }
219 }
220
221 while (*f && *f != '%' && !Py_ISALPHA(*f))
222 f++;
223
224 /* handle the long flag ('l'), but only for %ld and %lu.
225 others can be added when necessary. */
226 longflag = 0;
227 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
228 longflag = 1;
229 ++f;
230 }
231
232 /* handle the size_t flag ('z'). */
233 size_tflag = 0;
234 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
235 size_tflag = 1;
236 ++f;
237 }
238
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700239 /* subtract bytes preallocated for the format string
Victor Stinner03dab782015-10-14 00:21:35 +0200240 (ex: 2 for "%s") */
241 writer.min_size -= (f - p + 1);
242
243 switch (*f) {
244 case 'c':
245 {
246 int c = va_arg(vargs, int);
247 if (c < 0 || c > 255) {
248 PyErr_SetString(PyExc_OverflowError,
249 "PyBytes_FromFormatV(): %c format "
250 "expects an integer in range [0; 255]");
251 goto error;
252 }
253 writer.min_size++;
254 *s++ = (unsigned char)c;
255 break;
256 }
257
258 case 'd':
Victor Stinnerd36cf5f2020-06-10 18:38:05 +0200259 if (longflag) {
Victor Stinner03dab782015-10-14 00:21:35 +0200260 sprintf(buffer, "%ld", va_arg(vargs, long));
Victor Stinnerd36cf5f2020-06-10 18:38:05 +0200261 }
262 else if (size_tflag) {
263 sprintf(buffer, "%zd", va_arg(vargs, Py_ssize_t));
264 }
265 else {
Victor Stinner03dab782015-10-14 00:21:35 +0200266 sprintf(buffer, "%d", va_arg(vargs, int));
Victor Stinnerd36cf5f2020-06-10 18:38:05 +0200267 }
Victor Stinner03dab782015-10-14 00:21:35 +0200268 assert(strlen(buffer) < sizeof(buffer));
269 WRITE_BYTES(buffer);
270 break;
271
272 case 'u':
Victor Stinnerd36cf5f2020-06-10 18:38:05 +0200273 if (longflag) {
274 sprintf(buffer, "%lu", va_arg(vargs, unsigned long));
275 }
276 else if (size_tflag) {
277 sprintf(buffer, "%zu", va_arg(vargs, size_t));
278 }
279 else {
280 sprintf(buffer, "%u", va_arg(vargs, unsigned int));
281 }
Victor Stinner03dab782015-10-14 00:21:35 +0200282 assert(strlen(buffer) < sizeof(buffer));
283 WRITE_BYTES(buffer);
284 break;
285
286 case 'i':
287 sprintf(buffer, "%i", va_arg(vargs, int));
288 assert(strlen(buffer) < sizeof(buffer));
289 WRITE_BYTES(buffer);
290 break;
291
292 case 'x':
293 sprintf(buffer, "%x", va_arg(vargs, int));
294 assert(strlen(buffer) < sizeof(buffer));
295 WRITE_BYTES(buffer);
296 break;
297
298 case 's':
299 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000300 Py_ssize_t i;
Victor Stinner03dab782015-10-14 00:21:35 +0200301
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200302 p = va_arg(vargs, const char*);
Serhiy Storchakad586ccb2019-01-12 10:30:35 +0200303 if (prec <= 0) {
304 i = strlen(p);
305 }
306 else {
307 i = 0;
308 while (i < prec && p[i]) {
309 i++;
310 }
311 }
Victor Stinner03dab782015-10-14 00:21:35 +0200312 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
313 if (s == NULL)
314 goto error;
315 break;
316 }
317
318 case 'p':
319 sprintf(buffer, "%p", va_arg(vargs, void*));
320 assert(strlen(buffer) < sizeof(buffer));
321 /* %p is ill-defined: ensure leading 0x. */
322 if (buffer[1] == 'X')
323 buffer[1] = 'x';
324 else if (buffer[1] != 'x') {
325 memmove(buffer+2, buffer, strlen(buffer)+1);
326 buffer[0] = '0';
327 buffer[1] = 'x';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000328 }
Victor Stinner03dab782015-10-14 00:21:35 +0200329 WRITE_BYTES(buffer);
330 break;
331
332 case '%':
333 writer.min_size++;
334 *s++ = '%';
335 break;
336
337 default:
338 if (*f == 0) {
339 /* fix min_size if we reached the end of the format string */
340 writer.min_size++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000341 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000342
Victor Stinner03dab782015-10-14 00:21:35 +0200343 /* invalid format string: copy unformatted string and exit */
344 WRITE_BYTES(p);
345 return _PyBytesWriter_Finish(&writer, s);
346 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000347 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000348
Victor Stinner03dab782015-10-14 00:21:35 +0200349#undef WRITE_BYTES
350
351 return _PyBytesWriter_Finish(&writer, s);
352
353 error:
354 _PyBytesWriter_Dealloc(&writer);
355 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000356}
357
358PyObject *
359PyBytes_FromFormat(const char *format, ...)
360{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000361 PyObject* ret;
362 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000363
364#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000365 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000366#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000368#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000369 ret = PyBytes_FromFormatV(format, vargs);
370 va_end(vargs);
371 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000372}
373
Ethan Furmanb95b5612015-01-23 20:05:18 -0800374/* Helpers for formatstring */
375
376Py_LOCAL_INLINE(PyObject *)
377getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
378{
379 Py_ssize_t argidx = *p_argidx;
380 if (argidx < arglen) {
381 (*p_argidx)++;
382 if (arglen < 0)
383 return args;
384 else
385 return PyTuple_GetItem(args, argidx);
386 }
387 PyErr_SetString(PyExc_TypeError,
388 "not enough arguments for format string");
389 return NULL;
390}
391
392/* Format codes
393 * F_LJUST '-'
394 * F_SIGN '+'
395 * F_BLANK ' '
396 * F_ALT '#'
397 * F_ZERO '0'
398 */
399#define F_LJUST (1<<0)
400#define F_SIGN (1<<1)
401#define F_BLANK (1<<2)
402#define F_ALT (1<<3)
403#define F_ZERO (1<<4)
404
405/* Returns a new reference to a PyBytes object, or NULL on failure. */
406
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200407static char*
408formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200409 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800410{
411 char *p;
412 PyObject *result;
413 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200414 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800415
416 x = PyFloat_AsDouble(v);
417 if (x == -1.0 && PyErr_Occurred()) {
418 PyErr_Format(PyExc_TypeError, "float argument required, "
419 "not %.200s", Py_TYPE(v)->tp_name);
420 return NULL;
421 }
422
423 if (prec < 0)
424 prec = 6;
425
426 p = PyOS_double_to_string(x, type, prec,
427 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
428
429 if (p == NULL)
430 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200431
432 len = strlen(p);
433 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200434 str = _PyBytesWriter_Prepare(writer, str, len);
435 if (str == NULL)
436 return NULL;
Christian Heimesf051e432016-09-13 20:22:02 +0200437 memcpy(str, p, len);
Victor Stinner71dc3d82016-04-26 12:35:13 +0200438 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200439 str += len;
440 return str;
441 }
442
443 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800444 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200445 *p_result = result;
Zackery Spytz96c59322018-10-03 00:01:30 -0600446 return result != NULL ? str : NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800447}
448
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300449static PyObject *
450formatlong(PyObject *v, int flags, int prec, int type)
451{
452 PyObject *result, *iobj;
453 if (type == 'i')
454 type = 'd';
455 if (PyLong_Check(v))
456 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
457 if (PyNumber_Check(v)) {
458 /* make sure number is a type of integer for o, x, and X */
459 if (type == 'o' || type == 'x' || type == 'X')
Serhiy Storchaka5f4b229d2020-05-28 10:33:45 +0300460 iobj = _PyNumber_Index(v);
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300461 else
462 iobj = PyNumber_Long(v);
463 if (iobj == NULL) {
464 if (!PyErr_ExceptionMatches(PyExc_TypeError))
465 return NULL;
466 }
467 else if (!PyLong_Check(iobj))
468 Py_CLEAR(iobj);
469 if (iobj != NULL) {
470 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
471 Py_DECREF(iobj);
472 return result;
473 }
474 }
475 PyErr_Format(PyExc_TypeError,
476 "%%%c format: %s is required, not %.200s", type,
477 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
478 : "a number",
479 Py_TYPE(v)->tp_name);
480 return NULL;
481}
482
483static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200484byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800485{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300486 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200487 *p = PyBytes_AS_STRING(arg)[0];
488 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800489 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300490 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200491 *p = PyByteArray_AS_STRING(arg)[0];
492 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800493 }
494 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300495 PyObject *iobj;
496 long ival;
497 int overflow;
498 /* make sure number is a type of integer */
499 if (PyLong_Check(arg)) {
500 ival = PyLong_AsLongAndOverflow(arg, &overflow);
501 }
502 else {
503 iobj = PyNumber_Index(arg);
504 if (iobj == NULL) {
505 if (!PyErr_ExceptionMatches(PyExc_TypeError))
506 return 0;
507 goto onError;
508 }
509 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
510 Py_DECREF(iobj);
511 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300512 if (!overflow && ival == -1 && PyErr_Occurred())
513 goto onError;
514 if (overflow || !(0 <= ival && ival <= 255)) {
515 PyErr_SetString(PyExc_OverflowError,
516 "%c arg not in range(256)");
517 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800518 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300519 *p = (char)ival;
520 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800521 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300522 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200523 PyErr_SetString(PyExc_TypeError,
524 "%c requires an integer in range(256) or a single byte");
525 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800526}
527
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800528static PyObject *_PyBytes_FromBuffer(PyObject *x);
529
Ethan Furmanb95b5612015-01-23 20:05:18 -0800530static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200531format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800532{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200533 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800534 /* is it a bytes object? */
535 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200536 *pbuf = PyBytes_AS_STRING(v);
537 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800538 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200539 return v;
540 }
541 if (PyByteArray_Check(v)) {
542 *pbuf = PyByteArray_AS_STRING(v);
543 *plen = PyByteArray_GET_SIZE(v);
544 Py_INCREF(v);
545 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800546 }
547 /* does it support __bytes__? */
548 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
549 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +0100550 result = _PyObject_CallNoArg(func);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800551 Py_DECREF(func);
552 if (result == NULL)
553 return NULL;
554 if (!PyBytes_Check(result)) {
555 PyErr_Format(PyExc_TypeError,
556 "__bytes__ returned non-bytes (type %.200s)",
557 Py_TYPE(result)->tp_name);
558 Py_DECREF(result);
559 return NULL;
560 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200561 *pbuf = PyBytes_AS_STRING(result);
562 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800563 return result;
564 }
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800565 /* does it support buffer protocol? */
566 if (PyObject_CheckBuffer(v)) {
567 /* maybe we can avoid making a copy of the buffer object here? */
568 result = _PyBytes_FromBuffer(v);
569 if (result == NULL)
570 return NULL;
571 *pbuf = PyBytes_AS_STRING(result);
572 *plen = PyBytes_GET_SIZE(result);
573 return result;
574 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800575 PyErr_Format(PyExc_TypeError,
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800576 "%%b requires a bytes-like object, "
577 "or an object that implements __bytes__, not '%.100s'",
Ethan Furmanb95b5612015-01-23 20:05:18 -0800578 Py_TYPE(v)->tp_name);
579 return NULL;
580}
581
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200582/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800583
584PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +0200585_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
586 PyObject *args, int use_bytearray)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800587{
Victor Stinner772b2b02015-10-14 09:56:53 +0200588 const char *fmt;
589 char *res;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800590 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200591 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800592 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800593 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200594 _PyBytesWriter writer;
595
Victor Stinner772b2b02015-10-14 09:56:53 +0200596 if (args == NULL) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800597 PyErr_BadInternalCall();
598 return NULL;
599 }
Victor Stinner772b2b02015-10-14 09:56:53 +0200600 fmt = format;
601 fmtcnt = format_len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200602
603 _PyBytesWriter_Init(&writer);
Victor Stinner772b2b02015-10-14 09:56:53 +0200604 writer.use_bytearray = use_bytearray;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200605
606 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
607 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800608 return NULL;
Victor Stinner772b2b02015-10-14 09:56:53 +0200609 if (!use_bytearray)
610 writer.overallocate = 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200611
Ethan Furmanb95b5612015-01-23 20:05:18 -0800612 if (PyTuple_Check(args)) {
613 arglen = PyTuple_GET_SIZE(args);
614 argidx = 0;
615 }
616 else {
617 arglen = -1;
618 argidx = -2;
619 }
620 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
621 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
622 !PyByteArray_Check(args)) {
623 dict = args;
624 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200625
Ethan Furmanb95b5612015-01-23 20:05:18 -0800626 while (--fmtcnt >= 0) {
627 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200628 Py_ssize_t len;
629 char *pos;
630
Xiang Zhangb76ad512017-03-06 17:17:05 +0800631 pos = (char *)memchr(fmt + 1, '%', fmtcnt);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200632 if (pos != NULL)
633 len = pos - fmt;
Victor Stinner772b2b02015-10-14 09:56:53 +0200634 else
Xiang Zhangb76ad512017-03-06 17:17:05 +0800635 len = fmtcnt + 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200636 assert(len != 0);
637
Christian Heimesf051e432016-09-13 20:22:02 +0200638 memcpy(res, fmt, len);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200639 res += len;
640 fmt += len;
641 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800642 }
643 else {
644 /* Got a format specifier */
645 int flags = 0;
646 Py_ssize_t width = -1;
647 int prec = -1;
648 int c = '\0';
649 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800650 PyObject *v = NULL;
651 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200652 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800653 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200654 Py_ssize_t len = 0;
655 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200656 Py_ssize_t alloc;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800657
Ethan Furmanb95b5612015-01-23 20:05:18 -0800658 fmt++;
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200659 if (*fmt == '%') {
660 *res++ = '%';
661 fmt++;
662 fmtcnt--;
663 continue;
664 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800665 if (*fmt == '(') {
Victor Stinner772b2b02015-10-14 09:56:53 +0200666 const char *keystart;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800667 Py_ssize_t keylen;
668 PyObject *key;
669 int pcount = 1;
670
671 if (dict == NULL) {
672 PyErr_SetString(PyExc_TypeError,
673 "format requires a mapping");
674 goto error;
675 }
676 ++fmt;
677 --fmtcnt;
678 keystart = fmt;
679 /* Skip over balanced parentheses */
680 while (pcount > 0 && --fmtcnt >= 0) {
681 if (*fmt == ')')
682 --pcount;
683 else if (*fmt == '(')
684 ++pcount;
685 fmt++;
686 }
687 keylen = fmt - keystart - 1;
688 if (fmtcnt < 0 || pcount > 0) {
689 PyErr_SetString(PyExc_ValueError,
690 "incomplete format key");
691 goto error;
692 }
693 key = PyBytes_FromStringAndSize(keystart,
694 keylen);
695 if (key == NULL)
696 goto error;
697 if (args_owned) {
698 Py_DECREF(args);
699 args_owned = 0;
700 }
701 args = PyObject_GetItem(dict, key);
702 Py_DECREF(key);
703 if (args == NULL) {
704 goto error;
705 }
706 args_owned = 1;
707 arglen = -1;
708 argidx = -2;
709 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200710
711 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800712 while (--fmtcnt >= 0) {
713 switch (c = *fmt++) {
714 case '-': flags |= F_LJUST; continue;
715 case '+': flags |= F_SIGN; continue;
716 case ' ': flags |= F_BLANK; continue;
717 case '#': flags |= F_ALT; continue;
718 case '0': flags |= F_ZERO; continue;
719 }
720 break;
721 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200722
723 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800724 if (c == '*') {
725 v = getnextarg(args, arglen, &argidx);
726 if (v == NULL)
727 goto error;
728 if (!PyLong_Check(v)) {
729 PyErr_SetString(PyExc_TypeError,
730 "* wants int");
731 goto error;
732 }
733 width = PyLong_AsSsize_t(v);
734 if (width == -1 && PyErr_Occurred())
735 goto error;
736 if (width < 0) {
737 flags |= F_LJUST;
738 width = -width;
739 }
740 if (--fmtcnt >= 0)
741 c = *fmt++;
742 }
743 else if (c >= 0 && isdigit(c)) {
744 width = c - '0';
745 while (--fmtcnt >= 0) {
746 c = Py_CHARMASK(*fmt++);
747 if (!isdigit(c))
748 break;
749 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
750 PyErr_SetString(
751 PyExc_ValueError,
752 "width too big");
753 goto error;
754 }
755 width = width*10 + (c - '0');
756 }
757 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200758
759 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800760 if (c == '.') {
761 prec = 0;
762 if (--fmtcnt >= 0)
763 c = *fmt++;
764 if (c == '*') {
765 v = getnextarg(args, arglen, &argidx);
766 if (v == NULL)
767 goto error;
768 if (!PyLong_Check(v)) {
769 PyErr_SetString(
770 PyExc_TypeError,
771 "* wants int");
772 goto error;
773 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200774 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800775 if (prec == -1 && PyErr_Occurred())
776 goto error;
777 if (prec < 0)
778 prec = 0;
779 if (--fmtcnt >= 0)
780 c = *fmt++;
781 }
782 else if (c >= 0 && isdigit(c)) {
783 prec = c - '0';
784 while (--fmtcnt >= 0) {
785 c = Py_CHARMASK(*fmt++);
786 if (!isdigit(c))
787 break;
788 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
789 PyErr_SetString(
790 PyExc_ValueError,
791 "prec too big");
792 goto error;
793 }
794 prec = prec*10 + (c - '0');
795 }
796 }
797 } /* prec */
798 if (fmtcnt >= 0) {
799 if (c == 'h' || c == 'l' || c == 'L') {
800 if (--fmtcnt >= 0)
801 c = *fmt++;
802 }
803 }
804 if (fmtcnt < 0) {
805 PyErr_SetString(PyExc_ValueError,
806 "incomplete format");
807 goto error;
808 }
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200809 v = getnextarg(args, arglen, &argidx);
810 if (v == NULL)
811 goto error;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200812
Alexey Izbyshevccd99752018-08-23 10:50:52 +0300813 if (fmtcnt == 0) {
814 /* last write: disable writer overallocation */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200815 writer.overallocate = 0;
816 }
817
Ethan Furmanb95b5612015-01-23 20:05:18 -0800818 sign = 0;
819 fill = ' ';
820 switch (c) {
Ethan Furman62e977f2015-03-11 08:17:00 -0700821 case 'r':
822 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800823 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200824 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800825 if (temp == NULL)
826 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200827 assert(PyUnicode_IS_ASCII(temp));
828 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
829 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800830 if (prec >= 0 && len > prec)
831 len = prec;
832 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200833
Ethan Furmanb95b5612015-01-23 20:05:18 -0800834 case 's':
835 // %s is only for 2/3 code; 3 only code should use %b
836 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200837 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800838 if (temp == NULL)
839 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800840 if (prec >= 0 && len > prec)
841 len = prec;
842 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200843
Ethan Furmanb95b5612015-01-23 20:05:18 -0800844 case 'i':
845 case 'd':
846 case 'u':
847 case 'o':
848 case 'x':
849 case 'X':
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200850 if (PyLong_CheckExact(v)
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200851 && width == -1 && prec == -1
852 && !(flags & (F_SIGN | F_BLANK))
853 && c != 'X')
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200854 {
855 /* Fast path */
856 int alternate = flags & F_ALT;
857 int base;
858
859 switch(c)
860 {
861 default:
Barry Warsawb2e57942017-09-14 18:13:16 -0700862 Py_UNREACHABLE();
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200863 case 'd':
864 case 'i':
865 case 'u':
866 base = 10;
867 break;
868 case 'o':
869 base = 8;
870 break;
871 case 'x':
872 case 'X':
873 base = 16;
874 break;
875 }
876
877 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200878 writer.min_size -= 2; /* size preallocated for "%d" */
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200879 res = _PyLong_FormatBytesWriter(&writer, res,
880 v, base, alternate);
881 if (res == NULL)
882 goto error;
883 continue;
884 }
885
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300886 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200887 if (!temp)
888 goto error;
889 assert(PyUnicode_IS_ASCII(temp));
890 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
891 len = PyUnicode_GET_LENGTH(temp);
892 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800893 if (flags & F_ZERO)
894 fill = '0';
895 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200896
Ethan Furmanb95b5612015-01-23 20:05:18 -0800897 case 'e':
898 case 'E':
899 case 'f':
900 case 'F':
901 case 'g':
902 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200903 if (width == -1 && prec == -1
904 && !(flags & (F_SIGN | F_BLANK)))
905 {
906 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200907 writer.min_size -= 2; /* size preallocated for "%f" */
Victor Stinnerad771582015-10-09 12:38:53 +0200908 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200909 if (res == NULL)
910 goto error;
911 continue;
912 }
913
Victor Stinnerad771582015-10-09 12:38:53 +0200914 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800915 goto error;
916 pbuf = PyBytes_AS_STRING(temp);
917 len = PyBytes_GET_SIZE(temp);
918 sign = 1;
919 if (flags & F_ZERO)
920 fill = '0';
921 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200922
Ethan Furmanb95b5612015-01-23 20:05:18 -0800923 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200924 pbuf = &onechar;
925 len = byte_converter(v, &onechar);
926 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800927 goto error;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200928 if (width == -1) {
929 /* Fast path */
930 *res++ = onechar;
931 continue;
932 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800933 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200934
Ethan Furmanb95b5612015-01-23 20:05:18 -0800935 default:
936 PyErr_Format(PyExc_ValueError,
937 "unsupported format character '%c' (0x%x) "
938 "at index %zd",
939 c, c,
Victor Stinner772b2b02015-10-14 09:56:53 +0200940 (Py_ssize_t)(fmt - 1 - format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800941 goto error;
942 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200943
Ethan Furmanb95b5612015-01-23 20:05:18 -0800944 if (sign) {
945 if (*pbuf == '-' || *pbuf == '+') {
946 sign = *pbuf++;
947 len--;
948 }
949 else if (flags & F_SIGN)
950 sign = '+';
951 else if (flags & F_BLANK)
952 sign = ' ';
953 else
954 sign = 0;
955 }
956 if (width < len)
957 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200958
959 alloc = width;
960 if (sign != 0 && len == width)
961 alloc++;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200962 /* 2: size preallocated for %s */
963 if (alloc > 2) {
964 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200965 if (res == NULL)
966 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800967 }
Victor Stinner60ec6ef2019-10-07 22:31:42 +0200968#ifndef NDEBUG
969 char *before = res;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200970#endif
971
972 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800973 if (sign) {
974 if (fill != ' ')
975 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800976 if (width > len)
977 width--;
978 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200979
980 /* Write the numeric prefix for "x", "X" and "o" formats
981 if the alternate form is used.
982 For example, write "0x" for the "%#x" format. */
Serhiy Storchakab1a16192016-12-17 21:48:03 +0200983 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800984 assert(pbuf[0] == '0');
985 assert(pbuf[1] == c);
986 if (fill != ' ') {
987 *res++ = *pbuf++;
988 *res++ = *pbuf++;
989 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800990 width -= 2;
991 if (width < 0)
992 width = 0;
993 len -= 2;
994 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200995
996 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800997 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200998 memset(res, fill, width - len);
999 res += (width - len);
1000 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -08001001 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001002
1003 /* If padding with spaces: write sign if needed and/or numeric
1004 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001005 if (fill == ' ') {
1006 if (sign)
1007 *res++ = sign;
Serhiy Storchakab1a16192016-12-17 21:48:03 +02001008 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001009 assert(pbuf[0] == '0');
1010 assert(pbuf[1] == c);
1011 *res++ = *pbuf++;
1012 *res++ = *pbuf++;
1013 }
1014 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001015
1016 /* Copy bytes */
Christian Heimesf051e432016-09-13 20:22:02 +02001017 memcpy(res, pbuf, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001018 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001019
1020 /* Pad right with the fill character if needed */
1021 if (width > len) {
1022 memset(res, ' ', width - len);
1023 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001024 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001025
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +02001026 if (dict && (argidx < arglen)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001027 PyErr_SetString(PyExc_TypeError,
1028 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -08001029 Py_XDECREF(temp);
1030 goto error;
1031 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001032 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001033
Victor Stinner60ec6ef2019-10-07 22:31:42 +02001034#ifndef NDEBUG
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001035 /* check that we computed the exact size for this write */
1036 assert((res - before) == alloc);
1037#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -08001038 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001039
1040 /* If overallocation was disabled, ensure that it was the last
1041 write. Otherwise, we missed an optimization */
Alexey Izbyshevccd99752018-08-23 10:50:52 +03001042 assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001043 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001044
Ethan Furmanb95b5612015-01-23 20:05:18 -08001045 if (argidx < arglen && !dict) {
1046 PyErr_SetString(PyExc_TypeError,
1047 "not all arguments converted during bytes formatting");
1048 goto error;
1049 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001050
Ethan Furmanb95b5612015-01-23 20:05:18 -08001051 if (args_owned) {
1052 Py_DECREF(args);
1053 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001054 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001055
1056 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001057 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001058 if (args_owned) {
1059 Py_DECREF(args);
1060 }
1061 return NULL;
1062}
1063
Greg Price3a4f6672019-09-12 11:12:22 -07001064/* Unescape a backslash-escaped string. */
Eric V. Smith42454af2016-10-31 09:22:08 -04001065PyObject *_PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001066 Py_ssize_t len,
1067 const char *errors,
Eric V. Smith42454af2016-10-31 09:22:08 -04001068 const char **first_invalid_escape)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001069{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001070 int c;
Victor Stinner2ec80632015-10-14 13:32:13 +02001071 char *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001072 const char *end;
Victor Stinner2ec80632015-10-14 13:32:13 +02001073 _PyBytesWriter writer;
1074
1075 _PyBytesWriter_Init(&writer);
1076
1077 p = _PyBytesWriter_Alloc(&writer, len);
1078 if (p == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001079 return NULL;
Victor Stinner2ec80632015-10-14 13:32:13 +02001080 writer.overallocate = 1;
1081
Eric V. Smith42454af2016-10-31 09:22:08 -04001082 *first_invalid_escape = NULL;
1083
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001084 end = s + len;
1085 while (s < end) {
1086 if (*s != '\\') {
Greg Price3a4f6672019-09-12 11:12:22 -07001087 *p++ = *s++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001088 continue;
1089 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001090
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001091 s++;
Victor Stinner2ec80632015-10-14 13:32:13 +02001092 if (s == end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001093 PyErr_SetString(PyExc_ValueError,
1094 "Trailing \\ in string");
1095 goto failed;
1096 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001097
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001098 switch (*s++) {
1099 /* XXX This assumes ASCII! */
1100 case '\n': break;
1101 case '\\': *p++ = '\\'; break;
1102 case '\'': *p++ = '\''; break;
1103 case '\"': *p++ = '\"'; break;
1104 case 'b': *p++ = '\b'; break;
1105 case 'f': *p++ = '\014'; break; /* FF */
1106 case 't': *p++ = '\t'; break;
1107 case 'n': *p++ = '\n'; break;
1108 case 'r': *p++ = '\r'; break;
1109 case 'v': *p++ = '\013'; break; /* VT */
1110 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1111 case '0': case '1': case '2': case '3':
1112 case '4': case '5': case '6': case '7':
1113 c = s[-1] - '0';
1114 if (s < end && '0' <= *s && *s <= '7') {
1115 c = (c<<3) + *s++ - '0';
1116 if (s < end && '0' <= *s && *s <= '7')
1117 c = (c<<3) + *s++ - '0';
1118 }
1119 *p++ = c;
1120 break;
1121 case 'x':
Victor Stinner2ec80632015-10-14 13:32:13 +02001122 if (s+1 < end) {
1123 int digit1, digit2;
1124 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1125 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1126 if (digit1 < 16 && digit2 < 16) {
1127 *p++ = (unsigned char)((digit1 << 4) + digit2);
1128 s += 2;
1129 break;
1130 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001131 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001132 /* invalid hexadecimal digits */
1133
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001134 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001135 PyErr_Format(PyExc_ValueError,
Serhiy Storchakad53fe5f2019-03-13 22:59:55 +02001136 "invalid \\x escape at position %zd",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001137 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001138 goto failed;
1139 }
1140 if (strcmp(errors, "replace") == 0) {
1141 *p++ = '?';
1142 } else if (strcmp(errors, "ignore") == 0)
1143 /* do nothing */;
1144 else {
1145 PyErr_Format(PyExc_ValueError,
1146 "decoding error; unknown "
1147 "error handling code: %.400s",
1148 errors);
1149 goto failed;
1150 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001151 /* skip \x */
1152 if (s < end && Py_ISXDIGIT(s[0]))
1153 s++; /* and a hexdigit */
1154 break;
Victor Stinner2ec80632015-10-14 13:32:13 +02001155
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001156 default:
Eric V. Smith42454af2016-10-31 09:22:08 -04001157 if (*first_invalid_escape == NULL) {
1158 *first_invalid_escape = s-1; /* Back up one char, since we've
1159 already incremented s. */
1160 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001161 *p++ = '\\';
Eric V. Smith42454af2016-10-31 09:22:08 -04001162 s--;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001163 }
1164 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001165
1166 return _PyBytesWriter_Finish(&writer, p);
1167
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001168 failed:
Victor Stinner2ec80632015-10-14 13:32:13 +02001169 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001170 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001171}
1172
Eric V. Smith42454af2016-10-31 09:22:08 -04001173PyObject *PyBytes_DecodeEscape(const char *s,
1174 Py_ssize_t len,
1175 const char *errors,
Greg Price3a4f6672019-09-12 11:12:22 -07001176 Py_ssize_t Py_UNUSED(unicode),
1177 const char *Py_UNUSED(recode_encoding))
Eric V. Smith42454af2016-10-31 09:22:08 -04001178{
1179 const char* first_invalid_escape;
Greg Price3a4f6672019-09-12 11:12:22 -07001180 PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
Eric V. Smith42454af2016-10-31 09:22:08 -04001181 &first_invalid_escape);
1182 if (result == NULL)
1183 return NULL;
1184 if (first_invalid_escape != NULL) {
1185 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1186 "invalid escape sequence '\\%c'",
Serhiy Storchaka56cb4652017-10-20 17:08:15 +03001187 (unsigned char)*first_invalid_escape) < 0) {
Eric V. Smith42454af2016-10-31 09:22:08 -04001188 Py_DECREF(result);
1189 return NULL;
1190 }
1191 }
1192 return result;
1193
1194}
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001195/* -------------------------------------------------------------------- */
1196/* object api */
1197
1198Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001199PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001200{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001201 if (!PyBytes_Check(op)) {
1202 PyErr_Format(PyExc_TypeError,
1203 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1204 return -1;
1205 }
1206 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001207}
1208
1209char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001210PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001211{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001212 if (!PyBytes_Check(op)) {
1213 PyErr_Format(PyExc_TypeError,
1214 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1215 return NULL;
1216 }
1217 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001218}
1219
1220int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001221PyBytes_AsStringAndSize(PyObject *obj,
1222 char **s,
1223 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001224{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001225 if (s == NULL) {
1226 PyErr_BadInternalCall();
1227 return -1;
1228 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001229
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001230 if (!PyBytes_Check(obj)) {
1231 PyErr_Format(PyExc_TypeError,
1232 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1233 return -1;
1234 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001235
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001236 *s = PyBytes_AS_STRING(obj);
1237 if (len != NULL)
1238 *len = PyBytes_GET_SIZE(obj);
1239 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001240 PyErr_SetString(PyExc_ValueError,
1241 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001242 return -1;
1243 }
1244 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001245}
Neal Norwitz6968b052007-02-27 19:02:19 +00001246
1247/* -------------------------------------------------------------------- */
1248/* Methods */
1249
Eric Smith0923d1d2009-04-16 20:16:10 +00001250#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001251
1252#include "stringlib/fastsearch.h"
1253#include "stringlib/count.h"
1254#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001255#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001256#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001257#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001258#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001259
Eric Smith0f78bff2009-11-30 01:01:42 +00001260#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001261
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001262PyObject *
1263PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001264{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001265 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001266 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001267 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001268 PyObject *v;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001269 unsigned char quote;
1270 const unsigned char *s;
1271 Py_UCS1 *p;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001272
1273 /* Compute size of output string */
1274 squotes = dquotes = 0;
1275 newsize = 3; /* b'' */
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001276 s = (const unsigned char*)op->ob_sval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001277 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001278 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001279 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001280 case '\'': squotes++; break;
1281 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001282 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001283 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001284 default:
1285 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001286 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001287 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001288 if (newsize > PY_SSIZE_T_MAX - incr)
1289 goto overflow;
1290 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001291 }
1292 quote = '\'';
1293 if (smartquotes && squotes && !dquotes)
1294 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001295 if (squotes && quote == '\'') {
1296 if (newsize > PY_SSIZE_T_MAX - squotes)
1297 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001298 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001299 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001300
1301 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001302 if (v == NULL) {
1303 return NULL;
1304 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001305 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001306
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001307 *p++ = 'b', *p++ = quote;
1308 for (i = 0; i < length; i++) {
1309 unsigned char c = op->ob_sval[i];
1310 if (c == quote || c == '\\')
1311 *p++ = '\\', *p++ = c;
1312 else if (c == '\t')
1313 *p++ = '\\', *p++ = 't';
1314 else if (c == '\n')
1315 *p++ = '\\', *p++ = 'n';
1316 else if (c == '\r')
1317 *p++ = '\\', *p++ = 'r';
1318 else if (c < ' ' || c >= 0x7f) {
1319 *p++ = '\\';
1320 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001321 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1322 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001323 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001324 else
1325 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001326 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001327 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001328 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001329 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001330
1331 overflow:
1332 PyErr_SetString(PyExc_OverflowError,
1333 "bytes object is too large to make repr");
1334 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001335}
1336
Neal Norwitz6968b052007-02-27 19:02:19 +00001337static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001338bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001339{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001340 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001341}
1342
Neal Norwitz6968b052007-02-27 19:02:19 +00001343static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001344bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001345{
Victor Stinnerda7933e2020-04-13 03:04:28 +02001346 if (_Py_GetConfig()->bytes_warning) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001347 if (PyErr_WarnEx(PyExc_BytesWarning,
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001348 "str() on a bytes instance", 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001349 return NULL;
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001350 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001351 }
1352 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001353}
1354
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001355static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001356bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001357{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001358 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001359}
Neal Norwitz6968b052007-02-27 19:02:19 +00001360
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001361/* This is also used by PyBytes_Concat() */
1362static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001363bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001364{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001365 Py_buffer va, vb;
1366 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001367
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001368 va.len = -1;
1369 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001370 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1371 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001372 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
Serhiy Storchaka6b5a9ec2017-03-19 19:47:02 +02001373 Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001374 goto done;
1375 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001376
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001377 /* Optimize end cases */
1378 if (va.len == 0 && PyBytes_CheckExact(b)) {
1379 result = b;
1380 Py_INCREF(result);
1381 goto done;
1382 }
1383 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1384 result = a;
1385 Py_INCREF(result);
1386 goto done;
1387 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001388
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001389 if (va.len > PY_SSIZE_T_MAX - vb.len) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001390 PyErr_NoMemory();
1391 goto done;
1392 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001393
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001394 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001395 if (result != NULL) {
1396 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1397 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1398 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001399
1400 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001401 if (va.len != -1)
1402 PyBuffer_Release(&va);
1403 if (vb.len != -1)
1404 PyBuffer_Release(&vb);
1405 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001406}
Neal Norwitz6968b052007-02-27 19:02:19 +00001407
1408static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001409bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001410{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001411 Py_ssize_t i;
1412 Py_ssize_t j;
1413 Py_ssize_t size;
1414 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001415 size_t nbytes;
1416 if (n < 0)
1417 n = 0;
1418 /* watch out for overflows: the size can overflow int,
1419 * and the # of bytes needed can overflow size_t
1420 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001421 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001422 PyErr_SetString(PyExc_OverflowError,
1423 "repeated bytes are too long");
1424 return NULL;
1425 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001426 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001427 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1428 Py_INCREF(a);
1429 return (PyObject *)a;
1430 }
1431 nbytes = (size_t)size;
1432 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1433 PyErr_SetString(PyExc_OverflowError,
1434 "repeated bytes are too long");
1435 return NULL;
1436 }
1437 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1438 if (op == NULL)
1439 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +01001440 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001441 op->ob_shash = -1;
1442 op->ob_sval[size] = '\0';
1443 if (Py_SIZE(a) == 1 && n > 0) {
1444 memset(op->ob_sval, a->ob_sval[0] , n);
1445 return (PyObject *) op;
1446 }
1447 i = 0;
1448 if (i < size) {
Christian Heimesf051e432016-09-13 20:22:02 +02001449 memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001450 i = Py_SIZE(a);
1451 }
1452 while (i < size) {
1453 j = (i <= size-i) ? i : size-i;
Christian Heimesf051e432016-09-13 20:22:02 +02001454 memcpy(op->ob_sval+i, op->ob_sval, j);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001455 i += j;
1456 }
1457 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001458}
1459
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001460static int
1461bytes_contains(PyObject *self, PyObject *arg)
1462{
1463 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1464}
1465
Neal Norwitz6968b052007-02-27 19:02:19 +00001466static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001467bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001468{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001469 if (i < 0 || i >= Py_SIZE(a)) {
1470 PyErr_SetString(PyExc_IndexError, "index out of range");
1471 return NULL;
1472 }
1473 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001474}
1475
Benjamin Peterson621b4302016-09-09 13:54:34 -07001476static int
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001477bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1478{
1479 int cmp;
1480 Py_ssize_t len;
1481
1482 len = Py_SIZE(a);
1483 if (Py_SIZE(b) != len)
1484 return 0;
1485
1486 if (a->ob_sval[0] != b->ob_sval[0])
1487 return 0;
1488
1489 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1490 return (cmp == 0);
1491}
1492
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001493static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001494bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001495{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001496 int c;
1497 Py_ssize_t len_a, len_b;
1498 Py_ssize_t min_len;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001499 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001500
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001501 /* Make sure both arguments are strings. */
1502 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Victor Stinnerda7933e2020-04-13 03:04:28 +02001503 if (_Py_GetConfig()->bytes_warning && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001504 rc = PyObject_IsInstance((PyObject*)a,
1505 (PyObject*)&PyUnicode_Type);
1506 if (!rc)
1507 rc = PyObject_IsInstance((PyObject*)b,
1508 (PyObject*)&PyUnicode_Type);
1509 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001510 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001511 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001512 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001513 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001514 return NULL;
1515 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001516 else {
1517 rc = PyObject_IsInstance((PyObject*)a,
1518 (PyObject*)&PyLong_Type);
1519 if (!rc)
1520 rc = PyObject_IsInstance((PyObject*)b,
1521 (PyObject*)&PyLong_Type);
1522 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001523 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001524 if (rc) {
1525 if (PyErr_WarnEx(PyExc_BytesWarning,
1526 "Comparison between bytes and int", 1))
1527 return NULL;
1528 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001529 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001530 }
stratakise8b19652017-11-02 11:32:54 +01001531 Py_RETURN_NOTIMPLEMENTED;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001532 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001533 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001534 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001535 case Py_EQ:
1536 case Py_LE:
1537 case Py_GE:
1538 /* a string is equal to itself */
stratakise8b19652017-11-02 11:32:54 +01001539 Py_RETURN_TRUE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001540 case Py_NE:
1541 case Py_LT:
1542 case Py_GT:
stratakise8b19652017-11-02 11:32:54 +01001543 Py_RETURN_FALSE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001544 default:
1545 PyErr_BadArgument();
1546 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001547 }
1548 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001549 else if (op == Py_EQ || op == Py_NE) {
1550 int eq = bytes_compare_eq(a, b);
1551 eq ^= (op == Py_NE);
stratakise8b19652017-11-02 11:32:54 +01001552 return PyBool_FromLong(eq);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001553 }
1554 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001555 len_a = Py_SIZE(a);
1556 len_b = Py_SIZE(b);
1557 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001558 if (min_len > 0) {
1559 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001560 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001561 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001562 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001563 else
1564 c = 0;
stratakise8b19652017-11-02 11:32:54 +01001565 if (c != 0)
1566 Py_RETURN_RICHCOMPARE(c, 0, op);
1567 Py_RETURN_RICHCOMPARE(len_a, len_b, op);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001568 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001569}
1570
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001571static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001572bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001573{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001574 if (a->ob_shash == -1) {
1575 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001576 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001577 }
1578 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001579}
1580
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001581static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001582bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001583{
Victor Stinnera15e2602020-04-08 02:01:56 +02001584 if (_PyIndex_Check(item)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001585 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1586 if (i == -1 && PyErr_Occurred())
1587 return NULL;
1588 if (i < 0)
1589 i += PyBytes_GET_SIZE(self);
1590 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1591 PyErr_SetString(PyExc_IndexError,
1592 "index out of range");
1593 return NULL;
1594 }
1595 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1596 }
1597 else if (PySlice_Check(item)) {
Zackery Spytz14514d92019-05-17 01:13:03 -06001598 Py_ssize_t start, stop, step, slicelength, i;
1599 size_t cur;
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001600 const char* source_buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001601 char* result_buf;
1602 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001603
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001604 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001605 return NULL;
1606 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001607 slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1608 &stop, step);
Neal Norwitz6968b052007-02-27 19:02:19 +00001609
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001610 if (slicelength <= 0) {
1611 return PyBytes_FromStringAndSize("", 0);
1612 }
1613 else if (start == 0 && step == 1 &&
1614 slicelength == PyBytes_GET_SIZE(self) &&
1615 PyBytes_CheckExact(self)) {
1616 Py_INCREF(self);
1617 return (PyObject *)self;
1618 }
1619 else if (step == 1) {
1620 return PyBytes_FromStringAndSize(
1621 PyBytes_AS_STRING(self) + start,
1622 slicelength);
1623 }
1624 else {
1625 source_buf = PyBytes_AS_STRING(self);
1626 result = PyBytes_FromStringAndSize(NULL, slicelength);
1627 if (result == NULL)
1628 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001629
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001630 result_buf = PyBytes_AS_STRING(result);
1631 for (cur = start, i = 0; i < slicelength;
1632 cur += step, i++) {
1633 result_buf[i] = source_buf[cur];
1634 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001635
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001636 return result;
1637 }
1638 }
1639 else {
1640 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001641 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001642 Py_TYPE(item)->tp_name);
1643 return NULL;
1644 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001645}
1646
1647static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001648bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001649{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001650 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1651 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001652}
1653
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001654static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001655 (lenfunc)bytes_length, /*sq_length*/
1656 (binaryfunc)bytes_concat, /*sq_concat*/
1657 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1658 (ssizeargfunc)bytes_item, /*sq_item*/
1659 0, /*sq_slice*/
1660 0, /*sq_ass_item*/
1661 0, /*sq_ass_slice*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001662 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001663};
1664
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001665static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001666 (lenfunc)bytes_length,
1667 (binaryfunc)bytes_subscript,
1668 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001669};
1670
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001671static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001672 (getbufferproc)bytes_buffer_getbuffer,
1673 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001674};
1675
1676
1677#define LEFTSTRIP 0
1678#define RIGHTSTRIP 1
1679#define BOTHSTRIP 2
1680
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001681/*[clinic input]
1682bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001683
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001684 sep: object = None
1685 The delimiter according which to split the bytes.
1686 None (the default value) means split on ASCII whitespace characters
1687 (space, tab, return, newline, formfeed, vertical tab).
1688 maxsplit: Py_ssize_t = -1
1689 Maximum number of splits to do.
1690 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001691
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001692Return a list of the sections in the bytes, using sep as the delimiter.
1693[clinic start generated code]*/
1694
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001695static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001696bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1697/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001698{
1699 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001700 const char *s = PyBytes_AS_STRING(self), *sub;
1701 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001702 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001703
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001704 if (maxsplit < 0)
1705 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001706 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001707 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001708 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001709 return NULL;
1710 sub = vsub.buf;
1711 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001712
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001713 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1714 PyBuffer_Release(&vsub);
1715 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001716}
1717
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001718/*[clinic input]
1719bytes.partition
1720
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001721 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001722 /
1723
1724Partition the bytes into three parts using the given separator.
1725
1726This will search for the separator sep in the bytes. If the separator is found,
1727returns a 3-tuple containing the part before the separator, the separator
1728itself, and the part after it.
1729
1730If the separator is not found, returns a 3-tuple containing the original bytes
1731object and two empty bytes objects.
1732[clinic start generated code]*/
1733
Neal Norwitz6968b052007-02-27 19:02:19 +00001734static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001735bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001736/*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001737{
Neal Norwitz6968b052007-02-27 19:02:19 +00001738 return stringlib_partition(
1739 (PyObject*) self,
1740 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001741 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001742 );
1743}
1744
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001745/*[clinic input]
1746bytes.rpartition
1747
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001748 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001749 /
1750
1751Partition the bytes into three parts using the given separator.
1752
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001753This will search for the separator sep in the bytes, starting at the end. If
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001754the separator is found, returns a 3-tuple containing the part before the
1755separator, the separator itself, and the part after it.
1756
1757If the separator is not found, returns a 3-tuple containing two empty bytes
1758objects and the original bytes object.
1759[clinic start generated code]*/
1760
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001761static PyObject *
1762bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001763/*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001764{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001765 return stringlib_rpartition(
1766 (PyObject*) self,
1767 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001768 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001769 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001770}
1771
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001772/*[clinic input]
1773bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001774
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001775Return a list of the sections in the bytes, using sep as the delimiter.
1776
1777Splitting is done starting at the end of the bytes and working to the front.
1778[clinic start generated code]*/
1779
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001780static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001781bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1782/*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001783{
1784 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001785 const char *s = PyBytes_AS_STRING(self), *sub;
1786 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001787 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001788
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001789 if (maxsplit < 0)
1790 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001791 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001792 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001793 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001794 return NULL;
1795 sub = vsub.buf;
1796 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001797
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001798 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1799 PyBuffer_Release(&vsub);
1800 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001801}
1802
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001803
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001804/*[clinic input]
1805bytes.join
1806
1807 iterable_of_bytes: object
1808 /
1809
1810Concatenate any number of bytes objects.
1811
1812The bytes whose method is called is inserted in between each pair.
1813
1814The result is returned as a new bytes object.
1815
1816Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1817[clinic start generated code]*/
1818
Neal Norwitz6968b052007-02-27 19:02:19 +00001819static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001820bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1821/*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001822{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001823 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001824}
1825
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001826PyObject *
1827_PyBytes_Join(PyObject *sep, PyObject *x)
1828{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001829 assert(sep != NULL && PyBytes_Check(sep));
1830 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001831 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001832}
1833
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001834static PyObject *
1835bytes_find(PyBytesObject *self, PyObject *args)
1836{
1837 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1838}
1839
1840static PyObject *
1841bytes_index(PyBytesObject *self, PyObject *args)
1842{
1843 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1844}
1845
1846
1847static PyObject *
1848bytes_rfind(PyBytesObject *self, PyObject *args)
1849{
1850 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1851}
1852
1853
1854static PyObject *
1855bytes_rindex(PyBytesObject *self, PyObject *args)
1856{
1857 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1858}
1859
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001860
1861Py_LOCAL_INLINE(PyObject *)
1862do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001863{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001864 Py_buffer vsep;
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001865 const char *s = PyBytes_AS_STRING(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001866 Py_ssize_t len = PyBytes_GET_SIZE(self);
1867 char *sep;
1868 Py_ssize_t seplen;
1869 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001870
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001871 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001872 return NULL;
1873 sep = vsep.buf;
1874 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001875
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001876 i = 0;
1877 if (striptype != RIGHTSTRIP) {
1878 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1879 i++;
1880 }
1881 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001882
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001883 j = len;
1884 if (striptype != LEFTSTRIP) {
1885 do {
1886 j--;
1887 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1888 j++;
1889 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001890
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001891 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001892
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001893 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1894 Py_INCREF(self);
1895 return (PyObject*)self;
1896 }
1897 else
1898 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001899}
1900
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001901
1902Py_LOCAL_INLINE(PyObject *)
1903do_strip(PyBytesObject *self, int striptype)
1904{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001905 const char *s = PyBytes_AS_STRING(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001906 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001907
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001908 i = 0;
1909 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001910 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001911 i++;
1912 }
1913 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001914
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001915 j = len;
1916 if (striptype != LEFTSTRIP) {
1917 do {
1918 j--;
David Malcolm96960882010-11-05 17:23:41 +00001919 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001920 j++;
1921 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001922
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001923 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1924 Py_INCREF(self);
1925 return (PyObject*)self;
1926 }
1927 else
1928 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001929}
1930
1931
1932Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001933do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001934{
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001935 if (bytes != Py_None) {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001936 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001937 }
1938 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001939}
1940
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001941/*[clinic input]
1942bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001943
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001944 bytes: object = None
1945 /
1946
1947Strip leading and trailing bytes contained in the argument.
1948
1949If the argument is omitted or None, strip leading and trailing ASCII whitespace.
1950[clinic start generated code]*/
1951
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001952static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001953bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001954/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001955{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001956 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001957}
1958
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001959/*[clinic input]
1960bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001961
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001962 bytes: object = None
1963 /
1964
1965Strip leading bytes contained in the argument.
1966
1967If the argument is omitted or None, strip leading ASCII whitespace.
1968[clinic start generated code]*/
1969
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001970static PyObject *
1971bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001972/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001973{
1974 return do_argstrip(self, LEFTSTRIP, bytes);
1975}
1976
1977/*[clinic input]
1978bytes.rstrip
1979
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001980 bytes: object = None
1981 /
1982
1983Strip trailing bytes contained in the argument.
1984
1985If the argument is omitted or None, strip trailing ASCII whitespace.
1986[clinic start generated code]*/
1987
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001988static PyObject *
1989bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001990/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001991{
1992 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001993}
Neal Norwitz6968b052007-02-27 19:02:19 +00001994
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001995
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001996static PyObject *
1997bytes_count(PyBytesObject *self, PyObject *args)
1998{
1999 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2000}
2001
2002
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002003/*[clinic input]
2004bytes.translate
2005
Victor Stinner049e5092014-08-17 22:20:00 +02002006 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002007 Translation table, which must be a bytes object of length 256.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002008 /
Martin Panter1b6c6da2016-08-27 08:35:02 +00002009 delete as deletechars: object(c_default="NULL") = b''
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002010
2011Return a copy with each character mapped by the given translation table.
2012
Martin Panter1b6c6da2016-08-27 08:35:02 +00002013All characters occurring in the optional argument delete are removed.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002014The remaining characters are mapped through the given translation table.
2015[clinic start generated code]*/
2016
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002017static PyObject *
Martin Panter1b6c6da2016-08-27 08:35:02 +00002018bytes_translate_impl(PyBytesObject *self, PyObject *table,
Larry Hastings89964c42015-04-14 18:07:59 -04002019 PyObject *deletechars)
Martin Panter1b6c6da2016-08-27 08:35:02 +00002020/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002021{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03002022 const char *input;
2023 char *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002024 Py_buffer table_view = {NULL, NULL};
2025 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002026 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002027 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002028 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002029 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002030 Py_ssize_t inlen, tablen, dellen = 0;
2031 PyObject *result;
2032 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002033
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002034 if (PyBytes_Check(table)) {
2035 table_chars = PyBytes_AS_STRING(table);
2036 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002037 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002038 else if (table == Py_None) {
2039 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002040 tablen = 256;
2041 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002042 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002043 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002044 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002045 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002046 tablen = table_view.len;
2047 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002048
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002049 if (tablen != 256) {
2050 PyErr_SetString(PyExc_ValueError,
2051 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002052 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002053 return NULL;
2054 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002055
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002056 if (deletechars != NULL) {
2057 if (PyBytes_Check(deletechars)) {
2058 del_table_chars = PyBytes_AS_STRING(deletechars);
2059 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002060 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002061 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002062 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002063 PyBuffer_Release(&table_view);
2064 return NULL;
2065 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002066 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002067 dellen = del_table_view.len;
2068 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002069 }
2070 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002071 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002072 dellen = 0;
2073 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002074
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002075 inlen = PyBytes_GET_SIZE(input_obj);
2076 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002077 if (result == NULL) {
2078 PyBuffer_Release(&del_table_view);
2079 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002080 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002081 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002082 output_start = output = PyBytes_AS_STRING(result);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002083 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002084
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002085 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002086 /* If no deletions are required, use faster code */
2087 for (i = inlen; --i >= 0; ) {
2088 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002089 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002090 changed = 1;
2091 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002092 if (!changed && PyBytes_CheckExact(input_obj)) {
2093 Py_INCREF(input_obj);
2094 Py_DECREF(result);
2095 result = input_obj;
2096 }
2097 PyBuffer_Release(&del_table_view);
2098 PyBuffer_Release(&table_view);
2099 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002100 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002101
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002102 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002103 for (i = 0; i < 256; i++)
2104 trans_table[i] = Py_CHARMASK(i);
2105 } else {
2106 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002107 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002108 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002109 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002110
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002111 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002112 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002113 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002114
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002115 for (i = inlen; --i >= 0; ) {
2116 c = Py_CHARMASK(*input++);
2117 if (trans_table[c] != -1)
2118 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2119 continue;
2120 changed = 1;
2121 }
2122 if (!changed && PyBytes_CheckExact(input_obj)) {
2123 Py_DECREF(result);
2124 Py_INCREF(input_obj);
2125 return input_obj;
2126 }
2127 /* Fix the size of the resulting string */
2128 if (inlen > 0)
2129 _PyBytes_Resize(&result, output - output_start);
2130 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002131}
2132
2133
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002134/*[clinic input]
2135
2136@staticmethod
2137bytes.maketrans
2138
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002139 frm: Py_buffer
2140 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002141 /
2142
2143Return a translation table useable for the bytes or bytearray translate method.
2144
2145The returned table will be one where each byte in frm is mapped to the byte at
2146the same position in to.
2147
2148The bytes objects frm and to must be of the same length.
2149[clinic start generated code]*/
2150
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002151static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002152bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002153/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002154{
2155 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002156}
2157
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002158
2159/*[clinic input]
2160bytes.replace
2161
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002162 old: Py_buffer
2163 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002164 count: Py_ssize_t = -1
2165 Maximum number of occurrences to replace.
2166 -1 (the default value) means replace all occurrences.
2167 /
2168
2169Return a copy with all occurrences of substring old replaced by new.
2170
2171If the optional argument count is given, only the first count occurrences are
2172replaced.
2173[clinic start generated code]*/
2174
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002175static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002176bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
Larry Hastings89964c42015-04-14 18:07:59 -04002177 Py_ssize_t count)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002178/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002179{
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +03002180 return stringlib_replace((PyObject *)self,
2181 (const char *)old->buf, old->len,
2182 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002183}
2184
2185/** End DALKE **/
2186
sweeneydea81849b2020-04-22 17:05:48 -04002187/*[clinic input]
2188bytes.removeprefix as bytes_removeprefix
2189
2190 prefix: Py_buffer
2191 /
2192
2193Return a bytes object with the given prefix string removed if present.
2194
2195If the bytes starts with the prefix string, return bytes[len(prefix):].
2196Otherwise, return a copy of the original bytes.
2197[clinic start generated code]*/
2198
2199static PyObject *
2200bytes_removeprefix_impl(PyBytesObject *self, Py_buffer *prefix)
2201/*[clinic end generated code: output=f006865331a06ab6 input=0c93bac817a8502c]*/
2202{
2203 const char *self_start = PyBytes_AS_STRING(self);
2204 Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2205 const char *prefix_start = prefix->buf;
2206 Py_ssize_t prefix_len = prefix->len;
2207
2208 if (self_len >= prefix_len
2209 && prefix_len > 0
2210 && memcmp(self_start, prefix_start, prefix_len) == 0)
2211 {
2212 return PyBytes_FromStringAndSize(self_start + prefix_len,
2213 self_len - prefix_len);
2214 }
2215
2216 if (PyBytes_CheckExact(self)) {
2217 Py_INCREF(self);
2218 return (PyObject *)self;
2219 }
2220
2221 return PyBytes_FromStringAndSize(self_start, self_len);
2222}
2223
2224/*[clinic input]
2225bytes.removesuffix as bytes_removesuffix
2226
2227 suffix: Py_buffer
2228 /
2229
2230Return a bytes object with the given suffix string removed if present.
2231
2232If the bytes ends with the suffix string and that suffix is not empty,
2233return bytes[:-len(prefix)]. Otherwise, return a copy of the original
2234bytes.
2235[clinic start generated code]*/
2236
2237static PyObject *
2238bytes_removesuffix_impl(PyBytesObject *self, Py_buffer *suffix)
2239/*[clinic end generated code: output=d887d308e3242eeb input=9f4e1da8c637bbf1]*/
2240{
2241 const char *self_start = PyBytes_AS_STRING(self);
2242 Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2243 const char *suffix_start = suffix->buf;
2244 Py_ssize_t suffix_len = suffix->len;
2245
2246 if (self_len >= suffix_len
2247 && suffix_len > 0
2248 && memcmp(self_start + self_len - suffix_len,
2249 suffix_start, suffix_len) == 0)
2250 {
2251 return PyBytes_FromStringAndSize(self_start,
2252 self_len - suffix_len);
2253 }
2254
2255 if (PyBytes_CheckExact(self)) {
2256 Py_INCREF(self);
2257 return (PyObject *)self;
2258 }
2259
2260 return PyBytes_FromStringAndSize(self_start, self_len);
2261}
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002262
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002263static PyObject *
2264bytes_startswith(PyBytesObject *self, PyObject *args)
2265{
2266 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2267}
2268
2269static PyObject *
2270bytes_endswith(PyBytesObject *self, PyObject *args)
2271{
2272 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2273}
2274
2275
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002276/*[clinic input]
2277bytes.decode
2278
2279 encoding: str(c_default="NULL") = 'utf-8'
2280 The encoding with which to decode the bytes.
2281 errors: str(c_default="NULL") = 'strict'
2282 The error handling scheme to use for the handling of decoding errors.
2283 The default is 'strict' meaning that decoding errors raise a
2284 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2285 as well as any other name registered with codecs.register_error that
2286 can handle UnicodeDecodeErrors.
2287
2288Decode the bytes using the codec registered for encoding.
2289[clinic start generated code]*/
2290
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002291static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002292bytes_decode_impl(PyBytesObject *self, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04002293 const char *errors)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002294/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002295{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002296 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002297}
2298
Guido van Rossum20188312006-05-05 15:15:40 +00002299
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002300/*[clinic input]
2301bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002302
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002303 keepends: bool(accept={int}) = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002304
2305Return a list of the lines in the bytes, breaking at line boundaries.
2306
2307Line breaks are not included in the resulting list unless keepends is given and
2308true.
2309[clinic start generated code]*/
2310
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002311static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002312bytes_splitlines_impl(PyBytesObject *self, int keepends)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002313/*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002314{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002315 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002316 (PyObject*) self, PyBytes_AS_STRING(self),
2317 PyBytes_GET_SIZE(self), keepends
2318 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002319}
2320
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002321/*[clinic input]
2322@classmethod
2323bytes.fromhex
2324
2325 string: unicode
2326 /
2327
2328Create a bytes object from a string of hexadecimal numbers.
2329
2330Spaces between two numbers are accepted.
2331Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2332[clinic start generated code]*/
2333
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002334static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002335bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002336/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002337{
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002338 PyObject *result = _PyBytes_FromHex(string, 0);
2339 if (type != &PyBytes_Type && result != NULL) {
Petr Viktorinffd97532020-02-11 17:46:57 +01002340 Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002341 }
2342 return result;
Victor Stinner2bf89932015-10-14 11:25:33 +02002343}
2344
2345PyObject*
2346_PyBytes_FromHex(PyObject *string, int use_bytearray)
2347{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002348 char *buf;
Victor Stinner2bf89932015-10-14 11:25:33 +02002349 Py_ssize_t hexlen, invalid_char;
2350 unsigned int top, bot;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03002351 const Py_UCS1 *str, *end;
Victor Stinner2bf89932015-10-14 11:25:33 +02002352 _PyBytesWriter writer;
2353
2354 _PyBytesWriter_Init(&writer);
2355 writer.use_bytearray = use_bytearray;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002356
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002357 assert(PyUnicode_Check(string));
2358 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002359 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002360 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002361
Victor Stinner2bf89932015-10-14 11:25:33 +02002362 if (!PyUnicode_IS_ASCII(string)) {
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03002363 const void *data = PyUnicode_DATA(string);
Victor Stinner2bf89932015-10-14 11:25:33 +02002364 unsigned int kind = PyUnicode_KIND(string);
2365 Py_ssize_t i;
2366
2367 /* search for the first non-ASCII character */
2368 for (i = 0; i < hexlen; i++) {
2369 if (PyUnicode_READ(kind, data, i) >= 128)
2370 break;
2371 }
2372 invalid_char = i;
2373 goto error;
2374 }
2375
2376 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2377 str = PyUnicode_1BYTE_DATA(string);
2378
2379 /* This overestimates if there are spaces */
2380 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2381 if (buf == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002382 return NULL;
Victor Stinner2bf89932015-10-14 11:25:33 +02002383
2384 end = str + hexlen;
2385 while (str < end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002386 /* skip over spaces in the input */
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002387 if (Py_ISSPACE(*str)) {
Victor Stinner2bf89932015-10-14 11:25:33 +02002388 do {
2389 str++;
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002390 } while (Py_ISSPACE(*str));
Victor Stinner2bf89932015-10-14 11:25:33 +02002391 if (str >= end)
2392 break;
2393 }
2394
2395 top = _PyLong_DigitValue[*str];
2396 if (top >= 16) {
2397 invalid_char = str - PyUnicode_1BYTE_DATA(string);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002398 goto error;
2399 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002400 str++;
2401
2402 bot = _PyLong_DigitValue[*str];
2403 if (bot >= 16) {
2404 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2405 goto error;
2406 }
2407 str++;
2408
2409 *buf++ = (unsigned char)((top << 4) + bot);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002410 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002411
2412 return _PyBytesWriter_Finish(&writer, buf);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002413
2414 error:
Victor Stinner2bf89932015-10-14 11:25:33 +02002415 PyErr_Format(PyExc_ValueError,
2416 "non-hexadecimal number found in "
2417 "fromhex() arg at position %zd", invalid_char);
2418 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002419 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002420}
2421
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002422/*[clinic input]
2423bytes.hex
2424
2425 sep: object = NULL
2426 An optional single character or byte to separate hex bytes.
2427 bytes_per_sep: int = 1
2428 How many bytes between separators. Positive values count from the
2429 right, negative values count from the left.
2430
2431Create a str of hexadecimal numbers from a bytes object.
2432
2433Example:
2434>>> value = b'\xb9\x01\xef'
2435>>> value.hex()
2436'b901ef'
2437>>> value.hex(':')
2438'b9:01:ef'
2439>>> value.hex(':', 2)
2440'b9:01ef'
2441>>> value.hex(':', -2)
2442'b901:ef'
2443[clinic start generated code]*/
Gregory P. Smith8cb65692015-04-25 23:22:26 +00002444
2445static PyObject *
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002446bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2447/*[clinic end generated code: output=1f134da504064139 input=f1238d3455990218]*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002448{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03002449 const char *argbuf = PyBytes_AS_STRING(self);
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002450 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002451 return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002452}
2453
2454static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302455bytes_getnewargs(PyBytesObject *v, PyObject *Py_UNUSED(ignored))
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002456{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002457 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002458}
2459
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002460
2461static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002462bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002463 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302464 {"capitalize", stringlib_capitalize, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002465 _Py_capitalize__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002466 STRINGLIB_CENTER_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002467 {"count", (PyCFunction)bytes_count, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002468 _Py_count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002469 BYTES_DECODE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002470 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002471 _Py_endswith__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002472 STRINGLIB_EXPANDTABS_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002473 {"find", (PyCFunction)bytes_find, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002474 _Py_find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002475 BYTES_FROMHEX_METHODDEF
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002476 BYTES_HEX_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002477 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302478 {"isalnum", stringlib_isalnum, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002479 _Py_isalnum__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302480 {"isalpha", stringlib_isalpha, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002481 _Py_isalpha__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302482 {"isascii", stringlib_isascii, METH_NOARGS,
INADA Naokia49ac992018-01-27 14:06:21 +09002483 _Py_isascii__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302484 {"isdigit", stringlib_isdigit, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002485 _Py_isdigit__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302486 {"islower", stringlib_islower, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002487 _Py_islower__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302488 {"isspace", stringlib_isspace, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002489 _Py_isspace__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302490 {"istitle", stringlib_istitle, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002491 _Py_istitle__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302492 {"isupper", stringlib_isupper, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002493 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002494 BYTES_JOIN_METHODDEF
Tal Einatc929df32018-07-06 13:17:38 +03002495 STRINGLIB_LJUST_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302496 {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002497 BYTES_LSTRIP_METHODDEF
2498 BYTES_MAKETRANS_METHODDEF
2499 BYTES_PARTITION_METHODDEF
2500 BYTES_REPLACE_METHODDEF
sweeneydea81849b2020-04-22 17:05:48 -04002501 BYTES_REMOVEPREFIX_METHODDEF
2502 BYTES_REMOVESUFFIX_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002503 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2504 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002505 STRINGLIB_RJUST_METHODDEF
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002506 BYTES_RPARTITION_METHODDEF
2507 BYTES_RSPLIT_METHODDEF
2508 BYTES_RSTRIP_METHODDEF
2509 BYTES_SPLIT_METHODDEF
2510 BYTES_SPLITLINES_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002511 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002512 _Py_startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002513 BYTES_STRIP_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302514 {"swapcase", stringlib_swapcase, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002515 _Py_swapcase__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302516 {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002517 BYTES_TRANSLATE_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302518 {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002519 STRINGLIB_ZFILL_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002520 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002521};
2522
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002523static PyObject *
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002524bytes_mod(PyObject *self, PyObject *arg)
Ethan Furmanb95b5612015-01-23 20:05:18 -08002525{
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002526 if (!PyBytes_Check(self)) {
2527 Py_RETURN_NOTIMPLEMENTED;
Victor Stinner772b2b02015-10-14 09:56:53 +02002528 }
Victor Stinner772b2b02015-10-14 09:56:53 +02002529 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002530 arg, 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -08002531}
2532
2533static PyNumberMethods bytes_as_number = {
2534 0, /*nb_add*/
2535 0, /*nb_subtract*/
2536 0, /*nb_multiply*/
2537 bytes_mod, /*nb_remainder*/
2538};
2539
2540static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002541bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002542
2543static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002544bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002545{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002546 PyObject *x = NULL;
2547 const char *encoding = NULL;
2548 const char *errors = NULL;
2549 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002550 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002551 Py_ssize_t size;
2552 static char *kwlist[] = {"source", "encoding", "errors", 0};
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002553
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002554 if (type != &PyBytes_Type)
Serhiy Storchaka15095802015-11-25 15:47:01 +02002555 return bytes_subtype_new(type, args, kwds);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002556 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2557 &encoding, &errors))
2558 return NULL;
2559 if (x == NULL) {
2560 if (encoding != NULL || errors != NULL) {
2561 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka2c2044e2018-10-21 15:29:12 +03002562 encoding != NULL ?
2563 "encoding without a string argument" :
2564 "errors without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002565 return NULL;
2566 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02002567 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002568 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002569
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002570 if (encoding != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002571 /* Encode via the codec registry */
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002572 if (!PyUnicode_Check(x)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002573 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002574 "encoding without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002575 return NULL;
2576 }
2577 new = PyUnicode_AsEncodedString(x, encoding, errors);
2578 if (new == NULL)
2579 return NULL;
2580 assert(PyBytes_Check(new));
2581 return new;
2582 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002583
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002584 if (errors != NULL) {
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002585 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002586 PyUnicode_Check(x) ?
2587 "string argument without an encoding" :
2588 "errors without a string argument");
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002589 return NULL;
2590 }
2591
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002592 /* We'd like to call PyObject_Bytes here, but we need to check for an
2593 integer argument before deferring to PyBytes_FromObject, something
2594 PyObject_Bytes doesn't do. */
2595 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2596 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +01002597 new = _PyObject_CallNoArg(func);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002598 Py_DECREF(func);
2599 if (new == NULL)
2600 return NULL;
2601 if (!PyBytes_Check(new)) {
2602 PyErr_Format(PyExc_TypeError,
2603 "__bytes__ returned non-bytes (type %.200s)",
2604 Py_TYPE(new)->tp_name);
2605 Py_DECREF(new);
2606 return NULL;
2607 }
2608 return new;
2609 }
2610 else if (PyErr_Occurred())
2611 return NULL;
2612
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002613 if (PyUnicode_Check(x)) {
2614 PyErr_SetString(PyExc_TypeError,
2615 "string argument without an encoding");
2616 return NULL;
2617 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002618 /* Is it an integer? */
Victor Stinnera15e2602020-04-08 02:01:56 +02002619 if (_PyIndex_Check(x)) {
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002620 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2621 if (size == -1 && PyErr_Occurred()) {
Serhiy Storchakae8904212018-10-15 00:02:57 +03002622 if (!PyErr_ExceptionMatches(PyExc_TypeError))
INADA Naokia634e232017-01-06 17:32:01 +09002623 return NULL;
2624 PyErr_Clear(); /* fall through */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002625 }
INADA Naokia634e232017-01-06 17:32:01 +09002626 else {
2627 if (size < 0) {
2628 PyErr_SetString(PyExc_ValueError, "negative count");
2629 return NULL;
2630 }
2631 new = _PyBytes_FromSize(size, 1);
2632 if (new == NULL)
2633 return NULL;
2634 return new;
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002635 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002636 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002637
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002638 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002639}
2640
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002641static PyObject*
2642_PyBytes_FromBuffer(PyObject *x)
2643{
2644 PyObject *new;
2645 Py_buffer view;
2646
2647 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2648 return NULL;
2649
2650 new = PyBytes_FromStringAndSize(NULL, view.len);
2651 if (!new)
2652 goto fail;
2653 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2654 &view, view.len, 'C') < 0)
2655 goto fail;
2656 PyBuffer_Release(&view);
2657 return new;
2658
2659fail:
2660 Py_XDECREF(new);
2661 PyBuffer_Release(&view);
2662 return NULL;
2663}
2664
2665static PyObject*
2666_PyBytes_FromList(PyObject *x)
2667{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002668 Py_ssize_t i, size = PyList_GET_SIZE(x);
2669 Py_ssize_t value;
2670 char *str;
2671 PyObject *item;
2672 _PyBytesWriter writer;
2673
2674 _PyBytesWriter_Init(&writer);
2675 str = _PyBytesWriter_Alloc(&writer, size);
2676 if (str == NULL)
2677 return NULL;
2678 writer.overallocate = 1;
2679 size = writer.allocated;
2680
2681 for (i = 0; i < PyList_GET_SIZE(x); i++) {
2682 item = PyList_GET_ITEM(x, i);
2683 Py_INCREF(item);
2684 value = PyNumber_AsSsize_t(item, NULL);
2685 Py_DECREF(item);
2686 if (value == -1 && PyErr_Occurred())
2687 goto error;
2688
2689 if (value < 0 || value >= 256) {
2690 PyErr_SetString(PyExc_ValueError,
2691 "bytes must be in range(0, 256)");
2692 goto error;
2693 }
2694
2695 if (i >= size) {
2696 str = _PyBytesWriter_Resize(&writer, str, size+1);
2697 if (str == NULL)
2698 return NULL;
2699 size = writer.allocated;
2700 }
2701 *str++ = (char) value;
2702 }
2703 return _PyBytesWriter_Finish(&writer, str);
2704
2705 error:
2706 _PyBytesWriter_Dealloc(&writer);
2707 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002708}
2709
2710static PyObject*
2711_PyBytes_FromTuple(PyObject *x)
2712{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002713 PyObject *bytes;
2714 Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2715 Py_ssize_t value;
2716 char *str;
2717 PyObject *item;
2718
2719 bytes = PyBytes_FromStringAndSize(NULL, size);
2720 if (bytes == NULL)
2721 return NULL;
2722 str = ((PyBytesObject *)bytes)->ob_sval;
2723
2724 for (i = 0; i < size; i++) {
2725 item = PyTuple_GET_ITEM(x, i);
2726 value = PyNumber_AsSsize_t(item, NULL);
2727 if (value == -1 && PyErr_Occurred())
2728 goto error;
2729
2730 if (value < 0 || value >= 256) {
2731 PyErr_SetString(PyExc_ValueError,
2732 "bytes must be in range(0, 256)");
2733 goto error;
2734 }
2735 *str++ = (char) value;
2736 }
2737 return bytes;
2738
2739 error:
2740 Py_DECREF(bytes);
2741 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002742}
2743
2744static PyObject *
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002745_PyBytes_FromIterator(PyObject *it, PyObject *x)
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002746{
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002747 char *str;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002748 Py_ssize_t i, size;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002749 _PyBytesWriter writer;
2750
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002751 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02002752 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002753 if (size == -1 && PyErr_Occurred())
2754 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002755
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002756 _PyBytesWriter_Init(&writer);
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002757 str = _PyBytesWriter_Alloc(&writer, size);
2758 if (str == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002759 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002760 writer.overallocate = 1;
2761 size = writer.allocated;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002762
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002763 /* Run the iterator to exhaustion */
2764 for (i = 0; ; i++) {
2765 PyObject *item;
2766 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002767
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002768 /* Get the next item */
2769 item = PyIter_Next(it);
2770 if (item == NULL) {
2771 if (PyErr_Occurred())
2772 goto error;
2773 break;
2774 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002775
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002776 /* Interpret it as an int (__index__) */
Serhiy Storchakaf54d7812016-07-06 21:39:44 +03002777 value = PyNumber_AsSsize_t(item, NULL);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002778 Py_DECREF(item);
2779 if (value == -1 && PyErr_Occurred())
2780 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002781
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002782 /* Range check */
2783 if (value < 0 || value >= 256) {
2784 PyErr_SetString(PyExc_ValueError,
2785 "bytes must be in range(0, 256)");
2786 goto error;
2787 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002788
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002789 /* Append the byte */
2790 if (i >= size) {
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002791 str = _PyBytesWriter_Resize(&writer, str, size+1);
2792 if (str == NULL)
2793 return NULL;
2794 size = writer.allocated;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002795 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002796 *str++ = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002797 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002798
2799 return _PyBytesWriter_Finish(&writer, str);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002800
2801 error:
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002802 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002803 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002804}
2805
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002806PyObject *
2807PyBytes_FromObject(PyObject *x)
2808{
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002809 PyObject *it, *result;
2810
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002811 if (x == NULL) {
2812 PyErr_BadInternalCall();
2813 return NULL;
2814 }
2815
2816 if (PyBytes_CheckExact(x)) {
2817 Py_INCREF(x);
2818 return x;
2819 }
2820
2821 /* Use the modern buffer interface */
2822 if (PyObject_CheckBuffer(x))
2823 return _PyBytes_FromBuffer(x);
2824
2825 if (PyList_CheckExact(x))
2826 return _PyBytes_FromList(x);
2827
2828 if (PyTuple_CheckExact(x))
2829 return _PyBytes_FromTuple(x);
2830
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002831 if (!PyUnicode_Check(x)) {
2832 it = PyObject_GetIter(x);
2833 if (it != NULL) {
2834 result = _PyBytes_FromIterator(it, x);
2835 Py_DECREF(it);
2836 return result;
2837 }
Serhiy Storchakae8904212018-10-15 00:02:57 +03002838 if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2839 return NULL;
2840 }
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002841 }
2842
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002843 PyErr_Format(PyExc_TypeError,
2844 "cannot convert '%.200s' object to bytes",
Victor Stinner58ac7002020-02-07 03:04:21 +01002845 Py_TYPE(x)->tp_name);
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002846 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002847}
2848
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002849static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002850bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002851{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002852 PyObject *tmp, *pnew;
2853 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002854
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002855 assert(PyType_IsSubtype(type, &PyBytes_Type));
2856 tmp = bytes_new(&PyBytes_Type, args, kwds);
2857 if (tmp == NULL)
2858 return NULL;
Serhiy Storchaka15095802015-11-25 15:47:01 +02002859 assert(PyBytes_Check(tmp));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002860 n = PyBytes_GET_SIZE(tmp);
2861 pnew = type->tp_alloc(type, n);
2862 if (pnew != NULL) {
Christian Heimesf051e432016-09-13 20:22:02 +02002863 memcpy(PyBytes_AS_STRING(pnew),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002864 PyBytes_AS_STRING(tmp), n+1);
2865 ((PyBytesObject *)pnew)->ob_shash =
2866 ((PyBytesObject *)tmp)->ob_shash;
2867 }
2868 Py_DECREF(tmp);
2869 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002870}
2871
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002872PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002873"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002874bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002875bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002876bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2877bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002878\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002879Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002880 - an iterable yielding integers in range(256)\n\
2881 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002882 - any object implementing the buffer API.\n\
2883 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002884
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002885static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002886
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002887PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002888 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2889 "bytes",
2890 PyBytesObject_SIZE,
2891 sizeof(char),
Inada Naoki7d408692019-05-29 17:23:27 +09002892 0, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002893 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002894 0, /* tp_getattr */
2895 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002896 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002897 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08002898 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002899 &bytes_as_sequence, /* tp_as_sequence */
2900 &bytes_as_mapping, /* tp_as_mapping */
2901 (hashfunc)bytes_hash, /* tp_hash */
2902 0, /* tp_call */
2903 bytes_str, /* tp_str */
2904 PyObject_GenericGetAttr, /* tp_getattro */
2905 0, /* tp_setattro */
2906 &bytes_as_buffer, /* tp_as_buffer */
2907 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2908 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2909 bytes_doc, /* tp_doc */
2910 0, /* tp_traverse */
2911 0, /* tp_clear */
2912 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2913 0, /* tp_weaklistoffset */
2914 bytes_iter, /* tp_iter */
2915 0, /* tp_iternext */
2916 bytes_methods, /* tp_methods */
2917 0, /* tp_members */
2918 0, /* tp_getset */
2919 &PyBaseObject_Type, /* tp_base */
2920 0, /* tp_dict */
2921 0, /* tp_descr_get */
2922 0, /* tp_descr_set */
2923 0, /* tp_dictoffset */
2924 0, /* tp_init */
2925 0, /* tp_alloc */
2926 bytes_new, /* tp_new */
2927 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002928};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002929
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002930void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002931PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002932{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002933 assert(pv != NULL);
2934 if (*pv == NULL)
2935 return;
2936 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002937 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002938 return;
2939 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02002940
2941 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2942 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05002943 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02002944 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02002945
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002946 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02002947 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2948 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2949 Py_CLEAR(*pv);
2950 return;
2951 }
2952
2953 oldsize = PyBytes_GET_SIZE(*pv);
2954 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2955 PyErr_NoMemory();
2956 goto error;
2957 }
2958 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2959 goto error;
2960
2961 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2962 PyBuffer_Release(&wb);
2963 return;
2964
2965 error:
2966 PyBuffer_Release(&wb);
2967 Py_CLEAR(*pv);
2968 return;
2969 }
2970
2971 else {
2972 /* Multiple references, need to create new object */
2973 PyObject *v;
2974 v = bytes_concat(*pv, w);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002975 Py_SETREF(*pv, v);
Antoine Pitrou161d6952014-05-01 14:36:20 +02002976 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002977}
2978
2979void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002980PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002981{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002982 PyBytes_Concat(pv, w);
2983 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002984}
2985
2986
Ethan Furmanb95b5612015-01-23 20:05:18 -08002987/* The following function breaks the notion that bytes are immutable:
2988 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002989 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08002990 as creating a new bytes object and destroying the old one, only
2991 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002992 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08002993 Note that if there's not enough memory to resize the bytes object, the
2994 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002995 memory" exception is set, and -1 is returned. Else (on success) 0 is
2996 returned, and the value in *pv may or may not be the same as on input.
2997 As always, an extra byte is allocated for a trailing \0 byte (newsize
2998 does *not* include that), and a trailing \0 byte is stored.
2999*/
3000
3001int
3002_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3003{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003004 PyObject *v;
3005 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003006 v = *pv;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03003007 if (!PyBytes_Check(v) || newsize < 0) {
3008 goto error;
3009 }
3010 if (Py_SIZE(v) == newsize) {
3011 /* return early if newsize equals to v->ob_size */
3012 return 0;
3013 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02003014 if (Py_SIZE(v) == 0) {
3015 if (newsize == 0) {
3016 return 0;
3017 }
3018 *pv = _PyBytes_FromSize(newsize, 0);
3019 Py_DECREF(v);
3020 return (*pv == NULL) ? -1 : 0;
3021 }
Berker Peksag4a72a7b2016-09-16 17:31:06 +03003022 if (Py_REFCNT(v) != 1) {
3023 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003024 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02003025 if (newsize == 0) {
3026 *pv = _PyBytes_FromSize(0, 0);
3027 Py_DECREF(v);
3028 return (*pv == NULL) ? -1 : 0;
3029 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003030 /* XXX UNREF/NEWREF interface should be more symmetrical */
Victor Stinner49932fe2020-02-03 17:55:05 +01003031#ifdef Py_REF_DEBUG
3032 _Py_RefTotal--;
3033#endif
3034#ifdef Py_TRACE_REFS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003035 _Py_ForgetReference(v);
Victor Stinner49932fe2020-02-03 17:55:05 +01003036#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003037 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03003038 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003039 if (*pv == NULL) {
3040 PyObject_Del(v);
3041 PyErr_NoMemory();
3042 return -1;
3043 }
3044 _Py_NewReference(*pv);
3045 sv = (PyBytesObject *) *pv;
Victor Stinner60ac6ed2020-02-07 23:18:08 +01003046 Py_SET_SIZE(sv, newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003047 sv->ob_sval[newsize] = '\0';
3048 sv->ob_shash = -1; /* invalidate cached hash value */
3049 return 0;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03003050error:
3051 *pv = 0;
3052 Py_DECREF(v);
3053 PyErr_BadInternalCall();
3054 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003055}
3056
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003057void
Victor Stinnerbed48172019-08-27 00:12:32 +02003058_PyBytes_Fini(void)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003059{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003060 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003061 for (i = 0; i < UCHAR_MAX + 1; i++)
3062 Py_CLEAR(characters[i]);
3063 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003064}
3065
Benjamin Peterson4116f362008-05-27 00:36:20 +00003066/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003067
3068typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003069 PyObject_HEAD
3070 Py_ssize_t it_index;
3071 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003072} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003073
3074static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003075striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003076{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003077 _PyObject_GC_UNTRACK(it);
3078 Py_XDECREF(it->it_seq);
3079 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003080}
3081
3082static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003083striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003084{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003085 Py_VISIT(it->it_seq);
3086 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003087}
3088
3089static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003090striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003091{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003092 PyBytesObject *seq;
3093 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003094
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003095 assert(it != NULL);
3096 seq = it->it_seq;
3097 if (seq == NULL)
3098 return NULL;
3099 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003100
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003101 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3102 item = PyLong_FromLong(
3103 (unsigned char)seq->ob_sval[it->it_index]);
3104 if (item != NULL)
3105 ++it->it_index;
3106 return item;
3107 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003108
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003109 it->it_seq = NULL;
Serhiy Storchakafbb1c5e2016-03-30 20:40:02 +03003110 Py_DECREF(seq);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003111 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003112}
3113
3114static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303115striter_len(striterobject *it, PyObject *Py_UNUSED(ignored))
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003116{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003117 Py_ssize_t len = 0;
3118 if (it->it_seq)
3119 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3120 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003121}
3122
3123PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003124 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003125
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003126static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303127striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003128{
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003129 _Py_IDENTIFIER(iter);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003130 if (it->it_seq != NULL) {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003131 return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003132 it->it_seq, it->it_index);
3133 } else {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003134 return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003135 }
3136}
3137
3138PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3139
3140static PyObject *
3141striter_setstate(striterobject *it, PyObject *state)
3142{
3143 Py_ssize_t index = PyLong_AsSsize_t(state);
3144 if (index == -1 && PyErr_Occurred())
3145 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003146 if (it->it_seq != NULL) {
3147 if (index < 0)
3148 index = 0;
3149 else if (index > PyBytes_GET_SIZE(it->it_seq))
3150 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3151 it->it_index = index;
3152 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003153 Py_RETURN_NONE;
3154}
3155
3156PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3157
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003158static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003159 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3160 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003161 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3162 reduce_doc},
3163 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3164 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003165 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003166};
3167
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003168PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003169 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3170 "bytes_iterator", /* tp_name */
3171 sizeof(striterobject), /* tp_basicsize */
3172 0, /* tp_itemsize */
3173 /* methods */
3174 (destructor)striter_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003175 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003176 0, /* tp_getattr */
3177 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003178 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003179 0, /* tp_repr */
3180 0, /* tp_as_number */
3181 0, /* tp_as_sequence */
3182 0, /* tp_as_mapping */
3183 0, /* tp_hash */
3184 0, /* tp_call */
3185 0, /* tp_str */
3186 PyObject_GenericGetAttr, /* tp_getattro */
3187 0, /* tp_setattro */
3188 0, /* tp_as_buffer */
3189 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3190 0, /* tp_doc */
3191 (traverseproc)striter_traverse, /* tp_traverse */
3192 0, /* tp_clear */
3193 0, /* tp_richcompare */
3194 0, /* tp_weaklistoffset */
3195 PyObject_SelfIter, /* tp_iter */
3196 (iternextfunc)striter_next, /* tp_iternext */
3197 striter_methods, /* tp_methods */
3198 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003199};
3200
3201static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003202bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003203{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003204 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003205
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003206 if (!PyBytes_Check(seq)) {
3207 PyErr_BadInternalCall();
3208 return NULL;
3209 }
3210 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3211 if (it == NULL)
3212 return NULL;
3213 it->it_index = 0;
3214 Py_INCREF(seq);
3215 it->it_seq = (PyBytesObject *)seq;
3216 _PyObject_GC_TRACK(it);
3217 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003218}
Victor Stinner00165072015-10-09 01:53:21 +02003219
3220
3221/* _PyBytesWriter API */
3222
3223#ifdef MS_WINDOWS
3224 /* On Windows, overallocate by 50% is the best factor */
3225# define OVERALLOCATE_FACTOR 2
3226#else
3227 /* On Linux, overallocate by 25% is the best factor */
3228# define OVERALLOCATE_FACTOR 4
3229#endif
3230
3231void
3232_PyBytesWriter_Init(_PyBytesWriter *writer)
3233{
Victor Stinner661aacc2015-10-14 09:41:48 +02003234 /* Set all attributes before small_buffer to 0 */
3235 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003236#ifndef NDEBUG
3237 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3238 sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003239#endif
3240}
3241
3242void
3243_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3244{
3245 Py_CLEAR(writer->buffer);
3246}
3247
3248Py_LOCAL_INLINE(char*)
3249_PyBytesWriter_AsString(_PyBytesWriter *writer)
3250{
Victor Stinner661aacc2015-10-14 09:41:48 +02003251 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003252 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003253 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003254 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003255 else if (writer->use_bytearray) {
3256 assert(writer->buffer != NULL);
3257 return PyByteArray_AS_STRING(writer->buffer);
3258 }
3259 else {
3260 assert(writer->buffer != NULL);
3261 return PyBytes_AS_STRING(writer->buffer);
3262 }
Victor Stinner00165072015-10-09 01:53:21 +02003263}
3264
3265Py_LOCAL_INLINE(Py_ssize_t)
Victor Stinner2bf89932015-10-14 11:25:33 +02003266_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
Victor Stinner00165072015-10-09 01:53:21 +02003267{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03003268 const char *start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003269 assert(str != NULL);
3270 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003271 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003272 return str - start;
3273}
3274
Victor Stinner68762572019-10-07 18:42:01 +02003275#ifndef NDEBUG
3276Py_LOCAL_INLINE(int)
Victor Stinner00165072015-10-09 01:53:21 +02003277_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3278{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03003279 const char *start, *end;
Victor Stinner00165072015-10-09 01:53:21 +02003280
Victor Stinner661aacc2015-10-14 09:41:48 +02003281 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003282 assert(writer->buffer == NULL);
3283 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003284 else {
3285 assert(writer->buffer != NULL);
3286 if (writer->use_bytearray)
3287 assert(PyByteArray_CheckExact(writer->buffer));
3288 else
3289 assert(PyBytes_CheckExact(writer->buffer));
3290 assert(Py_REFCNT(writer->buffer) == 1);
3291 }
Victor Stinner00165072015-10-09 01:53:21 +02003292
Victor Stinner661aacc2015-10-14 09:41:48 +02003293 if (writer->use_bytearray) {
3294 /* bytearray has its own overallocation algorithm,
3295 writer overallocation must be disabled */
3296 assert(!writer->overallocate);
3297 }
3298
3299 assert(0 <= writer->allocated);
Victor Stinner53926a12015-10-09 12:37:03 +02003300 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003301 /* the last byte must always be null */
Victor Stinner661aacc2015-10-14 09:41:48 +02003302 start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003303 assert(start[writer->allocated] == 0);
3304
3305 end = start + writer->allocated;
3306 assert(str != NULL);
3307 assert(start <= str && str <= end);
Victor Stinner68762572019-10-07 18:42:01 +02003308 return 1;
Victor Stinner00165072015-10-09 01:53:21 +02003309}
Victor Stinner68762572019-10-07 18:42:01 +02003310#endif
Victor Stinner00165072015-10-09 01:53:21 +02003311
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003312void*
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003313_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
Victor Stinner00165072015-10-09 01:53:21 +02003314{
3315 Py_ssize_t allocated, pos;
3316
Victor Stinner68762572019-10-07 18:42:01 +02003317 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003318 assert(writer->allocated < size);
Victor Stinner00165072015-10-09 01:53:21 +02003319
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003320 allocated = size;
Victor Stinner00165072015-10-09 01:53:21 +02003321 if (writer->overallocate
3322 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3323 /* overallocate to limit the number of realloc() */
3324 allocated += allocated / OVERALLOCATE_FACTOR;
3325 }
3326
Victor Stinner2bf89932015-10-14 11:25:33 +02003327 pos = _PyBytesWriter_GetSize(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003328 if (!writer->use_small_buffer) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003329 if (writer->use_bytearray) {
3330 if (PyByteArray_Resize(writer->buffer, allocated))
3331 goto error;
3332 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3333 but we cannot use ob_alloc because bytes may need to be moved
3334 to use the whole buffer. bytearray uses an internal optimization
3335 to avoid moving or copying bytes when bytes are removed at the
3336 beginning (ex: del bytearray[:1]). */
3337 }
3338 else {
3339 if (_PyBytes_Resize(&writer->buffer, allocated))
3340 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003341 }
3342 }
3343 else {
3344 /* convert from stack buffer to bytes object buffer */
3345 assert(writer->buffer == NULL);
3346
Victor Stinner661aacc2015-10-14 09:41:48 +02003347 if (writer->use_bytearray)
3348 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3349 else
3350 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003351 if (writer->buffer == NULL)
Victor Stinner661aacc2015-10-14 09:41:48 +02003352 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003353
3354 if (pos != 0) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003355 char *dest;
3356 if (writer->use_bytearray)
3357 dest = PyByteArray_AS_STRING(writer->buffer);
3358 else
3359 dest = PyBytes_AS_STRING(writer->buffer);
Christian Heimesf051e432016-09-13 20:22:02 +02003360 memcpy(dest,
Victor Stinnerb3653a32015-10-09 03:38:24 +02003361 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02003362 pos);
3363 }
3364
Victor Stinnerb3653a32015-10-09 03:38:24 +02003365 writer->use_small_buffer = 0;
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003366#ifndef NDEBUG
3367 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3368 sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003369#endif
Victor Stinner00165072015-10-09 01:53:21 +02003370 }
3371 writer->allocated = allocated;
3372
3373 str = _PyBytesWriter_AsString(writer) + pos;
Victor Stinner68762572019-10-07 18:42:01 +02003374 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinner00165072015-10-09 01:53:21 +02003375 return str;
Victor Stinner661aacc2015-10-14 09:41:48 +02003376
3377error:
3378 _PyBytesWriter_Dealloc(writer);
3379 return NULL;
Victor Stinner00165072015-10-09 01:53:21 +02003380}
3381
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003382void*
3383_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3384{
3385 Py_ssize_t new_min_size;
3386
Victor Stinner68762572019-10-07 18:42:01 +02003387 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003388 assert(size >= 0);
3389
3390 if (size == 0) {
3391 /* nothing to do */
3392 return str;
3393 }
3394
3395 if (writer->min_size > PY_SSIZE_T_MAX - size) {
3396 PyErr_NoMemory();
3397 _PyBytesWriter_Dealloc(writer);
3398 return NULL;
3399 }
3400 new_min_size = writer->min_size + size;
3401
3402 if (new_min_size > writer->allocated)
3403 str = _PyBytesWriter_Resize(writer, str, new_min_size);
3404
3405 writer->min_size = new_min_size;
3406 return str;
3407}
3408
Victor Stinner00165072015-10-09 01:53:21 +02003409/* Allocate the buffer to write size bytes.
3410 Return the pointer to the beginning of buffer data.
3411 Raise an exception and return NULL on error. */
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003412void*
Victor Stinner00165072015-10-09 01:53:21 +02003413_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3414{
3415 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02003416 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02003417 assert(size >= 0);
3418
Victor Stinnerb3653a32015-10-09 03:38:24 +02003419 writer->use_small_buffer = 1;
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003420#ifndef NDEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003421 writer->allocated = sizeof(writer->small_buffer) - 1;
Victor Stinnerf6358a72015-10-14 12:02:39 +02003422 /* In debug mode, don't use the full small buffer because it is less
3423 efficient than bytes and bytearray objects to detect buffer underflow
3424 and buffer overflow. Use 10 bytes of the small buffer to test also
3425 code using the smaller buffer in debug mode.
3426
3427 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3428 in debug mode to also be able to detect stack overflow when running
3429 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3430 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3431 stack overflow. */
3432 writer->allocated = Py_MIN(writer->allocated, 10);
3433 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3434 to detect buffer overflow */
Victor Stinnerb3653a32015-10-09 03:38:24 +02003435 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003436#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02003437 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02003438#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02003439 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02003440}
3441
3442PyObject *
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003443_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
Victor Stinner00165072015-10-09 01:53:21 +02003444{
Victor Stinner2bf89932015-10-14 11:25:33 +02003445 Py_ssize_t size;
Victor Stinner00165072015-10-09 01:53:21 +02003446 PyObject *result;
3447
Victor Stinner68762572019-10-07 18:42:01 +02003448 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinner00165072015-10-09 01:53:21 +02003449
Victor Stinner2bf89932015-10-14 11:25:33 +02003450 size = _PyBytesWriter_GetSize(writer, str);
3451 if (size == 0 && !writer->use_bytearray) {
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003452 Py_CLEAR(writer->buffer);
3453 /* Get the empty byte string singleton */
3454 result = PyBytes_FromStringAndSize(NULL, 0);
3455 }
3456 else if (writer->use_small_buffer) {
Victor Stinnere914d412016-04-15 17:52:27 +02003457 if (writer->use_bytearray) {
3458 result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3459 }
3460 else {
3461 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3462 }
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003463 }
3464 else {
3465 result = writer->buffer;
3466 writer->buffer = NULL;
3467
Victor Stinner2bf89932015-10-14 11:25:33 +02003468 if (size != writer->allocated) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003469 if (writer->use_bytearray) {
Victor Stinner2bf89932015-10-14 11:25:33 +02003470 if (PyByteArray_Resize(result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003471 Py_DECREF(result);
3472 return NULL;
3473 }
3474 }
3475 else {
Victor Stinner2bf89932015-10-14 11:25:33 +02003476 if (_PyBytes_Resize(&result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003477 assert(result == NULL);
3478 return NULL;
3479 }
Victor Stinner00165072015-10-09 01:53:21 +02003480 }
3481 }
Victor Stinner00165072015-10-09 01:53:21 +02003482 }
Victor Stinner00165072015-10-09 01:53:21 +02003483 return result;
3484}
Victor Stinnerce179bf2015-10-09 12:57:22 +02003485
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003486void*
Victor Stinnere9aa5952015-10-12 13:57:47 +02003487_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003488 const void *bytes, Py_ssize_t size)
Victor Stinnerce179bf2015-10-09 12:57:22 +02003489{
Victor Stinnere9aa5952015-10-12 13:57:47 +02003490 char *str = (char *)ptr;
3491
Victor Stinnerce179bf2015-10-09 12:57:22 +02003492 str = _PyBytesWriter_Prepare(writer, str, size);
3493 if (str == NULL)
3494 return NULL;
3495
Christian Heimesf051e432016-09-13 20:22:02 +02003496 memcpy(str, bytes, size);
Victor Stinnerce179bf2015-10-09 12:57:22 +02003497 str += size;
3498
3499 return str;
3500}