blob: d39721428634fcb269a8537cc7f36e6f81d83dbf [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Victor Stinnerd9ea5ca2020-04-15 02:57:50 +02006#include "pycore_abstract.h" // _PyIndex_Check()
Victor Stinner45876a92020-02-12 22:32:34 +01007#include "pycore_bytes_methods.h"
Victor Stinnerbcda8f12018-11-21 22:27:47 +01008#include "pycore_object.h"
Victor Stinnerd9ea5ca2020-04-15 02:57:50 +02009#include "pycore_pymem.h" // PYMEM_CLEANBYTE
Christian Heimes2c9c7a52008-05-26 13:42:13 +000010
Gregory P. Smith8cb65692015-04-25 23:22:26 +000011#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +000012#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000013
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020014/*[clinic input]
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030015class bytes "PyBytesObject *" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020016[clinic start generated code]*/
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030017/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020018
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030019#include "clinic/bytesobject.c.h"
20
Christian Heimes2c9c7a52008-05-26 13:42:13 +000021static PyBytesObject *characters[UCHAR_MAX + 1];
22static PyBytesObject *nullstring;
23
Hai Shi46874c22020-01-30 17:20:25 -060024_Py_IDENTIFIER(__bytes__);
25
Mark Dickinsonfd24b322008-12-06 15:33:31 +000026/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyBytesObject_SIZE + n bytes.
28
29 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
30 3 bytes per string allocation on a typical system.
31*/
32#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
33
Victor Stinner2bf89932015-10-14 11:25:33 +020034/* Forward declaration */
35Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
36 char *str);
37
Christian Heimes2c9c7a52008-05-26 13:42:13 +000038/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000039 For PyBytes_FromString(), the parameter `str' points to a null-terminated
40 string containing exactly `size' bytes.
41
Martin Pantera90a4a92016-05-30 04:04:50 +000042 For PyBytes_FromStringAndSize(), the parameter `str' is
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043 either NULL or else points to a string containing at least `size' bytes.
44 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
45 not have to be null-terminated. (Therefore it is safe to construct a
46 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
47 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
48 bytes (setting the last byte to the null terminating character) and you can
49 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000050 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000051 alter the data yourself, since the strings may be shared.
52
53 The PyObject member `op->ob_size', which denotes the number of "extra
54 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020055 allocated for string data, not counting the null terminating character.
56 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000057 PyBytes_FromStringAndSize()) or the length of the string in the `str'
58 parameter (for PyBytes_FromString()).
59*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020060static PyObject *
61_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000062{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020063 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020064 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020065
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000066 if (size == 0 && (op = nullstring) != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 Py_INCREF(op);
68 return (PyObject *)op;
69 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000070
Victor Stinner049e5092014-08-17 22:20:00 +020071 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000072 PyErr_SetString(PyExc_OverflowError,
73 "byte string is too large");
74 return NULL;
75 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000076
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020078 if (use_calloc)
79 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
80 else
81 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Victor Stinner04fc4f22020-06-16 01:28:07 +020082 if (op == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000083 return PyErr_NoMemory();
Victor Stinner04fc4f22020-06-16 01:28:07 +020084 }
85 _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000086 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020087 if (!use_calloc)
88 op->ob_sval[size] = '\0';
89 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000090 if (size == 0) {
91 nullstring = op;
92 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020093 }
94 return (PyObject *) op;
95}
96
97PyObject *
98PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
99{
100 PyBytesObject *op;
101 if (size < 0) {
102 PyErr_SetString(PyExc_SystemError,
103 "Negative size passed to PyBytes_FromStringAndSize");
104 return NULL;
105 }
106 if (size == 1 && str != NULL &&
107 (op = characters[*str & UCHAR_MAX]) != NULL)
108 {
Victor Stinnerdb067af2014-05-02 22:31:14 +0200109 Py_INCREF(op);
110 return (PyObject *)op;
111 }
112
113 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
114 if (op == NULL)
115 return NULL;
116 if (str == NULL)
117 return (PyObject *) op;
118
Christian Heimesf051e432016-09-13 20:22:02 +0200119 memcpy(op->ob_sval, str, size);
Victor Stinnerdb067af2014-05-02 22:31:14 +0200120 /* share short strings */
121 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000122 characters[*str & UCHAR_MAX] = op;
123 Py_INCREF(op);
124 }
125 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000126}
127
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000128PyObject *
129PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000130{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200131 size_t size;
132 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000133
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 assert(str != NULL);
135 size = strlen(str);
136 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
137 PyErr_SetString(PyExc_OverflowError,
138 "byte string is too long");
139 return NULL;
140 }
141 if (size == 0 && (op = nullstring) != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000142 Py_INCREF(op);
143 return (PyObject *)op;
144 }
145 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000146 Py_INCREF(op);
147 return (PyObject *)op;
148 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000149
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000150 /* Inline PyObject_NewVar */
151 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
Victor Stinner04fc4f22020-06-16 01:28:07 +0200152 if (op == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000153 return PyErr_NoMemory();
Victor Stinner04fc4f22020-06-16 01:28:07 +0200154 }
155 _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 op->ob_shash = -1;
Christian Heimesf051e432016-09-13 20:22:02 +0200157 memcpy(op->ob_sval, str, size+1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000158 /* share short strings */
159 if (size == 0) {
160 nullstring = op;
161 Py_INCREF(op);
162 } else if (size == 1) {
163 characters[*str & UCHAR_MAX] = op;
164 Py_INCREF(op);
165 }
166 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000167}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000168
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000169PyObject *
170PyBytes_FromFormatV(const char *format, va_list vargs)
171{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000172 char *s;
Victor Stinner03dab782015-10-14 00:21:35 +0200173 const char *f;
174 const char *p;
175 Py_ssize_t prec;
176 int longflag;
177 int size_tflag;
178 /* Longest 64-bit formatted numbers:
179 - "18446744073709551615\0" (21 bytes)
180 - "-9223372036854775808\0" (21 bytes)
181 Decimal takes the most space (it isn't enough for octal.)
Guido van Rossum343e97f2007-04-09 00:43:24 +0000182
Victor Stinner03dab782015-10-14 00:21:35 +0200183 Longest 64-bit pointer representation:
184 "0xffffffffffffffff\0" (19 bytes). */
185 char buffer[21];
186 _PyBytesWriter writer;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000187
Victor Stinner03dab782015-10-14 00:21:35 +0200188 _PyBytesWriter_Init(&writer);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000189
Victor Stinner03dab782015-10-14 00:21:35 +0200190 s = _PyBytesWriter_Alloc(&writer, strlen(format));
191 if (s == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000192 return NULL;
Victor Stinner03dab782015-10-14 00:21:35 +0200193 writer.overallocate = 1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000194
Victor Stinner03dab782015-10-14 00:21:35 +0200195#define WRITE_BYTES(str) \
196 do { \
197 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
198 if (s == NULL) \
199 goto error; \
200 } while (0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000201
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000202 for (f = format; *f; f++) {
Victor Stinner03dab782015-10-14 00:21:35 +0200203 if (*f != '%') {
204 *s++ = *f;
205 continue;
206 }
207
208 p = f++;
209
210 /* ignore the width (ex: 10 in "%10s") */
211 while (Py_ISDIGIT(*f))
212 f++;
213
214 /* parse the precision (ex: 10 in "%.10s") */
215 prec = 0;
216 if (*f == '.') {
217 f++;
218 for (; Py_ISDIGIT(*f); f++) {
219 prec = (prec * 10) + (*f - '0');
220 }
221 }
222
223 while (*f && *f != '%' && !Py_ISALPHA(*f))
224 f++;
225
226 /* handle the long flag ('l'), but only for %ld and %lu.
227 others can be added when necessary. */
228 longflag = 0;
229 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
230 longflag = 1;
231 ++f;
232 }
233
234 /* handle the size_t flag ('z'). */
235 size_tflag = 0;
236 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
237 size_tflag = 1;
238 ++f;
239 }
240
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700241 /* subtract bytes preallocated for the format string
Victor Stinner03dab782015-10-14 00:21:35 +0200242 (ex: 2 for "%s") */
243 writer.min_size -= (f - p + 1);
244
245 switch (*f) {
246 case 'c':
247 {
248 int c = va_arg(vargs, int);
249 if (c < 0 || c > 255) {
250 PyErr_SetString(PyExc_OverflowError,
251 "PyBytes_FromFormatV(): %c format "
252 "expects an integer in range [0; 255]");
253 goto error;
254 }
255 writer.min_size++;
256 *s++ = (unsigned char)c;
257 break;
258 }
259
260 case 'd':
Victor Stinnerd36cf5f2020-06-10 18:38:05 +0200261 if (longflag) {
Victor Stinner03dab782015-10-14 00:21:35 +0200262 sprintf(buffer, "%ld", va_arg(vargs, long));
Victor Stinnerd36cf5f2020-06-10 18:38:05 +0200263 }
264 else if (size_tflag) {
265 sprintf(buffer, "%zd", va_arg(vargs, Py_ssize_t));
266 }
267 else {
Victor Stinner03dab782015-10-14 00:21:35 +0200268 sprintf(buffer, "%d", va_arg(vargs, int));
Victor Stinnerd36cf5f2020-06-10 18:38:05 +0200269 }
Victor Stinner03dab782015-10-14 00:21:35 +0200270 assert(strlen(buffer) < sizeof(buffer));
271 WRITE_BYTES(buffer);
272 break;
273
274 case 'u':
Victor Stinnerd36cf5f2020-06-10 18:38:05 +0200275 if (longflag) {
276 sprintf(buffer, "%lu", va_arg(vargs, unsigned long));
277 }
278 else if (size_tflag) {
279 sprintf(buffer, "%zu", va_arg(vargs, size_t));
280 }
281 else {
282 sprintf(buffer, "%u", va_arg(vargs, unsigned int));
283 }
Victor Stinner03dab782015-10-14 00:21:35 +0200284 assert(strlen(buffer) < sizeof(buffer));
285 WRITE_BYTES(buffer);
286 break;
287
288 case 'i':
289 sprintf(buffer, "%i", va_arg(vargs, int));
290 assert(strlen(buffer) < sizeof(buffer));
291 WRITE_BYTES(buffer);
292 break;
293
294 case 'x':
295 sprintf(buffer, "%x", va_arg(vargs, int));
296 assert(strlen(buffer) < sizeof(buffer));
297 WRITE_BYTES(buffer);
298 break;
299
300 case 's':
301 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000302 Py_ssize_t i;
Victor Stinner03dab782015-10-14 00:21:35 +0200303
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200304 p = va_arg(vargs, const char*);
Serhiy Storchakad586ccb2019-01-12 10:30:35 +0200305 if (prec <= 0) {
306 i = strlen(p);
307 }
308 else {
309 i = 0;
310 while (i < prec && p[i]) {
311 i++;
312 }
313 }
Victor Stinner03dab782015-10-14 00:21:35 +0200314 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
315 if (s == NULL)
316 goto error;
317 break;
318 }
319
320 case 'p':
321 sprintf(buffer, "%p", va_arg(vargs, void*));
322 assert(strlen(buffer) < sizeof(buffer));
323 /* %p is ill-defined: ensure leading 0x. */
324 if (buffer[1] == 'X')
325 buffer[1] = 'x';
326 else if (buffer[1] != 'x') {
327 memmove(buffer+2, buffer, strlen(buffer)+1);
328 buffer[0] = '0';
329 buffer[1] = 'x';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000330 }
Victor Stinner03dab782015-10-14 00:21:35 +0200331 WRITE_BYTES(buffer);
332 break;
333
334 case '%':
335 writer.min_size++;
336 *s++ = '%';
337 break;
338
339 default:
340 if (*f == 0) {
341 /* fix min_size if we reached the end of the format string */
342 writer.min_size++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000343 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000344
Victor Stinner03dab782015-10-14 00:21:35 +0200345 /* invalid format string: copy unformatted string and exit */
346 WRITE_BYTES(p);
347 return _PyBytesWriter_Finish(&writer, s);
348 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000349 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000350
Victor Stinner03dab782015-10-14 00:21:35 +0200351#undef WRITE_BYTES
352
353 return _PyBytesWriter_Finish(&writer, s);
354
355 error:
356 _PyBytesWriter_Dealloc(&writer);
357 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000358}
359
360PyObject *
361PyBytes_FromFormat(const char *format, ...)
362{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000363 PyObject* ret;
364 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000365
366#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000368#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000369 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000370#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000371 ret = PyBytes_FromFormatV(format, vargs);
372 va_end(vargs);
373 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000374}
375
Ethan Furmanb95b5612015-01-23 20:05:18 -0800376/* Helpers for formatstring */
377
378Py_LOCAL_INLINE(PyObject *)
379getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
380{
381 Py_ssize_t argidx = *p_argidx;
382 if (argidx < arglen) {
383 (*p_argidx)++;
384 if (arglen < 0)
385 return args;
386 else
387 return PyTuple_GetItem(args, argidx);
388 }
389 PyErr_SetString(PyExc_TypeError,
390 "not enough arguments for format string");
391 return NULL;
392}
393
394/* Format codes
395 * F_LJUST '-'
396 * F_SIGN '+'
397 * F_BLANK ' '
398 * F_ALT '#'
399 * F_ZERO '0'
400 */
401#define F_LJUST (1<<0)
402#define F_SIGN (1<<1)
403#define F_BLANK (1<<2)
404#define F_ALT (1<<3)
405#define F_ZERO (1<<4)
406
407/* Returns a new reference to a PyBytes object, or NULL on failure. */
408
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200409static char*
410formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200411 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800412{
413 char *p;
414 PyObject *result;
415 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200416 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800417
418 x = PyFloat_AsDouble(v);
419 if (x == -1.0 && PyErr_Occurred()) {
420 PyErr_Format(PyExc_TypeError, "float argument required, "
421 "not %.200s", Py_TYPE(v)->tp_name);
422 return NULL;
423 }
424
425 if (prec < 0)
426 prec = 6;
427
428 p = PyOS_double_to_string(x, type, prec,
429 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
430
431 if (p == NULL)
432 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200433
434 len = strlen(p);
435 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200436 str = _PyBytesWriter_Prepare(writer, str, len);
437 if (str == NULL)
438 return NULL;
Christian Heimesf051e432016-09-13 20:22:02 +0200439 memcpy(str, p, len);
Victor Stinner71dc3d82016-04-26 12:35:13 +0200440 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200441 str += len;
442 return str;
443 }
444
445 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800446 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200447 *p_result = result;
Zackery Spytz96c59322018-10-03 00:01:30 -0600448 return result != NULL ? str : NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800449}
450
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300451static PyObject *
452formatlong(PyObject *v, int flags, int prec, int type)
453{
454 PyObject *result, *iobj;
455 if (type == 'i')
456 type = 'd';
457 if (PyLong_Check(v))
458 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
459 if (PyNumber_Check(v)) {
460 /* make sure number is a type of integer for o, x, and X */
461 if (type == 'o' || type == 'x' || type == 'X')
Serhiy Storchaka5f4b229d2020-05-28 10:33:45 +0300462 iobj = _PyNumber_Index(v);
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300463 else
464 iobj = PyNumber_Long(v);
465 if (iobj == NULL) {
466 if (!PyErr_ExceptionMatches(PyExc_TypeError))
467 return NULL;
468 }
469 else if (!PyLong_Check(iobj))
470 Py_CLEAR(iobj);
471 if (iobj != NULL) {
472 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
473 Py_DECREF(iobj);
474 return result;
475 }
476 }
477 PyErr_Format(PyExc_TypeError,
478 "%%%c format: %s is required, not %.200s", type,
479 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
480 : "a number",
481 Py_TYPE(v)->tp_name);
482 return NULL;
483}
484
485static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200486byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800487{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300488 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200489 *p = PyBytes_AS_STRING(arg)[0];
490 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800491 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300492 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200493 *p = PyByteArray_AS_STRING(arg)[0];
494 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800495 }
496 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300497 PyObject *iobj;
498 long ival;
499 int overflow;
500 /* make sure number is a type of integer */
501 if (PyLong_Check(arg)) {
502 ival = PyLong_AsLongAndOverflow(arg, &overflow);
503 }
504 else {
505 iobj = PyNumber_Index(arg);
506 if (iobj == NULL) {
507 if (!PyErr_ExceptionMatches(PyExc_TypeError))
508 return 0;
509 goto onError;
510 }
511 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
512 Py_DECREF(iobj);
513 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300514 if (!overflow && ival == -1 && PyErr_Occurred())
515 goto onError;
516 if (overflow || !(0 <= ival && ival <= 255)) {
517 PyErr_SetString(PyExc_OverflowError,
518 "%c arg not in range(256)");
519 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800520 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300521 *p = (char)ival;
522 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800523 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300524 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200525 PyErr_SetString(PyExc_TypeError,
526 "%c requires an integer in range(256) or a single byte");
527 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800528}
529
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800530static PyObject *_PyBytes_FromBuffer(PyObject *x);
531
Ethan Furmanb95b5612015-01-23 20:05:18 -0800532static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200533format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800534{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200535 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800536 /* is it a bytes object? */
537 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200538 *pbuf = PyBytes_AS_STRING(v);
539 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800540 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200541 return v;
542 }
543 if (PyByteArray_Check(v)) {
544 *pbuf = PyByteArray_AS_STRING(v);
545 *plen = PyByteArray_GET_SIZE(v);
546 Py_INCREF(v);
547 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800548 }
549 /* does it support __bytes__? */
550 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
551 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +0100552 result = _PyObject_CallNoArg(func);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800553 Py_DECREF(func);
554 if (result == NULL)
555 return NULL;
556 if (!PyBytes_Check(result)) {
557 PyErr_Format(PyExc_TypeError,
558 "__bytes__ returned non-bytes (type %.200s)",
559 Py_TYPE(result)->tp_name);
560 Py_DECREF(result);
561 return NULL;
562 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200563 *pbuf = PyBytes_AS_STRING(result);
564 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800565 return result;
566 }
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800567 /* does it support buffer protocol? */
568 if (PyObject_CheckBuffer(v)) {
569 /* maybe we can avoid making a copy of the buffer object here? */
570 result = _PyBytes_FromBuffer(v);
571 if (result == NULL)
572 return NULL;
573 *pbuf = PyBytes_AS_STRING(result);
574 *plen = PyBytes_GET_SIZE(result);
575 return result;
576 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800577 PyErr_Format(PyExc_TypeError,
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800578 "%%b requires a bytes-like object, "
579 "or an object that implements __bytes__, not '%.100s'",
Ethan Furmanb95b5612015-01-23 20:05:18 -0800580 Py_TYPE(v)->tp_name);
581 return NULL;
582}
583
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200584/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800585
586PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +0200587_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
588 PyObject *args, int use_bytearray)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800589{
Victor Stinner772b2b02015-10-14 09:56:53 +0200590 const char *fmt;
591 char *res;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800592 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200593 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800594 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800595 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200596 _PyBytesWriter writer;
597
Victor Stinner772b2b02015-10-14 09:56:53 +0200598 if (args == NULL) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800599 PyErr_BadInternalCall();
600 return NULL;
601 }
Victor Stinner772b2b02015-10-14 09:56:53 +0200602 fmt = format;
603 fmtcnt = format_len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200604
605 _PyBytesWriter_Init(&writer);
Victor Stinner772b2b02015-10-14 09:56:53 +0200606 writer.use_bytearray = use_bytearray;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200607
608 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
609 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800610 return NULL;
Victor Stinner772b2b02015-10-14 09:56:53 +0200611 if (!use_bytearray)
612 writer.overallocate = 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200613
Ethan Furmanb95b5612015-01-23 20:05:18 -0800614 if (PyTuple_Check(args)) {
615 arglen = PyTuple_GET_SIZE(args);
616 argidx = 0;
617 }
618 else {
619 arglen = -1;
620 argidx = -2;
621 }
622 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
623 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
624 !PyByteArray_Check(args)) {
625 dict = args;
626 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200627
Ethan Furmanb95b5612015-01-23 20:05:18 -0800628 while (--fmtcnt >= 0) {
629 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200630 Py_ssize_t len;
631 char *pos;
632
Xiang Zhangb76ad512017-03-06 17:17:05 +0800633 pos = (char *)memchr(fmt + 1, '%', fmtcnt);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200634 if (pos != NULL)
635 len = pos - fmt;
Victor Stinner772b2b02015-10-14 09:56:53 +0200636 else
Xiang Zhangb76ad512017-03-06 17:17:05 +0800637 len = fmtcnt + 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200638 assert(len != 0);
639
Christian Heimesf051e432016-09-13 20:22:02 +0200640 memcpy(res, fmt, len);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200641 res += len;
642 fmt += len;
643 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800644 }
645 else {
646 /* Got a format specifier */
647 int flags = 0;
648 Py_ssize_t width = -1;
649 int prec = -1;
650 int c = '\0';
651 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800652 PyObject *v = NULL;
653 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200654 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800655 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200656 Py_ssize_t len = 0;
657 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200658 Py_ssize_t alloc;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800659
Ethan Furmanb95b5612015-01-23 20:05:18 -0800660 fmt++;
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200661 if (*fmt == '%') {
662 *res++ = '%';
663 fmt++;
664 fmtcnt--;
665 continue;
666 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800667 if (*fmt == '(') {
Victor Stinner772b2b02015-10-14 09:56:53 +0200668 const char *keystart;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800669 Py_ssize_t keylen;
670 PyObject *key;
671 int pcount = 1;
672
673 if (dict == NULL) {
674 PyErr_SetString(PyExc_TypeError,
675 "format requires a mapping");
676 goto error;
677 }
678 ++fmt;
679 --fmtcnt;
680 keystart = fmt;
681 /* Skip over balanced parentheses */
682 while (pcount > 0 && --fmtcnt >= 0) {
683 if (*fmt == ')')
684 --pcount;
685 else if (*fmt == '(')
686 ++pcount;
687 fmt++;
688 }
689 keylen = fmt - keystart - 1;
690 if (fmtcnt < 0 || pcount > 0) {
691 PyErr_SetString(PyExc_ValueError,
692 "incomplete format key");
693 goto error;
694 }
695 key = PyBytes_FromStringAndSize(keystart,
696 keylen);
697 if (key == NULL)
698 goto error;
699 if (args_owned) {
700 Py_DECREF(args);
701 args_owned = 0;
702 }
703 args = PyObject_GetItem(dict, key);
704 Py_DECREF(key);
705 if (args == NULL) {
706 goto error;
707 }
708 args_owned = 1;
709 arglen = -1;
710 argidx = -2;
711 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200712
713 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800714 while (--fmtcnt >= 0) {
715 switch (c = *fmt++) {
716 case '-': flags |= F_LJUST; continue;
717 case '+': flags |= F_SIGN; continue;
718 case ' ': flags |= F_BLANK; continue;
719 case '#': flags |= F_ALT; continue;
720 case '0': flags |= F_ZERO; continue;
721 }
722 break;
723 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200724
725 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800726 if (c == '*') {
727 v = getnextarg(args, arglen, &argidx);
728 if (v == NULL)
729 goto error;
730 if (!PyLong_Check(v)) {
731 PyErr_SetString(PyExc_TypeError,
732 "* wants int");
733 goto error;
734 }
735 width = PyLong_AsSsize_t(v);
736 if (width == -1 && PyErr_Occurred())
737 goto error;
738 if (width < 0) {
739 flags |= F_LJUST;
740 width = -width;
741 }
742 if (--fmtcnt >= 0)
743 c = *fmt++;
744 }
745 else if (c >= 0 && isdigit(c)) {
746 width = c - '0';
747 while (--fmtcnt >= 0) {
748 c = Py_CHARMASK(*fmt++);
749 if (!isdigit(c))
750 break;
751 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
752 PyErr_SetString(
753 PyExc_ValueError,
754 "width too big");
755 goto error;
756 }
757 width = width*10 + (c - '0');
758 }
759 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200760
761 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800762 if (c == '.') {
763 prec = 0;
764 if (--fmtcnt >= 0)
765 c = *fmt++;
766 if (c == '*') {
767 v = getnextarg(args, arglen, &argidx);
768 if (v == NULL)
769 goto error;
770 if (!PyLong_Check(v)) {
771 PyErr_SetString(
772 PyExc_TypeError,
773 "* wants int");
774 goto error;
775 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200776 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800777 if (prec == -1 && PyErr_Occurred())
778 goto error;
779 if (prec < 0)
780 prec = 0;
781 if (--fmtcnt >= 0)
782 c = *fmt++;
783 }
784 else if (c >= 0 && isdigit(c)) {
785 prec = c - '0';
786 while (--fmtcnt >= 0) {
787 c = Py_CHARMASK(*fmt++);
788 if (!isdigit(c))
789 break;
790 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
791 PyErr_SetString(
792 PyExc_ValueError,
793 "prec too big");
794 goto error;
795 }
796 prec = prec*10 + (c - '0');
797 }
798 }
799 } /* prec */
800 if (fmtcnt >= 0) {
801 if (c == 'h' || c == 'l' || c == 'L') {
802 if (--fmtcnt >= 0)
803 c = *fmt++;
804 }
805 }
806 if (fmtcnt < 0) {
807 PyErr_SetString(PyExc_ValueError,
808 "incomplete format");
809 goto error;
810 }
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200811 v = getnextarg(args, arglen, &argidx);
812 if (v == NULL)
813 goto error;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200814
Alexey Izbyshevccd99752018-08-23 10:50:52 +0300815 if (fmtcnt == 0) {
816 /* last write: disable writer overallocation */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200817 writer.overallocate = 0;
818 }
819
Ethan Furmanb95b5612015-01-23 20:05:18 -0800820 sign = 0;
821 fill = ' ';
822 switch (c) {
Ethan Furman62e977f2015-03-11 08:17:00 -0700823 case 'r':
824 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800825 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200826 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800827 if (temp == NULL)
828 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200829 assert(PyUnicode_IS_ASCII(temp));
830 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
831 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800832 if (prec >= 0 && len > prec)
833 len = prec;
834 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200835
Ethan Furmanb95b5612015-01-23 20:05:18 -0800836 case 's':
837 // %s is only for 2/3 code; 3 only code should use %b
838 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200839 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800840 if (temp == NULL)
841 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800842 if (prec >= 0 && len > prec)
843 len = prec;
844 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200845
Ethan Furmanb95b5612015-01-23 20:05:18 -0800846 case 'i':
847 case 'd':
848 case 'u':
849 case 'o':
850 case 'x':
851 case 'X':
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200852 if (PyLong_CheckExact(v)
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200853 && width == -1 && prec == -1
854 && !(flags & (F_SIGN | F_BLANK))
855 && c != 'X')
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200856 {
857 /* Fast path */
858 int alternate = flags & F_ALT;
859 int base;
860
861 switch(c)
862 {
863 default:
Barry Warsawb2e57942017-09-14 18:13:16 -0700864 Py_UNREACHABLE();
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200865 case 'd':
866 case 'i':
867 case 'u':
868 base = 10;
869 break;
870 case 'o':
871 base = 8;
872 break;
873 case 'x':
874 case 'X':
875 base = 16;
876 break;
877 }
878
879 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200880 writer.min_size -= 2; /* size preallocated for "%d" */
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200881 res = _PyLong_FormatBytesWriter(&writer, res,
882 v, base, alternate);
883 if (res == NULL)
884 goto error;
885 continue;
886 }
887
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300888 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200889 if (!temp)
890 goto error;
891 assert(PyUnicode_IS_ASCII(temp));
892 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
893 len = PyUnicode_GET_LENGTH(temp);
894 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800895 if (flags & F_ZERO)
896 fill = '0';
897 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200898
Ethan Furmanb95b5612015-01-23 20:05:18 -0800899 case 'e':
900 case 'E':
901 case 'f':
902 case 'F':
903 case 'g':
904 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200905 if (width == -1 && prec == -1
906 && !(flags & (F_SIGN | F_BLANK)))
907 {
908 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200909 writer.min_size -= 2; /* size preallocated for "%f" */
Victor Stinnerad771582015-10-09 12:38:53 +0200910 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200911 if (res == NULL)
912 goto error;
913 continue;
914 }
915
Victor Stinnerad771582015-10-09 12:38:53 +0200916 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800917 goto error;
918 pbuf = PyBytes_AS_STRING(temp);
919 len = PyBytes_GET_SIZE(temp);
920 sign = 1;
921 if (flags & F_ZERO)
922 fill = '0';
923 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200924
Ethan Furmanb95b5612015-01-23 20:05:18 -0800925 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200926 pbuf = &onechar;
927 len = byte_converter(v, &onechar);
928 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800929 goto error;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200930 if (width == -1) {
931 /* Fast path */
932 *res++ = onechar;
933 continue;
934 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800935 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200936
Ethan Furmanb95b5612015-01-23 20:05:18 -0800937 default:
938 PyErr_Format(PyExc_ValueError,
939 "unsupported format character '%c' (0x%x) "
940 "at index %zd",
941 c, c,
Victor Stinner772b2b02015-10-14 09:56:53 +0200942 (Py_ssize_t)(fmt - 1 - format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800943 goto error;
944 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200945
Ethan Furmanb95b5612015-01-23 20:05:18 -0800946 if (sign) {
947 if (*pbuf == '-' || *pbuf == '+') {
948 sign = *pbuf++;
949 len--;
950 }
951 else if (flags & F_SIGN)
952 sign = '+';
953 else if (flags & F_BLANK)
954 sign = ' ';
955 else
956 sign = 0;
957 }
958 if (width < len)
959 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200960
961 alloc = width;
962 if (sign != 0 && len == width)
963 alloc++;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200964 /* 2: size preallocated for %s */
965 if (alloc > 2) {
966 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200967 if (res == NULL)
968 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800969 }
Victor Stinner60ec6ef2019-10-07 22:31:42 +0200970#ifndef NDEBUG
971 char *before = res;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200972#endif
973
974 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800975 if (sign) {
976 if (fill != ' ')
977 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800978 if (width > len)
979 width--;
980 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200981
982 /* Write the numeric prefix for "x", "X" and "o" formats
983 if the alternate form is used.
984 For example, write "0x" for the "%#x" format. */
Serhiy Storchakab1a16192016-12-17 21:48:03 +0200985 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800986 assert(pbuf[0] == '0');
987 assert(pbuf[1] == c);
988 if (fill != ' ') {
989 *res++ = *pbuf++;
990 *res++ = *pbuf++;
991 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800992 width -= 2;
993 if (width < 0)
994 width = 0;
995 len -= 2;
996 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200997
998 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800999 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001000 memset(res, fill, width - len);
1001 res += (width - len);
1002 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -08001003 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001004
1005 /* If padding with spaces: write sign if needed and/or numeric
1006 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001007 if (fill == ' ') {
1008 if (sign)
1009 *res++ = sign;
Serhiy Storchakab1a16192016-12-17 21:48:03 +02001010 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001011 assert(pbuf[0] == '0');
1012 assert(pbuf[1] == c);
1013 *res++ = *pbuf++;
1014 *res++ = *pbuf++;
1015 }
1016 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001017
1018 /* Copy bytes */
Christian Heimesf051e432016-09-13 20:22:02 +02001019 memcpy(res, pbuf, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001020 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001021
1022 /* Pad right with the fill character if needed */
1023 if (width > len) {
1024 memset(res, ' ', width - len);
1025 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001026 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001027
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +02001028 if (dict && (argidx < arglen)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001029 PyErr_SetString(PyExc_TypeError,
1030 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -08001031 Py_XDECREF(temp);
1032 goto error;
1033 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001034 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001035
Victor Stinner60ec6ef2019-10-07 22:31:42 +02001036#ifndef NDEBUG
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001037 /* check that we computed the exact size for this write */
1038 assert((res - before) == alloc);
1039#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -08001040 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001041
1042 /* If overallocation was disabled, ensure that it was the last
1043 write. Otherwise, we missed an optimization */
Alexey Izbyshevccd99752018-08-23 10:50:52 +03001044 assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001045 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001046
Ethan Furmanb95b5612015-01-23 20:05:18 -08001047 if (argidx < arglen && !dict) {
1048 PyErr_SetString(PyExc_TypeError,
1049 "not all arguments converted during bytes formatting");
1050 goto error;
1051 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001052
Ethan Furmanb95b5612015-01-23 20:05:18 -08001053 if (args_owned) {
1054 Py_DECREF(args);
1055 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001056 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001057
1058 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001059 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001060 if (args_owned) {
1061 Py_DECREF(args);
1062 }
1063 return NULL;
1064}
1065
Greg Price3a4f6672019-09-12 11:12:22 -07001066/* Unescape a backslash-escaped string. */
Eric V. Smith42454af2016-10-31 09:22:08 -04001067PyObject *_PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001068 Py_ssize_t len,
1069 const char *errors,
Eric V. Smith42454af2016-10-31 09:22:08 -04001070 const char **first_invalid_escape)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001071{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001072 int c;
Victor Stinner2ec80632015-10-14 13:32:13 +02001073 char *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001074 const char *end;
Victor Stinner2ec80632015-10-14 13:32:13 +02001075 _PyBytesWriter writer;
1076
1077 _PyBytesWriter_Init(&writer);
1078
1079 p = _PyBytesWriter_Alloc(&writer, len);
1080 if (p == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001081 return NULL;
Victor Stinner2ec80632015-10-14 13:32:13 +02001082 writer.overallocate = 1;
1083
Eric V. Smith42454af2016-10-31 09:22:08 -04001084 *first_invalid_escape = NULL;
1085
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001086 end = s + len;
1087 while (s < end) {
1088 if (*s != '\\') {
Greg Price3a4f6672019-09-12 11:12:22 -07001089 *p++ = *s++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001090 continue;
1091 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001092
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001093 s++;
Victor Stinner2ec80632015-10-14 13:32:13 +02001094 if (s == end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001095 PyErr_SetString(PyExc_ValueError,
1096 "Trailing \\ in string");
1097 goto failed;
1098 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001099
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001100 switch (*s++) {
1101 /* XXX This assumes ASCII! */
1102 case '\n': break;
1103 case '\\': *p++ = '\\'; break;
1104 case '\'': *p++ = '\''; break;
1105 case '\"': *p++ = '\"'; break;
1106 case 'b': *p++ = '\b'; break;
1107 case 'f': *p++ = '\014'; break; /* FF */
1108 case 't': *p++ = '\t'; break;
1109 case 'n': *p++ = '\n'; break;
1110 case 'r': *p++ = '\r'; break;
1111 case 'v': *p++ = '\013'; break; /* VT */
1112 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1113 case '0': case '1': case '2': case '3':
1114 case '4': case '5': case '6': case '7':
1115 c = s[-1] - '0';
1116 if (s < end && '0' <= *s && *s <= '7') {
1117 c = (c<<3) + *s++ - '0';
1118 if (s < end && '0' <= *s && *s <= '7')
1119 c = (c<<3) + *s++ - '0';
1120 }
1121 *p++ = c;
1122 break;
1123 case 'x':
Victor Stinner2ec80632015-10-14 13:32:13 +02001124 if (s+1 < end) {
1125 int digit1, digit2;
1126 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1127 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1128 if (digit1 < 16 && digit2 < 16) {
1129 *p++ = (unsigned char)((digit1 << 4) + digit2);
1130 s += 2;
1131 break;
1132 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001133 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001134 /* invalid hexadecimal digits */
1135
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001136 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001137 PyErr_Format(PyExc_ValueError,
Serhiy Storchakad53fe5f2019-03-13 22:59:55 +02001138 "invalid \\x escape at position %zd",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001139 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001140 goto failed;
1141 }
1142 if (strcmp(errors, "replace") == 0) {
1143 *p++ = '?';
1144 } else if (strcmp(errors, "ignore") == 0)
1145 /* do nothing */;
1146 else {
1147 PyErr_Format(PyExc_ValueError,
1148 "decoding error; unknown "
1149 "error handling code: %.400s",
1150 errors);
1151 goto failed;
1152 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001153 /* skip \x */
1154 if (s < end && Py_ISXDIGIT(s[0]))
1155 s++; /* and a hexdigit */
1156 break;
Victor Stinner2ec80632015-10-14 13:32:13 +02001157
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001158 default:
Eric V. Smith42454af2016-10-31 09:22:08 -04001159 if (*first_invalid_escape == NULL) {
1160 *first_invalid_escape = s-1; /* Back up one char, since we've
1161 already incremented s. */
1162 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001163 *p++ = '\\';
Eric V. Smith42454af2016-10-31 09:22:08 -04001164 s--;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001165 }
1166 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001167
1168 return _PyBytesWriter_Finish(&writer, p);
1169
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001170 failed:
Victor Stinner2ec80632015-10-14 13:32:13 +02001171 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001172 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001173}
1174
Eric V. Smith42454af2016-10-31 09:22:08 -04001175PyObject *PyBytes_DecodeEscape(const char *s,
1176 Py_ssize_t len,
1177 const char *errors,
Greg Price3a4f6672019-09-12 11:12:22 -07001178 Py_ssize_t Py_UNUSED(unicode),
1179 const char *Py_UNUSED(recode_encoding))
Eric V. Smith42454af2016-10-31 09:22:08 -04001180{
1181 const char* first_invalid_escape;
Greg Price3a4f6672019-09-12 11:12:22 -07001182 PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
Eric V. Smith42454af2016-10-31 09:22:08 -04001183 &first_invalid_escape);
1184 if (result == NULL)
1185 return NULL;
1186 if (first_invalid_escape != NULL) {
1187 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1188 "invalid escape sequence '\\%c'",
Serhiy Storchaka56cb4652017-10-20 17:08:15 +03001189 (unsigned char)*first_invalid_escape) < 0) {
Eric V. Smith42454af2016-10-31 09:22:08 -04001190 Py_DECREF(result);
1191 return NULL;
1192 }
1193 }
1194 return result;
1195
1196}
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001197/* -------------------------------------------------------------------- */
1198/* object api */
1199
1200Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001201PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001202{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001203 if (!PyBytes_Check(op)) {
1204 PyErr_Format(PyExc_TypeError,
1205 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1206 return -1;
1207 }
1208 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001209}
1210
1211char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001212PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001213{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001214 if (!PyBytes_Check(op)) {
1215 PyErr_Format(PyExc_TypeError,
1216 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1217 return NULL;
1218 }
1219 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001220}
1221
1222int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001223PyBytes_AsStringAndSize(PyObject *obj,
1224 char **s,
1225 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001226{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001227 if (s == NULL) {
1228 PyErr_BadInternalCall();
1229 return -1;
1230 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001231
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001232 if (!PyBytes_Check(obj)) {
1233 PyErr_Format(PyExc_TypeError,
1234 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1235 return -1;
1236 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001237
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001238 *s = PyBytes_AS_STRING(obj);
1239 if (len != NULL)
1240 *len = PyBytes_GET_SIZE(obj);
1241 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001242 PyErr_SetString(PyExc_ValueError,
1243 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001244 return -1;
1245 }
1246 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001247}
Neal Norwitz6968b052007-02-27 19:02:19 +00001248
1249/* -------------------------------------------------------------------- */
1250/* Methods */
1251
Eric Smith0923d1d2009-04-16 20:16:10 +00001252#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001253
1254#include "stringlib/fastsearch.h"
1255#include "stringlib/count.h"
1256#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001257#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001258#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001259#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001260#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001261
Eric Smith0f78bff2009-11-30 01:01:42 +00001262#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001263
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001264PyObject *
1265PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001266{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001267 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001268 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001269 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001270 PyObject *v;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001271 unsigned char quote;
1272 const unsigned char *s;
1273 Py_UCS1 *p;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001274
1275 /* Compute size of output string */
1276 squotes = dquotes = 0;
1277 newsize = 3; /* b'' */
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001278 s = (const unsigned char*)op->ob_sval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001279 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001280 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001281 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001282 case '\'': squotes++; break;
1283 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001284 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001285 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001286 default:
1287 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001288 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001289 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001290 if (newsize > PY_SSIZE_T_MAX - incr)
1291 goto overflow;
1292 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001293 }
1294 quote = '\'';
1295 if (smartquotes && squotes && !dquotes)
1296 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001297 if (squotes && quote == '\'') {
1298 if (newsize > PY_SSIZE_T_MAX - squotes)
1299 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001300 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001301 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001302
1303 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001304 if (v == NULL) {
1305 return NULL;
1306 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001307 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001308
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001309 *p++ = 'b', *p++ = quote;
1310 for (i = 0; i < length; i++) {
1311 unsigned char c = op->ob_sval[i];
1312 if (c == quote || c == '\\')
1313 *p++ = '\\', *p++ = c;
1314 else if (c == '\t')
1315 *p++ = '\\', *p++ = 't';
1316 else if (c == '\n')
1317 *p++ = '\\', *p++ = 'n';
1318 else if (c == '\r')
1319 *p++ = '\\', *p++ = 'r';
1320 else if (c < ' ' || c >= 0x7f) {
1321 *p++ = '\\';
1322 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001323 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1324 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001325 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001326 else
1327 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001328 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001329 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001330 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001331 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001332
1333 overflow:
1334 PyErr_SetString(PyExc_OverflowError,
1335 "bytes object is too large to make repr");
1336 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001337}
1338
Neal Norwitz6968b052007-02-27 19:02:19 +00001339static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001340bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001341{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001342 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001343}
1344
Neal Norwitz6968b052007-02-27 19:02:19 +00001345static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001346bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001347{
Victor Stinnerda7933e2020-04-13 03:04:28 +02001348 if (_Py_GetConfig()->bytes_warning) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001349 if (PyErr_WarnEx(PyExc_BytesWarning,
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001350 "str() on a bytes instance", 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001351 return NULL;
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001352 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001353 }
1354 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001355}
1356
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001357static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001358bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001359{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001360 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001361}
Neal Norwitz6968b052007-02-27 19:02:19 +00001362
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001363/* This is also used by PyBytes_Concat() */
1364static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001365bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001366{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001367 Py_buffer va, vb;
1368 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001369
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001370 va.len = -1;
1371 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001372 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1373 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001374 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
Serhiy Storchaka6b5a9ec2017-03-19 19:47:02 +02001375 Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001376 goto done;
1377 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001378
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001379 /* Optimize end cases */
1380 if (va.len == 0 && PyBytes_CheckExact(b)) {
1381 result = b;
1382 Py_INCREF(result);
1383 goto done;
1384 }
1385 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1386 result = a;
1387 Py_INCREF(result);
1388 goto done;
1389 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001390
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001391 if (va.len > PY_SSIZE_T_MAX - vb.len) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001392 PyErr_NoMemory();
1393 goto done;
1394 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001395
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001396 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001397 if (result != NULL) {
1398 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1399 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1400 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001401
1402 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001403 if (va.len != -1)
1404 PyBuffer_Release(&va);
1405 if (vb.len != -1)
1406 PyBuffer_Release(&vb);
1407 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001408}
Neal Norwitz6968b052007-02-27 19:02:19 +00001409
1410static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001411bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001412{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001413 Py_ssize_t i;
1414 Py_ssize_t j;
1415 Py_ssize_t size;
1416 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001417 size_t nbytes;
1418 if (n < 0)
1419 n = 0;
1420 /* watch out for overflows: the size can overflow int,
1421 * and the # of bytes needed can overflow size_t
1422 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001423 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001424 PyErr_SetString(PyExc_OverflowError,
1425 "repeated bytes are too long");
1426 return NULL;
1427 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001428 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001429 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1430 Py_INCREF(a);
1431 return (PyObject *)a;
1432 }
1433 nbytes = (size_t)size;
1434 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1435 PyErr_SetString(PyExc_OverflowError,
1436 "repeated bytes are too long");
1437 return NULL;
1438 }
1439 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
Victor Stinner04fc4f22020-06-16 01:28:07 +02001440 if (op == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001441 return PyErr_NoMemory();
Victor Stinner04fc4f22020-06-16 01:28:07 +02001442 }
1443 _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001444 op->ob_shash = -1;
1445 op->ob_sval[size] = '\0';
1446 if (Py_SIZE(a) == 1 && n > 0) {
1447 memset(op->ob_sval, a->ob_sval[0] , n);
1448 return (PyObject *) op;
1449 }
1450 i = 0;
1451 if (i < size) {
Christian Heimesf051e432016-09-13 20:22:02 +02001452 memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001453 i = Py_SIZE(a);
1454 }
1455 while (i < size) {
1456 j = (i <= size-i) ? i : size-i;
Christian Heimesf051e432016-09-13 20:22:02 +02001457 memcpy(op->ob_sval+i, op->ob_sval, j);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001458 i += j;
1459 }
1460 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001461}
1462
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001463static int
1464bytes_contains(PyObject *self, PyObject *arg)
1465{
1466 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1467}
1468
Neal Norwitz6968b052007-02-27 19:02:19 +00001469static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001470bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001471{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001472 if (i < 0 || i >= Py_SIZE(a)) {
1473 PyErr_SetString(PyExc_IndexError, "index out of range");
1474 return NULL;
1475 }
1476 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001477}
1478
Benjamin Peterson621b4302016-09-09 13:54:34 -07001479static int
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001480bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1481{
1482 int cmp;
1483 Py_ssize_t len;
1484
1485 len = Py_SIZE(a);
1486 if (Py_SIZE(b) != len)
1487 return 0;
1488
1489 if (a->ob_sval[0] != b->ob_sval[0])
1490 return 0;
1491
1492 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1493 return (cmp == 0);
1494}
1495
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001496static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001497bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001498{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001499 int c;
1500 Py_ssize_t len_a, len_b;
1501 Py_ssize_t min_len;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001502 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001503
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001504 /* Make sure both arguments are strings. */
1505 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Victor Stinnerda7933e2020-04-13 03:04:28 +02001506 if (_Py_GetConfig()->bytes_warning && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001507 rc = PyObject_IsInstance((PyObject*)a,
1508 (PyObject*)&PyUnicode_Type);
1509 if (!rc)
1510 rc = PyObject_IsInstance((PyObject*)b,
1511 (PyObject*)&PyUnicode_Type);
1512 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001513 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001514 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001515 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001516 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001517 return NULL;
1518 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001519 else {
1520 rc = PyObject_IsInstance((PyObject*)a,
1521 (PyObject*)&PyLong_Type);
1522 if (!rc)
1523 rc = PyObject_IsInstance((PyObject*)b,
1524 (PyObject*)&PyLong_Type);
1525 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001526 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001527 if (rc) {
1528 if (PyErr_WarnEx(PyExc_BytesWarning,
1529 "Comparison between bytes and int", 1))
1530 return NULL;
1531 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001532 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001533 }
stratakise8b19652017-11-02 11:32:54 +01001534 Py_RETURN_NOTIMPLEMENTED;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001535 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001536 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001537 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001538 case Py_EQ:
1539 case Py_LE:
1540 case Py_GE:
1541 /* a string is equal to itself */
stratakise8b19652017-11-02 11:32:54 +01001542 Py_RETURN_TRUE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001543 case Py_NE:
1544 case Py_LT:
1545 case Py_GT:
stratakise8b19652017-11-02 11:32:54 +01001546 Py_RETURN_FALSE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001547 default:
1548 PyErr_BadArgument();
1549 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001550 }
1551 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001552 else if (op == Py_EQ || op == Py_NE) {
1553 int eq = bytes_compare_eq(a, b);
1554 eq ^= (op == Py_NE);
stratakise8b19652017-11-02 11:32:54 +01001555 return PyBool_FromLong(eq);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001556 }
1557 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001558 len_a = Py_SIZE(a);
1559 len_b = Py_SIZE(b);
1560 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001561 if (min_len > 0) {
1562 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001563 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001564 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001565 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001566 else
1567 c = 0;
stratakise8b19652017-11-02 11:32:54 +01001568 if (c != 0)
1569 Py_RETURN_RICHCOMPARE(c, 0, op);
1570 Py_RETURN_RICHCOMPARE(len_a, len_b, op);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001571 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001572}
1573
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001574static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001575bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001576{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001577 if (a->ob_shash == -1) {
1578 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001579 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001580 }
1581 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001582}
1583
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001584static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001585bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001586{
Victor Stinnera15e2602020-04-08 02:01:56 +02001587 if (_PyIndex_Check(item)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001588 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1589 if (i == -1 && PyErr_Occurred())
1590 return NULL;
1591 if (i < 0)
1592 i += PyBytes_GET_SIZE(self);
1593 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1594 PyErr_SetString(PyExc_IndexError,
1595 "index out of range");
1596 return NULL;
1597 }
1598 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1599 }
1600 else if (PySlice_Check(item)) {
Zackery Spytz14514d92019-05-17 01:13:03 -06001601 Py_ssize_t start, stop, step, slicelength, i;
1602 size_t cur;
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001603 const char* source_buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001604 char* result_buf;
1605 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001606
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001607 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001608 return NULL;
1609 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001610 slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1611 &stop, step);
Neal Norwitz6968b052007-02-27 19:02:19 +00001612
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001613 if (slicelength <= 0) {
1614 return PyBytes_FromStringAndSize("", 0);
1615 }
1616 else if (start == 0 && step == 1 &&
1617 slicelength == PyBytes_GET_SIZE(self) &&
1618 PyBytes_CheckExact(self)) {
1619 Py_INCREF(self);
1620 return (PyObject *)self;
1621 }
1622 else if (step == 1) {
1623 return PyBytes_FromStringAndSize(
1624 PyBytes_AS_STRING(self) + start,
1625 slicelength);
1626 }
1627 else {
1628 source_buf = PyBytes_AS_STRING(self);
1629 result = PyBytes_FromStringAndSize(NULL, slicelength);
1630 if (result == NULL)
1631 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001632
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001633 result_buf = PyBytes_AS_STRING(result);
1634 for (cur = start, i = 0; i < slicelength;
1635 cur += step, i++) {
1636 result_buf[i] = source_buf[cur];
1637 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001638
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001639 return result;
1640 }
1641 }
1642 else {
1643 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001644 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001645 Py_TYPE(item)->tp_name);
1646 return NULL;
1647 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001648}
1649
1650static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001651bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001652{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001653 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1654 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001655}
1656
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001657static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001658 (lenfunc)bytes_length, /*sq_length*/
1659 (binaryfunc)bytes_concat, /*sq_concat*/
1660 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1661 (ssizeargfunc)bytes_item, /*sq_item*/
1662 0, /*sq_slice*/
1663 0, /*sq_ass_item*/
1664 0, /*sq_ass_slice*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001665 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001666};
1667
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001668static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001669 (lenfunc)bytes_length,
1670 (binaryfunc)bytes_subscript,
1671 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001672};
1673
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001674static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001675 (getbufferproc)bytes_buffer_getbuffer,
1676 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001677};
1678
1679
1680#define LEFTSTRIP 0
1681#define RIGHTSTRIP 1
1682#define BOTHSTRIP 2
1683
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001684/*[clinic input]
1685bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001686
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001687 sep: object = None
1688 The delimiter according which to split the bytes.
1689 None (the default value) means split on ASCII whitespace characters
1690 (space, tab, return, newline, formfeed, vertical tab).
1691 maxsplit: Py_ssize_t = -1
1692 Maximum number of splits to do.
1693 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001694
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001695Return a list of the sections in the bytes, using sep as the delimiter.
1696[clinic start generated code]*/
1697
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001698static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001699bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1700/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001701{
1702 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001703 const char *s = PyBytes_AS_STRING(self), *sub;
1704 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001705 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001706
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001707 if (maxsplit < 0)
1708 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001709 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001710 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001711 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001712 return NULL;
1713 sub = vsub.buf;
1714 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001715
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001716 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1717 PyBuffer_Release(&vsub);
1718 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001719}
1720
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001721/*[clinic input]
1722bytes.partition
1723
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001724 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001725 /
1726
1727Partition the bytes into three parts using the given separator.
1728
1729This will search for the separator sep in the bytes. If the separator is found,
1730returns a 3-tuple containing the part before the separator, the separator
1731itself, and the part after it.
1732
1733If the separator is not found, returns a 3-tuple containing the original bytes
1734object and two empty bytes objects.
1735[clinic start generated code]*/
1736
Neal Norwitz6968b052007-02-27 19:02:19 +00001737static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001738bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001739/*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001740{
Neal Norwitz6968b052007-02-27 19:02:19 +00001741 return stringlib_partition(
1742 (PyObject*) self,
1743 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001744 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001745 );
1746}
1747
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001748/*[clinic input]
1749bytes.rpartition
1750
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001751 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001752 /
1753
1754Partition the bytes into three parts using the given separator.
1755
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001756This will search for the separator sep in the bytes, starting at the end. If
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001757the separator is found, returns a 3-tuple containing the part before the
1758separator, the separator itself, and the part after it.
1759
1760If the separator is not found, returns a 3-tuple containing two empty bytes
1761objects and the original bytes object.
1762[clinic start generated code]*/
1763
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001764static PyObject *
1765bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001766/*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001767{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001768 return stringlib_rpartition(
1769 (PyObject*) self,
1770 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001771 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001772 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001773}
1774
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001775/*[clinic input]
1776bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001777
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001778Return a list of the sections in the bytes, using sep as the delimiter.
1779
1780Splitting is done starting at the end of the bytes and working to the front.
1781[clinic start generated code]*/
1782
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001783static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001784bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1785/*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001786{
1787 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001788 const char *s = PyBytes_AS_STRING(self), *sub;
1789 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001790 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001791
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001792 if (maxsplit < 0)
1793 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001794 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001795 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001796 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001797 return NULL;
1798 sub = vsub.buf;
1799 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001800
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001801 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1802 PyBuffer_Release(&vsub);
1803 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001804}
1805
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001806
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001807/*[clinic input]
1808bytes.join
1809
1810 iterable_of_bytes: object
1811 /
1812
1813Concatenate any number of bytes objects.
1814
1815The bytes whose method is called is inserted in between each pair.
1816
1817The result is returned as a new bytes object.
1818
1819Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1820[clinic start generated code]*/
1821
Neal Norwitz6968b052007-02-27 19:02:19 +00001822static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001823bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1824/*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001825{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001826 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001827}
1828
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001829PyObject *
1830_PyBytes_Join(PyObject *sep, PyObject *x)
1831{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001832 assert(sep != NULL && PyBytes_Check(sep));
1833 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001834 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001835}
1836
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001837static PyObject *
1838bytes_find(PyBytesObject *self, PyObject *args)
1839{
1840 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1841}
1842
1843static PyObject *
1844bytes_index(PyBytesObject *self, PyObject *args)
1845{
1846 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1847}
1848
1849
1850static PyObject *
1851bytes_rfind(PyBytesObject *self, PyObject *args)
1852{
1853 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1854}
1855
1856
1857static PyObject *
1858bytes_rindex(PyBytesObject *self, PyObject *args)
1859{
1860 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1861}
1862
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001863
1864Py_LOCAL_INLINE(PyObject *)
1865do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001866{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001867 Py_buffer vsep;
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001868 const char *s = PyBytes_AS_STRING(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001869 Py_ssize_t len = PyBytes_GET_SIZE(self);
1870 char *sep;
1871 Py_ssize_t seplen;
1872 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001873
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001874 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001875 return NULL;
1876 sep = vsep.buf;
1877 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001878
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001879 i = 0;
1880 if (striptype != RIGHTSTRIP) {
1881 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1882 i++;
1883 }
1884 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001885
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001886 j = len;
1887 if (striptype != LEFTSTRIP) {
1888 do {
1889 j--;
1890 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1891 j++;
1892 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001893
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001894 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001895
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001896 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1897 Py_INCREF(self);
1898 return (PyObject*)self;
1899 }
1900 else
1901 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001902}
1903
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001904
1905Py_LOCAL_INLINE(PyObject *)
1906do_strip(PyBytesObject *self, int striptype)
1907{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001908 const char *s = PyBytes_AS_STRING(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001909 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001910
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001911 i = 0;
1912 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001913 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001914 i++;
1915 }
1916 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001917
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001918 j = len;
1919 if (striptype != LEFTSTRIP) {
1920 do {
1921 j--;
David Malcolm96960882010-11-05 17:23:41 +00001922 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001923 j++;
1924 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001925
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001926 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1927 Py_INCREF(self);
1928 return (PyObject*)self;
1929 }
1930 else
1931 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001932}
1933
1934
1935Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001936do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001937{
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001938 if (bytes != Py_None) {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001939 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001940 }
1941 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001942}
1943
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001944/*[clinic input]
1945bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001946
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001947 bytes: object = None
1948 /
1949
1950Strip leading and trailing bytes contained in the argument.
1951
1952If the argument is omitted or None, strip leading and trailing ASCII whitespace.
1953[clinic start generated code]*/
1954
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001955static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001956bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001957/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001958{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001959 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001960}
1961
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001962/*[clinic input]
1963bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001964
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001965 bytes: object = None
1966 /
1967
1968Strip leading bytes contained in the argument.
1969
1970If the argument is omitted or None, strip leading ASCII whitespace.
1971[clinic start generated code]*/
1972
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001973static PyObject *
1974bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001975/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001976{
1977 return do_argstrip(self, LEFTSTRIP, bytes);
1978}
1979
1980/*[clinic input]
1981bytes.rstrip
1982
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001983 bytes: object = None
1984 /
1985
1986Strip trailing bytes contained in the argument.
1987
1988If the argument is omitted or None, strip trailing ASCII whitespace.
1989[clinic start generated code]*/
1990
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001991static PyObject *
1992bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001993/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001994{
1995 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001996}
Neal Norwitz6968b052007-02-27 19:02:19 +00001997
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001998
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001999static PyObject *
2000bytes_count(PyBytesObject *self, PyObject *args)
2001{
2002 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2003}
2004
2005
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002006/*[clinic input]
2007bytes.translate
2008
Victor Stinner049e5092014-08-17 22:20:00 +02002009 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002010 Translation table, which must be a bytes object of length 256.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002011 /
Martin Panter1b6c6da2016-08-27 08:35:02 +00002012 delete as deletechars: object(c_default="NULL") = b''
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002013
2014Return a copy with each character mapped by the given translation table.
2015
Martin Panter1b6c6da2016-08-27 08:35:02 +00002016All characters occurring in the optional argument delete are removed.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002017The remaining characters are mapped through the given translation table.
2018[clinic start generated code]*/
2019
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002020static PyObject *
Martin Panter1b6c6da2016-08-27 08:35:02 +00002021bytes_translate_impl(PyBytesObject *self, PyObject *table,
Larry Hastings89964c42015-04-14 18:07:59 -04002022 PyObject *deletechars)
Martin Panter1b6c6da2016-08-27 08:35:02 +00002023/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002024{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03002025 const char *input;
2026 char *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002027 Py_buffer table_view = {NULL, NULL};
2028 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002029 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002030 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002031 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002032 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002033 Py_ssize_t inlen, tablen, dellen = 0;
2034 PyObject *result;
2035 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002036
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002037 if (PyBytes_Check(table)) {
2038 table_chars = PyBytes_AS_STRING(table);
2039 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002040 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002041 else if (table == Py_None) {
2042 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002043 tablen = 256;
2044 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002045 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002046 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002047 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002048 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002049 tablen = table_view.len;
2050 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002051
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002052 if (tablen != 256) {
2053 PyErr_SetString(PyExc_ValueError,
2054 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002055 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002056 return NULL;
2057 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002058
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002059 if (deletechars != NULL) {
2060 if (PyBytes_Check(deletechars)) {
2061 del_table_chars = PyBytes_AS_STRING(deletechars);
2062 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002063 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002064 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002065 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002066 PyBuffer_Release(&table_view);
2067 return NULL;
2068 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002069 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002070 dellen = del_table_view.len;
2071 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002072 }
2073 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002074 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002075 dellen = 0;
2076 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002077
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002078 inlen = PyBytes_GET_SIZE(input_obj);
2079 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002080 if (result == NULL) {
2081 PyBuffer_Release(&del_table_view);
2082 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002083 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002084 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002085 output_start = output = PyBytes_AS_STRING(result);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002086 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002087
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002088 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002089 /* If no deletions are required, use faster code */
2090 for (i = inlen; --i >= 0; ) {
2091 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002092 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002093 changed = 1;
2094 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002095 if (!changed && PyBytes_CheckExact(input_obj)) {
2096 Py_INCREF(input_obj);
2097 Py_DECREF(result);
2098 result = input_obj;
2099 }
2100 PyBuffer_Release(&del_table_view);
2101 PyBuffer_Release(&table_view);
2102 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002103 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002104
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002105 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002106 for (i = 0; i < 256; i++)
2107 trans_table[i] = Py_CHARMASK(i);
2108 } else {
2109 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002110 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002111 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002112 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002113
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002114 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002115 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002116 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002117
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002118 for (i = inlen; --i >= 0; ) {
2119 c = Py_CHARMASK(*input++);
2120 if (trans_table[c] != -1)
2121 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2122 continue;
2123 changed = 1;
2124 }
2125 if (!changed && PyBytes_CheckExact(input_obj)) {
2126 Py_DECREF(result);
2127 Py_INCREF(input_obj);
2128 return input_obj;
2129 }
2130 /* Fix the size of the resulting string */
2131 if (inlen > 0)
2132 _PyBytes_Resize(&result, output - output_start);
2133 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002134}
2135
2136
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002137/*[clinic input]
2138
2139@staticmethod
2140bytes.maketrans
2141
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002142 frm: Py_buffer
2143 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002144 /
2145
2146Return a translation table useable for the bytes or bytearray translate method.
2147
2148The returned table will be one where each byte in frm is mapped to the byte at
2149the same position in to.
2150
2151The bytes objects frm and to must be of the same length.
2152[clinic start generated code]*/
2153
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002154static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002155bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002156/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002157{
2158 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002159}
2160
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002161
2162/*[clinic input]
2163bytes.replace
2164
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002165 old: Py_buffer
2166 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002167 count: Py_ssize_t = -1
2168 Maximum number of occurrences to replace.
2169 -1 (the default value) means replace all occurrences.
2170 /
2171
2172Return a copy with all occurrences of substring old replaced by new.
2173
2174If the optional argument count is given, only the first count occurrences are
2175replaced.
2176[clinic start generated code]*/
2177
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002178static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002179bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
Larry Hastings89964c42015-04-14 18:07:59 -04002180 Py_ssize_t count)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002181/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002182{
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +03002183 return stringlib_replace((PyObject *)self,
2184 (const char *)old->buf, old->len,
2185 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002186}
2187
2188/** End DALKE **/
2189
sweeneydea81849b2020-04-22 17:05:48 -04002190/*[clinic input]
2191bytes.removeprefix as bytes_removeprefix
2192
2193 prefix: Py_buffer
2194 /
2195
2196Return a bytes object with the given prefix string removed if present.
2197
2198If the bytes starts with the prefix string, return bytes[len(prefix):].
2199Otherwise, return a copy of the original bytes.
2200[clinic start generated code]*/
2201
2202static PyObject *
2203bytes_removeprefix_impl(PyBytesObject *self, Py_buffer *prefix)
2204/*[clinic end generated code: output=f006865331a06ab6 input=0c93bac817a8502c]*/
2205{
2206 const char *self_start = PyBytes_AS_STRING(self);
2207 Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2208 const char *prefix_start = prefix->buf;
2209 Py_ssize_t prefix_len = prefix->len;
2210
2211 if (self_len >= prefix_len
2212 && prefix_len > 0
2213 && memcmp(self_start, prefix_start, prefix_len) == 0)
2214 {
2215 return PyBytes_FromStringAndSize(self_start + prefix_len,
2216 self_len - prefix_len);
2217 }
2218
2219 if (PyBytes_CheckExact(self)) {
2220 Py_INCREF(self);
2221 return (PyObject *)self;
2222 }
2223
2224 return PyBytes_FromStringAndSize(self_start, self_len);
2225}
2226
2227/*[clinic input]
2228bytes.removesuffix as bytes_removesuffix
2229
2230 suffix: Py_buffer
2231 /
2232
2233Return a bytes object with the given suffix string removed if present.
2234
2235If the bytes ends with the suffix string and that suffix is not empty,
2236return bytes[:-len(prefix)]. Otherwise, return a copy of the original
2237bytes.
2238[clinic start generated code]*/
2239
2240static PyObject *
2241bytes_removesuffix_impl(PyBytesObject *self, Py_buffer *suffix)
2242/*[clinic end generated code: output=d887d308e3242eeb input=9f4e1da8c637bbf1]*/
2243{
2244 const char *self_start = PyBytes_AS_STRING(self);
2245 Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2246 const char *suffix_start = suffix->buf;
2247 Py_ssize_t suffix_len = suffix->len;
2248
2249 if (self_len >= suffix_len
2250 && suffix_len > 0
2251 && memcmp(self_start + self_len - suffix_len,
2252 suffix_start, suffix_len) == 0)
2253 {
2254 return PyBytes_FromStringAndSize(self_start,
2255 self_len - suffix_len);
2256 }
2257
2258 if (PyBytes_CheckExact(self)) {
2259 Py_INCREF(self);
2260 return (PyObject *)self;
2261 }
2262
2263 return PyBytes_FromStringAndSize(self_start, self_len);
2264}
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002265
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002266static PyObject *
2267bytes_startswith(PyBytesObject *self, PyObject *args)
2268{
2269 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2270}
2271
2272static PyObject *
2273bytes_endswith(PyBytesObject *self, PyObject *args)
2274{
2275 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2276}
2277
2278
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002279/*[clinic input]
2280bytes.decode
2281
2282 encoding: str(c_default="NULL") = 'utf-8'
2283 The encoding with which to decode the bytes.
2284 errors: str(c_default="NULL") = 'strict'
2285 The error handling scheme to use for the handling of decoding errors.
2286 The default is 'strict' meaning that decoding errors raise a
2287 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2288 as well as any other name registered with codecs.register_error that
2289 can handle UnicodeDecodeErrors.
2290
2291Decode the bytes using the codec registered for encoding.
2292[clinic start generated code]*/
2293
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002294static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002295bytes_decode_impl(PyBytesObject *self, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04002296 const char *errors)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002297/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002298{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002299 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002300}
2301
Guido van Rossum20188312006-05-05 15:15:40 +00002302
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002303/*[clinic input]
2304bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002305
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002306 keepends: bool(accept={int}) = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002307
2308Return a list of the lines in the bytes, breaking at line boundaries.
2309
2310Line breaks are not included in the resulting list unless keepends is given and
2311true.
2312[clinic start generated code]*/
2313
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002314static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002315bytes_splitlines_impl(PyBytesObject *self, int keepends)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002316/*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002317{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002318 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002319 (PyObject*) self, PyBytes_AS_STRING(self),
2320 PyBytes_GET_SIZE(self), keepends
2321 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002322}
2323
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002324/*[clinic input]
2325@classmethod
2326bytes.fromhex
2327
2328 string: unicode
2329 /
2330
2331Create a bytes object from a string of hexadecimal numbers.
2332
2333Spaces between two numbers are accepted.
2334Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2335[clinic start generated code]*/
2336
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002337static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002338bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002339/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002340{
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002341 PyObject *result = _PyBytes_FromHex(string, 0);
2342 if (type != &PyBytes_Type && result != NULL) {
Petr Viktorinffd97532020-02-11 17:46:57 +01002343 Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002344 }
2345 return result;
Victor Stinner2bf89932015-10-14 11:25:33 +02002346}
2347
2348PyObject*
2349_PyBytes_FromHex(PyObject *string, int use_bytearray)
2350{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002351 char *buf;
Victor Stinner2bf89932015-10-14 11:25:33 +02002352 Py_ssize_t hexlen, invalid_char;
2353 unsigned int top, bot;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03002354 const Py_UCS1 *str, *end;
Victor Stinner2bf89932015-10-14 11:25:33 +02002355 _PyBytesWriter writer;
2356
2357 _PyBytesWriter_Init(&writer);
2358 writer.use_bytearray = use_bytearray;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002359
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002360 assert(PyUnicode_Check(string));
2361 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002362 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002363 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002364
Victor Stinner2bf89932015-10-14 11:25:33 +02002365 if (!PyUnicode_IS_ASCII(string)) {
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03002366 const void *data = PyUnicode_DATA(string);
Victor Stinner2bf89932015-10-14 11:25:33 +02002367 unsigned int kind = PyUnicode_KIND(string);
2368 Py_ssize_t i;
2369
2370 /* search for the first non-ASCII character */
2371 for (i = 0; i < hexlen; i++) {
2372 if (PyUnicode_READ(kind, data, i) >= 128)
2373 break;
2374 }
2375 invalid_char = i;
2376 goto error;
2377 }
2378
2379 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2380 str = PyUnicode_1BYTE_DATA(string);
2381
2382 /* This overestimates if there are spaces */
2383 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2384 if (buf == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002385 return NULL;
Victor Stinner2bf89932015-10-14 11:25:33 +02002386
2387 end = str + hexlen;
2388 while (str < end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002389 /* skip over spaces in the input */
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002390 if (Py_ISSPACE(*str)) {
Victor Stinner2bf89932015-10-14 11:25:33 +02002391 do {
2392 str++;
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002393 } while (Py_ISSPACE(*str));
Victor Stinner2bf89932015-10-14 11:25:33 +02002394 if (str >= end)
2395 break;
2396 }
2397
2398 top = _PyLong_DigitValue[*str];
2399 if (top >= 16) {
2400 invalid_char = str - PyUnicode_1BYTE_DATA(string);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002401 goto error;
2402 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002403 str++;
2404
2405 bot = _PyLong_DigitValue[*str];
2406 if (bot >= 16) {
2407 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2408 goto error;
2409 }
2410 str++;
2411
2412 *buf++ = (unsigned char)((top << 4) + bot);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002413 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002414
2415 return _PyBytesWriter_Finish(&writer, buf);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002416
2417 error:
Victor Stinner2bf89932015-10-14 11:25:33 +02002418 PyErr_Format(PyExc_ValueError,
2419 "non-hexadecimal number found in "
2420 "fromhex() arg at position %zd", invalid_char);
2421 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002422 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002423}
2424
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002425/*[clinic input]
2426bytes.hex
2427
2428 sep: object = NULL
2429 An optional single character or byte to separate hex bytes.
2430 bytes_per_sep: int = 1
2431 How many bytes between separators. Positive values count from the
2432 right, negative values count from the left.
2433
2434Create a str of hexadecimal numbers from a bytes object.
2435
2436Example:
2437>>> value = b'\xb9\x01\xef'
2438>>> value.hex()
2439'b901ef'
2440>>> value.hex(':')
2441'b9:01:ef'
2442>>> value.hex(':', 2)
2443'b9:01ef'
2444>>> value.hex(':', -2)
2445'b901:ef'
2446[clinic start generated code]*/
Gregory P. Smith8cb65692015-04-25 23:22:26 +00002447
2448static PyObject *
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002449bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2450/*[clinic end generated code: output=1f134da504064139 input=f1238d3455990218]*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002451{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03002452 const char *argbuf = PyBytes_AS_STRING(self);
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002453 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002454 return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002455}
2456
2457static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302458bytes_getnewargs(PyBytesObject *v, PyObject *Py_UNUSED(ignored))
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002459{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002460 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002461}
2462
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002463
2464static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002465bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002466 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302467 {"capitalize", stringlib_capitalize, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002468 _Py_capitalize__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002469 STRINGLIB_CENTER_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002470 {"count", (PyCFunction)bytes_count, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002471 _Py_count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002472 BYTES_DECODE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002473 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002474 _Py_endswith__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002475 STRINGLIB_EXPANDTABS_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002476 {"find", (PyCFunction)bytes_find, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002477 _Py_find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002478 BYTES_FROMHEX_METHODDEF
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002479 BYTES_HEX_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002480 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302481 {"isalnum", stringlib_isalnum, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002482 _Py_isalnum__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302483 {"isalpha", stringlib_isalpha, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002484 _Py_isalpha__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302485 {"isascii", stringlib_isascii, METH_NOARGS,
INADA Naokia49ac992018-01-27 14:06:21 +09002486 _Py_isascii__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302487 {"isdigit", stringlib_isdigit, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002488 _Py_isdigit__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302489 {"islower", stringlib_islower, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002490 _Py_islower__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302491 {"isspace", stringlib_isspace, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002492 _Py_isspace__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302493 {"istitle", stringlib_istitle, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002494 _Py_istitle__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302495 {"isupper", stringlib_isupper, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002496 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002497 BYTES_JOIN_METHODDEF
Tal Einatc929df32018-07-06 13:17:38 +03002498 STRINGLIB_LJUST_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302499 {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002500 BYTES_LSTRIP_METHODDEF
2501 BYTES_MAKETRANS_METHODDEF
2502 BYTES_PARTITION_METHODDEF
2503 BYTES_REPLACE_METHODDEF
sweeneydea81849b2020-04-22 17:05:48 -04002504 BYTES_REMOVEPREFIX_METHODDEF
2505 BYTES_REMOVESUFFIX_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002506 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2507 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002508 STRINGLIB_RJUST_METHODDEF
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002509 BYTES_RPARTITION_METHODDEF
2510 BYTES_RSPLIT_METHODDEF
2511 BYTES_RSTRIP_METHODDEF
2512 BYTES_SPLIT_METHODDEF
2513 BYTES_SPLITLINES_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002514 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002515 _Py_startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002516 BYTES_STRIP_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302517 {"swapcase", stringlib_swapcase, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002518 _Py_swapcase__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302519 {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002520 BYTES_TRANSLATE_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302521 {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002522 STRINGLIB_ZFILL_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002523 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002524};
2525
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002526static PyObject *
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002527bytes_mod(PyObject *self, PyObject *arg)
Ethan Furmanb95b5612015-01-23 20:05:18 -08002528{
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002529 if (!PyBytes_Check(self)) {
2530 Py_RETURN_NOTIMPLEMENTED;
Victor Stinner772b2b02015-10-14 09:56:53 +02002531 }
Victor Stinner772b2b02015-10-14 09:56:53 +02002532 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002533 arg, 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -08002534}
2535
2536static PyNumberMethods bytes_as_number = {
2537 0, /*nb_add*/
2538 0, /*nb_subtract*/
2539 0, /*nb_multiply*/
2540 bytes_mod, /*nb_remainder*/
2541};
2542
2543static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002544bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002545
2546static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002547bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002548{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002549 PyObject *x = NULL;
2550 const char *encoding = NULL;
2551 const char *errors = NULL;
2552 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002553 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002554 Py_ssize_t size;
2555 static char *kwlist[] = {"source", "encoding", "errors", 0};
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002556
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002557 if (type != &PyBytes_Type)
Serhiy Storchaka15095802015-11-25 15:47:01 +02002558 return bytes_subtype_new(type, args, kwds);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002559 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2560 &encoding, &errors))
2561 return NULL;
2562 if (x == NULL) {
2563 if (encoding != NULL || errors != NULL) {
2564 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka2c2044e2018-10-21 15:29:12 +03002565 encoding != NULL ?
2566 "encoding without a string argument" :
2567 "errors without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002568 return NULL;
2569 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02002570 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002571 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002572
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002573 if (encoding != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002574 /* Encode via the codec registry */
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002575 if (!PyUnicode_Check(x)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002576 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002577 "encoding without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002578 return NULL;
2579 }
2580 new = PyUnicode_AsEncodedString(x, encoding, errors);
2581 if (new == NULL)
2582 return NULL;
2583 assert(PyBytes_Check(new));
2584 return new;
2585 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002586
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002587 if (errors != NULL) {
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002588 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002589 PyUnicode_Check(x) ?
2590 "string argument without an encoding" :
2591 "errors without a string argument");
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002592 return NULL;
2593 }
2594
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002595 /* We'd like to call PyObject_Bytes here, but we need to check for an
2596 integer argument before deferring to PyBytes_FromObject, something
2597 PyObject_Bytes doesn't do. */
2598 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2599 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +01002600 new = _PyObject_CallNoArg(func);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002601 Py_DECREF(func);
2602 if (new == NULL)
2603 return NULL;
2604 if (!PyBytes_Check(new)) {
2605 PyErr_Format(PyExc_TypeError,
2606 "__bytes__ returned non-bytes (type %.200s)",
2607 Py_TYPE(new)->tp_name);
2608 Py_DECREF(new);
2609 return NULL;
2610 }
2611 return new;
2612 }
2613 else if (PyErr_Occurred())
2614 return NULL;
2615
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002616 if (PyUnicode_Check(x)) {
2617 PyErr_SetString(PyExc_TypeError,
2618 "string argument without an encoding");
2619 return NULL;
2620 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002621 /* Is it an integer? */
Victor Stinnera15e2602020-04-08 02:01:56 +02002622 if (_PyIndex_Check(x)) {
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002623 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2624 if (size == -1 && PyErr_Occurred()) {
Serhiy Storchakae8904212018-10-15 00:02:57 +03002625 if (!PyErr_ExceptionMatches(PyExc_TypeError))
INADA Naokia634e232017-01-06 17:32:01 +09002626 return NULL;
2627 PyErr_Clear(); /* fall through */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002628 }
INADA Naokia634e232017-01-06 17:32:01 +09002629 else {
2630 if (size < 0) {
2631 PyErr_SetString(PyExc_ValueError, "negative count");
2632 return NULL;
2633 }
2634 new = _PyBytes_FromSize(size, 1);
2635 if (new == NULL)
2636 return NULL;
2637 return new;
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002638 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002639 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002640
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002641 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002642}
2643
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002644static PyObject*
2645_PyBytes_FromBuffer(PyObject *x)
2646{
2647 PyObject *new;
2648 Py_buffer view;
2649
2650 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2651 return NULL;
2652
2653 new = PyBytes_FromStringAndSize(NULL, view.len);
2654 if (!new)
2655 goto fail;
2656 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2657 &view, view.len, 'C') < 0)
2658 goto fail;
2659 PyBuffer_Release(&view);
2660 return new;
2661
2662fail:
2663 Py_XDECREF(new);
2664 PyBuffer_Release(&view);
2665 return NULL;
2666}
2667
2668static PyObject*
2669_PyBytes_FromList(PyObject *x)
2670{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002671 Py_ssize_t i, size = PyList_GET_SIZE(x);
2672 Py_ssize_t value;
2673 char *str;
2674 PyObject *item;
2675 _PyBytesWriter writer;
2676
2677 _PyBytesWriter_Init(&writer);
2678 str = _PyBytesWriter_Alloc(&writer, size);
2679 if (str == NULL)
2680 return NULL;
2681 writer.overallocate = 1;
2682 size = writer.allocated;
2683
2684 for (i = 0; i < PyList_GET_SIZE(x); i++) {
2685 item = PyList_GET_ITEM(x, i);
2686 Py_INCREF(item);
2687 value = PyNumber_AsSsize_t(item, NULL);
2688 Py_DECREF(item);
2689 if (value == -1 && PyErr_Occurred())
2690 goto error;
2691
2692 if (value < 0 || value >= 256) {
2693 PyErr_SetString(PyExc_ValueError,
2694 "bytes must be in range(0, 256)");
2695 goto error;
2696 }
2697
2698 if (i >= size) {
2699 str = _PyBytesWriter_Resize(&writer, str, size+1);
2700 if (str == NULL)
2701 return NULL;
2702 size = writer.allocated;
2703 }
2704 *str++ = (char) value;
2705 }
2706 return _PyBytesWriter_Finish(&writer, str);
2707
2708 error:
2709 _PyBytesWriter_Dealloc(&writer);
2710 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002711}
2712
2713static PyObject*
2714_PyBytes_FromTuple(PyObject *x)
2715{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002716 PyObject *bytes;
2717 Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2718 Py_ssize_t value;
2719 char *str;
2720 PyObject *item;
2721
2722 bytes = PyBytes_FromStringAndSize(NULL, size);
2723 if (bytes == NULL)
2724 return NULL;
2725 str = ((PyBytesObject *)bytes)->ob_sval;
2726
2727 for (i = 0; i < size; i++) {
2728 item = PyTuple_GET_ITEM(x, i);
2729 value = PyNumber_AsSsize_t(item, NULL);
2730 if (value == -1 && PyErr_Occurred())
2731 goto error;
2732
2733 if (value < 0 || value >= 256) {
2734 PyErr_SetString(PyExc_ValueError,
2735 "bytes must be in range(0, 256)");
2736 goto error;
2737 }
2738 *str++ = (char) value;
2739 }
2740 return bytes;
2741
2742 error:
2743 Py_DECREF(bytes);
2744 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002745}
2746
2747static PyObject *
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002748_PyBytes_FromIterator(PyObject *it, PyObject *x)
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002749{
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002750 char *str;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002751 Py_ssize_t i, size;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002752 _PyBytesWriter writer;
2753
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002754 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02002755 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002756 if (size == -1 && PyErr_Occurred())
2757 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002758
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002759 _PyBytesWriter_Init(&writer);
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002760 str = _PyBytesWriter_Alloc(&writer, size);
2761 if (str == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002762 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002763 writer.overallocate = 1;
2764 size = writer.allocated;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002765
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002766 /* Run the iterator to exhaustion */
2767 for (i = 0; ; i++) {
2768 PyObject *item;
2769 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002770
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002771 /* Get the next item */
2772 item = PyIter_Next(it);
2773 if (item == NULL) {
2774 if (PyErr_Occurred())
2775 goto error;
2776 break;
2777 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002778
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002779 /* Interpret it as an int (__index__) */
Serhiy Storchakaf54d7812016-07-06 21:39:44 +03002780 value = PyNumber_AsSsize_t(item, NULL);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002781 Py_DECREF(item);
2782 if (value == -1 && PyErr_Occurred())
2783 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002784
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002785 /* Range check */
2786 if (value < 0 || value >= 256) {
2787 PyErr_SetString(PyExc_ValueError,
2788 "bytes must be in range(0, 256)");
2789 goto error;
2790 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002791
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002792 /* Append the byte */
2793 if (i >= size) {
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002794 str = _PyBytesWriter_Resize(&writer, str, size+1);
2795 if (str == NULL)
2796 return NULL;
2797 size = writer.allocated;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002798 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002799 *str++ = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002800 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002801
2802 return _PyBytesWriter_Finish(&writer, str);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002803
2804 error:
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002805 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002806 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002807}
2808
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002809PyObject *
2810PyBytes_FromObject(PyObject *x)
2811{
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002812 PyObject *it, *result;
2813
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002814 if (x == NULL) {
2815 PyErr_BadInternalCall();
2816 return NULL;
2817 }
2818
2819 if (PyBytes_CheckExact(x)) {
2820 Py_INCREF(x);
2821 return x;
2822 }
2823
2824 /* Use the modern buffer interface */
2825 if (PyObject_CheckBuffer(x))
2826 return _PyBytes_FromBuffer(x);
2827
2828 if (PyList_CheckExact(x))
2829 return _PyBytes_FromList(x);
2830
2831 if (PyTuple_CheckExact(x))
2832 return _PyBytes_FromTuple(x);
2833
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002834 if (!PyUnicode_Check(x)) {
2835 it = PyObject_GetIter(x);
2836 if (it != NULL) {
2837 result = _PyBytes_FromIterator(it, x);
2838 Py_DECREF(it);
2839 return result;
2840 }
Serhiy Storchakae8904212018-10-15 00:02:57 +03002841 if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2842 return NULL;
2843 }
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002844 }
2845
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002846 PyErr_Format(PyExc_TypeError,
2847 "cannot convert '%.200s' object to bytes",
Victor Stinner58ac7002020-02-07 03:04:21 +01002848 Py_TYPE(x)->tp_name);
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002849 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002850}
2851
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002852static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002853bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002854{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002855 PyObject *tmp, *pnew;
2856 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002857
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002858 assert(PyType_IsSubtype(type, &PyBytes_Type));
2859 tmp = bytes_new(&PyBytes_Type, args, kwds);
2860 if (tmp == NULL)
2861 return NULL;
Serhiy Storchaka15095802015-11-25 15:47:01 +02002862 assert(PyBytes_Check(tmp));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002863 n = PyBytes_GET_SIZE(tmp);
2864 pnew = type->tp_alloc(type, n);
2865 if (pnew != NULL) {
Christian Heimesf051e432016-09-13 20:22:02 +02002866 memcpy(PyBytes_AS_STRING(pnew),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002867 PyBytes_AS_STRING(tmp), n+1);
2868 ((PyBytesObject *)pnew)->ob_shash =
2869 ((PyBytesObject *)tmp)->ob_shash;
2870 }
2871 Py_DECREF(tmp);
2872 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002873}
2874
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002875PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002876"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002877bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002878bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002879bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2880bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002881\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002882Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002883 - an iterable yielding integers in range(256)\n\
2884 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002885 - any object implementing the buffer API.\n\
2886 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002887
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002888static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002889
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002890PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002891 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2892 "bytes",
2893 PyBytesObject_SIZE,
2894 sizeof(char),
Inada Naoki7d408692019-05-29 17:23:27 +09002895 0, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002896 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002897 0, /* tp_getattr */
2898 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002899 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002900 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08002901 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002902 &bytes_as_sequence, /* tp_as_sequence */
2903 &bytes_as_mapping, /* tp_as_mapping */
2904 (hashfunc)bytes_hash, /* tp_hash */
2905 0, /* tp_call */
2906 bytes_str, /* tp_str */
2907 PyObject_GenericGetAttr, /* tp_getattro */
2908 0, /* tp_setattro */
2909 &bytes_as_buffer, /* tp_as_buffer */
2910 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2911 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2912 bytes_doc, /* tp_doc */
2913 0, /* tp_traverse */
2914 0, /* tp_clear */
2915 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2916 0, /* tp_weaklistoffset */
2917 bytes_iter, /* tp_iter */
2918 0, /* tp_iternext */
2919 bytes_methods, /* tp_methods */
2920 0, /* tp_members */
2921 0, /* tp_getset */
2922 &PyBaseObject_Type, /* tp_base */
2923 0, /* tp_dict */
2924 0, /* tp_descr_get */
2925 0, /* tp_descr_set */
2926 0, /* tp_dictoffset */
2927 0, /* tp_init */
2928 0, /* tp_alloc */
2929 bytes_new, /* tp_new */
2930 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002931};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002932
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002933void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002934PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002935{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002936 assert(pv != NULL);
2937 if (*pv == NULL)
2938 return;
2939 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002940 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002941 return;
2942 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02002943
2944 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2945 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05002946 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02002947 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02002948
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002949 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02002950 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2951 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2952 Py_CLEAR(*pv);
2953 return;
2954 }
2955
2956 oldsize = PyBytes_GET_SIZE(*pv);
2957 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2958 PyErr_NoMemory();
2959 goto error;
2960 }
2961 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2962 goto error;
2963
2964 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2965 PyBuffer_Release(&wb);
2966 return;
2967
2968 error:
2969 PyBuffer_Release(&wb);
2970 Py_CLEAR(*pv);
2971 return;
2972 }
2973
2974 else {
2975 /* Multiple references, need to create new object */
2976 PyObject *v;
2977 v = bytes_concat(*pv, w);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002978 Py_SETREF(*pv, v);
Antoine Pitrou161d6952014-05-01 14:36:20 +02002979 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002980}
2981
2982void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002983PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002984{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002985 PyBytes_Concat(pv, w);
2986 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002987}
2988
2989
Ethan Furmanb95b5612015-01-23 20:05:18 -08002990/* The following function breaks the notion that bytes are immutable:
2991 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002992 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08002993 as creating a new bytes object and destroying the old one, only
2994 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002995 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08002996 Note that if there's not enough memory to resize the bytes object, the
2997 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002998 memory" exception is set, and -1 is returned. Else (on success) 0 is
2999 returned, and the value in *pv may or may not be the same as on input.
3000 As always, an extra byte is allocated for a trailing \0 byte (newsize
3001 does *not* include that), and a trailing \0 byte is stored.
3002*/
3003
3004int
3005_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3006{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003007 PyObject *v;
3008 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003009 v = *pv;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03003010 if (!PyBytes_Check(v) || newsize < 0) {
3011 goto error;
3012 }
3013 if (Py_SIZE(v) == newsize) {
3014 /* return early if newsize equals to v->ob_size */
3015 return 0;
3016 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02003017 if (Py_SIZE(v) == 0) {
3018 if (newsize == 0) {
3019 return 0;
3020 }
3021 *pv = _PyBytes_FromSize(newsize, 0);
3022 Py_DECREF(v);
3023 return (*pv == NULL) ? -1 : 0;
3024 }
Berker Peksag4a72a7b2016-09-16 17:31:06 +03003025 if (Py_REFCNT(v) != 1) {
3026 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003027 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02003028 if (newsize == 0) {
3029 *pv = _PyBytes_FromSize(0, 0);
3030 Py_DECREF(v);
3031 return (*pv == NULL) ? -1 : 0;
3032 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003033 /* XXX UNREF/NEWREF interface should be more symmetrical */
Victor Stinner49932fe2020-02-03 17:55:05 +01003034#ifdef Py_REF_DEBUG
3035 _Py_RefTotal--;
3036#endif
3037#ifdef Py_TRACE_REFS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003038 _Py_ForgetReference(v);
Victor Stinner49932fe2020-02-03 17:55:05 +01003039#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003040 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03003041 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003042 if (*pv == NULL) {
3043 PyObject_Del(v);
3044 PyErr_NoMemory();
3045 return -1;
3046 }
3047 _Py_NewReference(*pv);
3048 sv = (PyBytesObject *) *pv;
Victor Stinner60ac6ed2020-02-07 23:18:08 +01003049 Py_SET_SIZE(sv, newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003050 sv->ob_sval[newsize] = '\0';
3051 sv->ob_shash = -1; /* invalidate cached hash value */
3052 return 0;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03003053error:
3054 *pv = 0;
3055 Py_DECREF(v);
3056 PyErr_BadInternalCall();
3057 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003058}
3059
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003060void
Victor Stinnerbed48172019-08-27 00:12:32 +02003061_PyBytes_Fini(void)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003062{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003063 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003064 for (i = 0; i < UCHAR_MAX + 1; i++)
3065 Py_CLEAR(characters[i]);
3066 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003067}
3068
Benjamin Peterson4116f362008-05-27 00:36:20 +00003069/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003070
3071typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003072 PyObject_HEAD
3073 Py_ssize_t it_index;
3074 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003075} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003076
3077static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003078striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003079{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003080 _PyObject_GC_UNTRACK(it);
3081 Py_XDECREF(it->it_seq);
3082 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003083}
3084
3085static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003086striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003087{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003088 Py_VISIT(it->it_seq);
3089 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003090}
3091
3092static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003093striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003094{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003095 PyBytesObject *seq;
3096 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003097
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003098 assert(it != NULL);
3099 seq = it->it_seq;
3100 if (seq == NULL)
3101 return NULL;
3102 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003103
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003104 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3105 item = PyLong_FromLong(
3106 (unsigned char)seq->ob_sval[it->it_index]);
3107 if (item != NULL)
3108 ++it->it_index;
3109 return item;
3110 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003111
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003112 it->it_seq = NULL;
Serhiy Storchakafbb1c5e2016-03-30 20:40:02 +03003113 Py_DECREF(seq);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003114 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003115}
3116
3117static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303118striter_len(striterobject *it, PyObject *Py_UNUSED(ignored))
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003119{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003120 Py_ssize_t len = 0;
3121 if (it->it_seq)
3122 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3123 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003124}
3125
3126PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003127 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003128
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003129static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303130striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003131{
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003132 _Py_IDENTIFIER(iter);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003133 if (it->it_seq != NULL) {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003134 return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003135 it->it_seq, it->it_index);
3136 } else {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003137 return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003138 }
3139}
3140
3141PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3142
3143static PyObject *
3144striter_setstate(striterobject *it, PyObject *state)
3145{
3146 Py_ssize_t index = PyLong_AsSsize_t(state);
3147 if (index == -1 && PyErr_Occurred())
3148 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003149 if (it->it_seq != NULL) {
3150 if (index < 0)
3151 index = 0;
3152 else if (index > PyBytes_GET_SIZE(it->it_seq))
3153 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3154 it->it_index = index;
3155 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003156 Py_RETURN_NONE;
3157}
3158
3159PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3160
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003161static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003162 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3163 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003164 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3165 reduce_doc},
3166 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3167 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003168 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003169};
3170
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003171PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003172 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3173 "bytes_iterator", /* tp_name */
3174 sizeof(striterobject), /* tp_basicsize */
3175 0, /* tp_itemsize */
3176 /* methods */
3177 (destructor)striter_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003178 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003179 0, /* tp_getattr */
3180 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003181 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003182 0, /* tp_repr */
3183 0, /* tp_as_number */
3184 0, /* tp_as_sequence */
3185 0, /* tp_as_mapping */
3186 0, /* tp_hash */
3187 0, /* tp_call */
3188 0, /* tp_str */
3189 PyObject_GenericGetAttr, /* tp_getattro */
3190 0, /* tp_setattro */
3191 0, /* tp_as_buffer */
3192 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3193 0, /* tp_doc */
3194 (traverseproc)striter_traverse, /* tp_traverse */
3195 0, /* tp_clear */
3196 0, /* tp_richcompare */
3197 0, /* tp_weaklistoffset */
3198 PyObject_SelfIter, /* tp_iter */
3199 (iternextfunc)striter_next, /* tp_iternext */
3200 striter_methods, /* tp_methods */
3201 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003202};
3203
3204static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003205bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003206{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003207 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003208
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003209 if (!PyBytes_Check(seq)) {
3210 PyErr_BadInternalCall();
3211 return NULL;
3212 }
3213 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3214 if (it == NULL)
3215 return NULL;
3216 it->it_index = 0;
3217 Py_INCREF(seq);
3218 it->it_seq = (PyBytesObject *)seq;
3219 _PyObject_GC_TRACK(it);
3220 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003221}
Victor Stinner00165072015-10-09 01:53:21 +02003222
3223
3224/* _PyBytesWriter API */
3225
3226#ifdef MS_WINDOWS
3227 /* On Windows, overallocate by 50% is the best factor */
3228# define OVERALLOCATE_FACTOR 2
3229#else
3230 /* On Linux, overallocate by 25% is the best factor */
3231# define OVERALLOCATE_FACTOR 4
3232#endif
3233
3234void
3235_PyBytesWriter_Init(_PyBytesWriter *writer)
3236{
Victor Stinner661aacc2015-10-14 09:41:48 +02003237 /* Set all attributes before small_buffer to 0 */
3238 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003239#ifndef NDEBUG
3240 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3241 sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003242#endif
3243}
3244
3245void
3246_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3247{
3248 Py_CLEAR(writer->buffer);
3249}
3250
3251Py_LOCAL_INLINE(char*)
3252_PyBytesWriter_AsString(_PyBytesWriter *writer)
3253{
Victor Stinner661aacc2015-10-14 09:41:48 +02003254 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003255 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003256 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003257 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003258 else if (writer->use_bytearray) {
3259 assert(writer->buffer != NULL);
3260 return PyByteArray_AS_STRING(writer->buffer);
3261 }
3262 else {
3263 assert(writer->buffer != NULL);
3264 return PyBytes_AS_STRING(writer->buffer);
3265 }
Victor Stinner00165072015-10-09 01:53:21 +02003266}
3267
3268Py_LOCAL_INLINE(Py_ssize_t)
Victor Stinner2bf89932015-10-14 11:25:33 +02003269_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
Victor Stinner00165072015-10-09 01:53:21 +02003270{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03003271 const char *start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003272 assert(str != NULL);
3273 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003274 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003275 return str - start;
3276}
3277
Victor Stinner68762572019-10-07 18:42:01 +02003278#ifndef NDEBUG
3279Py_LOCAL_INLINE(int)
Victor Stinner00165072015-10-09 01:53:21 +02003280_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3281{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03003282 const char *start, *end;
Victor Stinner00165072015-10-09 01:53:21 +02003283
Victor Stinner661aacc2015-10-14 09:41:48 +02003284 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003285 assert(writer->buffer == NULL);
3286 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003287 else {
3288 assert(writer->buffer != NULL);
3289 if (writer->use_bytearray)
3290 assert(PyByteArray_CheckExact(writer->buffer));
3291 else
3292 assert(PyBytes_CheckExact(writer->buffer));
3293 assert(Py_REFCNT(writer->buffer) == 1);
3294 }
Victor Stinner00165072015-10-09 01:53:21 +02003295
Victor Stinner661aacc2015-10-14 09:41:48 +02003296 if (writer->use_bytearray) {
3297 /* bytearray has its own overallocation algorithm,
3298 writer overallocation must be disabled */
3299 assert(!writer->overallocate);
3300 }
3301
3302 assert(0 <= writer->allocated);
Victor Stinner53926a12015-10-09 12:37:03 +02003303 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003304 /* the last byte must always be null */
Victor Stinner661aacc2015-10-14 09:41:48 +02003305 start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003306 assert(start[writer->allocated] == 0);
3307
3308 end = start + writer->allocated;
3309 assert(str != NULL);
3310 assert(start <= str && str <= end);
Victor Stinner68762572019-10-07 18:42:01 +02003311 return 1;
Victor Stinner00165072015-10-09 01:53:21 +02003312}
Victor Stinner68762572019-10-07 18:42:01 +02003313#endif
Victor Stinner00165072015-10-09 01:53:21 +02003314
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003315void*
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003316_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
Victor Stinner00165072015-10-09 01:53:21 +02003317{
3318 Py_ssize_t allocated, pos;
3319
Victor Stinner68762572019-10-07 18:42:01 +02003320 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003321 assert(writer->allocated < size);
Victor Stinner00165072015-10-09 01:53:21 +02003322
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003323 allocated = size;
Victor Stinner00165072015-10-09 01:53:21 +02003324 if (writer->overallocate
3325 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3326 /* overallocate to limit the number of realloc() */
3327 allocated += allocated / OVERALLOCATE_FACTOR;
3328 }
3329
Victor Stinner2bf89932015-10-14 11:25:33 +02003330 pos = _PyBytesWriter_GetSize(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003331 if (!writer->use_small_buffer) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003332 if (writer->use_bytearray) {
3333 if (PyByteArray_Resize(writer->buffer, allocated))
3334 goto error;
3335 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3336 but we cannot use ob_alloc because bytes may need to be moved
3337 to use the whole buffer. bytearray uses an internal optimization
3338 to avoid moving or copying bytes when bytes are removed at the
3339 beginning (ex: del bytearray[:1]). */
3340 }
3341 else {
3342 if (_PyBytes_Resize(&writer->buffer, allocated))
3343 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003344 }
3345 }
3346 else {
3347 /* convert from stack buffer to bytes object buffer */
3348 assert(writer->buffer == NULL);
3349
Victor Stinner661aacc2015-10-14 09:41:48 +02003350 if (writer->use_bytearray)
3351 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3352 else
3353 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003354 if (writer->buffer == NULL)
Victor Stinner661aacc2015-10-14 09:41:48 +02003355 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003356
3357 if (pos != 0) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003358 char *dest;
3359 if (writer->use_bytearray)
3360 dest = PyByteArray_AS_STRING(writer->buffer);
3361 else
3362 dest = PyBytes_AS_STRING(writer->buffer);
Christian Heimesf051e432016-09-13 20:22:02 +02003363 memcpy(dest,
Victor Stinnerb3653a32015-10-09 03:38:24 +02003364 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02003365 pos);
3366 }
3367
Victor Stinnerb3653a32015-10-09 03:38:24 +02003368 writer->use_small_buffer = 0;
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003369#ifndef NDEBUG
3370 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3371 sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003372#endif
Victor Stinner00165072015-10-09 01:53:21 +02003373 }
3374 writer->allocated = allocated;
3375
3376 str = _PyBytesWriter_AsString(writer) + pos;
Victor Stinner68762572019-10-07 18:42:01 +02003377 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinner00165072015-10-09 01:53:21 +02003378 return str;
Victor Stinner661aacc2015-10-14 09:41:48 +02003379
3380error:
3381 _PyBytesWriter_Dealloc(writer);
3382 return NULL;
Victor Stinner00165072015-10-09 01:53:21 +02003383}
3384
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003385void*
3386_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3387{
3388 Py_ssize_t new_min_size;
3389
Victor Stinner68762572019-10-07 18:42:01 +02003390 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003391 assert(size >= 0);
3392
3393 if (size == 0) {
3394 /* nothing to do */
3395 return str;
3396 }
3397
3398 if (writer->min_size > PY_SSIZE_T_MAX - size) {
3399 PyErr_NoMemory();
3400 _PyBytesWriter_Dealloc(writer);
3401 return NULL;
3402 }
3403 new_min_size = writer->min_size + size;
3404
3405 if (new_min_size > writer->allocated)
3406 str = _PyBytesWriter_Resize(writer, str, new_min_size);
3407
3408 writer->min_size = new_min_size;
3409 return str;
3410}
3411
Victor Stinner00165072015-10-09 01:53:21 +02003412/* Allocate the buffer to write size bytes.
3413 Return the pointer to the beginning of buffer data.
3414 Raise an exception and return NULL on error. */
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003415void*
Victor Stinner00165072015-10-09 01:53:21 +02003416_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3417{
3418 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02003419 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02003420 assert(size >= 0);
3421
Victor Stinnerb3653a32015-10-09 03:38:24 +02003422 writer->use_small_buffer = 1;
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003423#ifndef NDEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003424 writer->allocated = sizeof(writer->small_buffer) - 1;
Victor Stinnerf6358a72015-10-14 12:02:39 +02003425 /* In debug mode, don't use the full small buffer because it is less
3426 efficient than bytes and bytearray objects to detect buffer underflow
3427 and buffer overflow. Use 10 bytes of the small buffer to test also
3428 code using the smaller buffer in debug mode.
3429
3430 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3431 in debug mode to also be able to detect stack overflow when running
3432 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3433 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3434 stack overflow. */
3435 writer->allocated = Py_MIN(writer->allocated, 10);
3436 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3437 to detect buffer overflow */
Victor Stinnerb3653a32015-10-09 03:38:24 +02003438 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003439#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02003440 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02003441#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02003442 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02003443}
3444
3445PyObject *
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003446_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
Victor Stinner00165072015-10-09 01:53:21 +02003447{
Victor Stinner2bf89932015-10-14 11:25:33 +02003448 Py_ssize_t size;
Victor Stinner00165072015-10-09 01:53:21 +02003449 PyObject *result;
3450
Victor Stinner68762572019-10-07 18:42:01 +02003451 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinner00165072015-10-09 01:53:21 +02003452
Victor Stinner2bf89932015-10-14 11:25:33 +02003453 size = _PyBytesWriter_GetSize(writer, str);
3454 if (size == 0 && !writer->use_bytearray) {
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003455 Py_CLEAR(writer->buffer);
3456 /* Get the empty byte string singleton */
3457 result = PyBytes_FromStringAndSize(NULL, 0);
3458 }
3459 else if (writer->use_small_buffer) {
Victor Stinnere914d412016-04-15 17:52:27 +02003460 if (writer->use_bytearray) {
3461 result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3462 }
3463 else {
3464 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3465 }
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003466 }
3467 else {
3468 result = writer->buffer;
3469 writer->buffer = NULL;
3470
Victor Stinner2bf89932015-10-14 11:25:33 +02003471 if (size != writer->allocated) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003472 if (writer->use_bytearray) {
Victor Stinner2bf89932015-10-14 11:25:33 +02003473 if (PyByteArray_Resize(result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003474 Py_DECREF(result);
3475 return NULL;
3476 }
3477 }
3478 else {
Victor Stinner2bf89932015-10-14 11:25:33 +02003479 if (_PyBytes_Resize(&result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003480 assert(result == NULL);
3481 return NULL;
3482 }
Victor Stinner00165072015-10-09 01:53:21 +02003483 }
3484 }
Victor Stinner00165072015-10-09 01:53:21 +02003485 }
Victor Stinner00165072015-10-09 01:53:21 +02003486 return result;
3487}
Victor Stinnerce179bf2015-10-09 12:57:22 +02003488
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003489void*
Victor Stinnere9aa5952015-10-12 13:57:47 +02003490_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003491 const void *bytes, Py_ssize_t size)
Victor Stinnerce179bf2015-10-09 12:57:22 +02003492{
Victor Stinnere9aa5952015-10-12 13:57:47 +02003493 char *str = (char *)ptr;
3494
Victor Stinnerce179bf2015-10-09 12:57:22 +02003495 str = _PyBytesWriter_Prepare(writer, str, size);
3496 if (str == NULL)
3497 return NULL;
3498
Christian Heimesf051e432016-09-13 20:22:02 +02003499 memcpy(str, bytes, size);
Victor Stinnerce179bf2015-10-09 12:57:22 +02003500 str += size;
3501
3502 return str;
3503}