blob: 7be075b72e32a86c7fa32ba4f00e183196e7b8a6 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Victor Stinnera15e2602020-04-08 02:01:56 +02006#include "pycore_abstract.h" // _PyIndex_Check()
Victor Stinner45876a92020-02-12 22:32:34 +01007#include "pycore_bytes_methods.h"
Victor Stinnerbcda8f12018-11-21 22:27:47 +01008#include "pycore_object.h"
Victor Stinner621cebe2018-11-12 16:53:38 +01009#include "pycore_pymem.h"
10#include "pycore_pystate.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +000011
Gregory P. Smith8cb65692015-04-25 23:22:26 +000012#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +000013#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000014
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020015/*[clinic input]
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030016class bytes "PyBytesObject *" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020017[clinic start generated code]*/
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030018/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020019
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030020#include "clinic/bytesobject.c.h"
21
Christian Heimes2c9c7a52008-05-26 13:42:13 +000022static PyBytesObject *characters[UCHAR_MAX + 1];
23static PyBytesObject *nullstring;
24
Hai Shi46874c22020-01-30 17:20:25 -060025_Py_IDENTIFIER(__bytes__);
26
Mark Dickinsonfd24b322008-12-06 15:33:31 +000027/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
28 for a string of length n should request PyBytesObject_SIZE + n bytes.
29
30 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
31 3 bytes per string allocation on a typical system.
32*/
33#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
34
Victor Stinner2bf89932015-10-14 11:25:33 +020035/* Forward declaration */
36Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
37 char *str);
38
Christian Heimes2c9c7a52008-05-26 13:42:13 +000039/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000040 For PyBytes_FromString(), the parameter `str' points to a null-terminated
41 string containing exactly `size' bytes.
42
Martin Pantera90a4a92016-05-30 04:04:50 +000043 For PyBytes_FromStringAndSize(), the parameter `str' is
Christian Heimes2c9c7a52008-05-26 13:42:13 +000044 either NULL or else points to a string containing at least `size' bytes.
45 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
46 not have to be null-terminated. (Therefore it is safe to construct a
47 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
48 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
49 bytes (setting the last byte to the null terminating character) and you can
50 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000051 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000052 alter the data yourself, since the strings may be shared.
53
54 The PyObject member `op->ob_size', which denotes the number of "extra
55 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020056 allocated for string data, not counting the null terminating character.
57 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000058 PyBytes_FromStringAndSize()) or the length of the string in the `str'
59 parameter (for PyBytes_FromString()).
60*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020061static PyObject *
62_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000063{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020064 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020065 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020066
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 if (size == 0 && (op = nullstring) != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000068 Py_INCREF(op);
69 return (PyObject *)op;
70 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000071
Victor Stinner049e5092014-08-17 22:20:00 +020072 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000073 PyErr_SetString(PyExc_OverflowError,
74 "byte string is too large");
75 return NULL;
76 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000077
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020079 if (use_calloc)
80 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
81 else
82 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000083 if (op == NULL)
84 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +010085 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000086 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020087 if (!use_calloc)
88 op->ob_sval[size] = '\0';
89 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000090 if (size == 0) {
91 nullstring = op;
92 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020093 }
94 return (PyObject *) op;
95}
96
97PyObject *
98PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
99{
100 PyBytesObject *op;
101 if (size < 0) {
102 PyErr_SetString(PyExc_SystemError,
103 "Negative size passed to PyBytes_FromStringAndSize");
104 return NULL;
105 }
106 if (size == 1 && str != NULL &&
107 (op = characters[*str & UCHAR_MAX]) != NULL)
108 {
Victor Stinnerdb067af2014-05-02 22:31:14 +0200109 Py_INCREF(op);
110 return (PyObject *)op;
111 }
112
113 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
114 if (op == NULL)
115 return NULL;
116 if (str == NULL)
117 return (PyObject *) op;
118
Christian Heimesf051e432016-09-13 20:22:02 +0200119 memcpy(op->ob_sval, str, size);
Victor Stinnerdb067af2014-05-02 22:31:14 +0200120 /* share short strings */
121 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000122 characters[*str & UCHAR_MAX] = op;
123 Py_INCREF(op);
124 }
125 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000126}
127
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000128PyObject *
129PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000130{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200131 size_t size;
132 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000133
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 assert(str != NULL);
135 size = strlen(str);
136 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
137 PyErr_SetString(PyExc_OverflowError,
138 "byte string is too long");
139 return NULL;
140 }
141 if (size == 0 && (op = nullstring) != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000142 Py_INCREF(op);
143 return (PyObject *)op;
144 }
145 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000146 Py_INCREF(op);
147 return (PyObject *)op;
148 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000149
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000150 /* Inline PyObject_NewVar */
151 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
152 if (op == NULL)
153 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +0100154 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000155 op->ob_shash = -1;
Christian Heimesf051e432016-09-13 20:22:02 +0200156 memcpy(op->ob_sval, str, size+1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000157 /* share short strings */
158 if (size == 0) {
159 nullstring = op;
160 Py_INCREF(op);
161 } else if (size == 1) {
162 characters[*str & UCHAR_MAX] = op;
163 Py_INCREF(op);
164 }
165 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000166}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000167
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000168PyObject *
169PyBytes_FromFormatV(const char *format, va_list vargs)
170{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000171 char *s;
Victor Stinner03dab782015-10-14 00:21:35 +0200172 const char *f;
173 const char *p;
174 Py_ssize_t prec;
175 int longflag;
176 int size_tflag;
177 /* Longest 64-bit formatted numbers:
178 - "18446744073709551615\0" (21 bytes)
179 - "-9223372036854775808\0" (21 bytes)
180 Decimal takes the most space (it isn't enough for octal.)
Guido van Rossum343e97f2007-04-09 00:43:24 +0000181
Victor Stinner03dab782015-10-14 00:21:35 +0200182 Longest 64-bit pointer representation:
183 "0xffffffffffffffff\0" (19 bytes). */
184 char buffer[21];
185 _PyBytesWriter writer;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000186
Victor Stinner03dab782015-10-14 00:21:35 +0200187 _PyBytesWriter_Init(&writer);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000188
Victor Stinner03dab782015-10-14 00:21:35 +0200189 s = _PyBytesWriter_Alloc(&writer, strlen(format));
190 if (s == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000191 return NULL;
Victor Stinner03dab782015-10-14 00:21:35 +0200192 writer.overallocate = 1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000193
Victor Stinner03dab782015-10-14 00:21:35 +0200194#define WRITE_BYTES(str) \
195 do { \
196 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
197 if (s == NULL) \
198 goto error; \
199 } while (0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000200
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000201 for (f = format; *f; f++) {
Victor Stinner03dab782015-10-14 00:21:35 +0200202 if (*f != '%') {
203 *s++ = *f;
204 continue;
205 }
206
207 p = f++;
208
209 /* ignore the width (ex: 10 in "%10s") */
210 while (Py_ISDIGIT(*f))
211 f++;
212
213 /* parse the precision (ex: 10 in "%.10s") */
214 prec = 0;
215 if (*f == '.') {
216 f++;
217 for (; Py_ISDIGIT(*f); f++) {
218 prec = (prec * 10) + (*f - '0');
219 }
220 }
221
222 while (*f && *f != '%' && !Py_ISALPHA(*f))
223 f++;
224
225 /* handle the long flag ('l'), but only for %ld and %lu.
226 others can be added when necessary. */
227 longflag = 0;
228 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
229 longflag = 1;
230 ++f;
231 }
232
233 /* handle the size_t flag ('z'). */
234 size_tflag = 0;
235 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
236 size_tflag = 1;
237 ++f;
238 }
239
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700240 /* subtract bytes preallocated for the format string
Victor Stinner03dab782015-10-14 00:21:35 +0200241 (ex: 2 for "%s") */
242 writer.min_size -= (f - p + 1);
243
244 switch (*f) {
245 case 'c':
246 {
247 int c = va_arg(vargs, int);
248 if (c < 0 || c > 255) {
249 PyErr_SetString(PyExc_OverflowError,
250 "PyBytes_FromFormatV(): %c format "
251 "expects an integer in range [0; 255]");
252 goto error;
253 }
254 writer.min_size++;
255 *s++ = (unsigned char)c;
256 break;
257 }
258
259 case 'd':
260 if (longflag)
261 sprintf(buffer, "%ld", va_arg(vargs, long));
262 else if (size_tflag)
263 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
264 va_arg(vargs, Py_ssize_t));
265 else
266 sprintf(buffer, "%d", va_arg(vargs, int));
267 assert(strlen(buffer) < sizeof(buffer));
268 WRITE_BYTES(buffer);
269 break;
270
271 case 'u':
272 if (longflag)
273 sprintf(buffer, "%lu",
274 va_arg(vargs, unsigned long));
275 else if (size_tflag)
276 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
277 va_arg(vargs, size_t));
278 else
279 sprintf(buffer, "%u",
280 va_arg(vargs, unsigned int));
281 assert(strlen(buffer) < sizeof(buffer));
282 WRITE_BYTES(buffer);
283 break;
284
285 case 'i':
286 sprintf(buffer, "%i", va_arg(vargs, int));
287 assert(strlen(buffer) < sizeof(buffer));
288 WRITE_BYTES(buffer);
289 break;
290
291 case 'x':
292 sprintf(buffer, "%x", va_arg(vargs, int));
293 assert(strlen(buffer) < sizeof(buffer));
294 WRITE_BYTES(buffer);
295 break;
296
297 case 's':
298 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000299 Py_ssize_t i;
Victor Stinner03dab782015-10-14 00:21:35 +0200300
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200301 p = va_arg(vargs, const char*);
Serhiy Storchakad586ccb2019-01-12 10:30:35 +0200302 if (prec <= 0) {
303 i = strlen(p);
304 }
305 else {
306 i = 0;
307 while (i < prec && p[i]) {
308 i++;
309 }
310 }
Victor Stinner03dab782015-10-14 00:21:35 +0200311 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
312 if (s == NULL)
313 goto error;
314 break;
315 }
316
317 case 'p':
318 sprintf(buffer, "%p", va_arg(vargs, void*));
319 assert(strlen(buffer) < sizeof(buffer));
320 /* %p is ill-defined: ensure leading 0x. */
321 if (buffer[1] == 'X')
322 buffer[1] = 'x';
323 else if (buffer[1] != 'x') {
324 memmove(buffer+2, buffer, strlen(buffer)+1);
325 buffer[0] = '0';
326 buffer[1] = 'x';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000327 }
Victor Stinner03dab782015-10-14 00:21:35 +0200328 WRITE_BYTES(buffer);
329 break;
330
331 case '%':
332 writer.min_size++;
333 *s++ = '%';
334 break;
335
336 default:
337 if (*f == 0) {
338 /* fix min_size if we reached the end of the format string */
339 writer.min_size++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000340 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000341
Victor Stinner03dab782015-10-14 00:21:35 +0200342 /* invalid format string: copy unformatted string and exit */
343 WRITE_BYTES(p);
344 return _PyBytesWriter_Finish(&writer, s);
345 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000347
Victor Stinner03dab782015-10-14 00:21:35 +0200348#undef WRITE_BYTES
349
350 return _PyBytesWriter_Finish(&writer, s);
351
352 error:
353 _PyBytesWriter_Dealloc(&writer);
354 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000355}
356
357PyObject *
358PyBytes_FromFormat(const char *format, ...)
359{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000360 PyObject* ret;
361 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000362
363#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000364 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000365#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000366 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000367#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000368 ret = PyBytes_FromFormatV(format, vargs);
369 va_end(vargs);
370 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000371}
372
Ethan Furmanb95b5612015-01-23 20:05:18 -0800373/* Helpers for formatstring */
374
375Py_LOCAL_INLINE(PyObject *)
376getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
377{
378 Py_ssize_t argidx = *p_argidx;
379 if (argidx < arglen) {
380 (*p_argidx)++;
381 if (arglen < 0)
382 return args;
383 else
384 return PyTuple_GetItem(args, argidx);
385 }
386 PyErr_SetString(PyExc_TypeError,
387 "not enough arguments for format string");
388 return NULL;
389}
390
391/* Format codes
392 * F_LJUST '-'
393 * F_SIGN '+'
394 * F_BLANK ' '
395 * F_ALT '#'
396 * F_ZERO '0'
397 */
398#define F_LJUST (1<<0)
399#define F_SIGN (1<<1)
400#define F_BLANK (1<<2)
401#define F_ALT (1<<3)
402#define F_ZERO (1<<4)
403
404/* Returns a new reference to a PyBytes object, or NULL on failure. */
405
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200406static char*
407formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200408 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800409{
410 char *p;
411 PyObject *result;
412 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200413 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800414
415 x = PyFloat_AsDouble(v);
416 if (x == -1.0 && PyErr_Occurred()) {
417 PyErr_Format(PyExc_TypeError, "float argument required, "
418 "not %.200s", Py_TYPE(v)->tp_name);
419 return NULL;
420 }
421
422 if (prec < 0)
423 prec = 6;
424
425 p = PyOS_double_to_string(x, type, prec,
426 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
427
428 if (p == NULL)
429 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200430
431 len = strlen(p);
432 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200433 str = _PyBytesWriter_Prepare(writer, str, len);
434 if (str == NULL)
435 return NULL;
Christian Heimesf051e432016-09-13 20:22:02 +0200436 memcpy(str, p, len);
Victor Stinner71dc3d82016-04-26 12:35:13 +0200437 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200438 str += len;
439 return str;
440 }
441
442 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800443 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200444 *p_result = result;
Zackery Spytz96c59322018-10-03 00:01:30 -0600445 return result != NULL ? str : NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800446}
447
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300448static PyObject *
449formatlong(PyObject *v, int flags, int prec, int type)
450{
451 PyObject *result, *iobj;
452 if (type == 'i')
453 type = 'd';
454 if (PyLong_Check(v))
455 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
456 if (PyNumber_Check(v)) {
457 /* make sure number is a type of integer for o, x, and X */
458 if (type == 'o' || type == 'x' || type == 'X')
459 iobj = PyNumber_Index(v);
460 else
461 iobj = PyNumber_Long(v);
462 if (iobj == NULL) {
463 if (!PyErr_ExceptionMatches(PyExc_TypeError))
464 return NULL;
465 }
466 else if (!PyLong_Check(iobj))
467 Py_CLEAR(iobj);
468 if (iobj != NULL) {
469 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
470 Py_DECREF(iobj);
471 return result;
472 }
473 }
474 PyErr_Format(PyExc_TypeError,
475 "%%%c format: %s is required, not %.200s", type,
476 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
477 : "a number",
478 Py_TYPE(v)->tp_name);
479 return NULL;
480}
481
482static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200483byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800484{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300485 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200486 *p = PyBytes_AS_STRING(arg)[0];
487 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800488 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300489 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200490 *p = PyByteArray_AS_STRING(arg)[0];
491 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800492 }
493 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300494 PyObject *iobj;
495 long ival;
496 int overflow;
497 /* make sure number is a type of integer */
498 if (PyLong_Check(arg)) {
499 ival = PyLong_AsLongAndOverflow(arg, &overflow);
500 }
501 else {
502 iobj = PyNumber_Index(arg);
503 if (iobj == NULL) {
504 if (!PyErr_ExceptionMatches(PyExc_TypeError))
505 return 0;
506 goto onError;
507 }
508 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
509 Py_DECREF(iobj);
510 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300511 if (!overflow && ival == -1 && PyErr_Occurred())
512 goto onError;
513 if (overflow || !(0 <= ival && ival <= 255)) {
514 PyErr_SetString(PyExc_OverflowError,
515 "%c arg not in range(256)");
516 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800517 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300518 *p = (char)ival;
519 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800520 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300521 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200522 PyErr_SetString(PyExc_TypeError,
523 "%c requires an integer in range(256) or a single byte");
524 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800525}
526
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800527static PyObject *_PyBytes_FromBuffer(PyObject *x);
528
Ethan Furmanb95b5612015-01-23 20:05:18 -0800529static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200530format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800531{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200532 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800533 /* is it a bytes object? */
534 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200535 *pbuf = PyBytes_AS_STRING(v);
536 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800537 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200538 return v;
539 }
540 if (PyByteArray_Check(v)) {
541 *pbuf = PyByteArray_AS_STRING(v);
542 *plen = PyByteArray_GET_SIZE(v);
543 Py_INCREF(v);
544 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800545 }
546 /* does it support __bytes__? */
547 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
548 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +0100549 result = _PyObject_CallNoArg(func);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800550 Py_DECREF(func);
551 if (result == NULL)
552 return NULL;
553 if (!PyBytes_Check(result)) {
554 PyErr_Format(PyExc_TypeError,
555 "__bytes__ returned non-bytes (type %.200s)",
556 Py_TYPE(result)->tp_name);
557 Py_DECREF(result);
558 return NULL;
559 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200560 *pbuf = PyBytes_AS_STRING(result);
561 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800562 return result;
563 }
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800564 /* does it support buffer protocol? */
565 if (PyObject_CheckBuffer(v)) {
566 /* maybe we can avoid making a copy of the buffer object here? */
567 result = _PyBytes_FromBuffer(v);
568 if (result == NULL)
569 return NULL;
570 *pbuf = PyBytes_AS_STRING(result);
571 *plen = PyBytes_GET_SIZE(result);
572 return result;
573 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800574 PyErr_Format(PyExc_TypeError,
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800575 "%%b requires a bytes-like object, "
576 "or an object that implements __bytes__, not '%.100s'",
Ethan Furmanb95b5612015-01-23 20:05:18 -0800577 Py_TYPE(v)->tp_name);
578 return NULL;
579}
580
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200581/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800582
583PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +0200584_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
585 PyObject *args, int use_bytearray)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800586{
Victor Stinner772b2b02015-10-14 09:56:53 +0200587 const char *fmt;
588 char *res;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800589 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200590 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800591 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800592 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200593 _PyBytesWriter writer;
594
Victor Stinner772b2b02015-10-14 09:56:53 +0200595 if (args == NULL) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800596 PyErr_BadInternalCall();
597 return NULL;
598 }
Victor Stinner772b2b02015-10-14 09:56:53 +0200599 fmt = format;
600 fmtcnt = format_len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200601
602 _PyBytesWriter_Init(&writer);
Victor Stinner772b2b02015-10-14 09:56:53 +0200603 writer.use_bytearray = use_bytearray;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200604
605 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
606 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800607 return NULL;
Victor Stinner772b2b02015-10-14 09:56:53 +0200608 if (!use_bytearray)
609 writer.overallocate = 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200610
Ethan Furmanb95b5612015-01-23 20:05:18 -0800611 if (PyTuple_Check(args)) {
612 arglen = PyTuple_GET_SIZE(args);
613 argidx = 0;
614 }
615 else {
616 arglen = -1;
617 argidx = -2;
618 }
619 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
620 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
621 !PyByteArray_Check(args)) {
622 dict = args;
623 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200624
Ethan Furmanb95b5612015-01-23 20:05:18 -0800625 while (--fmtcnt >= 0) {
626 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200627 Py_ssize_t len;
628 char *pos;
629
Xiang Zhangb76ad512017-03-06 17:17:05 +0800630 pos = (char *)memchr(fmt + 1, '%', fmtcnt);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200631 if (pos != NULL)
632 len = pos - fmt;
Victor Stinner772b2b02015-10-14 09:56:53 +0200633 else
Xiang Zhangb76ad512017-03-06 17:17:05 +0800634 len = fmtcnt + 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200635 assert(len != 0);
636
Christian Heimesf051e432016-09-13 20:22:02 +0200637 memcpy(res, fmt, len);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200638 res += len;
639 fmt += len;
640 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800641 }
642 else {
643 /* Got a format specifier */
644 int flags = 0;
645 Py_ssize_t width = -1;
646 int prec = -1;
647 int c = '\0';
648 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800649 PyObject *v = NULL;
650 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200651 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800652 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200653 Py_ssize_t len = 0;
654 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200655 Py_ssize_t alloc;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800656
Ethan Furmanb95b5612015-01-23 20:05:18 -0800657 fmt++;
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200658 if (*fmt == '%') {
659 *res++ = '%';
660 fmt++;
661 fmtcnt--;
662 continue;
663 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800664 if (*fmt == '(') {
Victor Stinner772b2b02015-10-14 09:56:53 +0200665 const char *keystart;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800666 Py_ssize_t keylen;
667 PyObject *key;
668 int pcount = 1;
669
670 if (dict == NULL) {
671 PyErr_SetString(PyExc_TypeError,
672 "format requires a mapping");
673 goto error;
674 }
675 ++fmt;
676 --fmtcnt;
677 keystart = fmt;
678 /* Skip over balanced parentheses */
679 while (pcount > 0 && --fmtcnt >= 0) {
680 if (*fmt == ')')
681 --pcount;
682 else if (*fmt == '(')
683 ++pcount;
684 fmt++;
685 }
686 keylen = fmt - keystart - 1;
687 if (fmtcnt < 0 || pcount > 0) {
688 PyErr_SetString(PyExc_ValueError,
689 "incomplete format key");
690 goto error;
691 }
692 key = PyBytes_FromStringAndSize(keystart,
693 keylen);
694 if (key == NULL)
695 goto error;
696 if (args_owned) {
697 Py_DECREF(args);
698 args_owned = 0;
699 }
700 args = PyObject_GetItem(dict, key);
701 Py_DECREF(key);
702 if (args == NULL) {
703 goto error;
704 }
705 args_owned = 1;
706 arglen = -1;
707 argidx = -2;
708 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200709
710 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800711 while (--fmtcnt >= 0) {
712 switch (c = *fmt++) {
713 case '-': flags |= F_LJUST; continue;
714 case '+': flags |= F_SIGN; continue;
715 case ' ': flags |= F_BLANK; continue;
716 case '#': flags |= F_ALT; continue;
717 case '0': flags |= F_ZERO; continue;
718 }
719 break;
720 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200721
722 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800723 if (c == '*') {
724 v = getnextarg(args, arglen, &argidx);
725 if (v == NULL)
726 goto error;
727 if (!PyLong_Check(v)) {
728 PyErr_SetString(PyExc_TypeError,
729 "* wants int");
730 goto error;
731 }
732 width = PyLong_AsSsize_t(v);
733 if (width == -1 && PyErr_Occurred())
734 goto error;
735 if (width < 0) {
736 flags |= F_LJUST;
737 width = -width;
738 }
739 if (--fmtcnt >= 0)
740 c = *fmt++;
741 }
742 else if (c >= 0 && isdigit(c)) {
743 width = c - '0';
744 while (--fmtcnt >= 0) {
745 c = Py_CHARMASK(*fmt++);
746 if (!isdigit(c))
747 break;
748 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
749 PyErr_SetString(
750 PyExc_ValueError,
751 "width too big");
752 goto error;
753 }
754 width = width*10 + (c - '0');
755 }
756 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200757
758 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800759 if (c == '.') {
760 prec = 0;
761 if (--fmtcnt >= 0)
762 c = *fmt++;
763 if (c == '*') {
764 v = getnextarg(args, arglen, &argidx);
765 if (v == NULL)
766 goto error;
767 if (!PyLong_Check(v)) {
768 PyErr_SetString(
769 PyExc_TypeError,
770 "* wants int");
771 goto error;
772 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200773 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800774 if (prec == -1 && PyErr_Occurred())
775 goto error;
776 if (prec < 0)
777 prec = 0;
778 if (--fmtcnt >= 0)
779 c = *fmt++;
780 }
781 else if (c >= 0 && isdigit(c)) {
782 prec = c - '0';
783 while (--fmtcnt >= 0) {
784 c = Py_CHARMASK(*fmt++);
785 if (!isdigit(c))
786 break;
787 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
788 PyErr_SetString(
789 PyExc_ValueError,
790 "prec too big");
791 goto error;
792 }
793 prec = prec*10 + (c - '0');
794 }
795 }
796 } /* prec */
797 if (fmtcnt >= 0) {
798 if (c == 'h' || c == 'l' || c == 'L') {
799 if (--fmtcnt >= 0)
800 c = *fmt++;
801 }
802 }
803 if (fmtcnt < 0) {
804 PyErr_SetString(PyExc_ValueError,
805 "incomplete format");
806 goto error;
807 }
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200808 v = getnextarg(args, arglen, &argidx);
809 if (v == NULL)
810 goto error;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200811
Alexey Izbyshevccd99752018-08-23 10:50:52 +0300812 if (fmtcnt == 0) {
813 /* last write: disable writer overallocation */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200814 writer.overallocate = 0;
815 }
816
Ethan Furmanb95b5612015-01-23 20:05:18 -0800817 sign = 0;
818 fill = ' ';
819 switch (c) {
Ethan Furman62e977f2015-03-11 08:17:00 -0700820 case 'r':
821 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800822 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200823 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800824 if (temp == NULL)
825 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200826 assert(PyUnicode_IS_ASCII(temp));
827 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
828 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800829 if (prec >= 0 && len > prec)
830 len = prec;
831 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200832
Ethan Furmanb95b5612015-01-23 20:05:18 -0800833 case 's':
834 // %s is only for 2/3 code; 3 only code should use %b
835 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200836 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800837 if (temp == NULL)
838 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800839 if (prec >= 0 && len > prec)
840 len = prec;
841 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200842
Ethan Furmanb95b5612015-01-23 20:05:18 -0800843 case 'i':
844 case 'd':
845 case 'u':
846 case 'o':
847 case 'x':
848 case 'X':
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200849 if (PyLong_CheckExact(v)
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200850 && width == -1 && prec == -1
851 && !(flags & (F_SIGN | F_BLANK))
852 && c != 'X')
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200853 {
854 /* Fast path */
855 int alternate = flags & F_ALT;
856 int base;
857
858 switch(c)
859 {
860 default:
Barry Warsawb2e57942017-09-14 18:13:16 -0700861 Py_UNREACHABLE();
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200862 case 'd':
863 case 'i':
864 case 'u':
865 base = 10;
866 break;
867 case 'o':
868 base = 8;
869 break;
870 case 'x':
871 case 'X':
872 base = 16;
873 break;
874 }
875
876 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200877 writer.min_size -= 2; /* size preallocated for "%d" */
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200878 res = _PyLong_FormatBytesWriter(&writer, res,
879 v, base, alternate);
880 if (res == NULL)
881 goto error;
882 continue;
883 }
884
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300885 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200886 if (!temp)
887 goto error;
888 assert(PyUnicode_IS_ASCII(temp));
889 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
890 len = PyUnicode_GET_LENGTH(temp);
891 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800892 if (flags & F_ZERO)
893 fill = '0';
894 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200895
Ethan Furmanb95b5612015-01-23 20:05:18 -0800896 case 'e':
897 case 'E':
898 case 'f':
899 case 'F':
900 case 'g':
901 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200902 if (width == -1 && prec == -1
903 && !(flags & (F_SIGN | F_BLANK)))
904 {
905 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200906 writer.min_size -= 2; /* size preallocated for "%f" */
Victor Stinnerad771582015-10-09 12:38:53 +0200907 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200908 if (res == NULL)
909 goto error;
910 continue;
911 }
912
Victor Stinnerad771582015-10-09 12:38:53 +0200913 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800914 goto error;
915 pbuf = PyBytes_AS_STRING(temp);
916 len = PyBytes_GET_SIZE(temp);
917 sign = 1;
918 if (flags & F_ZERO)
919 fill = '0';
920 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200921
Ethan Furmanb95b5612015-01-23 20:05:18 -0800922 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200923 pbuf = &onechar;
924 len = byte_converter(v, &onechar);
925 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800926 goto error;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200927 if (width == -1) {
928 /* Fast path */
929 *res++ = onechar;
930 continue;
931 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800932 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200933
Ethan Furmanb95b5612015-01-23 20:05:18 -0800934 default:
935 PyErr_Format(PyExc_ValueError,
936 "unsupported format character '%c' (0x%x) "
937 "at index %zd",
938 c, c,
Victor Stinner772b2b02015-10-14 09:56:53 +0200939 (Py_ssize_t)(fmt - 1 - format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800940 goto error;
941 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200942
Ethan Furmanb95b5612015-01-23 20:05:18 -0800943 if (sign) {
944 if (*pbuf == '-' || *pbuf == '+') {
945 sign = *pbuf++;
946 len--;
947 }
948 else if (flags & F_SIGN)
949 sign = '+';
950 else if (flags & F_BLANK)
951 sign = ' ';
952 else
953 sign = 0;
954 }
955 if (width < len)
956 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200957
958 alloc = width;
959 if (sign != 0 && len == width)
960 alloc++;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200961 /* 2: size preallocated for %s */
962 if (alloc > 2) {
963 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200964 if (res == NULL)
965 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800966 }
Victor Stinner60ec6ef2019-10-07 22:31:42 +0200967#ifndef NDEBUG
968 char *before = res;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200969#endif
970
971 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800972 if (sign) {
973 if (fill != ' ')
974 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800975 if (width > len)
976 width--;
977 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200978
979 /* Write the numeric prefix for "x", "X" and "o" formats
980 if the alternate form is used.
981 For example, write "0x" for the "%#x" format. */
Serhiy Storchakab1a16192016-12-17 21:48:03 +0200982 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800983 assert(pbuf[0] == '0');
984 assert(pbuf[1] == c);
985 if (fill != ' ') {
986 *res++ = *pbuf++;
987 *res++ = *pbuf++;
988 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800989 width -= 2;
990 if (width < 0)
991 width = 0;
992 len -= 2;
993 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200994
995 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800996 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200997 memset(res, fill, width - len);
998 res += (width - len);
999 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -08001000 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001001
1002 /* If padding with spaces: write sign if needed and/or numeric
1003 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001004 if (fill == ' ') {
1005 if (sign)
1006 *res++ = sign;
Serhiy Storchakab1a16192016-12-17 21:48:03 +02001007 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001008 assert(pbuf[0] == '0');
1009 assert(pbuf[1] == c);
1010 *res++ = *pbuf++;
1011 *res++ = *pbuf++;
1012 }
1013 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001014
1015 /* Copy bytes */
Christian Heimesf051e432016-09-13 20:22:02 +02001016 memcpy(res, pbuf, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001017 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001018
1019 /* Pad right with the fill character if needed */
1020 if (width > len) {
1021 memset(res, ' ', width - len);
1022 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001023 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001024
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +02001025 if (dict && (argidx < arglen)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001026 PyErr_SetString(PyExc_TypeError,
1027 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -08001028 Py_XDECREF(temp);
1029 goto error;
1030 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001031 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001032
Victor Stinner60ec6ef2019-10-07 22:31:42 +02001033#ifndef NDEBUG
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001034 /* check that we computed the exact size for this write */
1035 assert((res - before) == alloc);
1036#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -08001037 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001038
1039 /* If overallocation was disabled, ensure that it was the last
1040 write. Otherwise, we missed an optimization */
Alexey Izbyshevccd99752018-08-23 10:50:52 +03001041 assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001042 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001043
Ethan Furmanb95b5612015-01-23 20:05:18 -08001044 if (argidx < arglen && !dict) {
1045 PyErr_SetString(PyExc_TypeError,
1046 "not all arguments converted during bytes formatting");
1047 goto error;
1048 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001049
Ethan Furmanb95b5612015-01-23 20:05:18 -08001050 if (args_owned) {
1051 Py_DECREF(args);
1052 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001053 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001054
1055 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001056 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001057 if (args_owned) {
1058 Py_DECREF(args);
1059 }
1060 return NULL;
1061}
1062
Greg Price3a4f6672019-09-12 11:12:22 -07001063/* Unescape a backslash-escaped string. */
Eric V. Smith42454af2016-10-31 09:22:08 -04001064PyObject *_PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001065 Py_ssize_t len,
1066 const char *errors,
Eric V. Smith42454af2016-10-31 09:22:08 -04001067 const char **first_invalid_escape)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001068{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001069 int c;
Victor Stinner2ec80632015-10-14 13:32:13 +02001070 char *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001071 const char *end;
Victor Stinner2ec80632015-10-14 13:32:13 +02001072 _PyBytesWriter writer;
1073
1074 _PyBytesWriter_Init(&writer);
1075
1076 p = _PyBytesWriter_Alloc(&writer, len);
1077 if (p == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001078 return NULL;
Victor Stinner2ec80632015-10-14 13:32:13 +02001079 writer.overallocate = 1;
1080
Eric V. Smith42454af2016-10-31 09:22:08 -04001081 *first_invalid_escape = NULL;
1082
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001083 end = s + len;
1084 while (s < end) {
1085 if (*s != '\\') {
Greg Price3a4f6672019-09-12 11:12:22 -07001086 *p++ = *s++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001087 continue;
1088 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001089
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001090 s++;
Victor Stinner2ec80632015-10-14 13:32:13 +02001091 if (s == end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001092 PyErr_SetString(PyExc_ValueError,
1093 "Trailing \\ in string");
1094 goto failed;
1095 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001097 switch (*s++) {
1098 /* XXX This assumes ASCII! */
1099 case '\n': break;
1100 case '\\': *p++ = '\\'; break;
1101 case '\'': *p++ = '\''; break;
1102 case '\"': *p++ = '\"'; break;
1103 case 'b': *p++ = '\b'; break;
1104 case 'f': *p++ = '\014'; break; /* FF */
1105 case 't': *p++ = '\t'; break;
1106 case 'n': *p++ = '\n'; break;
1107 case 'r': *p++ = '\r'; break;
1108 case 'v': *p++ = '\013'; break; /* VT */
1109 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1110 case '0': case '1': case '2': case '3':
1111 case '4': case '5': case '6': case '7':
1112 c = s[-1] - '0';
1113 if (s < end && '0' <= *s && *s <= '7') {
1114 c = (c<<3) + *s++ - '0';
1115 if (s < end && '0' <= *s && *s <= '7')
1116 c = (c<<3) + *s++ - '0';
1117 }
1118 *p++ = c;
1119 break;
1120 case 'x':
Victor Stinner2ec80632015-10-14 13:32:13 +02001121 if (s+1 < end) {
1122 int digit1, digit2;
1123 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1124 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1125 if (digit1 < 16 && digit2 < 16) {
1126 *p++ = (unsigned char)((digit1 << 4) + digit2);
1127 s += 2;
1128 break;
1129 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001130 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001131 /* invalid hexadecimal digits */
1132
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001133 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001134 PyErr_Format(PyExc_ValueError,
Serhiy Storchakad53fe5f2019-03-13 22:59:55 +02001135 "invalid \\x escape at position %zd",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001136 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001137 goto failed;
1138 }
1139 if (strcmp(errors, "replace") == 0) {
1140 *p++ = '?';
1141 } else if (strcmp(errors, "ignore") == 0)
1142 /* do nothing */;
1143 else {
1144 PyErr_Format(PyExc_ValueError,
1145 "decoding error; unknown "
1146 "error handling code: %.400s",
1147 errors);
1148 goto failed;
1149 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001150 /* skip \x */
1151 if (s < end && Py_ISXDIGIT(s[0]))
1152 s++; /* and a hexdigit */
1153 break;
Victor Stinner2ec80632015-10-14 13:32:13 +02001154
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001155 default:
Eric V. Smith42454af2016-10-31 09:22:08 -04001156 if (*first_invalid_escape == NULL) {
1157 *first_invalid_escape = s-1; /* Back up one char, since we've
1158 already incremented s. */
1159 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001160 *p++ = '\\';
Eric V. Smith42454af2016-10-31 09:22:08 -04001161 s--;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001162 }
1163 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001164
1165 return _PyBytesWriter_Finish(&writer, p);
1166
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001167 failed:
Victor Stinner2ec80632015-10-14 13:32:13 +02001168 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001169 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001170}
1171
Eric V. Smith42454af2016-10-31 09:22:08 -04001172PyObject *PyBytes_DecodeEscape(const char *s,
1173 Py_ssize_t len,
1174 const char *errors,
Greg Price3a4f6672019-09-12 11:12:22 -07001175 Py_ssize_t Py_UNUSED(unicode),
1176 const char *Py_UNUSED(recode_encoding))
Eric V. Smith42454af2016-10-31 09:22:08 -04001177{
1178 const char* first_invalid_escape;
Greg Price3a4f6672019-09-12 11:12:22 -07001179 PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
Eric V. Smith42454af2016-10-31 09:22:08 -04001180 &first_invalid_escape);
1181 if (result == NULL)
1182 return NULL;
1183 if (first_invalid_escape != NULL) {
1184 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1185 "invalid escape sequence '\\%c'",
Serhiy Storchaka56cb4652017-10-20 17:08:15 +03001186 (unsigned char)*first_invalid_escape) < 0) {
Eric V. Smith42454af2016-10-31 09:22:08 -04001187 Py_DECREF(result);
1188 return NULL;
1189 }
1190 }
1191 return result;
1192
1193}
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001194/* -------------------------------------------------------------------- */
1195/* object api */
1196
1197Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001198PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001199{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001200 if (!PyBytes_Check(op)) {
1201 PyErr_Format(PyExc_TypeError,
1202 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1203 return -1;
1204 }
1205 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001206}
1207
1208char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001209PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001210{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001211 if (!PyBytes_Check(op)) {
1212 PyErr_Format(PyExc_TypeError,
1213 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1214 return NULL;
1215 }
1216 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001217}
1218
1219int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001220PyBytes_AsStringAndSize(PyObject *obj,
1221 char **s,
1222 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001223{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001224 if (s == NULL) {
1225 PyErr_BadInternalCall();
1226 return -1;
1227 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001228
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001229 if (!PyBytes_Check(obj)) {
1230 PyErr_Format(PyExc_TypeError,
1231 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1232 return -1;
1233 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001234
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001235 *s = PyBytes_AS_STRING(obj);
1236 if (len != NULL)
1237 *len = PyBytes_GET_SIZE(obj);
1238 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001239 PyErr_SetString(PyExc_ValueError,
1240 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001241 return -1;
1242 }
1243 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001244}
Neal Norwitz6968b052007-02-27 19:02:19 +00001245
1246/* -------------------------------------------------------------------- */
1247/* Methods */
1248
Eric Smith0923d1d2009-04-16 20:16:10 +00001249#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001250
1251#include "stringlib/fastsearch.h"
1252#include "stringlib/count.h"
1253#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001254#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001255#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001256#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001257#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001258
Eric Smith0f78bff2009-11-30 01:01:42 +00001259#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001260
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001261PyObject *
1262PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001263{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001264 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001265 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001266 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001267 PyObject *v;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001268 unsigned char quote;
1269 const unsigned char *s;
1270 Py_UCS1 *p;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001271
1272 /* Compute size of output string */
1273 squotes = dquotes = 0;
1274 newsize = 3; /* b'' */
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001275 s = (const unsigned char*)op->ob_sval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001276 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001277 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001278 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001279 case '\'': squotes++; break;
1280 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001281 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001282 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001283 default:
1284 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001285 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001286 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001287 if (newsize > PY_SSIZE_T_MAX - incr)
1288 goto overflow;
1289 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001290 }
1291 quote = '\'';
1292 if (smartquotes && squotes && !dquotes)
1293 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001294 if (squotes && quote == '\'') {
1295 if (newsize > PY_SSIZE_T_MAX - squotes)
1296 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001297 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001298 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001299
1300 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001301 if (v == NULL) {
1302 return NULL;
1303 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001304 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001305
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001306 *p++ = 'b', *p++ = quote;
1307 for (i = 0; i < length; i++) {
1308 unsigned char c = op->ob_sval[i];
1309 if (c == quote || c == '\\')
1310 *p++ = '\\', *p++ = c;
1311 else if (c == '\t')
1312 *p++ = '\\', *p++ = 't';
1313 else if (c == '\n')
1314 *p++ = '\\', *p++ = 'n';
1315 else if (c == '\r')
1316 *p++ = '\\', *p++ = 'r';
1317 else if (c < ' ' || c >= 0x7f) {
1318 *p++ = '\\';
1319 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001320 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1321 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001322 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001323 else
1324 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001325 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001326 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001327 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001328 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001329
1330 overflow:
1331 PyErr_SetString(PyExc_OverflowError,
1332 "bytes object is too large to make repr");
1333 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001334}
1335
Neal Norwitz6968b052007-02-27 19:02:19 +00001336static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001337bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001338{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001339 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001340}
1341
Neal Norwitz6968b052007-02-27 19:02:19 +00001342static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001343bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001344{
Victor Stinner331a6a52019-05-27 16:39:22 +02001345 PyConfig *config = &_PyInterpreterState_GET_UNSAFE()->config;
Victor Stinnerc96be812019-05-14 17:34:56 +02001346 if (config->bytes_warning) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001347 if (PyErr_WarnEx(PyExc_BytesWarning,
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001348 "str() on a bytes instance", 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001349 return NULL;
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001350 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001351 }
1352 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001353}
1354
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001355static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001356bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001357{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001358 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001359}
Neal Norwitz6968b052007-02-27 19:02:19 +00001360
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001361/* This is also used by PyBytes_Concat() */
1362static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001363bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001364{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001365 Py_buffer va, vb;
1366 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001367
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001368 va.len = -1;
1369 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001370 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1371 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001372 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
Serhiy Storchaka6b5a9ec2017-03-19 19:47:02 +02001373 Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001374 goto done;
1375 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001376
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001377 /* Optimize end cases */
1378 if (va.len == 0 && PyBytes_CheckExact(b)) {
1379 result = b;
1380 Py_INCREF(result);
1381 goto done;
1382 }
1383 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1384 result = a;
1385 Py_INCREF(result);
1386 goto done;
1387 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001388
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001389 if (va.len > PY_SSIZE_T_MAX - vb.len) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001390 PyErr_NoMemory();
1391 goto done;
1392 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001393
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001394 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001395 if (result != NULL) {
1396 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1397 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1398 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001399
1400 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001401 if (va.len != -1)
1402 PyBuffer_Release(&va);
1403 if (vb.len != -1)
1404 PyBuffer_Release(&vb);
1405 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001406}
Neal Norwitz6968b052007-02-27 19:02:19 +00001407
1408static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001409bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001410{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001411 Py_ssize_t i;
1412 Py_ssize_t j;
1413 Py_ssize_t size;
1414 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001415 size_t nbytes;
1416 if (n < 0)
1417 n = 0;
1418 /* watch out for overflows: the size can overflow int,
1419 * and the # of bytes needed can overflow size_t
1420 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001421 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001422 PyErr_SetString(PyExc_OverflowError,
1423 "repeated bytes are too long");
1424 return NULL;
1425 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001426 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001427 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1428 Py_INCREF(a);
1429 return (PyObject *)a;
1430 }
1431 nbytes = (size_t)size;
1432 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1433 PyErr_SetString(PyExc_OverflowError,
1434 "repeated bytes are too long");
1435 return NULL;
1436 }
1437 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1438 if (op == NULL)
1439 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +01001440 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001441 op->ob_shash = -1;
1442 op->ob_sval[size] = '\0';
1443 if (Py_SIZE(a) == 1 && n > 0) {
1444 memset(op->ob_sval, a->ob_sval[0] , n);
1445 return (PyObject *) op;
1446 }
1447 i = 0;
1448 if (i < size) {
Christian Heimesf051e432016-09-13 20:22:02 +02001449 memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001450 i = Py_SIZE(a);
1451 }
1452 while (i < size) {
1453 j = (i <= size-i) ? i : size-i;
Christian Heimesf051e432016-09-13 20:22:02 +02001454 memcpy(op->ob_sval+i, op->ob_sval, j);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001455 i += j;
1456 }
1457 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001458}
1459
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001460static int
1461bytes_contains(PyObject *self, PyObject *arg)
1462{
1463 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1464}
1465
Neal Norwitz6968b052007-02-27 19:02:19 +00001466static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001467bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001468{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001469 if (i < 0 || i >= Py_SIZE(a)) {
1470 PyErr_SetString(PyExc_IndexError, "index out of range");
1471 return NULL;
1472 }
1473 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001474}
1475
Benjamin Peterson621b4302016-09-09 13:54:34 -07001476static int
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001477bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1478{
1479 int cmp;
1480 Py_ssize_t len;
1481
1482 len = Py_SIZE(a);
1483 if (Py_SIZE(b) != len)
1484 return 0;
1485
1486 if (a->ob_sval[0] != b->ob_sval[0])
1487 return 0;
1488
1489 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1490 return (cmp == 0);
1491}
1492
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001493static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001494bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001495{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001496 int c;
1497 Py_ssize_t len_a, len_b;
1498 Py_ssize_t min_len;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001499 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001500
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001501 /* Make sure both arguments are strings. */
1502 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Victor Stinner331a6a52019-05-27 16:39:22 +02001503 PyConfig *config = &_PyInterpreterState_GET_UNSAFE()->config;
Victor Stinnerc96be812019-05-14 17:34:56 +02001504 if (config->bytes_warning && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001505 rc = PyObject_IsInstance((PyObject*)a,
1506 (PyObject*)&PyUnicode_Type);
1507 if (!rc)
1508 rc = PyObject_IsInstance((PyObject*)b,
1509 (PyObject*)&PyUnicode_Type);
1510 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001511 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001512 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001513 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001514 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001515 return NULL;
1516 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001517 else {
1518 rc = PyObject_IsInstance((PyObject*)a,
1519 (PyObject*)&PyLong_Type);
1520 if (!rc)
1521 rc = PyObject_IsInstance((PyObject*)b,
1522 (PyObject*)&PyLong_Type);
1523 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001524 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001525 if (rc) {
1526 if (PyErr_WarnEx(PyExc_BytesWarning,
1527 "Comparison between bytes and int", 1))
1528 return NULL;
1529 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001530 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001531 }
stratakise8b19652017-11-02 11:32:54 +01001532 Py_RETURN_NOTIMPLEMENTED;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001533 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001534 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001535 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001536 case Py_EQ:
1537 case Py_LE:
1538 case Py_GE:
1539 /* a string is equal to itself */
stratakise8b19652017-11-02 11:32:54 +01001540 Py_RETURN_TRUE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001541 case Py_NE:
1542 case Py_LT:
1543 case Py_GT:
stratakise8b19652017-11-02 11:32:54 +01001544 Py_RETURN_FALSE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001545 default:
1546 PyErr_BadArgument();
1547 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001548 }
1549 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001550 else if (op == Py_EQ || op == Py_NE) {
1551 int eq = bytes_compare_eq(a, b);
1552 eq ^= (op == Py_NE);
stratakise8b19652017-11-02 11:32:54 +01001553 return PyBool_FromLong(eq);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001554 }
1555 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001556 len_a = Py_SIZE(a);
1557 len_b = Py_SIZE(b);
1558 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001559 if (min_len > 0) {
1560 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001561 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001562 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001563 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001564 else
1565 c = 0;
stratakise8b19652017-11-02 11:32:54 +01001566 if (c != 0)
1567 Py_RETURN_RICHCOMPARE(c, 0, op);
1568 Py_RETURN_RICHCOMPARE(len_a, len_b, op);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001569 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001570}
1571
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001572static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001573bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001574{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001575 if (a->ob_shash == -1) {
1576 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001577 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001578 }
1579 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001580}
1581
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001582static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001583bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001584{
Victor Stinnera15e2602020-04-08 02:01:56 +02001585 if (_PyIndex_Check(item)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001586 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1587 if (i == -1 && PyErr_Occurred())
1588 return NULL;
1589 if (i < 0)
1590 i += PyBytes_GET_SIZE(self);
1591 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1592 PyErr_SetString(PyExc_IndexError,
1593 "index out of range");
1594 return NULL;
1595 }
1596 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1597 }
1598 else if (PySlice_Check(item)) {
Zackery Spytz14514d92019-05-17 01:13:03 -06001599 Py_ssize_t start, stop, step, slicelength, i;
1600 size_t cur;
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001601 const char* source_buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001602 char* result_buf;
1603 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001604
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001605 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001606 return NULL;
1607 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001608 slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1609 &stop, step);
Neal Norwitz6968b052007-02-27 19:02:19 +00001610
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001611 if (slicelength <= 0) {
1612 return PyBytes_FromStringAndSize("", 0);
1613 }
1614 else if (start == 0 && step == 1 &&
1615 slicelength == PyBytes_GET_SIZE(self) &&
1616 PyBytes_CheckExact(self)) {
1617 Py_INCREF(self);
1618 return (PyObject *)self;
1619 }
1620 else if (step == 1) {
1621 return PyBytes_FromStringAndSize(
1622 PyBytes_AS_STRING(self) + start,
1623 slicelength);
1624 }
1625 else {
1626 source_buf = PyBytes_AS_STRING(self);
1627 result = PyBytes_FromStringAndSize(NULL, slicelength);
1628 if (result == NULL)
1629 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001630
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001631 result_buf = PyBytes_AS_STRING(result);
1632 for (cur = start, i = 0; i < slicelength;
1633 cur += step, i++) {
1634 result_buf[i] = source_buf[cur];
1635 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001636
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001637 return result;
1638 }
1639 }
1640 else {
1641 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001642 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001643 Py_TYPE(item)->tp_name);
1644 return NULL;
1645 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001646}
1647
1648static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001649bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001650{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001651 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1652 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001653}
1654
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001655static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001656 (lenfunc)bytes_length, /*sq_length*/
1657 (binaryfunc)bytes_concat, /*sq_concat*/
1658 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1659 (ssizeargfunc)bytes_item, /*sq_item*/
1660 0, /*sq_slice*/
1661 0, /*sq_ass_item*/
1662 0, /*sq_ass_slice*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001663 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001664};
1665
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001666static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001667 (lenfunc)bytes_length,
1668 (binaryfunc)bytes_subscript,
1669 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001670};
1671
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001672static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001673 (getbufferproc)bytes_buffer_getbuffer,
1674 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001675};
1676
1677
1678#define LEFTSTRIP 0
1679#define RIGHTSTRIP 1
1680#define BOTHSTRIP 2
1681
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001682/*[clinic input]
1683bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001684
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001685 sep: object = None
1686 The delimiter according which to split the bytes.
1687 None (the default value) means split on ASCII whitespace characters
1688 (space, tab, return, newline, formfeed, vertical tab).
1689 maxsplit: Py_ssize_t = -1
1690 Maximum number of splits to do.
1691 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001692
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001693Return a list of the sections in the bytes, using sep as the delimiter.
1694[clinic start generated code]*/
1695
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001696static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001697bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1698/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001699{
1700 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001701 const char *s = PyBytes_AS_STRING(self), *sub;
1702 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001703 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001704
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001705 if (maxsplit < 0)
1706 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001707 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001708 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001709 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001710 return NULL;
1711 sub = vsub.buf;
1712 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001713
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001714 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1715 PyBuffer_Release(&vsub);
1716 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001717}
1718
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001719/*[clinic input]
1720bytes.partition
1721
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001722 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001723 /
1724
1725Partition the bytes into three parts using the given separator.
1726
1727This will search for the separator sep in the bytes. If the separator is found,
1728returns a 3-tuple containing the part before the separator, the separator
1729itself, and the part after it.
1730
1731If the separator is not found, returns a 3-tuple containing the original bytes
1732object and two empty bytes objects.
1733[clinic start generated code]*/
1734
Neal Norwitz6968b052007-02-27 19:02:19 +00001735static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001736bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001737/*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001738{
Neal Norwitz6968b052007-02-27 19:02:19 +00001739 return stringlib_partition(
1740 (PyObject*) self,
1741 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001742 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001743 );
1744}
1745
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001746/*[clinic input]
1747bytes.rpartition
1748
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001749 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001750 /
1751
1752Partition the bytes into three parts using the given separator.
1753
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001754This will search for the separator sep in the bytes, starting at the end. If
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001755the separator is found, returns a 3-tuple containing the part before the
1756separator, the separator itself, and the part after it.
1757
1758If the separator is not found, returns a 3-tuple containing two empty bytes
1759objects and the original bytes object.
1760[clinic start generated code]*/
1761
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001762static PyObject *
1763bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001764/*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001765{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001766 return stringlib_rpartition(
1767 (PyObject*) self,
1768 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001769 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001770 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001771}
1772
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001773/*[clinic input]
1774bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001775
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001776Return a list of the sections in the bytes, using sep as the delimiter.
1777
1778Splitting is done starting at the end of the bytes and working to the front.
1779[clinic start generated code]*/
1780
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001781static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001782bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1783/*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001784{
1785 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001786 const char *s = PyBytes_AS_STRING(self), *sub;
1787 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001788 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001789
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001790 if (maxsplit < 0)
1791 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001792 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001793 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001794 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001795 return NULL;
1796 sub = vsub.buf;
1797 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001798
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001799 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1800 PyBuffer_Release(&vsub);
1801 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001802}
1803
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001804
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001805/*[clinic input]
1806bytes.join
1807
1808 iterable_of_bytes: object
1809 /
1810
1811Concatenate any number of bytes objects.
1812
1813The bytes whose method is called is inserted in between each pair.
1814
1815The result is returned as a new bytes object.
1816
1817Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1818[clinic start generated code]*/
1819
Neal Norwitz6968b052007-02-27 19:02:19 +00001820static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001821bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1822/*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001823{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001824 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001825}
1826
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001827PyObject *
1828_PyBytes_Join(PyObject *sep, PyObject *x)
1829{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001830 assert(sep != NULL && PyBytes_Check(sep));
1831 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001832 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001833}
1834
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001835static PyObject *
1836bytes_find(PyBytesObject *self, PyObject *args)
1837{
1838 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1839}
1840
1841static PyObject *
1842bytes_index(PyBytesObject *self, PyObject *args)
1843{
1844 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1845}
1846
1847
1848static PyObject *
1849bytes_rfind(PyBytesObject *self, PyObject *args)
1850{
1851 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1852}
1853
1854
1855static PyObject *
1856bytes_rindex(PyBytesObject *self, PyObject *args)
1857{
1858 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1859}
1860
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001861
1862Py_LOCAL_INLINE(PyObject *)
1863do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001864{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001865 Py_buffer vsep;
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001866 const char *s = PyBytes_AS_STRING(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001867 Py_ssize_t len = PyBytes_GET_SIZE(self);
1868 char *sep;
1869 Py_ssize_t seplen;
1870 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001871
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001872 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001873 return NULL;
1874 sep = vsep.buf;
1875 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001876
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001877 i = 0;
1878 if (striptype != RIGHTSTRIP) {
1879 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1880 i++;
1881 }
1882 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001883
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001884 j = len;
1885 if (striptype != LEFTSTRIP) {
1886 do {
1887 j--;
1888 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1889 j++;
1890 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001891
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001892 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001893
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001894 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1895 Py_INCREF(self);
1896 return (PyObject*)self;
1897 }
1898 else
1899 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001900}
1901
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001902
1903Py_LOCAL_INLINE(PyObject *)
1904do_strip(PyBytesObject *self, int striptype)
1905{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001906 const char *s = PyBytes_AS_STRING(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001907 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001908
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001909 i = 0;
1910 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001911 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001912 i++;
1913 }
1914 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001915
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001916 j = len;
1917 if (striptype != LEFTSTRIP) {
1918 do {
1919 j--;
David Malcolm96960882010-11-05 17:23:41 +00001920 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001921 j++;
1922 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001923
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001924 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1925 Py_INCREF(self);
1926 return (PyObject*)self;
1927 }
1928 else
1929 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001930}
1931
1932
1933Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001934do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001935{
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001936 if (bytes != Py_None) {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001937 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001938 }
1939 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001940}
1941
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001942/*[clinic input]
1943bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001944
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001945 bytes: object = None
1946 /
1947
1948Strip leading and trailing bytes contained in the argument.
1949
1950If the argument is omitted or None, strip leading and trailing ASCII whitespace.
1951[clinic start generated code]*/
1952
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001953static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001954bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001955/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001956{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001957 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001958}
1959
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001960/*[clinic input]
1961bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001962
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001963 bytes: object = None
1964 /
1965
1966Strip leading bytes contained in the argument.
1967
1968If the argument is omitted or None, strip leading ASCII whitespace.
1969[clinic start generated code]*/
1970
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001971static PyObject *
1972bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001973/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001974{
1975 return do_argstrip(self, LEFTSTRIP, bytes);
1976}
1977
1978/*[clinic input]
1979bytes.rstrip
1980
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001981 bytes: object = None
1982 /
1983
1984Strip trailing bytes contained in the argument.
1985
1986If the argument is omitted or None, strip trailing ASCII whitespace.
1987[clinic start generated code]*/
1988
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001989static PyObject *
1990bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001991/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001992{
1993 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001994}
Neal Norwitz6968b052007-02-27 19:02:19 +00001995
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001996
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001997static PyObject *
1998bytes_count(PyBytesObject *self, PyObject *args)
1999{
2000 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2001}
2002
2003
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002004/*[clinic input]
2005bytes.translate
2006
Victor Stinner049e5092014-08-17 22:20:00 +02002007 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002008 Translation table, which must be a bytes object of length 256.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002009 /
Martin Panter1b6c6da2016-08-27 08:35:02 +00002010 delete as deletechars: object(c_default="NULL") = b''
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002011
2012Return a copy with each character mapped by the given translation table.
2013
Martin Panter1b6c6da2016-08-27 08:35:02 +00002014All characters occurring in the optional argument delete are removed.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002015The remaining characters are mapped through the given translation table.
2016[clinic start generated code]*/
2017
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002018static PyObject *
Martin Panter1b6c6da2016-08-27 08:35:02 +00002019bytes_translate_impl(PyBytesObject *self, PyObject *table,
Larry Hastings89964c42015-04-14 18:07:59 -04002020 PyObject *deletechars)
Martin Panter1b6c6da2016-08-27 08:35:02 +00002021/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002022{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03002023 const char *input;
2024 char *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002025 Py_buffer table_view = {NULL, NULL};
2026 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002027 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002028 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002029 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002030 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002031 Py_ssize_t inlen, tablen, dellen = 0;
2032 PyObject *result;
2033 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002034
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002035 if (PyBytes_Check(table)) {
2036 table_chars = PyBytes_AS_STRING(table);
2037 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002038 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002039 else if (table == Py_None) {
2040 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002041 tablen = 256;
2042 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002043 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002044 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002045 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002046 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002047 tablen = table_view.len;
2048 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002049
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002050 if (tablen != 256) {
2051 PyErr_SetString(PyExc_ValueError,
2052 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002053 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002054 return NULL;
2055 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002056
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002057 if (deletechars != NULL) {
2058 if (PyBytes_Check(deletechars)) {
2059 del_table_chars = PyBytes_AS_STRING(deletechars);
2060 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002061 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002062 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002063 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002064 PyBuffer_Release(&table_view);
2065 return NULL;
2066 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002067 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002068 dellen = del_table_view.len;
2069 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002070 }
2071 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002072 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002073 dellen = 0;
2074 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002075
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002076 inlen = PyBytes_GET_SIZE(input_obj);
2077 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002078 if (result == NULL) {
2079 PyBuffer_Release(&del_table_view);
2080 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002081 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002082 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002083 output_start = output = PyBytes_AS_STRING(result);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002084 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002085
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002086 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002087 /* If no deletions are required, use faster code */
2088 for (i = inlen; --i >= 0; ) {
2089 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002090 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002091 changed = 1;
2092 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002093 if (!changed && PyBytes_CheckExact(input_obj)) {
2094 Py_INCREF(input_obj);
2095 Py_DECREF(result);
2096 result = input_obj;
2097 }
2098 PyBuffer_Release(&del_table_view);
2099 PyBuffer_Release(&table_view);
2100 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002101 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002102
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002103 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002104 for (i = 0; i < 256; i++)
2105 trans_table[i] = Py_CHARMASK(i);
2106 } else {
2107 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002108 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002109 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002110 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002111
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002112 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002113 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002114 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002115
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002116 for (i = inlen; --i >= 0; ) {
2117 c = Py_CHARMASK(*input++);
2118 if (trans_table[c] != -1)
2119 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2120 continue;
2121 changed = 1;
2122 }
2123 if (!changed && PyBytes_CheckExact(input_obj)) {
2124 Py_DECREF(result);
2125 Py_INCREF(input_obj);
2126 return input_obj;
2127 }
2128 /* Fix the size of the resulting string */
2129 if (inlen > 0)
2130 _PyBytes_Resize(&result, output - output_start);
2131 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002132}
2133
2134
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002135/*[clinic input]
2136
2137@staticmethod
2138bytes.maketrans
2139
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002140 frm: Py_buffer
2141 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002142 /
2143
2144Return a translation table useable for the bytes or bytearray translate method.
2145
2146The returned table will be one where each byte in frm is mapped to the byte at
2147the same position in to.
2148
2149The bytes objects frm and to must be of the same length.
2150[clinic start generated code]*/
2151
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002152static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002153bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002154/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002155{
2156 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002157}
2158
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002159
2160/*[clinic input]
2161bytes.replace
2162
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002163 old: Py_buffer
2164 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002165 count: Py_ssize_t = -1
2166 Maximum number of occurrences to replace.
2167 -1 (the default value) means replace all occurrences.
2168 /
2169
2170Return a copy with all occurrences of substring old replaced by new.
2171
2172If the optional argument count is given, only the first count occurrences are
2173replaced.
2174[clinic start generated code]*/
2175
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002176static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002177bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
Larry Hastings89964c42015-04-14 18:07:59 -04002178 Py_ssize_t count)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002179/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002180{
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +03002181 return stringlib_replace((PyObject *)self,
2182 (const char *)old->buf, old->len,
2183 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002184}
2185
2186/** End DALKE **/
2187
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002188
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002189static PyObject *
2190bytes_startswith(PyBytesObject *self, PyObject *args)
2191{
2192 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2193}
2194
2195static PyObject *
2196bytes_endswith(PyBytesObject *self, PyObject *args)
2197{
2198 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2199}
2200
2201
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002202/*[clinic input]
2203bytes.decode
2204
2205 encoding: str(c_default="NULL") = 'utf-8'
2206 The encoding with which to decode the bytes.
2207 errors: str(c_default="NULL") = 'strict'
2208 The error handling scheme to use for the handling of decoding errors.
2209 The default is 'strict' meaning that decoding errors raise a
2210 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2211 as well as any other name registered with codecs.register_error that
2212 can handle UnicodeDecodeErrors.
2213
2214Decode the bytes using the codec registered for encoding.
2215[clinic start generated code]*/
2216
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002217static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002218bytes_decode_impl(PyBytesObject *self, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04002219 const char *errors)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002220/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002221{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002222 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002223}
2224
Guido van Rossum20188312006-05-05 15:15:40 +00002225
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002226/*[clinic input]
2227bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002228
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002229 keepends: bool(accept={int}) = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002230
2231Return a list of the lines in the bytes, breaking at line boundaries.
2232
2233Line breaks are not included in the resulting list unless keepends is given and
2234true.
2235[clinic start generated code]*/
2236
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002237static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002238bytes_splitlines_impl(PyBytesObject *self, int keepends)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002239/*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002240{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002241 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002242 (PyObject*) self, PyBytes_AS_STRING(self),
2243 PyBytes_GET_SIZE(self), keepends
2244 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002245}
2246
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002247/*[clinic input]
2248@classmethod
2249bytes.fromhex
2250
2251 string: unicode
2252 /
2253
2254Create a bytes object from a string of hexadecimal numbers.
2255
2256Spaces between two numbers are accepted.
2257Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2258[clinic start generated code]*/
2259
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002260static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002261bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002262/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002263{
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002264 PyObject *result = _PyBytes_FromHex(string, 0);
2265 if (type != &PyBytes_Type && result != NULL) {
Petr Viktorinffd97532020-02-11 17:46:57 +01002266 Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002267 }
2268 return result;
Victor Stinner2bf89932015-10-14 11:25:33 +02002269}
2270
2271PyObject*
2272_PyBytes_FromHex(PyObject *string, int use_bytearray)
2273{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002274 char *buf;
Victor Stinner2bf89932015-10-14 11:25:33 +02002275 Py_ssize_t hexlen, invalid_char;
2276 unsigned int top, bot;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03002277 const Py_UCS1 *str, *end;
Victor Stinner2bf89932015-10-14 11:25:33 +02002278 _PyBytesWriter writer;
2279
2280 _PyBytesWriter_Init(&writer);
2281 writer.use_bytearray = use_bytearray;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002282
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002283 assert(PyUnicode_Check(string));
2284 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002285 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002286 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002287
Victor Stinner2bf89932015-10-14 11:25:33 +02002288 if (!PyUnicode_IS_ASCII(string)) {
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03002289 const void *data = PyUnicode_DATA(string);
Victor Stinner2bf89932015-10-14 11:25:33 +02002290 unsigned int kind = PyUnicode_KIND(string);
2291 Py_ssize_t i;
2292
2293 /* search for the first non-ASCII character */
2294 for (i = 0; i < hexlen; i++) {
2295 if (PyUnicode_READ(kind, data, i) >= 128)
2296 break;
2297 }
2298 invalid_char = i;
2299 goto error;
2300 }
2301
2302 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2303 str = PyUnicode_1BYTE_DATA(string);
2304
2305 /* This overestimates if there are spaces */
2306 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2307 if (buf == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002308 return NULL;
Victor Stinner2bf89932015-10-14 11:25:33 +02002309
2310 end = str + hexlen;
2311 while (str < end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002312 /* skip over spaces in the input */
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002313 if (Py_ISSPACE(*str)) {
Victor Stinner2bf89932015-10-14 11:25:33 +02002314 do {
2315 str++;
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002316 } while (Py_ISSPACE(*str));
Victor Stinner2bf89932015-10-14 11:25:33 +02002317 if (str >= end)
2318 break;
2319 }
2320
2321 top = _PyLong_DigitValue[*str];
2322 if (top >= 16) {
2323 invalid_char = str - PyUnicode_1BYTE_DATA(string);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002324 goto error;
2325 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002326 str++;
2327
2328 bot = _PyLong_DigitValue[*str];
2329 if (bot >= 16) {
2330 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2331 goto error;
2332 }
2333 str++;
2334
2335 *buf++ = (unsigned char)((top << 4) + bot);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002336 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002337
2338 return _PyBytesWriter_Finish(&writer, buf);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002339
2340 error:
Victor Stinner2bf89932015-10-14 11:25:33 +02002341 PyErr_Format(PyExc_ValueError,
2342 "non-hexadecimal number found in "
2343 "fromhex() arg at position %zd", invalid_char);
2344 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002345 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002346}
2347
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002348/*[clinic input]
2349bytes.hex
2350
2351 sep: object = NULL
2352 An optional single character or byte to separate hex bytes.
2353 bytes_per_sep: int = 1
2354 How many bytes between separators. Positive values count from the
2355 right, negative values count from the left.
2356
2357Create a str of hexadecimal numbers from a bytes object.
2358
2359Example:
2360>>> value = b'\xb9\x01\xef'
2361>>> value.hex()
2362'b901ef'
2363>>> value.hex(':')
2364'b9:01:ef'
2365>>> value.hex(':', 2)
2366'b9:01ef'
2367>>> value.hex(':', -2)
2368'b901:ef'
2369[clinic start generated code]*/
Gregory P. Smith8cb65692015-04-25 23:22:26 +00002370
2371static PyObject *
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002372bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2373/*[clinic end generated code: output=1f134da504064139 input=f1238d3455990218]*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002374{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03002375 const char *argbuf = PyBytes_AS_STRING(self);
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002376 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002377 return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002378}
2379
2380static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302381bytes_getnewargs(PyBytesObject *v, PyObject *Py_UNUSED(ignored))
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002382{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002383 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002384}
2385
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002386
2387static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002388bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002389 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302390 {"capitalize", stringlib_capitalize, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002391 _Py_capitalize__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002392 STRINGLIB_CENTER_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002393 {"count", (PyCFunction)bytes_count, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002394 _Py_count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002395 BYTES_DECODE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002396 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002397 _Py_endswith__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002398 STRINGLIB_EXPANDTABS_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002399 {"find", (PyCFunction)bytes_find, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002400 _Py_find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002401 BYTES_FROMHEX_METHODDEF
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002402 BYTES_HEX_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002403 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302404 {"isalnum", stringlib_isalnum, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002405 _Py_isalnum__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302406 {"isalpha", stringlib_isalpha, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002407 _Py_isalpha__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302408 {"isascii", stringlib_isascii, METH_NOARGS,
INADA Naokia49ac992018-01-27 14:06:21 +09002409 _Py_isascii__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302410 {"isdigit", stringlib_isdigit, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002411 _Py_isdigit__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302412 {"islower", stringlib_islower, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002413 _Py_islower__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302414 {"isspace", stringlib_isspace, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002415 _Py_isspace__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302416 {"istitle", stringlib_istitle, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002417 _Py_istitle__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302418 {"isupper", stringlib_isupper, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002419 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002420 BYTES_JOIN_METHODDEF
Tal Einatc929df32018-07-06 13:17:38 +03002421 STRINGLIB_LJUST_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302422 {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002423 BYTES_LSTRIP_METHODDEF
2424 BYTES_MAKETRANS_METHODDEF
2425 BYTES_PARTITION_METHODDEF
2426 BYTES_REPLACE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002427 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2428 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002429 STRINGLIB_RJUST_METHODDEF
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002430 BYTES_RPARTITION_METHODDEF
2431 BYTES_RSPLIT_METHODDEF
2432 BYTES_RSTRIP_METHODDEF
2433 BYTES_SPLIT_METHODDEF
2434 BYTES_SPLITLINES_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002435 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002436 _Py_startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002437 BYTES_STRIP_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302438 {"swapcase", stringlib_swapcase, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002439 _Py_swapcase__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302440 {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002441 BYTES_TRANSLATE_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302442 {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002443 STRINGLIB_ZFILL_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002444 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002445};
2446
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002447static PyObject *
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002448bytes_mod(PyObject *self, PyObject *arg)
Ethan Furmanb95b5612015-01-23 20:05:18 -08002449{
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002450 if (!PyBytes_Check(self)) {
2451 Py_RETURN_NOTIMPLEMENTED;
Victor Stinner772b2b02015-10-14 09:56:53 +02002452 }
Victor Stinner772b2b02015-10-14 09:56:53 +02002453 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002454 arg, 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -08002455}
2456
2457static PyNumberMethods bytes_as_number = {
2458 0, /*nb_add*/
2459 0, /*nb_subtract*/
2460 0, /*nb_multiply*/
2461 bytes_mod, /*nb_remainder*/
2462};
2463
2464static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002465bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002466
2467static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002468bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002469{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002470 PyObject *x = NULL;
2471 const char *encoding = NULL;
2472 const char *errors = NULL;
2473 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002474 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002475 Py_ssize_t size;
2476 static char *kwlist[] = {"source", "encoding", "errors", 0};
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002477
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002478 if (type != &PyBytes_Type)
Serhiy Storchaka15095802015-11-25 15:47:01 +02002479 return bytes_subtype_new(type, args, kwds);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002480 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2481 &encoding, &errors))
2482 return NULL;
2483 if (x == NULL) {
2484 if (encoding != NULL || errors != NULL) {
2485 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka2c2044e2018-10-21 15:29:12 +03002486 encoding != NULL ?
2487 "encoding without a string argument" :
2488 "errors without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002489 return NULL;
2490 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02002491 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002492 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002493
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002494 if (encoding != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002495 /* Encode via the codec registry */
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002496 if (!PyUnicode_Check(x)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002497 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002498 "encoding without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002499 return NULL;
2500 }
2501 new = PyUnicode_AsEncodedString(x, encoding, errors);
2502 if (new == NULL)
2503 return NULL;
2504 assert(PyBytes_Check(new));
2505 return new;
2506 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002507
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002508 if (errors != NULL) {
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002509 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002510 PyUnicode_Check(x) ?
2511 "string argument without an encoding" :
2512 "errors without a string argument");
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002513 return NULL;
2514 }
2515
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002516 /* We'd like to call PyObject_Bytes here, but we need to check for an
2517 integer argument before deferring to PyBytes_FromObject, something
2518 PyObject_Bytes doesn't do. */
2519 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2520 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +01002521 new = _PyObject_CallNoArg(func);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002522 Py_DECREF(func);
2523 if (new == NULL)
2524 return NULL;
2525 if (!PyBytes_Check(new)) {
2526 PyErr_Format(PyExc_TypeError,
2527 "__bytes__ returned non-bytes (type %.200s)",
2528 Py_TYPE(new)->tp_name);
2529 Py_DECREF(new);
2530 return NULL;
2531 }
2532 return new;
2533 }
2534 else if (PyErr_Occurred())
2535 return NULL;
2536
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002537 if (PyUnicode_Check(x)) {
2538 PyErr_SetString(PyExc_TypeError,
2539 "string argument without an encoding");
2540 return NULL;
2541 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002542 /* Is it an integer? */
Victor Stinnera15e2602020-04-08 02:01:56 +02002543 if (_PyIndex_Check(x)) {
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002544 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2545 if (size == -1 && PyErr_Occurred()) {
Serhiy Storchakae8904212018-10-15 00:02:57 +03002546 if (!PyErr_ExceptionMatches(PyExc_TypeError))
INADA Naokia634e232017-01-06 17:32:01 +09002547 return NULL;
2548 PyErr_Clear(); /* fall through */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002549 }
INADA Naokia634e232017-01-06 17:32:01 +09002550 else {
2551 if (size < 0) {
2552 PyErr_SetString(PyExc_ValueError, "negative count");
2553 return NULL;
2554 }
2555 new = _PyBytes_FromSize(size, 1);
2556 if (new == NULL)
2557 return NULL;
2558 return new;
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002559 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002560 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002561
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002562 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002563}
2564
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002565static PyObject*
2566_PyBytes_FromBuffer(PyObject *x)
2567{
2568 PyObject *new;
2569 Py_buffer view;
2570
2571 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2572 return NULL;
2573
2574 new = PyBytes_FromStringAndSize(NULL, view.len);
2575 if (!new)
2576 goto fail;
2577 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2578 &view, view.len, 'C') < 0)
2579 goto fail;
2580 PyBuffer_Release(&view);
2581 return new;
2582
2583fail:
2584 Py_XDECREF(new);
2585 PyBuffer_Release(&view);
2586 return NULL;
2587}
2588
2589static PyObject*
2590_PyBytes_FromList(PyObject *x)
2591{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002592 Py_ssize_t i, size = PyList_GET_SIZE(x);
2593 Py_ssize_t value;
2594 char *str;
2595 PyObject *item;
2596 _PyBytesWriter writer;
2597
2598 _PyBytesWriter_Init(&writer);
2599 str = _PyBytesWriter_Alloc(&writer, size);
2600 if (str == NULL)
2601 return NULL;
2602 writer.overallocate = 1;
2603 size = writer.allocated;
2604
2605 for (i = 0; i < PyList_GET_SIZE(x); i++) {
2606 item = PyList_GET_ITEM(x, i);
2607 Py_INCREF(item);
2608 value = PyNumber_AsSsize_t(item, NULL);
2609 Py_DECREF(item);
2610 if (value == -1 && PyErr_Occurred())
2611 goto error;
2612
2613 if (value < 0 || value >= 256) {
2614 PyErr_SetString(PyExc_ValueError,
2615 "bytes must be in range(0, 256)");
2616 goto error;
2617 }
2618
2619 if (i >= size) {
2620 str = _PyBytesWriter_Resize(&writer, str, size+1);
2621 if (str == NULL)
2622 return NULL;
2623 size = writer.allocated;
2624 }
2625 *str++ = (char) value;
2626 }
2627 return _PyBytesWriter_Finish(&writer, str);
2628
2629 error:
2630 _PyBytesWriter_Dealloc(&writer);
2631 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002632}
2633
2634static PyObject*
2635_PyBytes_FromTuple(PyObject *x)
2636{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002637 PyObject *bytes;
2638 Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2639 Py_ssize_t value;
2640 char *str;
2641 PyObject *item;
2642
2643 bytes = PyBytes_FromStringAndSize(NULL, size);
2644 if (bytes == NULL)
2645 return NULL;
2646 str = ((PyBytesObject *)bytes)->ob_sval;
2647
2648 for (i = 0; i < size; i++) {
2649 item = PyTuple_GET_ITEM(x, i);
2650 value = PyNumber_AsSsize_t(item, NULL);
2651 if (value == -1 && PyErr_Occurred())
2652 goto error;
2653
2654 if (value < 0 || value >= 256) {
2655 PyErr_SetString(PyExc_ValueError,
2656 "bytes must be in range(0, 256)");
2657 goto error;
2658 }
2659 *str++ = (char) value;
2660 }
2661 return bytes;
2662
2663 error:
2664 Py_DECREF(bytes);
2665 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002666}
2667
2668static PyObject *
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002669_PyBytes_FromIterator(PyObject *it, PyObject *x)
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002670{
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002671 char *str;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002672 Py_ssize_t i, size;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002673 _PyBytesWriter writer;
2674
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002675 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02002676 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002677 if (size == -1 && PyErr_Occurred())
2678 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002679
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002680 _PyBytesWriter_Init(&writer);
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002681 str = _PyBytesWriter_Alloc(&writer, size);
2682 if (str == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002683 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002684 writer.overallocate = 1;
2685 size = writer.allocated;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002686
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002687 /* Run the iterator to exhaustion */
2688 for (i = 0; ; i++) {
2689 PyObject *item;
2690 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002691
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002692 /* Get the next item */
2693 item = PyIter_Next(it);
2694 if (item == NULL) {
2695 if (PyErr_Occurred())
2696 goto error;
2697 break;
2698 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002699
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002700 /* Interpret it as an int (__index__) */
Serhiy Storchakaf54d7812016-07-06 21:39:44 +03002701 value = PyNumber_AsSsize_t(item, NULL);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002702 Py_DECREF(item);
2703 if (value == -1 && PyErr_Occurred())
2704 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002705
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002706 /* Range check */
2707 if (value < 0 || value >= 256) {
2708 PyErr_SetString(PyExc_ValueError,
2709 "bytes must be in range(0, 256)");
2710 goto error;
2711 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002712
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002713 /* Append the byte */
2714 if (i >= size) {
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002715 str = _PyBytesWriter_Resize(&writer, str, size+1);
2716 if (str == NULL)
2717 return NULL;
2718 size = writer.allocated;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002719 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002720 *str++ = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002721 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002722
2723 return _PyBytesWriter_Finish(&writer, str);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002724
2725 error:
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002726 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002727 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002728}
2729
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002730PyObject *
2731PyBytes_FromObject(PyObject *x)
2732{
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002733 PyObject *it, *result;
2734
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002735 if (x == NULL) {
2736 PyErr_BadInternalCall();
2737 return NULL;
2738 }
2739
2740 if (PyBytes_CheckExact(x)) {
2741 Py_INCREF(x);
2742 return x;
2743 }
2744
2745 /* Use the modern buffer interface */
2746 if (PyObject_CheckBuffer(x))
2747 return _PyBytes_FromBuffer(x);
2748
2749 if (PyList_CheckExact(x))
2750 return _PyBytes_FromList(x);
2751
2752 if (PyTuple_CheckExact(x))
2753 return _PyBytes_FromTuple(x);
2754
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002755 if (!PyUnicode_Check(x)) {
2756 it = PyObject_GetIter(x);
2757 if (it != NULL) {
2758 result = _PyBytes_FromIterator(it, x);
2759 Py_DECREF(it);
2760 return result;
2761 }
Serhiy Storchakae8904212018-10-15 00:02:57 +03002762 if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2763 return NULL;
2764 }
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002765 }
2766
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002767 PyErr_Format(PyExc_TypeError,
2768 "cannot convert '%.200s' object to bytes",
Victor Stinner58ac7002020-02-07 03:04:21 +01002769 Py_TYPE(x)->tp_name);
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002770 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002771}
2772
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002773static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002774bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002775{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002776 PyObject *tmp, *pnew;
2777 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002778
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002779 assert(PyType_IsSubtype(type, &PyBytes_Type));
2780 tmp = bytes_new(&PyBytes_Type, args, kwds);
2781 if (tmp == NULL)
2782 return NULL;
Serhiy Storchaka15095802015-11-25 15:47:01 +02002783 assert(PyBytes_Check(tmp));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002784 n = PyBytes_GET_SIZE(tmp);
2785 pnew = type->tp_alloc(type, n);
2786 if (pnew != NULL) {
Christian Heimesf051e432016-09-13 20:22:02 +02002787 memcpy(PyBytes_AS_STRING(pnew),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002788 PyBytes_AS_STRING(tmp), n+1);
2789 ((PyBytesObject *)pnew)->ob_shash =
2790 ((PyBytesObject *)tmp)->ob_shash;
2791 }
2792 Py_DECREF(tmp);
2793 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002794}
2795
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002796PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002797"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002798bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002799bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002800bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2801bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002802\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002803Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002804 - an iterable yielding integers in range(256)\n\
2805 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002806 - any object implementing the buffer API.\n\
2807 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002808
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002809static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002810
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002811PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002812 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2813 "bytes",
2814 PyBytesObject_SIZE,
2815 sizeof(char),
Inada Naoki7d408692019-05-29 17:23:27 +09002816 0, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002817 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002818 0, /* tp_getattr */
2819 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002820 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002821 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08002822 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002823 &bytes_as_sequence, /* tp_as_sequence */
2824 &bytes_as_mapping, /* tp_as_mapping */
2825 (hashfunc)bytes_hash, /* tp_hash */
2826 0, /* tp_call */
2827 bytes_str, /* tp_str */
2828 PyObject_GenericGetAttr, /* tp_getattro */
2829 0, /* tp_setattro */
2830 &bytes_as_buffer, /* tp_as_buffer */
2831 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2832 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2833 bytes_doc, /* tp_doc */
2834 0, /* tp_traverse */
2835 0, /* tp_clear */
2836 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2837 0, /* tp_weaklistoffset */
2838 bytes_iter, /* tp_iter */
2839 0, /* tp_iternext */
2840 bytes_methods, /* tp_methods */
2841 0, /* tp_members */
2842 0, /* tp_getset */
2843 &PyBaseObject_Type, /* tp_base */
2844 0, /* tp_dict */
2845 0, /* tp_descr_get */
2846 0, /* tp_descr_set */
2847 0, /* tp_dictoffset */
2848 0, /* tp_init */
2849 0, /* tp_alloc */
2850 bytes_new, /* tp_new */
2851 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002852};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002853
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002854void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002855PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002856{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002857 assert(pv != NULL);
2858 if (*pv == NULL)
2859 return;
2860 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002861 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002862 return;
2863 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02002864
2865 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2866 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05002867 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02002868 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02002869
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002870 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02002871 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2872 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2873 Py_CLEAR(*pv);
2874 return;
2875 }
2876
2877 oldsize = PyBytes_GET_SIZE(*pv);
2878 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2879 PyErr_NoMemory();
2880 goto error;
2881 }
2882 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2883 goto error;
2884
2885 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2886 PyBuffer_Release(&wb);
2887 return;
2888
2889 error:
2890 PyBuffer_Release(&wb);
2891 Py_CLEAR(*pv);
2892 return;
2893 }
2894
2895 else {
2896 /* Multiple references, need to create new object */
2897 PyObject *v;
2898 v = bytes_concat(*pv, w);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002899 Py_SETREF(*pv, v);
Antoine Pitrou161d6952014-05-01 14:36:20 +02002900 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002901}
2902
2903void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002904PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002905{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002906 PyBytes_Concat(pv, w);
2907 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002908}
2909
2910
Ethan Furmanb95b5612015-01-23 20:05:18 -08002911/* The following function breaks the notion that bytes are immutable:
2912 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002913 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08002914 as creating a new bytes object and destroying the old one, only
2915 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002916 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08002917 Note that if there's not enough memory to resize the bytes object, the
2918 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002919 memory" exception is set, and -1 is returned. Else (on success) 0 is
2920 returned, and the value in *pv may or may not be the same as on input.
2921 As always, an extra byte is allocated for a trailing \0 byte (newsize
2922 does *not* include that), and a trailing \0 byte is stored.
2923*/
2924
2925int
2926_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2927{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002928 PyObject *v;
2929 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002930 v = *pv;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002931 if (!PyBytes_Check(v) || newsize < 0) {
2932 goto error;
2933 }
2934 if (Py_SIZE(v) == newsize) {
2935 /* return early if newsize equals to v->ob_size */
2936 return 0;
2937 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02002938 if (Py_SIZE(v) == 0) {
2939 if (newsize == 0) {
2940 return 0;
2941 }
2942 *pv = _PyBytes_FromSize(newsize, 0);
2943 Py_DECREF(v);
2944 return (*pv == NULL) ? -1 : 0;
2945 }
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002946 if (Py_REFCNT(v) != 1) {
2947 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002948 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02002949 if (newsize == 0) {
2950 *pv = _PyBytes_FromSize(0, 0);
2951 Py_DECREF(v);
2952 return (*pv == NULL) ? -1 : 0;
2953 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002954 /* XXX UNREF/NEWREF interface should be more symmetrical */
Victor Stinner49932fe2020-02-03 17:55:05 +01002955#ifdef Py_REF_DEBUG
2956 _Py_RefTotal--;
2957#endif
2958#ifdef Py_TRACE_REFS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002959 _Py_ForgetReference(v);
Victor Stinner49932fe2020-02-03 17:55:05 +01002960#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002961 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03002962 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002963 if (*pv == NULL) {
2964 PyObject_Del(v);
2965 PyErr_NoMemory();
2966 return -1;
2967 }
2968 _Py_NewReference(*pv);
2969 sv = (PyBytesObject *) *pv;
Victor Stinner60ac6ed2020-02-07 23:18:08 +01002970 Py_SET_SIZE(sv, newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002971 sv->ob_sval[newsize] = '\0';
2972 sv->ob_shash = -1; /* invalidate cached hash value */
2973 return 0;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002974error:
2975 *pv = 0;
2976 Py_DECREF(v);
2977 PyErr_BadInternalCall();
2978 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002979}
2980
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002981void
Victor Stinnerbed48172019-08-27 00:12:32 +02002982_PyBytes_Fini(void)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002983{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002984 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002985 for (i = 0; i < UCHAR_MAX + 1; i++)
2986 Py_CLEAR(characters[i]);
2987 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002988}
2989
Benjamin Peterson4116f362008-05-27 00:36:20 +00002990/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002991
2992typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002993 PyObject_HEAD
2994 Py_ssize_t it_index;
2995 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002996} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002997
2998static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002999striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003000{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003001 _PyObject_GC_UNTRACK(it);
3002 Py_XDECREF(it->it_seq);
3003 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003004}
3005
3006static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003007striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003008{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003009 Py_VISIT(it->it_seq);
3010 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003011}
3012
3013static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003014striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003015{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003016 PyBytesObject *seq;
3017 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003018
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003019 assert(it != NULL);
3020 seq = it->it_seq;
3021 if (seq == NULL)
3022 return NULL;
3023 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003024
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003025 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3026 item = PyLong_FromLong(
3027 (unsigned char)seq->ob_sval[it->it_index]);
3028 if (item != NULL)
3029 ++it->it_index;
3030 return item;
3031 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003032
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003033 it->it_seq = NULL;
Serhiy Storchakafbb1c5e2016-03-30 20:40:02 +03003034 Py_DECREF(seq);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003035 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003036}
3037
3038static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303039striter_len(striterobject *it, PyObject *Py_UNUSED(ignored))
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003040{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003041 Py_ssize_t len = 0;
3042 if (it->it_seq)
3043 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3044 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003045}
3046
3047PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003048 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003049
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003050static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303051striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003052{
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003053 _Py_IDENTIFIER(iter);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003054 if (it->it_seq != NULL) {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003055 return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003056 it->it_seq, it->it_index);
3057 } else {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003058 return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003059 }
3060}
3061
3062PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3063
3064static PyObject *
3065striter_setstate(striterobject *it, PyObject *state)
3066{
3067 Py_ssize_t index = PyLong_AsSsize_t(state);
3068 if (index == -1 && PyErr_Occurred())
3069 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003070 if (it->it_seq != NULL) {
3071 if (index < 0)
3072 index = 0;
3073 else if (index > PyBytes_GET_SIZE(it->it_seq))
3074 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3075 it->it_index = index;
3076 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003077 Py_RETURN_NONE;
3078}
3079
3080PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3081
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003082static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003083 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3084 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003085 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3086 reduce_doc},
3087 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3088 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003089 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003090};
3091
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003092PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003093 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3094 "bytes_iterator", /* tp_name */
3095 sizeof(striterobject), /* tp_basicsize */
3096 0, /* tp_itemsize */
3097 /* methods */
3098 (destructor)striter_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003099 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003100 0, /* tp_getattr */
3101 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003102 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003103 0, /* tp_repr */
3104 0, /* tp_as_number */
3105 0, /* tp_as_sequence */
3106 0, /* tp_as_mapping */
3107 0, /* tp_hash */
3108 0, /* tp_call */
3109 0, /* tp_str */
3110 PyObject_GenericGetAttr, /* tp_getattro */
3111 0, /* tp_setattro */
3112 0, /* tp_as_buffer */
3113 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3114 0, /* tp_doc */
3115 (traverseproc)striter_traverse, /* tp_traverse */
3116 0, /* tp_clear */
3117 0, /* tp_richcompare */
3118 0, /* tp_weaklistoffset */
3119 PyObject_SelfIter, /* tp_iter */
3120 (iternextfunc)striter_next, /* tp_iternext */
3121 striter_methods, /* tp_methods */
3122 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003123};
3124
3125static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003126bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003127{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003128 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003129
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003130 if (!PyBytes_Check(seq)) {
3131 PyErr_BadInternalCall();
3132 return NULL;
3133 }
3134 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3135 if (it == NULL)
3136 return NULL;
3137 it->it_index = 0;
3138 Py_INCREF(seq);
3139 it->it_seq = (PyBytesObject *)seq;
3140 _PyObject_GC_TRACK(it);
3141 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003142}
Victor Stinner00165072015-10-09 01:53:21 +02003143
3144
3145/* _PyBytesWriter API */
3146
3147#ifdef MS_WINDOWS
3148 /* On Windows, overallocate by 50% is the best factor */
3149# define OVERALLOCATE_FACTOR 2
3150#else
3151 /* On Linux, overallocate by 25% is the best factor */
3152# define OVERALLOCATE_FACTOR 4
3153#endif
3154
3155void
3156_PyBytesWriter_Init(_PyBytesWriter *writer)
3157{
Victor Stinner661aacc2015-10-14 09:41:48 +02003158 /* Set all attributes before small_buffer to 0 */
3159 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003160#ifndef NDEBUG
3161 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3162 sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003163#endif
3164}
3165
3166void
3167_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3168{
3169 Py_CLEAR(writer->buffer);
3170}
3171
3172Py_LOCAL_INLINE(char*)
3173_PyBytesWriter_AsString(_PyBytesWriter *writer)
3174{
Victor Stinner661aacc2015-10-14 09:41:48 +02003175 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003176 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003177 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003178 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003179 else if (writer->use_bytearray) {
3180 assert(writer->buffer != NULL);
3181 return PyByteArray_AS_STRING(writer->buffer);
3182 }
3183 else {
3184 assert(writer->buffer != NULL);
3185 return PyBytes_AS_STRING(writer->buffer);
3186 }
Victor Stinner00165072015-10-09 01:53:21 +02003187}
3188
3189Py_LOCAL_INLINE(Py_ssize_t)
Victor Stinner2bf89932015-10-14 11:25:33 +02003190_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
Victor Stinner00165072015-10-09 01:53:21 +02003191{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03003192 const char *start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003193 assert(str != NULL);
3194 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003195 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003196 return str - start;
3197}
3198
Victor Stinner68762572019-10-07 18:42:01 +02003199#ifndef NDEBUG
3200Py_LOCAL_INLINE(int)
Victor Stinner00165072015-10-09 01:53:21 +02003201_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3202{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03003203 const char *start, *end;
Victor Stinner00165072015-10-09 01:53:21 +02003204
Victor Stinner661aacc2015-10-14 09:41:48 +02003205 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003206 assert(writer->buffer == NULL);
3207 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003208 else {
3209 assert(writer->buffer != NULL);
3210 if (writer->use_bytearray)
3211 assert(PyByteArray_CheckExact(writer->buffer));
3212 else
3213 assert(PyBytes_CheckExact(writer->buffer));
3214 assert(Py_REFCNT(writer->buffer) == 1);
3215 }
Victor Stinner00165072015-10-09 01:53:21 +02003216
Victor Stinner661aacc2015-10-14 09:41:48 +02003217 if (writer->use_bytearray) {
3218 /* bytearray has its own overallocation algorithm,
3219 writer overallocation must be disabled */
3220 assert(!writer->overallocate);
3221 }
3222
3223 assert(0 <= writer->allocated);
Victor Stinner53926a12015-10-09 12:37:03 +02003224 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003225 /* the last byte must always be null */
Victor Stinner661aacc2015-10-14 09:41:48 +02003226 start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003227 assert(start[writer->allocated] == 0);
3228
3229 end = start + writer->allocated;
3230 assert(str != NULL);
3231 assert(start <= str && str <= end);
Victor Stinner68762572019-10-07 18:42:01 +02003232 return 1;
Victor Stinner00165072015-10-09 01:53:21 +02003233}
Victor Stinner68762572019-10-07 18:42:01 +02003234#endif
Victor Stinner00165072015-10-09 01:53:21 +02003235
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003236void*
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003237_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
Victor Stinner00165072015-10-09 01:53:21 +02003238{
3239 Py_ssize_t allocated, pos;
3240
Victor Stinner68762572019-10-07 18:42:01 +02003241 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003242 assert(writer->allocated < size);
Victor Stinner00165072015-10-09 01:53:21 +02003243
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003244 allocated = size;
Victor Stinner00165072015-10-09 01:53:21 +02003245 if (writer->overallocate
3246 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3247 /* overallocate to limit the number of realloc() */
3248 allocated += allocated / OVERALLOCATE_FACTOR;
3249 }
3250
Victor Stinner2bf89932015-10-14 11:25:33 +02003251 pos = _PyBytesWriter_GetSize(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003252 if (!writer->use_small_buffer) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003253 if (writer->use_bytearray) {
3254 if (PyByteArray_Resize(writer->buffer, allocated))
3255 goto error;
3256 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3257 but we cannot use ob_alloc because bytes may need to be moved
3258 to use the whole buffer. bytearray uses an internal optimization
3259 to avoid moving or copying bytes when bytes are removed at the
3260 beginning (ex: del bytearray[:1]). */
3261 }
3262 else {
3263 if (_PyBytes_Resize(&writer->buffer, allocated))
3264 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003265 }
3266 }
3267 else {
3268 /* convert from stack buffer to bytes object buffer */
3269 assert(writer->buffer == NULL);
3270
Victor Stinner661aacc2015-10-14 09:41:48 +02003271 if (writer->use_bytearray)
3272 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3273 else
3274 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003275 if (writer->buffer == NULL)
Victor Stinner661aacc2015-10-14 09:41:48 +02003276 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003277
3278 if (pos != 0) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003279 char *dest;
3280 if (writer->use_bytearray)
3281 dest = PyByteArray_AS_STRING(writer->buffer);
3282 else
3283 dest = PyBytes_AS_STRING(writer->buffer);
Christian Heimesf051e432016-09-13 20:22:02 +02003284 memcpy(dest,
Victor Stinnerb3653a32015-10-09 03:38:24 +02003285 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02003286 pos);
3287 }
3288
Victor Stinnerb3653a32015-10-09 03:38:24 +02003289 writer->use_small_buffer = 0;
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003290#ifndef NDEBUG
3291 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3292 sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003293#endif
Victor Stinner00165072015-10-09 01:53:21 +02003294 }
3295 writer->allocated = allocated;
3296
3297 str = _PyBytesWriter_AsString(writer) + pos;
Victor Stinner68762572019-10-07 18:42:01 +02003298 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinner00165072015-10-09 01:53:21 +02003299 return str;
Victor Stinner661aacc2015-10-14 09:41:48 +02003300
3301error:
3302 _PyBytesWriter_Dealloc(writer);
3303 return NULL;
Victor Stinner00165072015-10-09 01:53:21 +02003304}
3305
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003306void*
3307_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3308{
3309 Py_ssize_t new_min_size;
3310
Victor Stinner68762572019-10-07 18:42:01 +02003311 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003312 assert(size >= 0);
3313
3314 if (size == 0) {
3315 /* nothing to do */
3316 return str;
3317 }
3318
3319 if (writer->min_size > PY_SSIZE_T_MAX - size) {
3320 PyErr_NoMemory();
3321 _PyBytesWriter_Dealloc(writer);
3322 return NULL;
3323 }
3324 new_min_size = writer->min_size + size;
3325
3326 if (new_min_size > writer->allocated)
3327 str = _PyBytesWriter_Resize(writer, str, new_min_size);
3328
3329 writer->min_size = new_min_size;
3330 return str;
3331}
3332
Victor Stinner00165072015-10-09 01:53:21 +02003333/* Allocate the buffer to write size bytes.
3334 Return the pointer to the beginning of buffer data.
3335 Raise an exception and return NULL on error. */
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003336void*
Victor Stinner00165072015-10-09 01:53:21 +02003337_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3338{
3339 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02003340 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02003341 assert(size >= 0);
3342
Victor Stinnerb3653a32015-10-09 03:38:24 +02003343 writer->use_small_buffer = 1;
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003344#ifndef NDEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003345 writer->allocated = sizeof(writer->small_buffer) - 1;
Victor Stinnerf6358a72015-10-14 12:02:39 +02003346 /* In debug mode, don't use the full small buffer because it is less
3347 efficient than bytes and bytearray objects to detect buffer underflow
3348 and buffer overflow. Use 10 bytes of the small buffer to test also
3349 code using the smaller buffer in debug mode.
3350
3351 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3352 in debug mode to also be able to detect stack overflow when running
3353 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3354 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3355 stack overflow. */
3356 writer->allocated = Py_MIN(writer->allocated, 10);
3357 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3358 to detect buffer overflow */
Victor Stinnerb3653a32015-10-09 03:38:24 +02003359 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003360#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02003361 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02003362#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02003363 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02003364}
3365
3366PyObject *
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003367_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
Victor Stinner00165072015-10-09 01:53:21 +02003368{
Victor Stinner2bf89932015-10-14 11:25:33 +02003369 Py_ssize_t size;
Victor Stinner00165072015-10-09 01:53:21 +02003370 PyObject *result;
3371
Victor Stinner68762572019-10-07 18:42:01 +02003372 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinner00165072015-10-09 01:53:21 +02003373
Victor Stinner2bf89932015-10-14 11:25:33 +02003374 size = _PyBytesWriter_GetSize(writer, str);
3375 if (size == 0 && !writer->use_bytearray) {
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003376 Py_CLEAR(writer->buffer);
3377 /* Get the empty byte string singleton */
3378 result = PyBytes_FromStringAndSize(NULL, 0);
3379 }
3380 else if (writer->use_small_buffer) {
Victor Stinnere914d412016-04-15 17:52:27 +02003381 if (writer->use_bytearray) {
3382 result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3383 }
3384 else {
3385 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3386 }
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003387 }
3388 else {
3389 result = writer->buffer;
3390 writer->buffer = NULL;
3391
Victor Stinner2bf89932015-10-14 11:25:33 +02003392 if (size != writer->allocated) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003393 if (writer->use_bytearray) {
Victor Stinner2bf89932015-10-14 11:25:33 +02003394 if (PyByteArray_Resize(result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003395 Py_DECREF(result);
3396 return NULL;
3397 }
3398 }
3399 else {
Victor Stinner2bf89932015-10-14 11:25:33 +02003400 if (_PyBytes_Resize(&result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003401 assert(result == NULL);
3402 return NULL;
3403 }
Victor Stinner00165072015-10-09 01:53:21 +02003404 }
3405 }
Victor Stinner00165072015-10-09 01:53:21 +02003406 }
Victor Stinner00165072015-10-09 01:53:21 +02003407 return result;
3408}
Victor Stinnerce179bf2015-10-09 12:57:22 +02003409
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003410void*
Victor Stinnere9aa5952015-10-12 13:57:47 +02003411_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003412 const void *bytes, Py_ssize_t size)
Victor Stinnerce179bf2015-10-09 12:57:22 +02003413{
Victor Stinnere9aa5952015-10-12 13:57:47 +02003414 char *str = (char *)ptr;
3415
Victor Stinnerce179bf2015-10-09 12:57:22 +02003416 str = _PyBytesWriter_Prepare(writer, str, size);
3417 if (str == NULL)
3418 return NULL;
3419
Christian Heimesf051e432016-09-13 20:22:02 +02003420 memcpy(str, bytes, size);
Victor Stinnerce179bf2015-10-09 12:57:22 +02003421 str += size;
3422
3423 return str;
3424}