blob: 8d6454059ef889f439e354cd7421d1ec6475cd91 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Victor Stinnerd9ea5ca2020-04-15 02:57:50 +02006#include "pycore_abstract.h" // _PyIndex_Check()
Victor Stinner45876a92020-02-12 22:32:34 +01007#include "pycore_bytes_methods.h"
Victor Stinnerbcda8f12018-11-21 22:27:47 +01008#include "pycore_object.h"
Victor Stinnerd9ea5ca2020-04-15 02:57:50 +02009#include "pycore_pymem.h" // PYMEM_CLEANBYTE
Christian Heimes2c9c7a52008-05-26 13:42:13 +000010
Gregory P. Smith8cb65692015-04-25 23:22:26 +000011#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +000012#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000013
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020014/*[clinic input]
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030015class bytes "PyBytesObject *" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020016[clinic start generated code]*/
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030017/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020018
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030019#include "clinic/bytesobject.c.h"
20
Christian Heimes2c9c7a52008-05-26 13:42:13 +000021static PyBytesObject *characters[UCHAR_MAX + 1];
22static PyBytesObject *nullstring;
23
Hai Shi46874c22020-01-30 17:20:25 -060024_Py_IDENTIFIER(__bytes__);
25
Mark Dickinsonfd24b322008-12-06 15:33:31 +000026/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyBytesObject_SIZE + n bytes.
28
29 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
30 3 bytes per string allocation on a typical system.
31*/
32#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
33
Victor Stinner2bf89932015-10-14 11:25:33 +020034/* Forward declaration */
35Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
36 char *str);
37
Christian Heimes2c9c7a52008-05-26 13:42:13 +000038/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000039 For PyBytes_FromString(), the parameter `str' points to a null-terminated
40 string containing exactly `size' bytes.
41
Martin Pantera90a4a92016-05-30 04:04:50 +000042 For PyBytes_FromStringAndSize(), the parameter `str' is
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043 either NULL or else points to a string containing at least `size' bytes.
44 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
45 not have to be null-terminated. (Therefore it is safe to construct a
46 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
47 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
48 bytes (setting the last byte to the null terminating character) and you can
49 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000050 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000051 alter the data yourself, since the strings may be shared.
52
53 The PyObject member `op->ob_size', which denotes the number of "extra
54 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020055 allocated for string data, not counting the null terminating character.
56 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000057 PyBytes_FromStringAndSize()) or the length of the string in the `str'
58 parameter (for PyBytes_FromString()).
59*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020060static PyObject *
61_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000062{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020063 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020064 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020065
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000066 if (size == 0 && (op = nullstring) != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 Py_INCREF(op);
68 return (PyObject *)op;
69 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000070
Victor Stinner049e5092014-08-17 22:20:00 +020071 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000072 PyErr_SetString(PyExc_OverflowError,
73 "byte string is too large");
74 return NULL;
75 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000076
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020078 if (use_calloc)
79 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
80 else
81 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000082 if (op == NULL)
83 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +010084 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020086 if (!use_calloc)
87 op->ob_sval[size] = '\0';
88 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000089 if (size == 0) {
90 nullstring = op;
91 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020092 }
93 return (PyObject *) op;
94}
95
96PyObject *
97PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
98{
99 PyBytesObject *op;
100 if (size < 0) {
101 PyErr_SetString(PyExc_SystemError,
102 "Negative size passed to PyBytes_FromStringAndSize");
103 return NULL;
104 }
105 if (size == 1 && str != NULL &&
106 (op = characters[*str & UCHAR_MAX]) != NULL)
107 {
Victor Stinnerdb067af2014-05-02 22:31:14 +0200108 Py_INCREF(op);
109 return (PyObject *)op;
110 }
111
112 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
113 if (op == NULL)
114 return NULL;
115 if (str == NULL)
116 return (PyObject *) op;
117
Christian Heimesf051e432016-09-13 20:22:02 +0200118 memcpy(op->ob_sval, str, size);
Victor Stinnerdb067af2014-05-02 22:31:14 +0200119 /* share short strings */
120 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000121 characters[*str & UCHAR_MAX] = op;
122 Py_INCREF(op);
123 }
124 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000125}
126
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000127PyObject *
128PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000129{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200130 size_t size;
131 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000132
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000133 assert(str != NULL);
134 size = strlen(str);
135 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
136 PyErr_SetString(PyExc_OverflowError,
137 "byte string is too long");
138 return NULL;
139 }
140 if (size == 0 && (op = nullstring) != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 Py_INCREF(op);
142 return (PyObject *)op;
143 }
144 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 Py_INCREF(op);
146 return (PyObject *)op;
147 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 /* Inline PyObject_NewVar */
150 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
151 if (op == NULL)
152 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +0100153 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 op->ob_shash = -1;
Christian Heimesf051e432016-09-13 20:22:02 +0200155 memcpy(op->ob_sval, str, size+1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 /* share short strings */
157 if (size == 0) {
158 nullstring = op;
159 Py_INCREF(op);
160 } else if (size == 1) {
161 characters[*str & UCHAR_MAX] = op;
162 Py_INCREF(op);
163 }
164 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000165}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000166
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000167PyObject *
168PyBytes_FromFormatV(const char *format, va_list vargs)
169{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000170 char *s;
Victor Stinner03dab782015-10-14 00:21:35 +0200171 const char *f;
172 const char *p;
173 Py_ssize_t prec;
174 int longflag;
175 int size_tflag;
176 /* Longest 64-bit formatted numbers:
177 - "18446744073709551615\0" (21 bytes)
178 - "-9223372036854775808\0" (21 bytes)
179 Decimal takes the most space (it isn't enough for octal.)
Guido van Rossum343e97f2007-04-09 00:43:24 +0000180
Victor Stinner03dab782015-10-14 00:21:35 +0200181 Longest 64-bit pointer representation:
182 "0xffffffffffffffff\0" (19 bytes). */
183 char buffer[21];
184 _PyBytesWriter writer;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000185
Victor Stinner03dab782015-10-14 00:21:35 +0200186 _PyBytesWriter_Init(&writer);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000187
Victor Stinner03dab782015-10-14 00:21:35 +0200188 s = _PyBytesWriter_Alloc(&writer, strlen(format));
189 if (s == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000190 return NULL;
Victor Stinner03dab782015-10-14 00:21:35 +0200191 writer.overallocate = 1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000192
Victor Stinner03dab782015-10-14 00:21:35 +0200193#define WRITE_BYTES(str) \
194 do { \
195 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
196 if (s == NULL) \
197 goto error; \
198 } while (0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000199
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000200 for (f = format; *f; f++) {
Victor Stinner03dab782015-10-14 00:21:35 +0200201 if (*f != '%') {
202 *s++ = *f;
203 continue;
204 }
205
206 p = f++;
207
208 /* ignore the width (ex: 10 in "%10s") */
209 while (Py_ISDIGIT(*f))
210 f++;
211
212 /* parse the precision (ex: 10 in "%.10s") */
213 prec = 0;
214 if (*f == '.') {
215 f++;
216 for (; Py_ISDIGIT(*f); f++) {
217 prec = (prec * 10) + (*f - '0');
218 }
219 }
220
221 while (*f && *f != '%' && !Py_ISALPHA(*f))
222 f++;
223
224 /* handle the long flag ('l'), but only for %ld and %lu.
225 others can be added when necessary. */
226 longflag = 0;
227 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
228 longflag = 1;
229 ++f;
230 }
231
232 /* handle the size_t flag ('z'). */
233 size_tflag = 0;
234 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
235 size_tflag = 1;
236 ++f;
237 }
238
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700239 /* subtract bytes preallocated for the format string
Victor Stinner03dab782015-10-14 00:21:35 +0200240 (ex: 2 for "%s") */
241 writer.min_size -= (f - p + 1);
242
243 switch (*f) {
244 case 'c':
245 {
246 int c = va_arg(vargs, int);
247 if (c < 0 || c > 255) {
248 PyErr_SetString(PyExc_OverflowError,
249 "PyBytes_FromFormatV(): %c format "
250 "expects an integer in range [0; 255]");
251 goto error;
252 }
253 writer.min_size++;
254 *s++ = (unsigned char)c;
255 break;
256 }
257
258 case 'd':
259 if (longflag)
260 sprintf(buffer, "%ld", va_arg(vargs, long));
261 else if (size_tflag)
262 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
263 va_arg(vargs, Py_ssize_t));
264 else
265 sprintf(buffer, "%d", va_arg(vargs, int));
266 assert(strlen(buffer) < sizeof(buffer));
267 WRITE_BYTES(buffer);
268 break;
269
270 case 'u':
271 if (longflag)
272 sprintf(buffer, "%lu",
273 va_arg(vargs, unsigned long));
274 else if (size_tflag)
275 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
276 va_arg(vargs, size_t));
277 else
278 sprintf(buffer, "%u",
279 va_arg(vargs, unsigned int));
280 assert(strlen(buffer) < sizeof(buffer));
281 WRITE_BYTES(buffer);
282 break;
283
284 case 'i':
285 sprintf(buffer, "%i", va_arg(vargs, int));
286 assert(strlen(buffer) < sizeof(buffer));
287 WRITE_BYTES(buffer);
288 break;
289
290 case 'x':
291 sprintf(buffer, "%x", va_arg(vargs, int));
292 assert(strlen(buffer) < sizeof(buffer));
293 WRITE_BYTES(buffer);
294 break;
295
296 case 's':
297 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000298 Py_ssize_t i;
Victor Stinner03dab782015-10-14 00:21:35 +0200299
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200300 p = va_arg(vargs, const char*);
Serhiy Storchakad586ccb2019-01-12 10:30:35 +0200301 if (prec <= 0) {
302 i = strlen(p);
303 }
304 else {
305 i = 0;
306 while (i < prec && p[i]) {
307 i++;
308 }
309 }
Victor Stinner03dab782015-10-14 00:21:35 +0200310 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
311 if (s == NULL)
312 goto error;
313 break;
314 }
315
316 case 'p':
317 sprintf(buffer, "%p", va_arg(vargs, void*));
318 assert(strlen(buffer) < sizeof(buffer));
319 /* %p is ill-defined: ensure leading 0x. */
320 if (buffer[1] == 'X')
321 buffer[1] = 'x';
322 else if (buffer[1] != 'x') {
323 memmove(buffer+2, buffer, strlen(buffer)+1);
324 buffer[0] = '0';
325 buffer[1] = 'x';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000326 }
Victor Stinner03dab782015-10-14 00:21:35 +0200327 WRITE_BYTES(buffer);
328 break;
329
330 case '%':
331 writer.min_size++;
332 *s++ = '%';
333 break;
334
335 default:
336 if (*f == 0) {
337 /* fix min_size if we reached the end of the format string */
338 writer.min_size++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000339 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000340
Victor Stinner03dab782015-10-14 00:21:35 +0200341 /* invalid format string: copy unformatted string and exit */
342 WRITE_BYTES(p);
343 return _PyBytesWriter_Finish(&writer, s);
344 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000345 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000346
Victor Stinner03dab782015-10-14 00:21:35 +0200347#undef WRITE_BYTES
348
349 return _PyBytesWriter_Finish(&writer, s);
350
351 error:
352 _PyBytesWriter_Dealloc(&writer);
353 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000354}
355
356PyObject *
357PyBytes_FromFormat(const char *format, ...)
358{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 PyObject* ret;
360 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000361
362#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000363 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000364#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000365 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000366#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 ret = PyBytes_FromFormatV(format, vargs);
368 va_end(vargs);
369 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000370}
371
Ethan Furmanb95b5612015-01-23 20:05:18 -0800372/* Helpers for formatstring */
373
374Py_LOCAL_INLINE(PyObject *)
375getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
376{
377 Py_ssize_t argidx = *p_argidx;
378 if (argidx < arglen) {
379 (*p_argidx)++;
380 if (arglen < 0)
381 return args;
382 else
383 return PyTuple_GetItem(args, argidx);
384 }
385 PyErr_SetString(PyExc_TypeError,
386 "not enough arguments for format string");
387 return NULL;
388}
389
390/* Format codes
391 * F_LJUST '-'
392 * F_SIGN '+'
393 * F_BLANK ' '
394 * F_ALT '#'
395 * F_ZERO '0'
396 */
397#define F_LJUST (1<<0)
398#define F_SIGN (1<<1)
399#define F_BLANK (1<<2)
400#define F_ALT (1<<3)
401#define F_ZERO (1<<4)
402
403/* Returns a new reference to a PyBytes object, or NULL on failure. */
404
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200405static char*
406formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200407 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800408{
409 char *p;
410 PyObject *result;
411 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200412 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800413
414 x = PyFloat_AsDouble(v);
415 if (x == -1.0 && PyErr_Occurred()) {
416 PyErr_Format(PyExc_TypeError, "float argument required, "
417 "not %.200s", Py_TYPE(v)->tp_name);
418 return NULL;
419 }
420
421 if (prec < 0)
422 prec = 6;
423
424 p = PyOS_double_to_string(x, type, prec,
425 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
426
427 if (p == NULL)
428 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200429
430 len = strlen(p);
431 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200432 str = _PyBytesWriter_Prepare(writer, str, len);
433 if (str == NULL)
434 return NULL;
Christian Heimesf051e432016-09-13 20:22:02 +0200435 memcpy(str, p, len);
Victor Stinner71dc3d82016-04-26 12:35:13 +0200436 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200437 str += len;
438 return str;
439 }
440
441 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800442 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200443 *p_result = result;
Zackery Spytz96c59322018-10-03 00:01:30 -0600444 return result != NULL ? str : NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800445}
446
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300447static PyObject *
448formatlong(PyObject *v, int flags, int prec, int type)
449{
450 PyObject *result, *iobj;
451 if (type == 'i')
452 type = 'd';
453 if (PyLong_Check(v))
454 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
455 if (PyNumber_Check(v)) {
456 /* make sure number is a type of integer for o, x, and X */
457 if (type == 'o' || type == 'x' || type == 'X')
Serhiy Storchaka5f4b229d2020-05-28 10:33:45 +0300458 iobj = _PyNumber_Index(v);
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300459 else
460 iobj = PyNumber_Long(v);
461 if (iobj == NULL) {
462 if (!PyErr_ExceptionMatches(PyExc_TypeError))
463 return NULL;
464 }
465 else if (!PyLong_Check(iobj))
466 Py_CLEAR(iobj);
467 if (iobj != NULL) {
468 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
469 Py_DECREF(iobj);
470 return result;
471 }
472 }
473 PyErr_Format(PyExc_TypeError,
474 "%%%c format: %s is required, not %.200s", type,
475 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
476 : "a number",
477 Py_TYPE(v)->tp_name);
478 return NULL;
479}
480
481static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200482byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800483{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300484 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200485 *p = PyBytes_AS_STRING(arg)[0];
486 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800487 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300488 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200489 *p = PyByteArray_AS_STRING(arg)[0];
490 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800491 }
492 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300493 PyObject *iobj;
494 long ival;
495 int overflow;
496 /* make sure number is a type of integer */
497 if (PyLong_Check(arg)) {
498 ival = PyLong_AsLongAndOverflow(arg, &overflow);
499 }
500 else {
501 iobj = PyNumber_Index(arg);
502 if (iobj == NULL) {
503 if (!PyErr_ExceptionMatches(PyExc_TypeError))
504 return 0;
505 goto onError;
506 }
507 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
508 Py_DECREF(iobj);
509 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300510 if (!overflow && ival == -1 && PyErr_Occurred())
511 goto onError;
512 if (overflow || !(0 <= ival && ival <= 255)) {
513 PyErr_SetString(PyExc_OverflowError,
514 "%c arg not in range(256)");
515 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800516 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300517 *p = (char)ival;
518 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800519 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300520 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200521 PyErr_SetString(PyExc_TypeError,
522 "%c requires an integer in range(256) or a single byte");
523 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800524}
525
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800526static PyObject *_PyBytes_FromBuffer(PyObject *x);
527
Ethan Furmanb95b5612015-01-23 20:05:18 -0800528static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200529format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800530{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200531 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800532 /* is it a bytes object? */
533 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200534 *pbuf = PyBytes_AS_STRING(v);
535 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800536 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200537 return v;
538 }
539 if (PyByteArray_Check(v)) {
540 *pbuf = PyByteArray_AS_STRING(v);
541 *plen = PyByteArray_GET_SIZE(v);
542 Py_INCREF(v);
543 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800544 }
545 /* does it support __bytes__? */
546 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
547 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +0100548 result = _PyObject_CallNoArg(func);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800549 Py_DECREF(func);
550 if (result == NULL)
551 return NULL;
552 if (!PyBytes_Check(result)) {
553 PyErr_Format(PyExc_TypeError,
554 "__bytes__ returned non-bytes (type %.200s)",
555 Py_TYPE(result)->tp_name);
556 Py_DECREF(result);
557 return NULL;
558 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200559 *pbuf = PyBytes_AS_STRING(result);
560 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800561 return result;
562 }
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800563 /* does it support buffer protocol? */
564 if (PyObject_CheckBuffer(v)) {
565 /* maybe we can avoid making a copy of the buffer object here? */
566 result = _PyBytes_FromBuffer(v);
567 if (result == NULL)
568 return NULL;
569 *pbuf = PyBytes_AS_STRING(result);
570 *plen = PyBytes_GET_SIZE(result);
571 return result;
572 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800573 PyErr_Format(PyExc_TypeError,
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800574 "%%b requires a bytes-like object, "
575 "or an object that implements __bytes__, not '%.100s'",
Ethan Furmanb95b5612015-01-23 20:05:18 -0800576 Py_TYPE(v)->tp_name);
577 return NULL;
578}
579
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200580/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800581
582PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +0200583_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
584 PyObject *args, int use_bytearray)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800585{
Victor Stinner772b2b02015-10-14 09:56:53 +0200586 const char *fmt;
587 char *res;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800588 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200589 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800590 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800591 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200592 _PyBytesWriter writer;
593
Victor Stinner772b2b02015-10-14 09:56:53 +0200594 if (args == NULL) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800595 PyErr_BadInternalCall();
596 return NULL;
597 }
Victor Stinner772b2b02015-10-14 09:56:53 +0200598 fmt = format;
599 fmtcnt = format_len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200600
601 _PyBytesWriter_Init(&writer);
Victor Stinner772b2b02015-10-14 09:56:53 +0200602 writer.use_bytearray = use_bytearray;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200603
604 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
605 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800606 return NULL;
Victor Stinner772b2b02015-10-14 09:56:53 +0200607 if (!use_bytearray)
608 writer.overallocate = 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200609
Ethan Furmanb95b5612015-01-23 20:05:18 -0800610 if (PyTuple_Check(args)) {
611 arglen = PyTuple_GET_SIZE(args);
612 argidx = 0;
613 }
614 else {
615 arglen = -1;
616 argidx = -2;
617 }
618 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
619 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
620 !PyByteArray_Check(args)) {
621 dict = args;
622 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200623
Ethan Furmanb95b5612015-01-23 20:05:18 -0800624 while (--fmtcnt >= 0) {
625 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200626 Py_ssize_t len;
627 char *pos;
628
Xiang Zhangb76ad512017-03-06 17:17:05 +0800629 pos = (char *)memchr(fmt + 1, '%', fmtcnt);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200630 if (pos != NULL)
631 len = pos - fmt;
Victor Stinner772b2b02015-10-14 09:56:53 +0200632 else
Xiang Zhangb76ad512017-03-06 17:17:05 +0800633 len = fmtcnt + 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200634 assert(len != 0);
635
Christian Heimesf051e432016-09-13 20:22:02 +0200636 memcpy(res, fmt, len);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200637 res += len;
638 fmt += len;
639 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800640 }
641 else {
642 /* Got a format specifier */
643 int flags = 0;
644 Py_ssize_t width = -1;
645 int prec = -1;
646 int c = '\0';
647 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800648 PyObject *v = NULL;
649 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200650 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800651 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200652 Py_ssize_t len = 0;
653 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200654 Py_ssize_t alloc;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800655
Ethan Furmanb95b5612015-01-23 20:05:18 -0800656 fmt++;
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200657 if (*fmt == '%') {
658 *res++ = '%';
659 fmt++;
660 fmtcnt--;
661 continue;
662 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800663 if (*fmt == '(') {
Victor Stinner772b2b02015-10-14 09:56:53 +0200664 const char *keystart;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800665 Py_ssize_t keylen;
666 PyObject *key;
667 int pcount = 1;
668
669 if (dict == NULL) {
670 PyErr_SetString(PyExc_TypeError,
671 "format requires a mapping");
672 goto error;
673 }
674 ++fmt;
675 --fmtcnt;
676 keystart = fmt;
677 /* Skip over balanced parentheses */
678 while (pcount > 0 && --fmtcnt >= 0) {
679 if (*fmt == ')')
680 --pcount;
681 else if (*fmt == '(')
682 ++pcount;
683 fmt++;
684 }
685 keylen = fmt - keystart - 1;
686 if (fmtcnt < 0 || pcount > 0) {
687 PyErr_SetString(PyExc_ValueError,
688 "incomplete format key");
689 goto error;
690 }
691 key = PyBytes_FromStringAndSize(keystart,
692 keylen);
693 if (key == NULL)
694 goto error;
695 if (args_owned) {
696 Py_DECREF(args);
697 args_owned = 0;
698 }
699 args = PyObject_GetItem(dict, key);
700 Py_DECREF(key);
701 if (args == NULL) {
702 goto error;
703 }
704 args_owned = 1;
705 arglen = -1;
706 argidx = -2;
707 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200708
709 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800710 while (--fmtcnt >= 0) {
711 switch (c = *fmt++) {
712 case '-': flags |= F_LJUST; continue;
713 case '+': flags |= F_SIGN; continue;
714 case ' ': flags |= F_BLANK; continue;
715 case '#': flags |= F_ALT; continue;
716 case '0': flags |= F_ZERO; continue;
717 }
718 break;
719 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200720
721 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800722 if (c == '*') {
723 v = getnextarg(args, arglen, &argidx);
724 if (v == NULL)
725 goto error;
726 if (!PyLong_Check(v)) {
727 PyErr_SetString(PyExc_TypeError,
728 "* wants int");
729 goto error;
730 }
731 width = PyLong_AsSsize_t(v);
732 if (width == -1 && PyErr_Occurred())
733 goto error;
734 if (width < 0) {
735 flags |= F_LJUST;
736 width = -width;
737 }
738 if (--fmtcnt >= 0)
739 c = *fmt++;
740 }
741 else if (c >= 0 && isdigit(c)) {
742 width = c - '0';
743 while (--fmtcnt >= 0) {
744 c = Py_CHARMASK(*fmt++);
745 if (!isdigit(c))
746 break;
747 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
748 PyErr_SetString(
749 PyExc_ValueError,
750 "width too big");
751 goto error;
752 }
753 width = width*10 + (c - '0');
754 }
755 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200756
757 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800758 if (c == '.') {
759 prec = 0;
760 if (--fmtcnt >= 0)
761 c = *fmt++;
762 if (c == '*') {
763 v = getnextarg(args, arglen, &argidx);
764 if (v == NULL)
765 goto error;
766 if (!PyLong_Check(v)) {
767 PyErr_SetString(
768 PyExc_TypeError,
769 "* wants int");
770 goto error;
771 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200772 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800773 if (prec == -1 && PyErr_Occurred())
774 goto error;
775 if (prec < 0)
776 prec = 0;
777 if (--fmtcnt >= 0)
778 c = *fmt++;
779 }
780 else if (c >= 0 && isdigit(c)) {
781 prec = c - '0';
782 while (--fmtcnt >= 0) {
783 c = Py_CHARMASK(*fmt++);
784 if (!isdigit(c))
785 break;
786 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
787 PyErr_SetString(
788 PyExc_ValueError,
789 "prec too big");
790 goto error;
791 }
792 prec = prec*10 + (c - '0');
793 }
794 }
795 } /* prec */
796 if (fmtcnt >= 0) {
797 if (c == 'h' || c == 'l' || c == 'L') {
798 if (--fmtcnt >= 0)
799 c = *fmt++;
800 }
801 }
802 if (fmtcnt < 0) {
803 PyErr_SetString(PyExc_ValueError,
804 "incomplete format");
805 goto error;
806 }
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200807 v = getnextarg(args, arglen, &argidx);
808 if (v == NULL)
809 goto error;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200810
Alexey Izbyshevccd99752018-08-23 10:50:52 +0300811 if (fmtcnt == 0) {
812 /* last write: disable writer overallocation */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200813 writer.overallocate = 0;
814 }
815
Ethan Furmanb95b5612015-01-23 20:05:18 -0800816 sign = 0;
817 fill = ' ';
818 switch (c) {
Ethan Furman62e977f2015-03-11 08:17:00 -0700819 case 'r':
820 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800821 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200822 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800823 if (temp == NULL)
824 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200825 assert(PyUnicode_IS_ASCII(temp));
826 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
827 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800828 if (prec >= 0 && len > prec)
829 len = prec;
830 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200831
Ethan Furmanb95b5612015-01-23 20:05:18 -0800832 case 's':
833 // %s is only for 2/3 code; 3 only code should use %b
834 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200835 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800836 if (temp == NULL)
837 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800838 if (prec >= 0 && len > prec)
839 len = prec;
840 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200841
Ethan Furmanb95b5612015-01-23 20:05:18 -0800842 case 'i':
843 case 'd':
844 case 'u':
845 case 'o':
846 case 'x':
847 case 'X':
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200848 if (PyLong_CheckExact(v)
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200849 && width == -1 && prec == -1
850 && !(flags & (F_SIGN | F_BLANK))
851 && c != 'X')
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200852 {
853 /* Fast path */
854 int alternate = flags & F_ALT;
855 int base;
856
857 switch(c)
858 {
859 default:
Barry Warsawb2e57942017-09-14 18:13:16 -0700860 Py_UNREACHABLE();
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200861 case 'd':
862 case 'i':
863 case 'u':
864 base = 10;
865 break;
866 case 'o':
867 base = 8;
868 break;
869 case 'x':
870 case 'X':
871 base = 16;
872 break;
873 }
874
875 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200876 writer.min_size -= 2; /* size preallocated for "%d" */
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200877 res = _PyLong_FormatBytesWriter(&writer, res,
878 v, base, alternate);
879 if (res == NULL)
880 goto error;
881 continue;
882 }
883
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300884 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200885 if (!temp)
886 goto error;
887 assert(PyUnicode_IS_ASCII(temp));
888 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
889 len = PyUnicode_GET_LENGTH(temp);
890 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800891 if (flags & F_ZERO)
892 fill = '0';
893 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200894
Ethan Furmanb95b5612015-01-23 20:05:18 -0800895 case 'e':
896 case 'E':
897 case 'f':
898 case 'F':
899 case 'g':
900 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200901 if (width == -1 && prec == -1
902 && !(flags & (F_SIGN | F_BLANK)))
903 {
904 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200905 writer.min_size -= 2; /* size preallocated for "%f" */
Victor Stinnerad771582015-10-09 12:38:53 +0200906 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200907 if (res == NULL)
908 goto error;
909 continue;
910 }
911
Victor Stinnerad771582015-10-09 12:38:53 +0200912 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800913 goto error;
914 pbuf = PyBytes_AS_STRING(temp);
915 len = PyBytes_GET_SIZE(temp);
916 sign = 1;
917 if (flags & F_ZERO)
918 fill = '0';
919 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200920
Ethan Furmanb95b5612015-01-23 20:05:18 -0800921 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200922 pbuf = &onechar;
923 len = byte_converter(v, &onechar);
924 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800925 goto error;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200926 if (width == -1) {
927 /* Fast path */
928 *res++ = onechar;
929 continue;
930 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800931 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200932
Ethan Furmanb95b5612015-01-23 20:05:18 -0800933 default:
934 PyErr_Format(PyExc_ValueError,
935 "unsupported format character '%c' (0x%x) "
936 "at index %zd",
937 c, c,
Victor Stinner772b2b02015-10-14 09:56:53 +0200938 (Py_ssize_t)(fmt - 1 - format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800939 goto error;
940 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200941
Ethan Furmanb95b5612015-01-23 20:05:18 -0800942 if (sign) {
943 if (*pbuf == '-' || *pbuf == '+') {
944 sign = *pbuf++;
945 len--;
946 }
947 else if (flags & F_SIGN)
948 sign = '+';
949 else if (flags & F_BLANK)
950 sign = ' ';
951 else
952 sign = 0;
953 }
954 if (width < len)
955 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200956
957 alloc = width;
958 if (sign != 0 && len == width)
959 alloc++;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200960 /* 2: size preallocated for %s */
961 if (alloc > 2) {
962 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200963 if (res == NULL)
964 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800965 }
Victor Stinner60ec6ef2019-10-07 22:31:42 +0200966#ifndef NDEBUG
967 char *before = res;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200968#endif
969
970 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800971 if (sign) {
972 if (fill != ' ')
973 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800974 if (width > len)
975 width--;
976 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200977
978 /* Write the numeric prefix for "x", "X" and "o" formats
979 if the alternate form is used.
980 For example, write "0x" for the "%#x" format. */
Serhiy Storchakab1a16192016-12-17 21:48:03 +0200981 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800982 assert(pbuf[0] == '0');
983 assert(pbuf[1] == c);
984 if (fill != ' ') {
985 *res++ = *pbuf++;
986 *res++ = *pbuf++;
987 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800988 width -= 2;
989 if (width < 0)
990 width = 0;
991 len -= 2;
992 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200993
994 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800995 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200996 memset(res, fill, width - len);
997 res += (width - len);
998 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800999 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001000
1001 /* If padding with spaces: write sign if needed and/or numeric
1002 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001003 if (fill == ' ') {
1004 if (sign)
1005 *res++ = sign;
Serhiy Storchakab1a16192016-12-17 21:48:03 +02001006 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001007 assert(pbuf[0] == '0');
1008 assert(pbuf[1] == c);
1009 *res++ = *pbuf++;
1010 *res++ = *pbuf++;
1011 }
1012 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001013
1014 /* Copy bytes */
Christian Heimesf051e432016-09-13 20:22:02 +02001015 memcpy(res, pbuf, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001016 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001017
1018 /* Pad right with the fill character if needed */
1019 if (width > len) {
1020 memset(res, ' ', width - len);
1021 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001022 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001023
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +02001024 if (dict && (argidx < arglen)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001025 PyErr_SetString(PyExc_TypeError,
1026 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -08001027 Py_XDECREF(temp);
1028 goto error;
1029 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001030 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001031
Victor Stinner60ec6ef2019-10-07 22:31:42 +02001032#ifndef NDEBUG
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001033 /* check that we computed the exact size for this write */
1034 assert((res - before) == alloc);
1035#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -08001036 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001037
1038 /* If overallocation was disabled, ensure that it was the last
1039 write. Otherwise, we missed an optimization */
Alexey Izbyshevccd99752018-08-23 10:50:52 +03001040 assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001041 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001042
Ethan Furmanb95b5612015-01-23 20:05:18 -08001043 if (argidx < arglen && !dict) {
1044 PyErr_SetString(PyExc_TypeError,
1045 "not all arguments converted during bytes formatting");
1046 goto error;
1047 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001048
Ethan Furmanb95b5612015-01-23 20:05:18 -08001049 if (args_owned) {
1050 Py_DECREF(args);
1051 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001052 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001053
1054 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001055 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001056 if (args_owned) {
1057 Py_DECREF(args);
1058 }
1059 return NULL;
1060}
1061
Greg Price3a4f6672019-09-12 11:12:22 -07001062/* Unescape a backslash-escaped string. */
Eric V. Smith42454af2016-10-31 09:22:08 -04001063PyObject *_PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001064 Py_ssize_t len,
1065 const char *errors,
Eric V. Smith42454af2016-10-31 09:22:08 -04001066 const char **first_invalid_escape)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001067{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001068 int c;
Victor Stinner2ec80632015-10-14 13:32:13 +02001069 char *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001070 const char *end;
Victor Stinner2ec80632015-10-14 13:32:13 +02001071 _PyBytesWriter writer;
1072
1073 _PyBytesWriter_Init(&writer);
1074
1075 p = _PyBytesWriter_Alloc(&writer, len);
1076 if (p == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001077 return NULL;
Victor Stinner2ec80632015-10-14 13:32:13 +02001078 writer.overallocate = 1;
1079
Eric V. Smith42454af2016-10-31 09:22:08 -04001080 *first_invalid_escape = NULL;
1081
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001082 end = s + len;
1083 while (s < end) {
1084 if (*s != '\\') {
Greg Price3a4f6672019-09-12 11:12:22 -07001085 *p++ = *s++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001086 continue;
1087 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001088
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001089 s++;
Victor Stinner2ec80632015-10-14 13:32:13 +02001090 if (s == end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001091 PyErr_SetString(PyExc_ValueError,
1092 "Trailing \\ in string");
1093 goto failed;
1094 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001095
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001096 switch (*s++) {
1097 /* XXX This assumes ASCII! */
1098 case '\n': break;
1099 case '\\': *p++ = '\\'; break;
1100 case '\'': *p++ = '\''; break;
1101 case '\"': *p++ = '\"'; break;
1102 case 'b': *p++ = '\b'; break;
1103 case 'f': *p++ = '\014'; break; /* FF */
1104 case 't': *p++ = '\t'; break;
1105 case 'n': *p++ = '\n'; break;
1106 case 'r': *p++ = '\r'; break;
1107 case 'v': *p++ = '\013'; break; /* VT */
1108 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1109 case '0': case '1': case '2': case '3':
1110 case '4': case '5': case '6': case '7':
1111 c = s[-1] - '0';
1112 if (s < end && '0' <= *s && *s <= '7') {
1113 c = (c<<3) + *s++ - '0';
1114 if (s < end && '0' <= *s && *s <= '7')
1115 c = (c<<3) + *s++ - '0';
1116 }
1117 *p++ = c;
1118 break;
1119 case 'x':
Victor Stinner2ec80632015-10-14 13:32:13 +02001120 if (s+1 < end) {
1121 int digit1, digit2;
1122 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1123 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1124 if (digit1 < 16 && digit2 < 16) {
1125 *p++ = (unsigned char)((digit1 << 4) + digit2);
1126 s += 2;
1127 break;
1128 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001129 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001130 /* invalid hexadecimal digits */
1131
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001132 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001133 PyErr_Format(PyExc_ValueError,
Serhiy Storchakad53fe5f2019-03-13 22:59:55 +02001134 "invalid \\x escape at position %zd",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001135 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001136 goto failed;
1137 }
1138 if (strcmp(errors, "replace") == 0) {
1139 *p++ = '?';
1140 } else if (strcmp(errors, "ignore") == 0)
1141 /* do nothing */;
1142 else {
1143 PyErr_Format(PyExc_ValueError,
1144 "decoding error; unknown "
1145 "error handling code: %.400s",
1146 errors);
1147 goto failed;
1148 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001149 /* skip \x */
1150 if (s < end && Py_ISXDIGIT(s[0]))
1151 s++; /* and a hexdigit */
1152 break;
Victor Stinner2ec80632015-10-14 13:32:13 +02001153
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001154 default:
Eric V. Smith42454af2016-10-31 09:22:08 -04001155 if (*first_invalid_escape == NULL) {
1156 *first_invalid_escape = s-1; /* Back up one char, since we've
1157 already incremented s. */
1158 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001159 *p++ = '\\';
Eric V. Smith42454af2016-10-31 09:22:08 -04001160 s--;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001161 }
1162 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001163
1164 return _PyBytesWriter_Finish(&writer, p);
1165
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001166 failed:
Victor Stinner2ec80632015-10-14 13:32:13 +02001167 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001168 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001169}
1170
Eric V. Smith42454af2016-10-31 09:22:08 -04001171PyObject *PyBytes_DecodeEscape(const char *s,
1172 Py_ssize_t len,
1173 const char *errors,
Greg Price3a4f6672019-09-12 11:12:22 -07001174 Py_ssize_t Py_UNUSED(unicode),
1175 const char *Py_UNUSED(recode_encoding))
Eric V. Smith42454af2016-10-31 09:22:08 -04001176{
1177 const char* first_invalid_escape;
Greg Price3a4f6672019-09-12 11:12:22 -07001178 PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
Eric V. Smith42454af2016-10-31 09:22:08 -04001179 &first_invalid_escape);
1180 if (result == NULL)
1181 return NULL;
1182 if (first_invalid_escape != NULL) {
1183 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1184 "invalid escape sequence '\\%c'",
Serhiy Storchaka56cb4652017-10-20 17:08:15 +03001185 (unsigned char)*first_invalid_escape) < 0) {
Eric V. Smith42454af2016-10-31 09:22:08 -04001186 Py_DECREF(result);
1187 return NULL;
1188 }
1189 }
1190 return result;
1191
1192}
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001193/* -------------------------------------------------------------------- */
1194/* object api */
1195
1196Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001197PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001198{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001199 if (!PyBytes_Check(op)) {
1200 PyErr_Format(PyExc_TypeError,
1201 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1202 return -1;
1203 }
1204 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001205}
1206
1207char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001208PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001209{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001210 if (!PyBytes_Check(op)) {
1211 PyErr_Format(PyExc_TypeError,
1212 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1213 return NULL;
1214 }
1215 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001216}
1217
1218int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001219PyBytes_AsStringAndSize(PyObject *obj,
1220 char **s,
1221 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001222{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001223 if (s == NULL) {
1224 PyErr_BadInternalCall();
1225 return -1;
1226 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001227
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001228 if (!PyBytes_Check(obj)) {
1229 PyErr_Format(PyExc_TypeError,
1230 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1231 return -1;
1232 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001233
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001234 *s = PyBytes_AS_STRING(obj);
1235 if (len != NULL)
1236 *len = PyBytes_GET_SIZE(obj);
1237 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001238 PyErr_SetString(PyExc_ValueError,
1239 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001240 return -1;
1241 }
1242 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001243}
Neal Norwitz6968b052007-02-27 19:02:19 +00001244
1245/* -------------------------------------------------------------------- */
1246/* Methods */
1247
Eric Smith0923d1d2009-04-16 20:16:10 +00001248#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001249
1250#include "stringlib/fastsearch.h"
1251#include "stringlib/count.h"
1252#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001253#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001254#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001255#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001256#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001257
Eric Smith0f78bff2009-11-30 01:01:42 +00001258#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001259
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001260PyObject *
1261PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001262{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001263 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001264 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001265 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001266 PyObject *v;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001267 unsigned char quote;
1268 const unsigned char *s;
1269 Py_UCS1 *p;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001270
1271 /* Compute size of output string */
1272 squotes = dquotes = 0;
1273 newsize = 3; /* b'' */
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001274 s = (const unsigned char*)op->ob_sval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001275 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001276 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001277 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001278 case '\'': squotes++; break;
1279 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001280 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001281 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001282 default:
1283 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001284 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001285 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001286 if (newsize > PY_SSIZE_T_MAX - incr)
1287 goto overflow;
1288 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001289 }
1290 quote = '\'';
1291 if (smartquotes && squotes && !dquotes)
1292 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001293 if (squotes && quote == '\'') {
1294 if (newsize > PY_SSIZE_T_MAX - squotes)
1295 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001296 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001297 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001298
1299 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001300 if (v == NULL) {
1301 return NULL;
1302 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001303 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001304
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001305 *p++ = 'b', *p++ = quote;
1306 for (i = 0; i < length; i++) {
1307 unsigned char c = op->ob_sval[i];
1308 if (c == quote || c == '\\')
1309 *p++ = '\\', *p++ = c;
1310 else if (c == '\t')
1311 *p++ = '\\', *p++ = 't';
1312 else if (c == '\n')
1313 *p++ = '\\', *p++ = 'n';
1314 else if (c == '\r')
1315 *p++ = '\\', *p++ = 'r';
1316 else if (c < ' ' || c >= 0x7f) {
1317 *p++ = '\\';
1318 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001319 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1320 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001321 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001322 else
1323 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001324 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001325 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001326 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001327 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001328
1329 overflow:
1330 PyErr_SetString(PyExc_OverflowError,
1331 "bytes object is too large to make repr");
1332 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001333}
1334
Neal Norwitz6968b052007-02-27 19:02:19 +00001335static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001336bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001337{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001338 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001339}
1340
Neal Norwitz6968b052007-02-27 19:02:19 +00001341static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001342bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001343{
Victor Stinnerda7933e2020-04-13 03:04:28 +02001344 if (_Py_GetConfig()->bytes_warning) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001345 if (PyErr_WarnEx(PyExc_BytesWarning,
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001346 "str() on a bytes instance", 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001347 return NULL;
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001348 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001349 }
1350 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001351}
1352
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001353static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001354bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001355{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001356 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001357}
Neal Norwitz6968b052007-02-27 19:02:19 +00001358
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001359/* This is also used by PyBytes_Concat() */
1360static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001361bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001362{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001363 Py_buffer va, vb;
1364 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001365
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001366 va.len = -1;
1367 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001368 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1369 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001370 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
Serhiy Storchaka6b5a9ec2017-03-19 19:47:02 +02001371 Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001372 goto done;
1373 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001374
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001375 /* Optimize end cases */
1376 if (va.len == 0 && PyBytes_CheckExact(b)) {
1377 result = b;
1378 Py_INCREF(result);
1379 goto done;
1380 }
1381 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1382 result = a;
1383 Py_INCREF(result);
1384 goto done;
1385 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001386
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001387 if (va.len > PY_SSIZE_T_MAX - vb.len) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001388 PyErr_NoMemory();
1389 goto done;
1390 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001391
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001392 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001393 if (result != NULL) {
1394 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1395 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1396 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001397
1398 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001399 if (va.len != -1)
1400 PyBuffer_Release(&va);
1401 if (vb.len != -1)
1402 PyBuffer_Release(&vb);
1403 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001404}
Neal Norwitz6968b052007-02-27 19:02:19 +00001405
1406static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001407bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001408{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001409 Py_ssize_t i;
1410 Py_ssize_t j;
1411 Py_ssize_t size;
1412 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001413 size_t nbytes;
1414 if (n < 0)
1415 n = 0;
1416 /* watch out for overflows: the size can overflow int,
1417 * and the # of bytes needed can overflow size_t
1418 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001419 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001420 PyErr_SetString(PyExc_OverflowError,
1421 "repeated bytes are too long");
1422 return NULL;
1423 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001424 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001425 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1426 Py_INCREF(a);
1427 return (PyObject *)a;
1428 }
1429 nbytes = (size_t)size;
1430 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1431 PyErr_SetString(PyExc_OverflowError,
1432 "repeated bytes are too long");
1433 return NULL;
1434 }
1435 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1436 if (op == NULL)
1437 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +01001438 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001439 op->ob_shash = -1;
1440 op->ob_sval[size] = '\0';
1441 if (Py_SIZE(a) == 1 && n > 0) {
1442 memset(op->ob_sval, a->ob_sval[0] , n);
1443 return (PyObject *) op;
1444 }
1445 i = 0;
1446 if (i < size) {
Christian Heimesf051e432016-09-13 20:22:02 +02001447 memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001448 i = Py_SIZE(a);
1449 }
1450 while (i < size) {
1451 j = (i <= size-i) ? i : size-i;
Christian Heimesf051e432016-09-13 20:22:02 +02001452 memcpy(op->ob_sval+i, op->ob_sval, j);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001453 i += j;
1454 }
1455 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001456}
1457
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001458static int
1459bytes_contains(PyObject *self, PyObject *arg)
1460{
1461 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1462}
1463
Neal Norwitz6968b052007-02-27 19:02:19 +00001464static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001465bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001466{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001467 if (i < 0 || i >= Py_SIZE(a)) {
1468 PyErr_SetString(PyExc_IndexError, "index out of range");
1469 return NULL;
1470 }
1471 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001472}
1473
Benjamin Peterson621b4302016-09-09 13:54:34 -07001474static int
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001475bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1476{
1477 int cmp;
1478 Py_ssize_t len;
1479
1480 len = Py_SIZE(a);
1481 if (Py_SIZE(b) != len)
1482 return 0;
1483
1484 if (a->ob_sval[0] != b->ob_sval[0])
1485 return 0;
1486
1487 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1488 return (cmp == 0);
1489}
1490
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001491static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001492bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001493{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001494 int c;
1495 Py_ssize_t len_a, len_b;
1496 Py_ssize_t min_len;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001497 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001498
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001499 /* Make sure both arguments are strings. */
1500 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Victor Stinnerda7933e2020-04-13 03:04:28 +02001501 if (_Py_GetConfig()->bytes_warning && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001502 rc = PyObject_IsInstance((PyObject*)a,
1503 (PyObject*)&PyUnicode_Type);
1504 if (!rc)
1505 rc = PyObject_IsInstance((PyObject*)b,
1506 (PyObject*)&PyUnicode_Type);
1507 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001508 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001509 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001510 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001511 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001512 return NULL;
1513 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001514 else {
1515 rc = PyObject_IsInstance((PyObject*)a,
1516 (PyObject*)&PyLong_Type);
1517 if (!rc)
1518 rc = PyObject_IsInstance((PyObject*)b,
1519 (PyObject*)&PyLong_Type);
1520 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001521 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001522 if (rc) {
1523 if (PyErr_WarnEx(PyExc_BytesWarning,
1524 "Comparison between bytes and int", 1))
1525 return NULL;
1526 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001527 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001528 }
stratakise8b19652017-11-02 11:32:54 +01001529 Py_RETURN_NOTIMPLEMENTED;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001530 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001531 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001532 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001533 case Py_EQ:
1534 case Py_LE:
1535 case Py_GE:
1536 /* a string is equal to itself */
stratakise8b19652017-11-02 11:32:54 +01001537 Py_RETURN_TRUE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001538 case Py_NE:
1539 case Py_LT:
1540 case Py_GT:
stratakise8b19652017-11-02 11:32:54 +01001541 Py_RETURN_FALSE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001542 default:
1543 PyErr_BadArgument();
1544 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001545 }
1546 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001547 else if (op == Py_EQ || op == Py_NE) {
1548 int eq = bytes_compare_eq(a, b);
1549 eq ^= (op == Py_NE);
stratakise8b19652017-11-02 11:32:54 +01001550 return PyBool_FromLong(eq);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001551 }
1552 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001553 len_a = Py_SIZE(a);
1554 len_b = Py_SIZE(b);
1555 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001556 if (min_len > 0) {
1557 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001558 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001559 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001560 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001561 else
1562 c = 0;
stratakise8b19652017-11-02 11:32:54 +01001563 if (c != 0)
1564 Py_RETURN_RICHCOMPARE(c, 0, op);
1565 Py_RETURN_RICHCOMPARE(len_a, len_b, op);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001566 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001567}
1568
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001569static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001570bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001571{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001572 if (a->ob_shash == -1) {
1573 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001574 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001575 }
1576 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001577}
1578
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001579static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001580bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001581{
Victor Stinnera15e2602020-04-08 02:01:56 +02001582 if (_PyIndex_Check(item)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001583 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1584 if (i == -1 && PyErr_Occurred())
1585 return NULL;
1586 if (i < 0)
1587 i += PyBytes_GET_SIZE(self);
1588 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1589 PyErr_SetString(PyExc_IndexError,
1590 "index out of range");
1591 return NULL;
1592 }
1593 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1594 }
1595 else if (PySlice_Check(item)) {
Zackery Spytz14514d92019-05-17 01:13:03 -06001596 Py_ssize_t start, stop, step, slicelength, i;
1597 size_t cur;
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001598 const char* source_buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001599 char* result_buf;
1600 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001601
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001602 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001603 return NULL;
1604 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001605 slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1606 &stop, step);
Neal Norwitz6968b052007-02-27 19:02:19 +00001607
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001608 if (slicelength <= 0) {
1609 return PyBytes_FromStringAndSize("", 0);
1610 }
1611 else if (start == 0 && step == 1 &&
1612 slicelength == PyBytes_GET_SIZE(self) &&
1613 PyBytes_CheckExact(self)) {
1614 Py_INCREF(self);
1615 return (PyObject *)self;
1616 }
1617 else if (step == 1) {
1618 return PyBytes_FromStringAndSize(
1619 PyBytes_AS_STRING(self) + start,
1620 slicelength);
1621 }
1622 else {
1623 source_buf = PyBytes_AS_STRING(self);
1624 result = PyBytes_FromStringAndSize(NULL, slicelength);
1625 if (result == NULL)
1626 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001627
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001628 result_buf = PyBytes_AS_STRING(result);
1629 for (cur = start, i = 0; i < slicelength;
1630 cur += step, i++) {
1631 result_buf[i] = source_buf[cur];
1632 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001633
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001634 return result;
1635 }
1636 }
1637 else {
1638 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001639 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001640 Py_TYPE(item)->tp_name);
1641 return NULL;
1642 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001643}
1644
1645static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001646bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001647{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001648 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1649 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001650}
1651
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001652static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001653 (lenfunc)bytes_length, /*sq_length*/
1654 (binaryfunc)bytes_concat, /*sq_concat*/
1655 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1656 (ssizeargfunc)bytes_item, /*sq_item*/
1657 0, /*sq_slice*/
1658 0, /*sq_ass_item*/
1659 0, /*sq_ass_slice*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001660 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001661};
1662
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001663static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001664 (lenfunc)bytes_length,
1665 (binaryfunc)bytes_subscript,
1666 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001667};
1668
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001669static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001670 (getbufferproc)bytes_buffer_getbuffer,
1671 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001672};
1673
1674
1675#define LEFTSTRIP 0
1676#define RIGHTSTRIP 1
1677#define BOTHSTRIP 2
1678
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001679/*[clinic input]
1680bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001681
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001682 sep: object = None
1683 The delimiter according which to split the bytes.
1684 None (the default value) means split on ASCII whitespace characters
1685 (space, tab, return, newline, formfeed, vertical tab).
1686 maxsplit: Py_ssize_t = -1
1687 Maximum number of splits to do.
1688 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001689
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001690Return a list of the sections in the bytes, using sep as the delimiter.
1691[clinic start generated code]*/
1692
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001693static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001694bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1695/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001696{
1697 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001698 const char *s = PyBytes_AS_STRING(self), *sub;
1699 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001700 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001701
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001702 if (maxsplit < 0)
1703 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001704 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001705 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001706 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001707 return NULL;
1708 sub = vsub.buf;
1709 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001710
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001711 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1712 PyBuffer_Release(&vsub);
1713 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001714}
1715
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001716/*[clinic input]
1717bytes.partition
1718
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001719 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001720 /
1721
1722Partition the bytes into three parts using the given separator.
1723
1724This will search for the separator sep in the bytes. If the separator is found,
1725returns a 3-tuple containing the part before the separator, the separator
1726itself, and the part after it.
1727
1728If the separator is not found, returns a 3-tuple containing the original bytes
1729object and two empty bytes objects.
1730[clinic start generated code]*/
1731
Neal Norwitz6968b052007-02-27 19:02:19 +00001732static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001733bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001734/*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001735{
Neal Norwitz6968b052007-02-27 19:02:19 +00001736 return stringlib_partition(
1737 (PyObject*) self,
1738 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001739 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001740 );
1741}
1742
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001743/*[clinic input]
1744bytes.rpartition
1745
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001746 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001747 /
1748
1749Partition the bytes into three parts using the given separator.
1750
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001751This will search for the separator sep in the bytes, starting at the end. If
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001752the separator is found, returns a 3-tuple containing the part before the
1753separator, the separator itself, and the part after it.
1754
1755If the separator is not found, returns a 3-tuple containing two empty bytes
1756objects and the original bytes object.
1757[clinic start generated code]*/
1758
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001759static PyObject *
1760bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001761/*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001762{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001763 return stringlib_rpartition(
1764 (PyObject*) self,
1765 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001766 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001767 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001768}
1769
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001770/*[clinic input]
1771bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001772
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001773Return a list of the sections in the bytes, using sep as the delimiter.
1774
1775Splitting is done starting at the end of the bytes and working to the front.
1776[clinic start generated code]*/
1777
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001778static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001779bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1780/*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001781{
1782 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001783 const char *s = PyBytes_AS_STRING(self), *sub;
1784 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001785 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001786
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001787 if (maxsplit < 0)
1788 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001789 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001790 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001791 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001792 return NULL;
1793 sub = vsub.buf;
1794 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001795
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001796 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1797 PyBuffer_Release(&vsub);
1798 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001799}
1800
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001801
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001802/*[clinic input]
1803bytes.join
1804
1805 iterable_of_bytes: object
1806 /
1807
1808Concatenate any number of bytes objects.
1809
1810The bytes whose method is called is inserted in between each pair.
1811
1812The result is returned as a new bytes object.
1813
1814Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1815[clinic start generated code]*/
1816
Neal Norwitz6968b052007-02-27 19:02:19 +00001817static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001818bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1819/*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001820{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001821 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001822}
1823
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001824PyObject *
1825_PyBytes_Join(PyObject *sep, PyObject *x)
1826{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001827 assert(sep != NULL && PyBytes_Check(sep));
1828 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001829 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001830}
1831
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001832static PyObject *
1833bytes_find(PyBytesObject *self, PyObject *args)
1834{
1835 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1836}
1837
1838static PyObject *
1839bytes_index(PyBytesObject *self, PyObject *args)
1840{
1841 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1842}
1843
1844
1845static PyObject *
1846bytes_rfind(PyBytesObject *self, PyObject *args)
1847{
1848 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1849}
1850
1851
1852static PyObject *
1853bytes_rindex(PyBytesObject *self, PyObject *args)
1854{
1855 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1856}
1857
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001858
1859Py_LOCAL_INLINE(PyObject *)
1860do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001861{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001862 Py_buffer vsep;
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001863 const char *s = PyBytes_AS_STRING(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001864 Py_ssize_t len = PyBytes_GET_SIZE(self);
1865 char *sep;
1866 Py_ssize_t seplen;
1867 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001868
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001869 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001870 return NULL;
1871 sep = vsep.buf;
1872 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001873
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001874 i = 0;
1875 if (striptype != RIGHTSTRIP) {
1876 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1877 i++;
1878 }
1879 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001880
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001881 j = len;
1882 if (striptype != LEFTSTRIP) {
1883 do {
1884 j--;
1885 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1886 j++;
1887 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001888
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001889 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001890
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001891 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1892 Py_INCREF(self);
1893 return (PyObject*)self;
1894 }
1895 else
1896 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001897}
1898
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001899
1900Py_LOCAL_INLINE(PyObject *)
1901do_strip(PyBytesObject *self, int striptype)
1902{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001903 const char *s = PyBytes_AS_STRING(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001904 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001905
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001906 i = 0;
1907 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001908 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001909 i++;
1910 }
1911 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001912
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001913 j = len;
1914 if (striptype != LEFTSTRIP) {
1915 do {
1916 j--;
David Malcolm96960882010-11-05 17:23:41 +00001917 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001918 j++;
1919 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001920
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001921 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1922 Py_INCREF(self);
1923 return (PyObject*)self;
1924 }
1925 else
1926 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001927}
1928
1929
1930Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001931do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001932{
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001933 if (bytes != Py_None) {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001934 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001935 }
1936 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001937}
1938
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001939/*[clinic input]
1940bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001941
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001942 bytes: object = None
1943 /
1944
1945Strip leading and trailing bytes contained in the argument.
1946
1947If the argument is omitted or None, strip leading and trailing ASCII whitespace.
1948[clinic start generated code]*/
1949
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001950static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001951bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001952/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001953{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001954 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001955}
1956
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001957/*[clinic input]
1958bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001959
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001960 bytes: object = None
1961 /
1962
1963Strip leading bytes contained in the argument.
1964
1965If the argument is omitted or None, strip leading ASCII whitespace.
1966[clinic start generated code]*/
1967
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001968static PyObject *
1969bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001970/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001971{
1972 return do_argstrip(self, LEFTSTRIP, bytes);
1973}
1974
1975/*[clinic input]
1976bytes.rstrip
1977
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001978 bytes: object = None
1979 /
1980
1981Strip trailing bytes contained in the argument.
1982
1983If the argument is omitted or None, strip trailing ASCII whitespace.
1984[clinic start generated code]*/
1985
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001986static PyObject *
1987bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001988/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001989{
1990 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001991}
Neal Norwitz6968b052007-02-27 19:02:19 +00001992
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001993
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001994static PyObject *
1995bytes_count(PyBytesObject *self, PyObject *args)
1996{
1997 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1998}
1999
2000
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002001/*[clinic input]
2002bytes.translate
2003
Victor Stinner049e5092014-08-17 22:20:00 +02002004 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002005 Translation table, which must be a bytes object of length 256.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002006 /
Martin Panter1b6c6da2016-08-27 08:35:02 +00002007 delete as deletechars: object(c_default="NULL") = b''
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002008
2009Return a copy with each character mapped by the given translation table.
2010
Martin Panter1b6c6da2016-08-27 08:35:02 +00002011All characters occurring in the optional argument delete are removed.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002012The remaining characters are mapped through the given translation table.
2013[clinic start generated code]*/
2014
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002015static PyObject *
Martin Panter1b6c6da2016-08-27 08:35:02 +00002016bytes_translate_impl(PyBytesObject *self, PyObject *table,
Larry Hastings89964c42015-04-14 18:07:59 -04002017 PyObject *deletechars)
Martin Panter1b6c6da2016-08-27 08:35:02 +00002018/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002019{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03002020 const char *input;
2021 char *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002022 Py_buffer table_view = {NULL, NULL};
2023 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002024 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002025 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002026 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002027 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002028 Py_ssize_t inlen, tablen, dellen = 0;
2029 PyObject *result;
2030 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002031
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002032 if (PyBytes_Check(table)) {
2033 table_chars = PyBytes_AS_STRING(table);
2034 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002035 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002036 else if (table == Py_None) {
2037 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002038 tablen = 256;
2039 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002040 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002041 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002042 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002043 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002044 tablen = table_view.len;
2045 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002046
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002047 if (tablen != 256) {
2048 PyErr_SetString(PyExc_ValueError,
2049 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002050 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002051 return NULL;
2052 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002053
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002054 if (deletechars != NULL) {
2055 if (PyBytes_Check(deletechars)) {
2056 del_table_chars = PyBytes_AS_STRING(deletechars);
2057 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002058 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002059 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002060 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002061 PyBuffer_Release(&table_view);
2062 return NULL;
2063 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002064 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002065 dellen = del_table_view.len;
2066 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002067 }
2068 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002069 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002070 dellen = 0;
2071 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002072
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002073 inlen = PyBytes_GET_SIZE(input_obj);
2074 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002075 if (result == NULL) {
2076 PyBuffer_Release(&del_table_view);
2077 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002078 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002079 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002080 output_start = output = PyBytes_AS_STRING(result);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002081 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002082
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002083 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002084 /* If no deletions are required, use faster code */
2085 for (i = inlen; --i >= 0; ) {
2086 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002087 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002088 changed = 1;
2089 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002090 if (!changed && PyBytes_CheckExact(input_obj)) {
2091 Py_INCREF(input_obj);
2092 Py_DECREF(result);
2093 result = input_obj;
2094 }
2095 PyBuffer_Release(&del_table_view);
2096 PyBuffer_Release(&table_view);
2097 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002098 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002099
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002100 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002101 for (i = 0; i < 256; i++)
2102 trans_table[i] = Py_CHARMASK(i);
2103 } else {
2104 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002105 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002106 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002107 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002108
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002109 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002110 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002111 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002112
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002113 for (i = inlen; --i >= 0; ) {
2114 c = Py_CHARMASK(*input++);
2115 if (trans_table[c] != -1)
2116 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2117 continue;
2118 changed = 1;
2119 }
2120 if (!changed && PyBytes_CheckExact(input_obj)) {
2121 Py_DECREF(result);
2122 Py_INCREF(input_obj);
2123 return input_obj;
2124 }
2125 /* Fix the size of the resulting string */
2126 if (inlen > 0)
2127 _PyBytes_Resize(&result, output - output_start);
2128 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002129}
2130
2131
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002132/*[clinic input]
2133
2134@staticmethod
2135bytes.maketrans
2136
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002137 frm: Py_buffer
2138 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002139 /
2140
2141Return a translation table useable for the bytes or bytearray translate method.
2142
2143The returned table will be one where each byte in frm is mapped to the byte at
2144the same position in to.
2145
2146The bytes objects frm and to must be of the same length.
2147[clinic start generated code]*/
2148
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002149static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002150bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002151/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002152{
2153 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002154}
2155
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002156
2157/*[clinic input]
2158bytes.replace
2159
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002160 old: Py_buffer
2161 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002162 count: Py_ssize_t = -1
2163 Maximum number of occurrences to replace.
2164 -1 (the default value) means replace all occurrences.
2165 /
2166
2167Return a copy with all occurrences of substring old replaced by new.
2168
2169If the optional argument count is given, only the first count occurrences are
2170replaced.
2171[clinic start generated code]*/
2172
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002173static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002174bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
Larry Hastings89964c42015-04-14 18:07:59 -04002175 Py_ssize_t count)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002176/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002177{
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +03002178 return stringlib_replace((PyObject *)self,
2179 (const char *)old->buf, old->len,
2180 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002181}
2182
2183/** End DALKE **/
2184
sweeneydea81849b2020-04-22 17:05:48 -04002185/*[clinic input]
2186bytes.removeprefix as bytes_removeprefix
2187
2188 prefix: Py_buffer
2189 /
2190
2191Return a bytes object with the given prefix string removed if present.
2192
2193If the bytes starts with the prefix string, return bytes[len(prefix):].
2194Otherwise, return a copy of the original bytes.
2195[clinic start generated code]*/
2196
2197static PyObject *
2198bytes_removeprefix_impl(PyBytesObject *self, Py_buffer *prefix)
2199/*[clinic end generated code: output=f006865331a06ab6 input=0c93bac817a8502c]*/
2200{
2201 const char *self_start = PyBytes_AS_STRING(self);
2202 Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2203 const char *prefix_start = prefix->buf;
2204 Py_ssize_t prefix_len = prefix->len;
2205
2206 if (self_len >= prefix_len
2207 && prefix_len > 0
2208 && memcmp(self_start, prefix_start, prefix_len) == 0)
2209 {
2210 return PyBytes_FromStringAndSize(self_start + prefix_len,
2211 self_len - prefix_len);
2212 }
2213
2214 if (PyBytes_CheckExact(self)) {
2215 Py_INCREF(self);
2216 return (PyObject *)self;
2217 }
2218
2219 return PyBytes_FromStringAndSize(self_start, self_len);
2220}
2221
2222/*[clinic input]
2223bytes.removesuffix as bytes_removesuffix
2224
2225 suffix: Py_buffer
2226 /
2227
2228Return a bytes object with the given suffix string removed if present.
2229
2230If the bytes ends with the suffix string and that suffix is not empty,
2231return bytes[:-len(prefix)]. Otherwise, return a copy of the original
2232bytes.
2233[clinic start generated code]*/
2234
2235static PyObject *
2236bytes_removesuffix_impl(PyBytesObject *self, Py_buffer *suffix)
2237/*[clinic end generated code: output=d887d308e3242eeb input=9f4e1da8c637bbf1]*/
2238{
2239 const char *self_start = PyBytes_AS_STRING(self);
2240 Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2241 const char *suffix_start = suffix->buf;
2242 Py_ssize_t suffix_len = suffix->len;
2243
2244 if (self_len >= suffix_len
2245 && suffix_len > 0
2246 && memcmp(self_start + self_len - suffix_len,
2247 suffix_start, suffix_len) == 0)
2248 {
2249 return PyBytes_FromStringAndSize(self_start,
2250 self_len - suffix_len);
2251 }
2252
2253 if (PyBytes_CheckExact(self)) {
2254 Py_INCREF(self);
2255 return (PyObject *)self;
2256 }
2257
2258 return PyBytes_FromStringAndSize(self_start, self_len);
2259}
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002260
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002261static PyObject *
2262bytes_startswith(PyBytesObject *self, PyObject *args)
2263{
2264 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2265}
2266
2267static PyObject *
2268bytes_endswith(PyBytesObject *self, PyObject *args)
2269{
2270 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2271}
2272
2273
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002274/*[clinic input]
2275bytes.decode
2276
2277 encoding: str(c_default="NULL") = 'utf-8'
2278 The encoding with which to decode the bytes.
2279 errors: str(c_default="NULL") = 'strict'
2280 The error handling scheme to use for the handling of decoding errors.
2281 The default is 'strict' meaning that decoding errors raise a
2282 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2283 as well as any other name registered with codecs.register_error that
2284 can handle UnicodeDecodeErrors.
2285
2286Decode the bytes using the codec registered for encoding.
2287[clinic start generated code]*/
2288
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002289static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002290bytes_decode_impl(PyBytesObject *self, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04002291 const char *errors)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002292/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002293{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002294 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002295}
2296
Guido van Rossum20188312006-05-05 15:15:40 +00002297
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002298/*[clinic input]
2299bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002300
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002301 keepends: bool(accept={int}) = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002302
2303Return a list of the lines in the bytes, breaking at line boundaries.
2304
2305Line breaks are not included in the resulting list unless keepends is given and
2306true.
2307[clinic start generated code]*/
2308
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002309static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002310bytes_splitlines_impl(PyBytesObject *self, int keepends)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002311/*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002312{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002313 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002314 (PyObject*) self, PyBytes_AS_STRING(self),
2315 PyBytes_GET_SIZE(self), keepends
2316 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002317}
2318
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002319/*[clinic input]
2320@classmethod
2321bytes.fromhex
2322
2323 string: unicode
2324 /
2325
2326Create a bytes object from a string of hexadecimal numbers.
2327
2328Spaces between two numbers are accepted.
2329Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2330[clinic start generated code]*/
2331
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002332static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002333bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002334/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002335{
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002336 PyObject *result = _PyBytes_FromHex(string, 0);
2337 if (type != &PyBytes_Type && result != NULL) {
Petr Viktorinffd97532020-02-11 17:46:57 +01002338 Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002339 }
2340 return result;
Victor Stinner2bf89932015-10-14 11:25:33 +02002341}
2342
2343PyObject*
2344_PyBytes_FromHex(PyObject *string, int use_bytearray)
2345{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002346 char *buf;
Victor Stinner2bf89932015-10-14 11:25:33 +02002347 Py_ssize_t hexlen, invalid_char;
2348 unsigned int top, bot;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03002349 const Py_UCS1 *str, *end;
Victor Stinner2bf89932015-10-14 11:25:33 +02002350 _PyBytesWriter writer;
2351
2352 _PyBytesWriter_Init(&writer);
2353 writer.use_bytearray = use_bytearray;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002354
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002355 assert(PyUnicode_Check(string));
2356 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002357 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002358 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002359
Victor Stinner2bf89932015-10-14 11:25:33 +02002360 if (!PyUnicode_IS_ASCII(string)) {
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03002361 const void *data = PyUnicode_DATA(string);
Victor Stinner2bf89932015-10-14 11:25:33 +02002362 unsigned int kind = PyUnicode_KIND(string);
2363 Py_ssize_t i;
2364
2365 /* search for the first non-ASCII character */
2366 for (i = 0; i < hexlen; i++) {
2367 if (PyUnicode_READ(kind, data, i) >= 128)
2368 break;
2369 }
2370 invalid_char = i;
2371 goto error;
2372 }
2373
2374 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2375 str = PyUnicode_1BYTE_DATA(string);
2376
2377 /* This overestimates if there are spaces */
2378 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2379 if (buf == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002380 return NULL;
Victor Stinner2bf89932015-10-14 11:25:33 +02002381
2382 end = str + hexlen;
2383 while (str < end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002384 /* skip over spaces in the input */
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002385 if (Py_ISSPACE(*str)) {
Victor Stinner2bf89932015-10-14 11:25:33 +02002386 do {
2387 str++;
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002388 } while (Py_ISSPACE(*str));
Victor Stinner2bf89932015-10-14 11:25:33 +02002389 if (str >= end)
2390 break;
2391 }
2392
2393 top = _PyLong_DigitValue[*str];
2394 if (top >= 16) {
2395 invalid_char = str - PyUnicode_1BYTE_DATA(string);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002396 goto error;
2397 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002398 str++;
2399
2400 bot = _PyLong_DigitValue[*str];
2401 if (bot >= 16) {
2402 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2403 goto error;
2404 }
2405 str++;
2406
2407 *buf++ = (unsigned char)((top << 4) + bot);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002408 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002409
2410 return _PyBytesWriter_Finish(&writer, buf);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002411
2412 error:
Victor Stinner2bf89932015-10-14 11:25:33 +02002413 PyErr_Format(PyExc_ValueError,
2414 "non-hexadecimal number found in "
2415 "fromhex() arg at position %zd", invalid_char);
2416 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002417 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002418}
2419
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002420/*[clinic input]
2421bytes.hex
2422
2423 sep: object = NULL
2424 An optional single character or byte to separate hex bytes.
2425 bytes_per_sep: int = 1
2426 How many bytes between separators. Positive values count from the
2427 right, negative values count from the left.
2428
2429Create a str of hexadecimal numbers from a bytes object.
2430
2431Example:
2432>>> value = b'\xb9\x01\xef'
2433>>> value.hex()
2434'b901ef'
2435>>> value.hex(':')
2436'b9:01:ef'
2437>>> value.hex(':', 2)
2438'b9:01ef'
2439>>> value.hex(':', -2)
2440'b901:ef'
2441[clinic start generated code]*/
Gregory P. Smith8cb65692015-04-25 23:22:26 +00002442
2443static PyObject *
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002444bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2445/*[clinic end generated code: output=1f134da504064139 input=f1238d3455990218]*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002446{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03002447 const char *argbuf = PyBytes_AS_STRING(self);
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002448 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002449 return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002450}
2451
2452static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302453bytes_getnewargs(PyBytesObject *v, PyObject *Py_UNUSED(ignored))
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002454{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002455 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002456}
2457
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002458
2459static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002460bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002461 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302462 {"capitalize", stringlib_capitalize, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002463 _Py_capitalize__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002464 STRINGLIB_CENTER_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002465 {"count", (PyCFunction)bytes_count, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002466 _Py_count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002467 BYTES_DECODE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002468 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002469 _Py_endswith__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002470 STRINGLIB_EXPANDTABS_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002471 {"find", (PyCFunction)bytes_find, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002472 _Py_find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002473 BYTES_FROMHEX_METHODDEF
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002474 BYTES_HEX_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002475 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302476 {"isalnum", stringlib_isalnum, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002477 _Py_isalnum__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302478 {"isalpha", stringlib_isalpha, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002479 _Py_isalpha__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302480 {"isascii", stringlib_isascii, METH_NOARGS,
INADA Naokia49ac992018-01-27 14:06:21 +09002481 _Py_isascii__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302482 {"isdigit", stringlib_isdigit, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002483 _Py_isdigit__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302484 {"islower", stringlib_islower, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002485 _Py_islower__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302486 {"isspace", stringlib_isspace, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002487 _Py_isspace__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302488 {"istitle", stringlib_istitle, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002489 _Py_istitle__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302490 {"isupper", stringlib_isupper, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002491 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002492 BYTES_JOIN_METHODDEF
Tal Einatc929df32018-07-06 13:17:38 +03002493 STRINGLIB_LJUST_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302494 {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002495 BYTES_LSTRIP_METHODDEF
2496 BYTES_MAKETRANS_METHODDEF
2497 BYTES_PARTITION_METHODDEF
2498 BYTES_REPLACE_METHODDEF
sweeneydea81849b2020-04-22 17:05:48 -04002499 BYTES_REMOVEPREFIX_METHODDEF
2500 BYTES_REMOVESUFFIX_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002501 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2502 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002503 STRINGLIB_RJUST_METHODDEF
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002504 BYTES_RPARTITION_METHODDEF
2505 BYTES_RSPLIT_METHODDEF
2506 BYTES_RSTRIP_METHODDEF
2507 BYTES_SPLIT_METHODDEF
2508 BYTES_SPLITLINES_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002509 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002510 _Py_startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002511 BYTES_STRIP_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302512 {"swapcase", stringlib_swapcase, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002513 _Py_swapcase__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302514 {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002515 BYTES_TRANSLATE_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302516 {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002517 STRINGLIB_ZFILL_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002518 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002519};
2520
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002521static PyObject *
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002522bytes_mod(PyObject *self, PyObject *arg)
Ethan Furmanb95b5612015-01-23 20:05:18 -08002523{
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002524 if (!PyBytes_Check(self)) {
2525 Py_RETURN_NOTIMPLEMENTED;
Victor Stinner772b2b02015-10-14 09:56:53 +02002526 }
Victor Stinner772b2b02015-10-14 09:56:53 +02002527 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002528 arg, 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -08002529}
2530
2531static PyNumberMethods bytes_as_number = {
2532 0, /*nb_add*/
2533 0, /*nb_subtract*/
2534 0, /*nb_multiply*/
2535 bytes_mod, /*nb_remainder*/
2536};
2537
2538static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002539bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002540
2541static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002542bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002543{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002544 PyObject *x = NULL;
2545 const char *encoding = NULL;
2546 const char *errors = NULL;
2547 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002548 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002549 Py_ssize_t size;
2550 static char *kwlist[] = {"source", "encoding", "errors", 0};
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002551
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002552 if (type != &PyBytes_Type)
Serhiy Storchaka15095802015-11-25 15:47:01 +02002553 return bytes_subtype_new(type, args, kwds);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002554 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2555 &encoding, &errors))
2556 return NULL;
2557 if (x == NULL) {
2558 if (encoding != NULL || errors != NULL) {
2559 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka2c2044e2018-10-21 15:29:12 +03002560 encoding != NULL ?
2561 "encoding without a string argument" :
2562 "errors without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002563 return NULL;
2564 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02002565 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002566 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002567
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002568 if (encoding != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002569 /* Encode via the codec registry */
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002570 if (!PyUnicode_Check(x)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002571 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002572 "encoding without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002573 return NULL;
2574 }
2575 new = PyUnicode_AsEncodedString(x, encoding, errors);
2576 if (new == NULL)
2577 return NULL;
2578 assert(PyBytes_Check(new));
2579 return new;
2580 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002581
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002582 if (errors != NULL) {
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002583 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002584 PyUnicode_Check(x) ?
2585 "string argument without an encoding" :
2586 "errors without a string argument");
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002587 return NULL;
2588 }
2589
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002590 /* We'd like to call PyObject_Bytes here, but we need to check for an
2591 integer argument before deferring to PyBytes_FromObject, something
2592 PyObject_Bytes doesn't do. */
2593 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2594 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +01002595 new = _PyObject_CallNoArg(func);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002596 Py_DECREF(func);
2597 if (new == NULL)
2598 return NULL;
2599 if (!PyBytes_Check(new)) {
2600 PyErr_Format(PyExc_TypeError,
2601 "__bytes__ returned non-bytes (type %.200s)",
2602 Py_TYPE(new)->tp_name);
2603 Py_DECREF(new);
2604 return NULL;
2605 }
2606 return new;
2607 }
2608 else if (PyErr_Occurred())
2609 return NULL;
2610
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002611 if (PyUnicode_Check(x)) {
2612 PyErr_SetString(PyExc_TypeError,
2613 "string argument without an encoding");
2614 return NULL;
2615 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002616 /* Is it an integer? */
Victor Stinnera15e2602020-04-08 02:01:56 +02002617 if (_PyIndex_Check(x)) {
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002618 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2619 if (size == -1 && PyErr_Occurred()) {
Serhiy Storchakae8904212018-10-15 00:02:57 +03002620 if (!PyErr_ExceptionMatches(PyExc_TypeError))
INADA Naokia634e232017-01-06 17:32:01 +09002621 return NULL;
2622 PyErr_Clear(); /* fall through */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002623 }
INADA Naokia634e232017-01-06 17:32:01 +09002624 else {
2625 if (size < 0) {
2626 PyErr_SetString(PyExc_ValueError, "negative count");
2627 return NULL;
2628 }
2629 new = _PyBytes_FromSize(size, 1);
2630 if (new == NULL)
2631 return NULL;
2632 return new;
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002633 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002634 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002635
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002636 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002637}
2638
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002639static PyObject*
2640_PyBytes_FromBuffer(PyObject *x)
2641{
2642 PyObject *new;
2643 Py_buffer view;
2644
2645 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2646 return NULL;
2647
2648 new = PyBytes_FromStringAndSize(NULL, view.len);
2649 if (!new)
2650 goto fail;
2651 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2652 &view, view.len, 'C') < 0)
2653 goto fail;
2654 PyBuffer_Release(&view);
2655 return new;
2656
2657fail:
2658 Py_XDECREF(new);
2659 PyBuffer_Release(&view);
2660 return NULL;
2661}
2662
2663static PyObject*
2664_PyBytes_FromList(PyObject *x)
2665{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002666 Py_ssize_t i, size = PyList_GET_SIZE(x);
2667 Py_ssize_t value;
2668 char *str;
2669 PyObject *item;
2670 _PyBytesWriter writer;
2671
2672 _PyBytesWriter_Init(&writer);
2673 str = _PyBytesWriter_Alloc(&writer, size);
2674 if (str == NULL)
2675 return NULL;
2676 writer.overallocate = 1;
2677 size = writer.allocated;
2678
2679 for (i = 0; i < PyList_GET_SIZE(x); i++) {
2680 item = PyList_GET_ITEM(x, i);
2681 Py_INCREF(item);
2682 value = PyNumber_AsSsize_t(item, NULL);
2683 Py_DECREF(item);
2684 if (value == -1 && PyErr_Occurred())
2685 goto error;
2686
2687 if (value < 0 || value >= 256) {
2688 PyErr_SetString(PyExc_ValueError,
2689 "bytes must be in range(0, 256)");
2690 goto error;
2691 }
2692
2693 if (i >= size) {
2694 str = _PyBytesWriter_Resize(&writer, str, size+1);
2695 if (str == NULL)
2696 return NULL;
2697 size = writer.allocated;
2698 }
2699 *str++ = (char) value;
2700 }
2701 return _PyBytesWriter_Finish(&writer, str);
2702
2703 error:
2704 _PyBytesWriter_Dealloc(&writer);
2705 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002706}
2707
2708static PyObject*
2709_PyBytes_FromTuple(PyObject *x)
2710{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002711 PyObject *bytes;
2712 Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2713 Py_ssize_t value;
2714 char *str;
2715 PyObject *item;
2716
2717 bytes = PyBytes_FromStringAndSize(NULL, size);
2718 if (bytes == NULL)
2719 return NULL;
2720 str = ((PyBytesObject *)bytes)->ob_sval;
2721
2722 for (i = 0; i < size; i++) {
2723 item = PyTuple_GET_ITEM(x, i);
2724 value = PyNumber_AsSsize_t(item, NULL);
2725 if (value == -1 && PyErr_Occurred())
2726 goto error;
2727
2728 if (value < 0 || value >= 256) {
2729 PyErr_SetString(PyExc_ValueError,
2730 "bytes must be in range(0, 256)");
2731 goto error;
2732 }
2733 *str++ = (char) value;
2734 }
2735 return bytes;
2736
2737 error:
2738 Py_DECREF(bytes);
2739 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002740}
2741
2742static PyObject *
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002743_PyBytes_FromIterator(PyObject *it, PyObject *x)
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002744{
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002745 char *str;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002746 Py_ssize_t i, size;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002747 _PyBytesWriter writer;
2748
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002749 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02002750 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002751 if (size == -1 && PyErr_Occurred())
2752 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002753
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002754 _PyBytesWriter_Init(&writer);
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002755 str = _PyBytesWriter_Alloc(&writer, size);
2756 if (str == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002757 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002758 writer.overallocate = 1;
2759 size = writer.allocated;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002760
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002761 /* Run the iterator to exhaustion */
2762 for (i = 0; ; i++) {
2763 PyObject *item;
2764 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002765
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002766 /* Get the next item */
2767 item = PyIter_Next(it);
2768 if (item == NULL) {
2769 if (PyErr_Occurred())
2770 goto error;
2771 break;
2772 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002773
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002774 /* Interpret it as an int (__index__) */
Serhiy Storchakaf54d7812016-07-06 21:39:44 +03002775 value = PyNumber_AsSsize_t(item, NULL);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002776 Py_DECREF(item);
2777 if (value == -1 && PyErr_Occurred())
2778 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002779
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002780 /* Range check */
2781 if (value < 0 || value >= 256) {
2782 PyErr_SetString(PyExc_ValueError,
2783 "bytes must be in range(0, 256)");
2784 goto error;
2785 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002786
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002787 /* Append the byte */
2788 if (i >= size) {
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002789 str = _PyBytesWriter_Resize(&writer, str, size+1);
2790 if (str == NULL)
2791 return NULL;
2792 size = writer.allocated;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002793 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002794 *str++ = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002795 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002796
2797 return _PyBytesWriter_Finish(&writer, str);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002798
2799 error:
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002800 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002801 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002802}
2803
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002804PyObject *
2805PyBytes_FromObject(PyObject *x)
2806{
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002807 PyObject *it, *result;
2808
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002809 if (x == NULL) {
2810 PyErr_BadInternalCall();
2811 return NULL;
2812 }
2813
2814 if (PyBytes_CheckExact(x)) {
2815 Py_INCREF(x);
2816 return x;
2817 }
2818
2819 /* Use the modern buffer interface */
2820 if (PyObject_CheckBuffer(x))
2821 return _PyBytes_FromBuffer(x);
2822
2823 if (PyList_CheckExact(x))
2824 return _PyBytes_FromList(x);
2825
2826 if (PyTuple_CheckExact(x))
2827 return _PyBytes_FromTuple(x);
2828
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002829 if (!PyUnicode_Check(x)) {
2830 it = PyObject_GetIter(x);
2831 if (it != NULL) {
2832 result = _PyBytes_FromIterator(it, x);
2833 Py_DECREF(it);
2834 return result;
2835 }
Serhiy Storchakae8904212018-10-15 00:02:57 +03002836 if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2837 return NULL;
2838 }
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002839 }
2840
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002841 PyErr_Format(PyExc_TypeError,
2842 "cannot convert '%.200s' object to bytes",
Victor Stinner58ac7002020-02-07 03:04:21 +01002843 Py_TYPE(x)->tp_name);
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002844 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002845}
2846
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002847static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002848bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002849{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002850 PyObject *tmp, *pnew;
2851 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002852
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002853 assert(PyType_IsSubtype(type, &PyBytes_Type));
2854 tmp = bytes_new(&PyBytes_Type, args, kwds);
2855 if (tmp == NULL)
2856 return NULL;
Serhiy Storchaka15095802015-11-25 15:47:01 +02002857 assert(PyBytes_Check(tmp));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002858 n = PyBytes_GET_SIZE(tmp);
2859 pnew = type->tp_alloc(type, n);
2860 if (pnew != NULL) {
Christian Heimesf051e432016-09-13 20:22:02 +02002861 memcpy(PyBytes_AS_STRING(pnew),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002862 PyBytes_AS_STRING(tmp), n+1);
2863 ((PyBytesObject *)pnew)->ob_shash =
2864 ((PyBytesObject *)tmp)->ob_shash;
2865 }
2866 Py_DECREF(tmp);
2867 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002868}
2869
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002870PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002871"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002872bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002873bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002874bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2875bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002876\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002877Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002878 - an iterable yielding integers in range(256)\n\
2879 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002880 - any object implementing the buffer API.\n\
2881 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002882
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002883static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002884
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002885PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002886 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2887 "bytes",
2888 PyBytesObject_SIZE,
2889 sizeof(char),
Inada Naoki7d408692019-05-29 17:23:27 +09002890 0, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002891 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002892 0, /* tp_getattr */
2893 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002894 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002895 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08002896 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002897 &bytes_as_sequence, /* tp_as_sequence */
2898 &bytes_as_mapping, /* tp_as_mapping */
2899 (hashfunc)bytes_hash, /* tp_hash */
2900 0, /* tp_call */
2901 bytes_str, /* tp_str */
2902 PyObject_GenericGetAttr, /* tp_getattro */
2903 0, /* tp_setattro */
2904 &bytes_as_buffer, /* tp_as_buffer */
2905 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2906 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2907 bytes_doc, /* tp_doc */
2908 0, /* tp_traverse */
2909 0, /* tp_clear */
2910 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2911 0, /* tp_weaklistoffset */
2912 bytes_iter, /* tp_iter */
2913 0, /* tp_iternext */
2914 bytes_methods, /* tp_methods */
2915 0, /* tp_members */
2916 0, /* tp_getset */
2917 &PyBaseObject_Type, /* tp_base */
2918 0, /* tp_dict */
2919 0, /* tp_descr_get */
2920 0, /* tp_descr_set */
2921 0, /* tp_dictoffset */
2922 0, /* tp_init */
2923 0, /* tp_alloc */
2924 bytes_new, /* tp_new */
2925 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002926};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002927
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002928void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002929PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002930{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002931 assert(pv != NULL);
2932 if (*pv == NULL)
2933 return;
2934 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002935 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002936 return;
2937 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02002938
2939 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2940 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05002941 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02002942 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02002943
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002944 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02002945 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2946 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2947 Py_CLEAR(*pv);
2948 return;
2949 }
2950
2951 oldsize = PyBytes_GET_SIZE(*pv);
2952 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2953 PyErr_NoMemory();
2954 goto error;
2955 }
2956 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2957 goto error;
2958
2959 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2960 PyBuffer_Release(&wb);
2961 return;
2962
2963 error:
2964 PyBuffer_Release(&wb);
2965 Py_CLEAR(*pv);
2966 return;
2967 }
2968
2969 else {
2970 /* Multiple references, need to create new object */
2971 PyObject *v;
2972 v = bytes_concat(*pv, w);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002973 Py_SETREF(*pv, v);
Antoine Pitrou161d6952014-05-01 14:36:20 +02002974 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002975}
2976
2977void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002978PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002979{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002980 PyBytes_Concat(pv, w);
2981 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002982}
2983
2984
Ethan Furmanb95b5612015-01-23 20:05:18 -08002985/* The following function breaks the notion that bytes are immutable:
2986 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002987 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08002988 as creating a new bytes object and destroying the old one, only
2989 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002990 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08002991 Note that if there's not enough memory to resize the bytes object, the
2992 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002993 memory" exception is set, and -1 is returned. Else (on success) 0 is
2994 returned, and the value in *pv may or may not be the same as on input.
2995 As always, an extra byte is allocated for a trailing \0 byte (newsize
2996 does *not* include that), and a trailing \0 byte is stored.
2997*/
2998
2999int
3000_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3001{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003002 PyObject *v;
3003 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003004 v = *pv;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03003005 if (!PyBytes_Check(v) || newsize < 0) {
3006 goto error;
3007 }
3008 if (Py_SIZE(v) == newsize) {
3009 /* return early if newsize equals to v->ob_size */
3010 return 0;
3011 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02003012 if (Py_SIZE(v) == 0) {
3013 if (newsize == 0) {
3014 return 0;
3015 }
3016 *pv = _PyBytes_FromSize(newsize, 0);
3017 Py_DECREF(v);
3018 return (*pv == NULL) ? -1 : 0;
3019 }
Berker Peksag4a72a7b2016-09-16 17:31:06 +03003020 if (Py_REFCNT(v) != 1) {
3021 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003022 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02003023 if (newsize == 0) {
3024 *pv = _PyBytes_FromSize(0, 0);
3025 Py_DECREF(v);
3026 return (*pv == NULL) ? -1 : 0;
3027 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003028 /* XXX UNREF/NEWREF interface should be more symmetrical */
Victor Stinner49932fe2020-02-03 17:55:05 +01003029#ifdef Py_REF_DEBUG
3030 _Py_RefTotal--;
3031#endif
3032#ifdef Py_TRACE_REFS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003033 _Py_ForgetReference(v);
Victor Stinner49932fe2020-02-03 17:55:05 +01003034#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003035 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03003036 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003037 if (*pv == NULL) {
3038 PyObject_Del(v);
3039 PyErr_NoMemory();
3040 return -1;
3041 }
3042 _Py_NewReference(*pv);
3043 sv = (PyBytesObject *) *pv;
Victor Stinner60ac6ed2020-02-07 23:18:08 +01003044 Py_SET_SIZE(sv, newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003045 sv->ob_sval[newsize] = '\0';
3046 sv->ob_shash = -1; /* invalidate cached hash value */
3047 return 0;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03003048error:
3049 *pv = 0;
3050 Py_DECREF(v);
3051 PyErr_BadInternalCall();
3052 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003053}
3054
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003055void
Victor Stinnerbed48172019-08-27 00:12:32 +02003056_PyBytes_Fini(void)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003057{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003058 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003059 for (i = 0; i < UCHAR_MAX + 1; i++)
3060 Py_CLEAR(characters[i]);
3061 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003062}
3063
Benjamin Peterson4116f362008-05-27 00:36:20 +00003064/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003065
3066typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003067 PyObject_HEAD
3068 Py_ssize_t it_index;
3069 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003070} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003071
3072static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003073striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003074{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003075 _PyObject_GC_UNTRACK(it);
3076 Py_XDECREF(it->it_seq);
3077 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003078}
3079
3080static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003081striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003082{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003083 Py_VISIT(it->it_seq);
3084 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003085}
3086
3087static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003088striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003089{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003090 PyBytesObject *seq;
3091 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003092
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003093 assert(it != NULL);
3094 seq = it->it_seq;
3095 if (seq == NULL)
3096 return NULL;
3097 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003098
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003099 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3100 item = PyLong_FromLong(
3101 (unsigned char)seq->ob_sval[it->it_index]);
3102 if (item != NULL)
3103 ++it->it_index;
3104 return item;
3105 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003106
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003107 it->it_seq = NULL;
Serhiy Storchakafbb1c5e2016-03-30 20:40:02 +03003108 Py_DECREF(seq);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003109 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003110}
3111
3112static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303113striter_len(striterobject *it, PyObject *Py_UNUSED(ignored))
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003114{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003115 Py_ssize_t len = 0;
3116 if (it->it_seq)
3117 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3118 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003119}
3120
3121PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003122 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003123
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003124static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303125striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003126{
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003127 _Py_IDENTIFIER(iter);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003128 if (it->it_seq != NULL) {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003129 return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003130 it->it_seq, it->it_index);
3131 } else {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003132 return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003133 }
3134}
3135
3136PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3137
3138static PyObject *
3139striter_setstate(striterobject *it, PyObject *state)
3140{
3141 Py_ssize_t index = PyLong_AsSsize_t(state);
3142 if (index == -1 && PyErr_Occurred())
3143 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003144 if (it->it_seq != NULL) {
3145 if (index < 0)
3146 index = 0;
3147 else if (index > PyBytes_GET_SIZE(it->it_seq))
3148 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3149 it->it_index = index;
3150 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003151 Py_RETURN_NONE;
3152}
3153
3154PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3155
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003156static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003157 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3158 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003159 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3160 reduce_doc},
3161 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3162 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003163 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003164};
3165
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003166PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003167 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3168 "bytes_iterator", /* tp_name */
3169 sizeof(striterobject), /* tp_basicsize */
3170 0, /* tp_itemsize */
3171 /* methods */
3172 (destructor)striter_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003173 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003174 0, /* tp_getattr */
3175 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003176 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003177 0, /* tp_repr */
3178 0, /* tp_as_number */
3179 0, /* tp_as_sequence */
3180 0, /* tp_as_mapping */
3181 0, /* tp_hash */
3182 0, /* tp_call */
3183 0, /* tp_str */
3184 PyObject_GenericGetAttr, /* tp_getattro */
3185 0, /* tp_setattro */
3186 0, /* tp_as_buffer */
3187 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3188 0, /* tp_doc */
3189 (traverseproc)striter_traverse, /* tp_traverse */
3190 0, /* tp_clear */
3191 0, /* tp_richcompare */
3192 0, /* tp_weaklistoffset */
3193 PyObject_SelfIter, /* tp_iter */
3194 (iternextfunc)striter_next, /* tp_iternext */
3195 striter_methods, /* tp_methods */
3196 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003197};
3198
3199static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003200bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003201{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003202 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003203
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003204 if (!PyBytes_Check(seq)) {
3205 PyErr_BadInternalCall();
3206 return NULL;
3207 }
3208 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3209 if (it == NULL)
3210 return NULL;
3211 it->it_index = 0;
3212 Py_INCREF(seq);
3213 it->it_seq = (PyBytesObject *)seq;
3214 _PyObject_GC_TRACK(it);
3215 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003216}
Victor Stinner00165072015-10-09 01:53:21 +02003217
3218
3219/* _PyBytesWriter API */
3220
3221#ifdef MS_WINDOWS
3222 /* On Windows, overallocate by 50% is the best factor */
3223# define OVERALLOCATE_FACTOR 2
3224#else
3225 /* On Linux, overallocate by 25% is the best factor */
3226# define OVERALLOCATE_FACTOR 4
3227#endif
3228
3229void
3230_PyBytesWriter_Init(_PyBytesWriter *writer)
3231{
Victor Stinner661aacc2015-10-14 09:41:48 +02003232 /* Set all attributes before small_buffer to 0 */
3233 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003234#ifndef NDEBUG
3235 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3236 sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003237#endif
3238}
3239
3240void
3241_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3242{
3243 Py_CLEAR(writer->buffer);
3244}
3245
3246Py_LOCAL_INLINE(char*)
3247_PyBytesWriter_AsString(_PyBytesWriter *writer)
3248{
Victor Stinner661aacc2015-10-14 09:41:48 +02003249 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003250 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003251 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003252 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003253 else if (writer->use_bytearray) {
3254 assert(writer->buffer != NULL);
3255 return PyByteArray_AS_STRING(writer->buffer);
3256 }
3257 else {
3258 assert(writer->buffer != NULL);
3259 return PyBytes_AS_STRING(writer->buffer);
3260 }
Victor Stinner00165072015-10-09 01:53:21 +02003261}
3262
3263Py_LOCAL_INLINE(Py_ssize_t)
Victor Stinner2bf89932015-10-14 11:25:33 +02003264_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
Victor Stinner00165072015-10-09 01:53:21 +02003265{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03003266 const char *start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003267 assert(str != NULL);
3268 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003269 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003270 return str - start;
3271}
3272
Victor Stinner68762572019-10-07 18:42:01 +02003273#ifndef NDEBUG
3274Py_LOCAL_INLINE(int)
Victor Stinner00165072015-10-09 01:53:21 +02003275_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3276{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03003277 const char *start, *end;
Victor Stinner00165072015-10-09 01:53:21 +02003278
Victor Stinner661aacc2015-10-14 09:41:48 +02003279 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003280 assert(writer->buffer == NULL);
3281 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003282 else {
3283 assert(writer->buffer != NULL);
3284 if (writer->use_bytearray)
3285 assert(PyByteArray_CheckExact(writer->buffer));
3286 else
3287 assert(PyBytes_CheckExact(writer->buffer));
3288 assert(Py_REFCNT(writer->buffer) == 1);
3289 }
Victor Stinner00165072015-10-09 01:53:21 +02003290
Victor Stinner661aacc2015-10-14 09:41:48 +02003291 if (writer->use_bytearray) {
3292 /* bytearray has its own overallocation algorithm,
3293 writer overallocation must be disabled */
3294 assert(!writer->overallocate);
3295 }
3296
3297 assert(0 <= writer->allocated);
Victor Stinner53926a12015-10-09 12:37:03 +02003298 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003299 /* the last byte must always be null */
Victor Stinner661aacc2015-10-14 09:41:48 +02003300 start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003301 assert(start[writer->allocated] == 0);
3302
3303 end = start + writer->allocated;
3304 assert(str != NULL);
3305 assert(start <= str && str <= end);
Victor Stinner68762572019-10-07 18:42:01 +02003306 return 1;
Victor Stinner00165072015-10-09 01:53:21 +02003307}
Victor Stinner68762572019-10-07 18:42:01 +02003308#endif
Victor Stinner00165072015-10-09 01:53:21 +02003309
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003310void*
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003311_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
Victor Stinner00165072015-10-09 01:53:21 +02003312{
3313 Py_ssize_t allocated, pos;
3314
Victor Stinner68762572019-10-07 18:42:01 +02003315 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003316 assert(writer->allocated < size);
Victor Stinner00165072015-10-09 01:53:21 +02003317
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003318 allocated = size;
Victor Stinner00165072015-10-09 01:53:21 +02003319 if (writer->overallocate
3320 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3321 /* overallocate to limit the number of realloc() */
3322 allocated += allocated / OVERALLOCATE_FACTOR;
3323 }
3324
Victor Stinner2bf89932015-10-14 11:25:33 +02003325 pos = _PyBytesWriter_GetSize(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003326 if (!writer->use_small_buffer) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003327 if (writer->use_bytearray) {
3328 if (PyByteArray_Resize(writer->buffer, allocated))
3329 goto error;
3330 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3331 but we cannot use ob_alloc because bytes may need to be moved
3332 to use the whole buffer. bytearray uses an internal optimization
3333 to avoid moving or copying bytes when bytes are removed at the
3334 beginning (ex: del bytearray[:1]). */
3335 }
3336 else {
3337 if (_PyBytes_Resize(&writer->buffer, allocated))
3338 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003339 }
3340 }
3341 else {
3342 /* convert from stack buffer to bytes object buffer */
3343 assert(writer->buffer == NULL);
3344
Victor Stinner661aacc2015-10-14 09:41:48 +02003345 if (writer->use_bytearray)
3346 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3347 else
3348 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003349 if (writer->buffer == NULL)
Victor Stinner661aacc2015-10-14 09:41:48 +02003350 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003351
3352 if (pos != 0) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003353 char *dest;
3354 if (writer->use_bytearray)
3355 dest = PyByteArray_AS_STRING(writer->buffer);
3356 else
3357 dest = PyBytes_AS_STRING(writer->buffer);
Christian Heimesf051e432016-09-13 20:22:02 +02003358 memcpy(dest,
Victor Stinnerb3653a32015-10-09 03:38:24 +02003359 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02003360 pos);
3361 }
3362
Victor Stinnerb3653a32015-10-09 03:38:24 +02003363 writer->use_small_buffer = 0;
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003364#ifndef NDEBUG
3365 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3366 sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003367#endif
Victor Stinner00165072015-10-09 01:53:21 +02003368 }
3369 writer->allocated = allocated;
3370
3371 str = _PyBytesWriter_AsString(writer) + pos;
Victor Stinner68762572019-10-07 18:42:01 +02003372 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinner00165072015-10-09 01:53:21 +02003373 return str;
Victor Stinner661aacc2015-10-14 09:41:48 +02003374
3375error:
3376 _PyBytesWriter_Dealloc(writer);
3377 return NULL;
Victor Stinner00165072015-10-09 01:53:21 +02003378}
3379
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003380void*
3381_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3382{
3383 Py_ssize_t new_min_size;
3384
Victor Stinner68762572019-10-07 18:42:01 +02003385 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003386 assert(size >= 0);
3387
3388 if (size == 0) {
3389 /* nothing to do */
3390 return str;
3391 }
3392
3393 if (writer->min_size > PY_SSIZE_T_MAX - size) {
3394 PyErr_NoMemory();
3395 _PyBytesWriter_Dealloc(writer);
3396 return NULL;
3397 }
3398 new_min_size = writer->min_size + size;
3399
3400 if (new_min_size > writer->allocated)
3401 str = _PyBytesWriter_Resize(writer, str, new_min_size);
3402
3403 writer->min_size = new_min_size;
3404 return str;
3405}
3406
Victor Stinner00165072015-10-09 01:53:21 +02003407/* Allocate the buffer to write size bytes.
3408 Return the pointer to the beginning of buffer data.
3409 Raise an exception and return NULL on error. */
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003410void*
Victor Stinner00165072015-10-09 01:53:21 +02003411_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3412{
3413 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02003414 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02003415 assert(size >= 0);
3416
Victor Stinnerb3653a32015-10-09 03:38:24 +02003417 writer->use_small_buffer = 1;
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003418#ifndef NDEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003419 writer->allocated = sizeof(writer->small_buffer) - 1;
Victor Stinnerf6358a72015-10-14 12:02:39 +02003420 /* In debug mode, don't use the full small buffer because it is less
3421 efficient than bytes and bytearray objects to detect buffer underflow
3422 and buffer overflow. Use 10 bytes of the small buffer to test also
3423 code using the smaller buffer in debug mode.
3424
3425 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3426 in debug mode to also be able to detect stack overflow when running
3427 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3428 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3429 stack overflow. */
3430 writer->allocated = Py_MIN(writer->allocated, 10);
3431 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3432 to detect buffer overflow */
Victor Stinnerb3653a32015-10-09 03:38:24 +02003433 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003434#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02003435 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02003436#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02003437 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02003438}
3439
3440PyObject *
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003441_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
Victor Stinner00165072015-10-09 01:53:21 +02003442{
Victor Stinner2bf89932015-10-14 11:25:33 +02003443 Py_ssize_t size;
Victor Stinner00165072015-10-09 01:53:21 +02003444 PyObject *result;
3445
Victor Stinner68762572019-10-07 18:42:01 +02003446 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinner00165072015-10-09 01:53:21 +02003447
Victor Stinner2bf89932015-10-14 11:25:33 +02003448 size = _PyBytesWriter_GetSize(writer, str);
3449 if (size == 0 && !writer->use_bytearray) {
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003450 Py_CLEAR(writer->buffer);
3451 /* Get the empty byte string singleton */
3452 result = PyBytes_FromStringAndSize(NULL, 0);
3453 }
3454 else if (writer->use_small_buffer) {
Victor Stinnere914d412016-04-15 17:52:27 +02003455 if (writer->use_bytearray) {
3456 result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3457 }
3458 else {
3459 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3460 }
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003461 }
3462 else {
3463 result = writer->buffer;
3464 writer->buffer = NULL;
3465
Victor Stinner2bf89932015-10-14 11:25:33 +02003466 if (size != writer->allocated) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003467 if (writer->use_bytearray) {
Victor Stinner2bf89932015-10-14 11:25:33 +02003468 if (PyByteArray_Resize(result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003469 Py_DECREF(result);
3470 return NULL;
3471 }
3472 }
3473 else {
Victor Stinner2bf89932015-10-14 11:25:33 +02003474 if (_PyBytes_Resize(&result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003475 assert(result == NULL);
3476 return NULL;
3477 }
Victor Stinner00165072015-10-09 01:53:21 +02003478 }
3479 }
Victor Stinner00165072015-10-09 01:53:21 +02003480 }
Victor Stinner00165072015-10-09 01:53:21 +02003481 return result;
3482}
Victor Stinnerce179bf2015-10-09 12:57:22 +02003483
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003484void*
Victor Stinnere9aa5952015-10-12 13:57:47 +02003485_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003486 const void *bytes, Py_ssize_t size)
Victor Stinnerce179bf2015-10-09 12:57:22 +02003487{
Victor Stinnere9aa5952015-10-12 13:57:47 +02003488 char *str = (char *)ptr;
3489
Victor Stinnerce179bf2015-10-09 12:57:22 +02003490 str = _PyBytesWriter_Prepare(writer, str, size);
3491 if (str == NULL)
3492 return NULL;
3493
Christian Heimesf051e432016-09-13 20:22:02 +02003494 memcpy(str, bytes, size);
Victor Stinnerce179bf2015-10-09 12:57:22 +02003495 str += size;
3496
3497 return str;
3498}