blob: 06ead2b58f980f9212b869458f744c0ccd2b1b89 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Victor Stinnerd9ea5ca2020-04-15 02:57:50 +02006#include "pycore_abstract.h" // _PyIndex_Check()
Victor Stinner45876a92020-02-12 22:32:34 +01007#include "pycore_bytes_methods.h"
Victor Stinnerbcda8f12018-11-21 22:27:47 +01008#include "pycore_object.h"
Victor Stinnerd9ea5ca2020-04-15 02:57:50 +02009#include "pycore_pymem.h" // PYMEM_CLEANBYTE
Christian Heimes2c9c7a52008-05-26 13:42:13 +000010
Gregory P. Smith8cb65692015-04-25 23:22:26 +000011#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +000012#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000013
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020014/*[clinic input]
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030015class bytes "PyBytesObject *" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020016[clinic start generated code]*/
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030017/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020018
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030019#include "clinic/bytesobject.c.h"
20
Christian Heimes2c9c7a52008-05-26 13:42:13 +000021static PyBytesObject *characters[UCHAR_MAX + 1];
22static PyBytesObject *nullstring;
23
Hai Shi46874c22020-01-30 17:20:25 -060024_Py_IDENTIFIER(__bytes__);
25
Mark Dickinsonfd24b322008-12-06 15:33:31 +000026/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyBytesObject_SIZE + n bytes.
28
29 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
30 3 bytes per string allocation on a typical system.
31*/
32#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
33
Victor Stinner2bf89932015-10-14 11:25:33 +020034/* Forward declaration */
35Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
36 char *str);
37
Christian Heimes2c9c7a52008-05-26 13:42:13 +000038/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000039 For PyBytes_FromString(), the parameter `str' points to a null-terminated
40 string containing exactly `size' bytes.
41
Martin Pantera90a4a92016-05-30 04:04:50 +000042 For PyBytes_FromStringAndSize(), the parameter `str' is
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043 either NULL or else points to a string containing at least `size' bytes.
44 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
45 not have to be null-terminated. (Therefore it is safe to construct a
46 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
47 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
48 bytes (setting the last byte to the null terminating character) and you can
49 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000050 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000051 alter the data yourself, since the strings may be shared.
52
53 The PyObject member `op->ob_size', which denotes the number of "extra
54 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020055 allocated for string data, not counting the null terminating character.
56 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000057 PyBytes_FromStringAndSize()) or the length of the string in the `str'
58 parameter (for PyBytes_FromString()).
59*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020060static PyObject *
61_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000062{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020063 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020064 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020065
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000066 if (size == 0 && (op = nullstring) != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 Py_INCREF(op);
68 return (PyObject *)op;
69 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000070
Victor Stinner049e5092014-08-17 22:20:00 +020071 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000072 PyErr_SetString(PyExc_OverflowError,
73 "byte string is too large");
74 return NULL;
75 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000076
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020078 if (use_calloc)
79 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
80 else
81 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000082 if (op == NULL)
83 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +010084 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020086 if (!use_calloc)
87 op->ob_sval[size] = '\0';
88 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000089 if (size == 0) {
90 nullstring = op;
91 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020092 }
93 return (PyObject *) op;
94}
95
96PyObject *
97PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
98{
99 PyBytesObject *op;
100 if (size < 0) {
101 PyErr_SetString(PyExc_SystemError,
102 "Negative size passed to PyBytes_FromStringAndSize");
103 return NULL;
104 }
105 if (size == 1 && str != NULL &&
106 (op = characters[*str & UCHAR_MAX]) != NULL)
107 {
Victor Stinnerdb067af2014-05-02 22:31:14 +0200108 Py_INCREF(op);
109 return (PyObject *)op;
110 }
111
112 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
113 if (op == NULL)
114 return NULL;
115 if (str == NULL)
116 return (PyObject *) op;
117
Christian Heimesf051e432016-09-13 20:22:02 +0200118 memcpy(op->ob_sval, str, size);
Victor Stinnerdb067af2014-05-02 22:31:14 +0200119 /* share short strings */
120 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000121 characters[*str & UCHAR_MAX] = op;
122 Py_INCREF(op);
123 }
124 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000125}
126
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000127PyObject *
128PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000129{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200130 size_t size;
131 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000132
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000133 assert(str != NULL);
134 size = strlen(str);
135 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
136 PyErr_SetString(PyExc_OverflowError,
137 "byte string is too long");
138 return NULL;
139 }
140 if (size == 0 && (op = nullstring) != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 Py_INCREF(op);
142 return (PyObject *)op;
143 }
144 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 Py_INCREF(op);
146 return (PyObject *)op;
147 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 /* Inline PyObject_NewVar */
150 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
151 if (op == NULL)
152 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +0100153 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 op->ob_shash = -1;
Christian Heimesf051e432016-09-13 20:22:02 +0200155 memcpy(op->ob_sval, str, size+1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 /* share short strings */
157 if (size == 0) {
158 nullstring = op;
159 Py_INCREF(op);
160 } else if (size == 1) {
161 characters[*str & UCHAR_MAX] = op;
162 Py_INCREF(op);
163 }
164 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000165}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000166
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000167PyObject *
168PyBytes_FromFormatV(const char *format, va_list vargs)
169{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000170 char *s;
Victor Stinner03dab782015-10-14 00:21:35 +0200171 const char *f;
172 const char *p;
173 Py_ssize_t prec;
174 int longflag;
175 int size_tflag;
176 /* Longest 64-bit formatted numbers:
177 - "18446744073709551615\0" (21 bytes)
178 - "-9223372036854775808\0" (21 bytes)
179 Decimal takes the most space (it isn't enough for octal.)
Guido van Rossum343e97f2007-04-09 00:43:24 +0000180
Victor Stinner03dab782015-10-14 00:21:35 +0200181 Longest 64-bit pointer representation:
182 "0xffffffffffffffff\0" (19 bytes). */
183 char buffer[21];
184 _PyBytesWriter writer;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000185
Victor Stinner03dab782015-10-14 00:21:35 +0200186 _PyBytesWriter_Init(&writer);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000187
Victor Stinner03dab782015-10-14 00:21:35 +0200188 s = _PyBytesWriter_Alloc(&writer, strlen(format));
189 if (s == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000190 return NULL;
Victor Stinner03dab782015-10-14 00:21:35 +0200191 writer.overallocate = 1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000192
Victor Stinner03dab782015-10-14 00:21:35 +0200193#define WRITE_BYTES(str) \
194 do { \
195 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
196 if (s == NULL) \
197 goto error; \
198 } while (0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000199
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000200 for (f = format; *f; f++) {
Victor Stinner03dab782015-10-14 00:21:35 +0200201 if (*f != '%') {
202 *s++ = *f;
203 continue;
204 }
205
206 p = f++;
207
208 /* ignore the width (ex: 10 in "%10s") */
209 while (Py_ISDIGIT(*f))
210 f++;
211
212 /* parse the precision (ex: 10 in "%.10s") */
213 prec = 0;
214 if (*f == '.') {
215 f++;
216 for (; Py_ISDIGIT(*f); f++) {
217 prec = (prec * 10) + (*f - '0');
218 }
219 }
220
221 while (*f && *f != '%' && !Py_ISALPHA(*f))
222 f++;
223
224 /* handle the long flag ('l'), but only for %ld and %lu.
225 others can be added when necessary. */
226 longflag = 0;
227 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
228 longflag = 1;
229 ++f;
230 }
231
232 /* handle the size_t flag ('z'). */
233 size_tflag = 0;
234 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
235 size_tflag = 1;
236 ++f;
237 }
238
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700239 /* subtract bytes preallocated for the format string
Victor Stinner03dab782015-10-14 00:21:35 +0200240 (ex: 2 for "%s") */
241 writer.min_size -= (f - p + 1);
242
243 switch (*f) {
244 case 'c':
245 {
246 int c = va_arg(vargs, int);
247 if (c < 0 || c > 255) {
248 PyErr_SetString(PyExc_OverflowError,
249 "PyBytes_FromFormatV(): %c format "
250 "expects an integer in range [0; 255]");
251 goto error;
252 }
253 writer.min_size++;
254 *s++ = (unsigned char)c;
255 break;
256 }
257
258 case 'd':
259 if (longflag)
260 sprintf(buffer, "%ld", va_arg(vargs, long));
261 else if (size_tflag)
262 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
263 va_arg(vargs, Py_ssize_t));
264 else
265 sprintf(buffer, "%d", va_arg(vargs, int));
266 assert(strlen(buffer) < sizeof(buffer));
267 WRITE_BYTES(buffer);
268 break;
269
270 case 'u':
271 if (longflag)
272 sprintf(buffer, "%lu",
273 va_arg(vargs, unsigned long));
274 else if (size_tflag)
275 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
276 va_arg(vargs, size_t));
277 else
278 sprintf(buffer, "%u",
279 va_arg(vargs, unsigned int));
280 assert(strlen(buffer) < sizeof(buffer));
281 WRITE_BYTES(buffer);
282 break;
283
284 case 'i':
285 sprintf(buffer, "%i", va_arg(vargs, int));
286 assert(strlen(buffer) < sizeof(buffer));
287 WRITE_BYTES(buffer);
288 break;
289
290 case 'x':
291 sprintf(buffer, "%x", va_arg(vargs, int));
292 assert(strlen(buffer) < sizeof(buffer));
293 WRITE_BYTES(buffer);
294 break;
295
296 case 's':
297 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000298 Py_ssize_t i;
Victor Stinner03dab782015-10-14 00:21:35 +0200299
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200300 p = va_arg(vargs, const char*);
Serhiy Storchakad586ccb2019-01-12 10:30:35 +0200301 if (prec <= 0) {
302 i = strlen(p);
303 }
304 else {
305 i = 0;
306 while (i < prec && p[i]) {
307 i++;
308 }
309 }
Victor Stinner03dab782015-10-14 00:21:35 +0200310 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
311 if (s == NULL)
312 goto error;
313 break;
314 }
315
316 case 'p':
317 sprintf(buffer, "%p", va_arg(vargs, void*));
318 assert(strlen(buffer) < sizeof(buffer));
319 /* %p is ill-defined: ensure leading 0x. */
320 if (buffer[1] == 'X')
321 buffer[1] = 'x';
322 else if (buffer[1] != 'x') {
323 memmove(buffer+2, buffer, strlen(buffer)+1);
324 buffer[0] = '0';
325 buffer[1] = 'x';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000326 }
Victor Stinner03dab782015-10-14 00:21:35 +0200327 WRITE_BYTES(buffer);
328 break;
329
330 case '%':
331 writer.min_size++;
332 *s++ = '%';
333 break;
334
335 default:
336 if (*f == 0) {
337 /* fix min_size if we reached the end of the format string */
338 writer.min_size++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000339 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000340
Victor Stinner03dab782015-10-14 00:21:35 +0200341 /* invalid format string: copy unformatted string and exit */
342 WRITE_BYTES(p);
343 return _PyBytesWriter_Finish(&writer, s);
344 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000345 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000346
Victor Stinner03dab782015-10-14 00:21:35 +0200347#undef WRITE_BYTES
348
349 return _PyBytesWriter_Finish(&writer, s);
350
351 error:
352 _PyBytesWriter_Dealloc(&writer);
353 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000354}
355
356PyObject *
357PyBytes_FromFormat(const char *format, ...)
358{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 PyObject* ret;
360 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000361
362#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000363 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000364#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000365 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000366#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 ret = PyBytes_FromFormatV(format, vargs);
368 va_end(vargs);
369 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000370}
371
Ethan Furmanb95b5612015-01-23 20:05:18 -0800372/* Helpers for formatstring */
373
374Py_LOCAL_INLINE(PyObject *)
375getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
376{
377 Py_ssize_t argidx = *p_argidx;
378 if (argidx < arglen) {
379 (*p_argidx)++;
380 if (arglen < 0)
381 return args;
382 else
383 return PyTuple_GetItem(args, argidx);
384 }
385 PyErr_SetString(PyExc_TypeError,
386 "not enough arguments for format string");
387 return NULL;
388}
389
390/* Format codes
391 * F_LJUST '-'
392 * F_SIGN '+'
393 * F_BLANK ' '
394 * F_ALT '#'
395 * F_ZERO '0'
396 */
397#define F_LJUST (1<<0)
398#define F_SIGN (1<<1)
399#define F_BLANK (1<<2)
400#define F_ALT (1<<3)
401#define F_ZERO (1<<4)
402
403/* Returns a new reference to a PyBytes object, or NULL on failure. */
404
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200405static char*
406formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200407 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800408{
409 char *p;
410 PyObject *result;
411 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200412 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800413
414 x = PyFloat_AsDouble(v);
415 if (x == -1.0 && PyErr_Occurred()) {
416 PyErr_Format(PyExc_TypeError, "float argument required, "
417 "not %.200s", Py_TYPE(v)->tp_name);
418 return NULL;
419 }
420
421 if (prec < 0)
422 prec = 6;
423
424 p = PyOS_double_to_string(x, type, prec,
425 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
426
427 if (p == NULL)
428 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200429
430 len = strlen(p);
431 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200432 str = _PyBytesWriter_Prepare(writer, str, len);
433 if (str == NULL)
434 return NULL;
Christian Heimesf051e432016-09-13 20:22:02 +0200435 memcpy(str, p, len);
Victor Stinner71dc3d82016-04-26 12:35:13 +0200436 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200437 str += len;
438 return str;
439 }
440
441 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800442 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200443 *p_result = result;
Zackery Spytz96c59322018-10-03 00:01:30 -0600444 return result != NULL ? str : NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800445}
446
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300447static PyObject *
448formatlong(PyObject *v, int flags, int prec, int type)
449{
450 PyObject *result, *iobj;
451 if (type == 'i')
452 type = 'd';
453 if (PyLong_Check(v))
454 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
455 if (PyNumber_Check(v)) {
456 /* make sure number is a type of integer for o, x, and X */
457 if (type == 'o' || type == 'x' || type == 'X')
458 iobj = PyNumber_Index(v);
459 else
460 iobj = PyNumber_Long(v);
461 if (iobj == NULL) {
462 if (!PyErr_ExceptionMatches(PyExc_TypeError))
463 return NULL;
464 }
465 else if (!PyLong_Check(iobj))
466 Py_CLEAR(iobj);
467 if (iobj != NULL) {
468 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
469 Py_DECREF(iobj);
470 return result;
471 }
472 }
473 PyErr_Format(PyExc_TypeError,
474 "%%%c format: %s is required, not %.200s", type,
475 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
476 : "a number",
477 Py_TYPE(v)->tp_name);
478 return NULL;
479}
480
481static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200482byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800483{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300484 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200485 *p = PyBytes_AS_STRING(arg)[0];
486 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800487 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300488 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200489 *p = PyByteArray_AS_STRING(arg)[0];
490 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800491 }
492 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300493 PyObject *iobj;
494 long ival;
495 int overflow;
496 /* make sure number is a type of integer */
497 if (PyLong_Check(arg)) {
498 ival = PyLong_AsLongAndOverflow(arg, &overflow);
499 }
500 else {
501 iobj = PyNumber_Index(arg);
502 if (iobj == NULL) {
503 if (!PyErr_ExceptionMatches(PyExc_TypeError))
504 return 0;
505 goto onError;
506 }
507 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
508 Py_DECREF(iobj);
509 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300510 if (!overflow && ival == -1 && PyErr_Occurred())
511 goto onError;
512 if (overflow || !(0 <= ival && ival <= 255)) {
513 PyErr_SetString(PyExc_OverflowError,
514 "%c arg not in range(256)");
515 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800516 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300517 *p = (char)ival;
518 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800519 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300520 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200521 PyErr_SetString(PyExc_TypeError,
522 "%c requires an integer in range(256) or a single byte");
523 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800524}
525
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800526static PyObject *_PyBytes_FromBuffer(PyObject *x);
527
Ethan Furmanb95b5612015-01-23 20:05:18 -0800528static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200529format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800530{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200531 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800532 /* is it a bytes object? */
533 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200534 *pbuf = PyBytes_AS_STRING(v);
535 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800536 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200537 return v;
538 }
539 if (PyByteArray_Check(v)) {
540 *pbuf = PyByteArray_AS_STRING(v);
541 *plen = PyByteArray_GET_SIZE(v);
542 Py_INCREF(v);
543 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800544 }
545 /* does it support __bytes__? */
546 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
547 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +0100548 result = _PyObject_CallNoArg(func);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800549 Py_DECREF(func);
550 if (result == NULL)
551 return NULL;
552 if (!PyBytes_Check(result)) {
553 PyErr_Format(PyExc_TypeError,
554 "__bytes__ returned non-bytes (type %.200s)",
555 Py_TYPE(result)->tp_name);
556 Py_DECREF(result);
557 return NULL;
558 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200559 *pbuf = PyBytes_AS_STRING(result);
560 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800561 return result;
562 }
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800563 /* does it support buffer protocol? */
564 if (PyObject_CheckBuffer(v)) {
565 /* maybe we can avoid making a copy of the buffer object here? */
566 result = _PyBytes_FromBuffer(v);
567 if (result == NULL)
568 return NULL;
569 *pbuf = PyBytes_AS_STRING(result);
570 *plen = PyBytes_GET_SIZE(result);
571 return result;
572 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800573 PyErr_Format(PyExc_TypeError,
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800574 "%%b requires a bytes-like object, "
575 "or an object that implements __bytes__, not '%.100s'",
Ethan Furmanb95b5612015-01-23 20:05:18 -0800576 Py_TYPE(v)->tp_name);
577 return NULL;
578}
579
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200580/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800581
582PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +0200583_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
584 PyObject *args, int use_bytearray)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800585{
Victor Stinner772b2b02015-10-14 09:56:53 +0200586 const char *fmt;
587 char *res;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800588 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200589 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800590 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800591 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200592 _PyBytesWriter writer;
593
Victor Stinner772b2b02015-10-14 09:56:53 +0200594 if (args == NULL) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800595 PyErr_BadInternalCall();
596 return NULL;
597 }
Victor Stinner772b2b02015-10-14 09:56:53 +0200598 fmt = format;
599 fmtcnt = format_len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200600
601 _PyBytesWriter_Init(&writer);
Victor Stinner772b2b02015-10-14 09:56:53 +0200602 writer.use_bytearray = use_bytearray;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200603
604 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
605 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800606 return NULL;
Victor Stinner772b2b02015-10-14 09:56:53 +0200607 if (!use_bytearray)
608 writer.overallocate = 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200609
Ethan Furmanb95b5612015-01-23 20:05:18 -0800610 if (PyTuple_Check(args)) {
611 arglen = PyTuple_GET_SIZE(args);
612 argidx = 0;
613 }
614 else {
615 arglen = -1;
616 argidx = -2;
617 }
618 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
619 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
620 !PyByteArray_Check(args)) {
621 dict = args;
622 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200623
Ethan Furmanb95b5612015-01-23 20:05:18 -0800624 while (--fmtcnt >= 0) {
625 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200626 Py_ssize_t len;
627 char *pos;
628
Xiang Zhangb76ad512017-03-06 17:17:05 +0800629 pos = (char *)memchr(fmt + 1, '%', fmtcnt);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200630 if (pos != NULL)
631 len = pos - fmt;
Victor Stinner772b2b02015-10-14 09:56:53 +0200632 else
Xiang Zhangb76ad512017-03-06 17:17:05 +0800633 len = fmtcnt + 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200634 assert(len != 0);
635
Christian Heimesf051e432016-09-13 20:22:02 +0200636 memcpy(res, fmt, len);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200637 res += len;
638 fmt += len;
639 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800640 }
641 else {
642 /* Got a format specifier */
643 int flags = 0;
644 Py_ssize_t width = -1;
645 int prec = -1;
646 int c = '\0';
647 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800648 PyObject *v = NULL;
649 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200650 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800651 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200652 Py_ssize_t len = 0;
653 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200654 Py_ssize_t alloc;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800655
Ethan Furmanb95b5612015-01-23 20:05:18 -0800656 fmt++;
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200657 if (*fmt == '%') {
658 *res++ = '%';
659 fmt++;
660 fmtcnt--;
661 continue;
662 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800663 if (*fmt == '(') {
Victor Stinner772b2b02015-10-14 09:56:53 +0200664 const char *keystart;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800665 Py_ssize_t keylen;
666 PyObject *key;
667 int pcount = 1;
668
669 if (dict == NULL) {
670 PyErr_SetString(PyExc_TypeError,
671 "format requires a mapping");
672 goto error;
673 }
674 ++fmt;
675 --fmtcnt;
676 keystart = fmt;
677 /* Skip over balanced parentheses */
678 while (pcount > 0 && --fmtcnt >= 0) {
679 if (*fmt == ')')
680 --pcount;
681 else if (*fmt == '(')
682 ++pcount;
683 fmt++;
684 }
685 keylen = fmt - keystart - 1;
686 if (fmtcnt < 0 || pcount > 0) {
687 PyErr_SetString(PyExc_ValueError,
688 "incomplete format key");
689 goto error;
690 }
691 key = PyBytes_FromStringAndSize(keystart,
692 keylen);
693 if (key == NULL)
694 goto error;
695 if (args_owned) {
696 Py_DECREF(args);
697 args_owned = 0;
698 }
699 args = PyObject_GetItem(dict, key);
700 Py_DECREF(key);
701 if (args == NULL) {
702 goto error;
703 }
704 args_owned = 1;
705 arglen = -1;
706 argidx = -2;
707 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200708
709 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800710 while (--fmtcnt >= 0) {
711 switch (c = *fmt++) {
712 case '-': flags |= F_LJUST; continue;
713 case '+': flags |= F_SIGN; continue;
714 case ' ': flags |= F_BLANK; continue;
715 case '#': flags |= F_ALT; continue;
716 case '0': flags |= F_ZERO; continue;
717 }
718 break;
719 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200720
721 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800722 if (c == '*') {
723 v = getnextarg(args, arglen, &argidx);
724 if (v == NULL)
725 goto error;
726 if (!PyLong_Check(v)) {
727 PyErr_SetString(PyExc_TypeError,
728 "* wants int");
729 goto error;
730 }
731 width = PyLong_AsSsize_t(v);
732 if (width == -1 && PyErr_Occurred())
733 goto error;
734 if (width < 0) {
735 flags |= F_LJUST;
736 width = -width;
737 }
738 if (--fmtcnt >= 0)
739 c = *fmt++;
740 }
741 else if (c >= 0 && isdigit(c)) {
742 width = c - '0';
743 while (--fmtcnt >= 0) {
744 c = Py_CHARMASK(*fmt++);
745 if (!isdigit(c))
746 break;
747 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
748 PyErr_SetString(
749 PyExc_ValueError,
750 "width too big");
751 goto error;
752 }
753 width = width*10 + (c - '0');
754 }
755 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200756
757 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800758 if (c == '.') {
759 prec = 0;
760 if (--fmtcnt >= 0)
761 c = *fmt++;
762 if (c == '*') {
763 v = getnextarg(args, arglen, &argidx);
764 if (v == NULL)
765 goto error;
766 if (!PyLong_Check(v)) {
767 PyErr_SetString(
768 PyExc_TypeError,
769 "* wants int");
770 goto error;
771 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200772 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800773 if (prec == -1 && PyErr_Occurred())
774 goto error;
775 if (prec < 0)
776 prec = 0;
777 if (--fmtcnt >= 0)
778 c = *fmt++;
779 }
780 else if (c >= 0 && isdigit(c)) {
781 prec = c - '0';
782 while (--fmtcnt >= 0) {
783 c = Py_CHARMASK(*fmt++);
784 if (!isdigit(c))
785 break;
786 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
787 PyErr_SetString(
788 PyExc_ValueError,
789 "prec too big");
790 goto error;
791 }
792 prec = prec*10 + (c - '0');
793 }
794 }
795 } /* prec */
796 if (fmtcnt >= 0) {
797 if (c == 'h' || c == 'l' || c == 'L') {
798 if (--fmtcnt >= 0)
799 c = *fmt++;
800 }
801 }
802 if (fmtcnt < 0) {
803 PyErr_SetString(PyExc_ValueError,
804 "incomplete format");
805 goto error;
806 }
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200807 v = getnextarg(args, arglen, &argidx);
808 if (v == NULL)
809 goto error;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200810
Alexey Izbyshevccd99752018-08-23 10:50:52 +0300811 if (fmtcnt == 0) {
812 /* last write: disable writer overallocation */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200813 writer.overallocate = 0;
814 }
815
Ethan Furmanb95b5612015-01-23 20:05:18 -0800816 sign = 0;
817 fill = ' ';
818 switch (c) {
Ethan Furman62e977f2015-03-11 08:17:00 -0700819 case 'r':
820 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800821 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200822 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800823 if (temp == NULL)
824 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200825 assert(PyUnicode_IS_ASCII(temp));
826 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
827 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800828 if (prec >= 0 && len > prec)
829 len = prec;
830 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200831
Ethan Furmanb95b5612015-01-23 20:05:18 -0800832 case 's':
833 // %s is only for 2/3 code; 3 only code should use %b
834 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200835 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800836 if (temp == NULL)
837 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800838 if (prec >= 0 && len > prec)
839 len = prec;
840 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200841
Ethan Furmanb95b5612015-01-23 20:05:18 -0800842 case 'i':
843 case 'd':
844 case 'u':
845 case 'o':
846 case 'x':
847 case 'X':
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200848 if (PyLong_CheckExact(v)
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200849 && width == -1 && prec == -1
850 && !(flags & (F_SIGN | F_BLANK))
851 && c != 'X')
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200852 {
853 /* Fast path */
854 int alternate = flags & F_ALT;
855 int base;
856
857 switch(c)
858 {
859 default:
Barry Warsawb2e57942017-09-14 18:13:16 -0700860 Py_UNREACHABLE();
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200861 case 'd':
862 case 'i':
863 case 'u':
864 base = 10;
865 break;
866 case 'o':
867 base = 8;
868 break;
869 case 'x':
870 case 'X':
871 base = 16;
872 break;
873 }
874
875 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200876 writer.min_size -= 2; /* size preallocated for "%d" */
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200877 res = _PyLong_FormatBytesWriter(&writer, res,
878 v, base, alternate);
879 if (res == NULL)
880 goto error;
881 continue;
882 }
883
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300884 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200885 if (!temp)
886 goto error;
887 assert(PyUnicode_IS_ASCII(temp));
888 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
889 len = PyUnicode_GET_LENGTH(temp);
890 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800891 if (flags & F_ZERO)
892 fill = '0';
893 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200894
Ethan Furmanb95b5612015-01-23 20:05:18 -0800895 case 'e':
896 case 'E':
897 case 'f':
898 case 'F':
899 case 'g':
900 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200901 if (width == -1 && prec == -1
902 && !(flags & (F_SIGN | F_BLANK)))
903 {
904 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200905 writer.min_size -= 2; /* size preallocated for "%f" */
Victor Stinnerad771582015-10-09 12:38:53 +0200906 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200907 if (res == NULL)
908 goto error;
909 continue;
910 }
911
Victor Stinnerad771582015-10-09 12:38:53 +0200912 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800913 goto error;
914 pbuf = PyBytes_AS_STRING(temp);
915 len = PyBytes_GET_SIZE(temp);
916 sign = 1;
917 if (flags & F_ZERO)
918 fill = '0';
919 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200920
Ethan Furmanb95b5612015-01-23 20:05:18 -0800921 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200922 pbuf = &onechar;
923 len = byte_converter(v, &onechar);
924 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800925 goto error;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200926 if (width == -1) {
927 /* Fast path */
928 *res++ = onechar;
929 continue;
930 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800931 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200932
Ethan Furmanb95b5612015-01-23 20:05:18 -0800933 default:
934 PyErr_Format(PyExc_ValueError,
935 "unsupported format character '%c' (0x%x) "
936 "at index %zd",
937 c, c,
Victor Stinner772b2b02015-10-14 09:56:53 +0200938 (Py_ssize_t)(fmt - 1 - format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800939 goto error;
940 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200941
Ethan Furmanb95b5612015-01-23 20:05:18 -0800942 if (sign) {
943 if (*pbuf == '-' || *pbuf == '+') {
944 sign = *pbuf++;
945 len--;
946 }
947 else if (flags & F_SIGN)
948 sign = '+';
949 else if (flags & F_BLANK)
950 sign = ' ';
951 else
952 sign = 0;
953 }
954 if (width < len)
955 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200956
957 alloc = width;
958 if (sign != 0 && len == width)
959 alloc++;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200960 /* 2: size preallocated for %s */
961 if (alloc > 2) {
962 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200963 if (res == NULL)
964 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800965 }
Victor Stinner60ec6ef2019-10-07 22:31:42 +0200966#ifndef NDEBUG
967 char *before = res;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200968#endif
969
970 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800971 if (sign) {
972 if (fill != ' ')
973 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800974 if (width > len)
975 width--;
976 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200977
978 /* Write the numeric prefix for "x", "X" and "o" formats
979 if the alternate form is used.
980 For example, write "0x" for the "%#x" format. */
Serhiy Storchakab1a16192016-12-17 21:48:03 +0200981 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800982 assert(pbuf[0] == '0');
983 assert(pbuf[1] == c);
984 if (fill != ' ') {
985 *res++ = *pbuf++;
986 *res++ = *pbuf++;
987 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800988 width -= 2;
989 if (width < 0)
990 width = 0;
991 len -= 2;
992 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200993
994 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800995 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200996 memset(res, fill, width - len);
997 res += (width - len);
998 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800999 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001000
1001 /* If padding with spaces: write sign if needed and/or numeric
1002 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001003 if (fill == ' ') {
1004 if (sign)
1005 *res++ = sign;
Serhiy Storchakab1a16192016-12-17 21:48:03 +02001006 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001007 assert(pbuf[0] == '0');
1008 assert(pbuf[1] == c);
1009 *res++ = *pbuf++;
1010 *res++ = *pbuf++;
1011 }
1012 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001013
1014 /* Copy bytes */
Christian Heimesf051e432016-09-13 20:22:02 +02001015 memcpy(res, pbuf, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001016 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001017
1018 /* Pad right with the fill character if needed */
1019 if (width > len) {
1020 memset(res, ' ', width - len);
1021 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001022 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001023
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +02001024 if (dict && (argidx < arglen)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001025 PyErr_SetString(PyExc_TypeError,
1026 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -08001027 Py_XDECREF(temp);
1028 goto error;
1029 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001030 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001031
Victor Stinner60ec6ef2019-10-07 22:31:42 +02001032#ifndef NDEBUG
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001033 /* check that we computed the exact size for this write */
1034 assert((res - before) == alloc);
1035#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -08001036 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001037
1038 /* If overallocation was disabled, ensure that it was the last
1039 write. Otherwise, we missed an optimization */
Alexey Izbyshevccd99752018-08-23 10:50:52 +03001040 assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001041 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001042
Ethan Furmanb95b5612015-01-23 20:05:18 -08001043 if (argidx < arglen && !dict) {
1044 PyErr_SetString(PyExc_TypeError,
1045 "not all arguments converted during bytes formatting");
1046 goto error;
1047 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001048
Ethan Furmanb95b5612015-01-23 20:05:18 -08001049 if (args_owned) {
1050 Py_DECREF(args);
1051 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001052 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001053
1054 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001055 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001056 if (args_owned) {
1057 Py_DECREF(args);
1058 }
1059 return NULL;
1060}
1061
Greg Price3a4f6672019-09-12 11:12:22 -07001062/* Unescape a backslash-escaped string. */
Eric V. Smith42454af2016-10-31 09:22:08 -04001063PyObject *_PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001064 Py_ssize_t len,
1065 const char *errors,
Eric V. Smith42454af2016-10-31 09:22:08 -04001066 const char **first_invalid_escape)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001067{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001068 int c;
Victor Stinner2ec80632015-10-14 13:32:13 +02001069 char *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001070 const char *end;
Victor Stinner2ec80632015-10-14 13:32:13 +02001071 _PyBytesWriter writer;
1072
1073 _PyBytesWriter_Init(&writer);
1074
1075 p = _PyBytesWriter_Alloc(&writer, len);
1076 if (p == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001077 return NULL;
Victor Stinner2ec80632015-10-14 13:32:13 +02001078 writer.overallocate = 1;
1079
Eric V. Smith42454af2016-10-31 09:22:08 -04001080 *first_invalid_escape = NULL;
1081
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001082 end = s + len;
1083 while (s < end) {
1084 if (*s != '\\') {
Greg Price3a4f6672019-09-12 11:12:22 -07001085 *p++ = *s++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001086 continue;
1087 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001088
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001089 s++;
Victor Stinner2ec80632015-10-14 13:32:13 +02001090 if (s == end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001091 PyErr_SetString(PyExc_ValueError,
1092 "Trailing \\ in string");
1093 goto failed;
1094 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001095
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001096 switch (*s++) {
1097 /* XXX This assumes ASCII! */
1098 case '\n': break;
1099 case '\\': *p++ = '\\'; break;
1100 case '\'': *p++ = '\''; break;
1101 case '\"': *p++ = '\"'; break;
1102 case 'b': *p++ = '\b'; break;
1103 case 'f': *p++ = '\014'; break; /* FF */
1104 case 't': *p++ = '\t'; break;
1105 case 'n': *p++ = '\n'; break;
1106 case 'r': *p++ = '\r'; break;
1107 case 'v': *p++ = '\013'; break; /* VT */
1108 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1109 case '0': case '1': case '2': case '3':
1110 case '4': case '5': case '6': case '7':
1111 c = s[-1] - '0';
1112 if (s < end && '0' <= *s && *s <= '7') {
1113 c = (c<<3) + *s++ - '0';
1114 if (s < end && '0' <= *s && *s <= '7')
1115 c = (c<<3) + *s++ - '0';
1116 }
1117 *p++ = c;
1118 break;
1119 case 'x':
Victor Stinner2ec80632015-10-14 13:32:13 +02001120 if (s+1 < end) {
1121 int digit1, digit2;
1122 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1123 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1124 if (digit1 < 16 && digit2 < 16) {
1125 *p++ = (unsigned char)((digit1 << 4) + digit2);
1126 s += 2;
1127 break;
1128 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001129 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001130 /* invalid hexadecimal digits */
1131
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001132 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001133 PyErr_Format(PyExc_ValueError,
Serhiy Storchakad53fe5f2019-03-13 22:59:55 +02001134 "invalid \\x escape at position %zd",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001135 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001136 goto failed;
1137 }
1138 if (strcmp(errors, "replace") == 0) {
1139 *p++ = '?';
1140 } else if (strcmp(errors, "ignore") == 0)
1141 /* do nothing */;
1142 else {
1143 PyErr_Format(PyExc_ValueError,
1144 "decoding error; unknown "
1145 "error handling code: %.400s",
1146 errors);
1147 goto failed;
1148 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001149 /* skip \x */
1150 if (s < end && Py_ISXDIGIT(s[0]))
1151 s++; /* and a hexdigit */
1152 break;
Victor Stinner2ec80632015-10-14 13:32:13 +02001153
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001154 default:
Eric V. Smith42454af2016-10-31 09:22:08 -04001155 if (*first_invalid_escape == NULL) {
1156 *first_invalid_escape = s-1; /* Back up one char, since we've
1157 already incremented s. */
1158 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001159 *p++ = '\\';
Eric V. Smith42454af2016-10-31 09:22:08 -04001160 s--;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001161 }
1162 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001163
1164 return _PyBytesWriter_Finish(&writer, p);
1165
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001166 failed:
Victor Stinner2ec80632015-10-14 13:32:13 +02001167 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001168 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001169}
1170
Eric V. Smith42454af2016-10-31 09:22:08 -04001171PyObject *PyBytes_DecodeEscape(const char *s,
1172 Py_ssize_t len,
1173 const char *errors,
Greg Price3a4f6672019-09-12 11:12:22 -07001174 Py_ssize_t Py_UNUSED(unicode),
1175 const char *Py_UNUSED(recode_encoding))
Eric V. Smith42454af2016-10-31 09:22:08 -04001176{
1177 const char* first_invalid_escape;
Greg Price3a4f6672019-09-12 11:12:22 -07001178 PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
Eric V. Smith42454af2016-10-31 09:22:08 -04001179 &first_invalid_escape);
1180 if (result == NULL)
1181 return NULL;
1182 if (first_invalid_escape != NULL) {
1183 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1184 "invalid escape sequence '\\%c'",
Serhiy Storchaka56cb4652017-10-20 17:08:15 +03001185 (unsigned char)*first_invalid_escape) < 0) {
Eric V. Smith42454af2016-10-31 09:22:08 -04001186 Py_DECREF(result);
1187 return NULL;
1188 }
1189 }
1190 return result;
1191
1192}
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001193/* -------------------------------------------------------------------- */
1194/* object api */
1195
1196Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001197PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001198{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001199 if (!PyBytes_Check(op)) {
1200 PyErr_Format(PyExc_TypeError,
1201 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1202 return -1;
1203 }
1204 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001205}
1206
1207char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001208PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001209{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001210 if (!PyBytes_Check(op)) {
1211 PyErr_Format(PyExc_TypeError,
1212 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1213 return NULL;
1214 }
1215 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001216}
1217
1218int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001219PyBytes_AsStringAndSize(PyObject *obj,
1220 char **s,
1221 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001222{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001223 if (s == NULL) {
1224 PyErr_BadInternalCall();
1225 return -1;
1226 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001227
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001228 if (!PyBytes_Check(obj)) {
1229 PyErr_Format(PyExc_TypeError,
1230 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1231 return -1;
1232 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001233
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001234 *s = PyBytes_AS_STRING(obj);
1235 if (len != NULL)
1236 *len = PyBytes_GET_SIZE(obj);
1237 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001238 PyErr_SetString(PyExc_ValueError,
1239 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001240 return -1;
1241 }
1242 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001243}
Neal Norwitz6968b052007-02-27 19:02:19 +00001244
1245/* -------------------------------------------------------------------- */
1246/* Methods */
1247
Eric Smith0923d1d2009-04-16 20:16:10 +00001248#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001249
1250#include "stringlib/fastsearch.h"
1251#include "stringlib/count.h"
1252#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001253#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001254#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001255#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001256#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001257
Eric Smith0f78bff2009-11-30 01:01:42 +00001258#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001259
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001260PyObject *
1261PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001262{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001263 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001264 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001265 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001266 PyObject *v;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001267 unsigned char quote;
1268 const unsigned char *s;
1269 Py_UCS1 *p;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001270
1271 /* Compute size of output string */
1272 squotes = dquotes = 0;
1273 newsize = 3; /* b'' */
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001274 s = (const unsigned char*)op->ob_sval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001275 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001276 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001277 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001278 case '\'': squotes++; break;
1279 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001280 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001281 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001282 default:
1283 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001284 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001285 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001286 if (newsize > PY_SSIZE_T_MAX - incr)
1287 goto overflow;
1288 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001289 }
1290 quote = '\'';
1291 if (smartquotes && squotes && !dquotes)
1292 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001293 if (squotes && quote == '\'') {
1294 if (newsize > PY_SSIZE_T_MAX - squotes)
1295 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001296 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001297 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001298
1299 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001300 if (v == NULL) {
1301 return NULL;
1302 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001303 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001304
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001305 *p++ = 'b', *p++ = quote;
1306 for (i = 0; i < length; i++) {
1307 unsigned char c = op->ob_sval[i];
1308 if (c == quote || c == '\\')
1309 *p++ = '\\', *p++ = c;
1310 else if (c == '\t')
1311 *p++ = '\\', *p++ = 't';
1312 else if (c == '\n')
1313 *p++ = '\\', *p++ = 'n';
1314 else if (c == '\r')
1315 *p++ = '\\', *p++ = 'r';
1316 else if (c < ' ' || c >= 0x7f) {
1317 *p++ = '\\';
1318 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001319 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1320 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001321 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001322 else
1323 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001324 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001325 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001326 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001327 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001328
1329 overflow:
1330 PyErr_SetString(PyExc_OverflowError,
1331 "bytes object is too large to make repr");
1332 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001333}
1334
Neal Norwitz6968b052007-02-27 19:02:19 +00001335static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001336bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001337{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001338 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001339}
1340
Neal Norwitz6968b052007-02-27 19:02:19 +00001341static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001342bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001343{
Victor Stinnerda7933e2020-04-13 03:04:28 +02001344 if (_Py_GetConfig()->bytes_warning) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001345 if (PyErr_WarnEx(PyExc_BytesWarning,
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001346 "str() on a bytes instance", 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001347 return NULL;
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001348 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001349 }
1350 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001351}
1352
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001353static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001354bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001355{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001356 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001357}
Neal Norwitz6968b052007-02-27 19:02:19 +00001358
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001359/* This is also used by PyBytes_Concat() */
1360static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001361bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001362{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001363 Py_buffer va, vb;
1364 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001365
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001366 va.len = -1;
1367 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001368 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1369 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001370 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
Serhiy Storchaka6b5a9ec2017-03-19 19:47:02 +02001371 Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001372 goto done;
1373 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001374
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001375 /* Optimize end cases */
1376 if (va.len == 0 && PyBytes_CheckExact(b)) {
1377 result = b;
1378 Py_INCREF(result);
1379 goto done;
1380 }
1381 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1382 result = a;
1383 Py_INCREF(result);
1384 goto done;
1385 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001386
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001387 if (va.len > PY_SSIZE_T_MAX - vb.len) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001388 PyErr_NoMemory();
1389 goto done;
1390 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001391
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001392 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001393 if (result != NULL) {
1394 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1395 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1396 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001397
1398 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001399 if (va.len != -1)
1400 PyBuffer_Release(&va);
1401 if (vb.len != -1)
1402 PyBuffer_Release(&vb);
1403 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001404}
Neal Norwitz6968b052007-02-27 19:02:19 +00001405
1406static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001407bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001408{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001409 Py_ssize_t i;
1410 Py_ssize_t j;
1411 Py_ssize_t size;
1412 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001413 size_t nbytes;
1414 if (n < 0)
1415 n = 0;
1416 /* watch out for overflows: the size can overflow int,
1417 * and the # of bytes needed can overflow size_t
1418 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001419 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001420 PyErr_SetString(PyExc_OverflowError,
1421 "repeated bytes are too long");
1422 return NULL;
1423 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001424 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001425 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1426 Py_INCREF(a);
1427 return (PyObject *)a;
1428 }
1429 nbytes = (size_t)size;
1430 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1431 PyErr_SetString(PyExc_OverflowError,
1432 "repeated bytes are too long");
1433 return NULL;
1434 }
1435 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1436 if (op == NULL)
1437 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +01001438 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001439 op->ob_shash = -1;
1440 op->ob_sval[size] = '\0';
1441 if (Py_SIZE(a) == 1 && n > 0) {
1442 memset(op->ob_sval, a->ob_sval[0] , n);
1443 return (PyObject *) op;
1444 }
1445 i = 0;
1446 if (i < size) {
Christian Heimesf051e432016-09-13 20:22:02 +02001447 memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001448 i = Py_SIZE(a);
1449 }
1450 while (i < size) {
1451 j = (i <= size-i) ? i : size-i;
Christian Heimesf051e432016-09-13 20:22:02 +02001452 memcpy(op->ob_sval+i, op->ob_sval, j);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001453 i += j;
1454 }
1455 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001456}
1457
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001458static int
1459bytes_contains(PyObject *self, PyObject *arg)
1460{
1461 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1462}
1463
Neal Norwitz6968b052007-02-27 19:02:19 +00001464static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001465bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001466{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001467 if (i < 0 || i >= Py_SIZE(a)) {
1468 PyErr_SetString(PyExc_IndexError, "index out of range");
1469 return NULL;
1470 }
1471 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001472}
1473
Benjamin Peterson621b4302016-09-09 13:54:34 -07001474static int
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001475bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1476{
1477 int cmp;
1478 Py_ssize_t len;
1479
1480 len = Py_SIZE(a);
1481 if (Py_SIZE(b) != len)
1482 return 0;
1483
1484 if (a->ob_sval[0] != b->ob_sval[0])
1485 return 0;
1486
1487 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1488 return (cmp == 0);
1489}
1490
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001491static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001492bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001493{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001494 int c;
1495 Py_ssize_t len_a, len_b;
1496 Py_ssize_t min_len;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001497 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001498
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001499 /* Make sure both arguments are strings. */
1500 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Victor Stinnerda7933e2020-04-13 03:04:28 +02001501 if (_Py_GetConfig()->bytes_warning && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001502 rc = PyObject_IsInstance((PyObject*)a,
1503 (PyObject*)&PyUnicode_Type);
1504 if (!rc)
1505 rc = PyObject_IsInstance((PyObject*)b,
1506 (PyObject*)&PyUnicode_Type);
1507 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001508 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001509 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001510 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001511 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001512 return NULL;
1513 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001514 else {
1515 rc = PyObject_IsInstance((PyObject*)a,
1516 (PyObject*)&PyLong_Type);
1517 if (!rc)
1518 rc = PyObject_IsInstance((PyObject*)b,
1519 (PyObject*)&PyLong_Type);
1520 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001521 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001522 if (rc) {
1523 if (PyErr_WarnEx(PyExc_BytesWarning,
1524 "Comparison between bytes and int", 1))
1525 return NULL;
1526 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001527 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001528 }
stratakise8b19652017-11-02 11:32:54 +01001529 Py_RETURN_NOTIMPLEMENTED;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001530 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001531 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001532 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001533 case Py_EQ:
1534 case Py_LE:
1535 case Py_GE:
1536 /* a string is equal to itself */
stratakise8b19652017-11-02 11:32:54 +01001537 Py_RETURN_TRUE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001538 case Py_NE:
1539 case Py_LT:
1540 case Py_GT:
stratakise8b19652017-11-02 11:32:54 +01001541 Py_RETURN_FALSE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001542 default:
1543 PyErr_BadArgument();
1544 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001545 }
1546 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001547 else if (op == Py_EQ || op == Py_NE) {
1548 int eq = bytes_compare_eq(a, b);
1549 eq ^= (op == Py_NE);
stratakise8b19652017-11-02 11:32:54 +01001550 return PyBool_FromLong(eq);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001551 }
1552 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001553 len_a = Py_SIZE(a);
1554 len_b = Py_SIZE(b);
1555 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001556 if (min_len > 0) {
1557 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001558 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001559 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001560 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001561 else
1562 c = 0;
stratakise8b19652017-11-02 11:32:54 +01001563 if (c != 0)
1564 Py_RETURN_RICHCOMPARE(c, 0, op);
1565 Py_RETURN_RICHCOMPARE(len_a, len_b, op);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001566 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001567}
1568
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001569static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001570bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001571{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001572 if (a->ob_shash == -1) {
1573 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001574 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001575 }
1576 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001577}
1578
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001579static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001580bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001581{
Victor Stinnera15e2602020-04-08 02:01:56 +02001582 if (_PyIndex_Check(item)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001583 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1584 if (i == -1 && PyErr_Occurred())
1585 return NULL;
1586 if (i < 0)
1587 i += PyBytes_GET_SIZE(self);
1588 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1589 PyErr_SetString(PyExc_IndexError,
1590 "index out of range");
1591 return NULL;
1592 }
1593 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1594 }
1595 else if (PySlice_Check(item)) {
Zackery Spytz14514d92019-05-17 01:13:03 -06001596 Py_ssize_t start, stop, step, slicelength, i;
1597 size_t cur;
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001598 const char* source_buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001599 char* result_buf;
1600 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001601
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001602 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001603 return NULL;
1604 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001605 slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1606 &stop, step);
Neal Norwitz6968b052007-02-27 19:02:19 +00001607
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001608 if (slicelength <= 0) {
1609 return PyBytes_FromStringAndSize("", 0);
1610 }
1611 else if (start == 0 && step == 1 &&
1612 slicelength == PyBytes_GET_SIZE(self) &&
1613 PyBytes_CheckExact(self)) {
1614 Py_INCREF(self);
1615 return (PyObject *)self;
1616 }
1617 else if (step == 1) {
1618 return PyBytes_FromStringAndSize(
1619 PyBytes_AS_STRING(self) + start,
1620 slicelength);
1621 }
1622 else {
1623 source_buf = PyBytes_AS_STRING(self);
1624 result = PyBytes_FromStringAndSize(NULL, slicelength);
1625 if (result == NULL)
1626 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001627
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001628 result_buf = PyBytes_AS_STRING(result);
1629 for (cur = start, i = 0; i < slicelength;
1630 cur += step, i++) {
1631 result_buf[i] = source_buf[cur];
1632 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001633
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001634 return result;
1635 }
1636 }
1637 else {
1638 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001639 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001640 Py_TYPE(item)->tp_name);
1641 return NULL;
1642 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001643}
1644
1645static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001646bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001647{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001648 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1649 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001650}
1651
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001652static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001653 (lenfunc)bytes_length, /*sq_length*/
1654 (binaryfunc)bytes_concat, /*sq_concat*/
1655 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1656 (ssizeargfunc)bytes_item, /*sq_item*/
1657 0, /*sq_slice*/
1658 0, /*sq_ass_item*/
1659 0, /*sq_ass_slice*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001660 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001661};
1662
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001663static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001664 (lenfunc)bytes_length,
1665 (binaryfunc)bytes_subscript,
1666 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001667};
1668
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001669static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001670 (getbufferproc)bytes_buffer_getbuffer,
1671 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001672};
1673
1674
1675#define LEFTSTRIP 0
1676#define RIGHTSTRIP 1
1677#define BOTHSTRIP 2
1678
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001679/*[clinic input]
1680bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001681
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001682 sep: object = None
1683 The delimiter according which to split the bytes.
1684 None (the default value) means split on ASCII whitespace characters
1685 (space, tab, return, newline, formfeed, vertical tab).
1686 maxsplit: Py_ssize_t = -1
1687 Maximum number of splits to do.
1688 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001689
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001690Return a list of the sections in the bytes, using sep as the delimiter.
1691[clinic start generated code]*/
1692
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001693static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001694bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1695/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001696{
1697 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001698 const char *s = PyBytes_AS_STRING(self), *sub;
1699 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001700 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001701
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001702 if (maxsplit < 0)
1703 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001704 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001705 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001706 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001707 return NULL;
1708 sub = vsub.buf;
1709 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001710
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001711 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1712 PyBuffer_Release(&vsub);
1713 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001714}
1715
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001716/*[clinic input]
1717bytes.partition
1718
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001719 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001720 /
1721
1722Partition the bytes into three parts using the given separator.
1723
1724This will search for the separator sep in the bytes. If the separator is found,
1725returns a 3-tuple containing the part before the separator, the separator
1726itself, and the part after it.
1727
1728If the separator is not found, returns a 3-tuple containing the original bytes
1729object and two empty bytes objects.
1730[clinic start generated code]*/
1731
Neal Norwitz6968b052007-02-27 19:02:19 +00001732static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001733bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001734/*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001735{
Neal Norwitz6968b052007-02-27 19:02:19 +00001736 return stringlib_partition(
1737 (PyObject*) self,
1738 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001739 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001740 );
1741}
1742
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001743/*[clinic input]
1744bytes.rpartition
1745
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001746 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001747 /
1748
1749Partition the bytes into three parts using the given separator.
1750
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001751This will search for the separator sep in the bytes, starting at the end. If
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001752the separator is found, returns a 3-tuple containing the part before the
1753separator, the separator itself, and the part after it.
1754
1755If the separator is not found, returns a 3-tuple containing two empty bytes
1756objects and the original bytes object.
1757[clinic start generated code]*/
1758
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001759static PyObject *
1760bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001761/*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001762{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001763 return stringlib_rpartition(
1764 (PyObject*) self,
1765 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001766 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001767 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001768}
1769
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001770/*[clinic input]
1771bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001772
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001773Return a list of the sections in the bytes, using sep as the delimiter.
1774
1775Splitting is done starting at the end of the bytes and working to the front.
1776[clinic start generated code]*/
1777
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001778static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001779bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1780/*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001781{
1782 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001783 const char *s = PyBytes_AS_STRING(self), *sub;
1784 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001785 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001786
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001787 if (maxsplit < 0)
1788 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001789 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001790 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001791 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001792 return NULL;
1793 sub = vsub.buf;
1794 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001795
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001796 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1797 PyBuffer_Release(&vsub);
1798 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001799}
1800
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001801
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001802/*[clinic input]
1803bytes.join
1804
1805 iterable_of_bytes: object
1806 /
1807
1808Concatenate any number of bytes objects.
1809
1810The bytes whose method is called is inserted in between each pair.
1811
1812The result is returned as a new bytes object.
1813
1814Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1815[clinic start generated code]*/
1816
Neal Norwitz6968b052007-02-27 19:02:19 +00001817static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001818bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1819/*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001820{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001821 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001822}
1823
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001824PyObject *
1825_PyBytes_Join(PyObject *sep, PyObject *x)
1826{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001827 assert(sep != NULL && PyBytes_Check(sep));
1828 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001829 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001830}
1831
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001832static PyObject *
1833bytes_find(PyBytesObject *self, PyObject *args)
1834{
1835 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1836}
1837
1838static PyObject *
1839bytes_index(PyBytesObject *self, PyObject *args)
1840{
1841 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1842}
1843
1844
1845static PyObject *
1846bytes_rfind(PyBytesObject *self, PyObject *args)
1847{
1848 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1849}
1850
1851
1852static PyObject *
1853bytes_rindex(PyBytesObject *self, PyObject *args)
1854{
1855 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1856}
1857
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001858
1859Py_LOCAL_INLINE(PyObject *)
1860do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001861{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001862 Py_buffer vsep;
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001863 const char *s = PyBytes_AS_STRING(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001864 Py_ssize_t len = PyBytes_GET_SIZE(self);
1865 char *sep;
1866 Py_ssize_t seplen;
1867 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001868
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001869 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001870 return NULL;
1871 sep = vsep.buf;
1872 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001873
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001874 i = 0;
1875 if (striptype != RIGHTSTRIP) {
1876 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1877 i++;
1878 }
1879 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001880
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001881 j = len;
1882 if (striptype != LEFTSTRIP) {
1883 do {
1884 j--;
1885 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1886 j++;
1887 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001888
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001889 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001890
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001891 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1892 Py_INCREF(self);
1893 return (PyObject*)self;
1894 }
1895 else
1896 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001897}
1898
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001899
1900Py_LOCAL_INLINE(PyObject *)
1901do_strip(PyBytesObject *self, int striptype)
1902{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001903 const char *s = PyBytes_AS_STRING(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001904 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001905
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001906 i = 0;
1907 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001908 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001909 i++;
1910 }
1911 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001912
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001913 j = len;
1914 if (striptype != LEFTSTRIP) {
1915 do {
1916 j--;
David Malcolm96960882010-11-05 17:23:41 +00001917 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001918 j++;
1919 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001920
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001921 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1922 Py_INCREF(self);
1923 return (PyObject*)self;
1924 }
1925 else
1926 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001927}
1928
1929
1930Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001931do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001932{
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001933 if (bytes != Py_None) {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001934 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001935 }
1936 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001937}
1938
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001939/*[clinic input]
1940bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001941
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001942 bytes: object = None
1943 /
1944
1945Strip leading and trailing bytes contained in the argument.
1946
1947If the argument is omitted or None, strip leading and trailing ASCII whitespace.
1948[clinic start generated code]*/
1949
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001950static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001951bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001952/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001953{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001954 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001955}
1956
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001957/*[clinic input]
1958bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001959
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001960 bytes: object = None
1961 /
1962
1963Strip leading bytes contained in the argument.
1964
1965If the argument is omitted or None, strip leading ASCII whitespace.
1966[clinic start generated code]*/
1967
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001968static PyObject *
1969bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001970/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001971{
1972 return do_argstrip(self, LEFTSTRIP, bytes);
1973}
1974
1975/*[clinic input]
1976bytes.rstrip
1977
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001978 bytes: object = None
1979 /
1980
1981Strip trailing bytes contained in the argument.
1982
1983If the argument is omitted or None, strip trailing ASCII whitespace.
1984[clinic start generated code]*/
1985
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001986static PyObject *
1987bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001988/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001989{
1990 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001991}
Neal Norwitz6968b052007-02-27 19:02:19 +00001992
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001993
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001994static PyObject *
1995bytes_count(PyBytesObject *self, PyObject *args)
1996{
1997 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1998}
1999
2000
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002001/*[clinic input]
2002bytes.translate
2003
Victor Stinner049e5092014-08-17 22:20:00 +02002004 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002005 Translation table, which must be a bytes object of length 256.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002006 /
Martin Panter1b6c6da2016-08-27 08:35:02 +00002007 delete as deletechars: object(c_default="NULL") = b''
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002008
2009Return a copy with each character mapped by the given translation table.
2010
Martin Panter1b6c6da2016-08-27 08:35:02 +00002011All characters occurring in the optional argument delete are removed.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002012The remaining characters are mapped through the given translation table.
2013[clinic start generated code]*/
2014
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002015static PyObject *
Martin Panter1b6c6da2016-08-27 08:35:02 +00002016bytes_translate_impl(PyBytesObject *self, PyObject *table,
Larry Hastings89964c42015-04-14 18:07:59 -04002017 PyObject *deletechars)
Martin Panter1b6c6da2016-08-27 08:35:02 +00002018/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002019{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03002020 const char *input;
2021 char *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002022 Py_buffer table_view = {NULL, NULL};
2023 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002024 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002025 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002026 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002027 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002028 Py_ssize_t inlen, tablen, dellen = 0;
2029 PyObject *result;
2030 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002031
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002032 if (PyBytes_Check(table)) {
2033 table_chars = PyBytes_AS_STRING(table);
2034 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002035 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002036 else if (table == Py_None) {
2037 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002038 tablen = 256;
2039 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002040 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002041 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002042 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002043 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002044 tablen = table_view.len;
2045 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002046
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002047 if (tablen != 256) {
2048 PyErr_SetString(PyExc_ValueError,
2049 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002050 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002051 return NULL;
2052 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002053
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002054 if (deletechars != NULL) {
2055 if (PyBytes_Check(deletechars)) {
2056 del_table_chars = PyBytes_AS_STRING(deletechars);
2057 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002058 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002059 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002060 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002061 PyBuffer_Release(&table_view);
2062 return NULL;
2063 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002064 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002065 dellen = del_table_view.len;
2066 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002067 }
2068 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002069 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002070 dellen = 0;
2071 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002072
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002073 inlen = PyBytes_GET_SIZE(input_obj);
2074 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002075 if (result == NULL) {
2076 PyBuffer_Release(&del_table_view);
2077 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002078 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002079 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002080 output_start = output = PyBytes_AS_STRING(result);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002081 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002082
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002083 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002084 /* If no deletions are required, use faster code */
2085 for (i = inlen; --i >= 0; ) {
2086 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002087 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002088 changed = 1;
2089 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002090 if (!changed && PyBytes_CheckExact(input_obj)) {
2091 Py_INCREF(input_obj);
2092 Py_DECREF(result);
2093 result = input_obj;
2094 }
2095 PyBuffer_Release(&del_table_view);
2096 PyBuffer_Release(&table_view);
2097 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002098 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002099
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002100 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002101 for (i = 0; i < 256; i++)
2102 trans_table[i] = Py_CHARMASK(i);
2103 } else {
2104 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002105 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002106 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002107 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002108
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002109 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002110 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002111 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002112
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002113 for (i = inlen; --i >= 0; ) {
2114 c = Py_CHARMASK(*input++);
2115 if (trans_table[c] != -1)
2116 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2117 continue;
2118 changed = 1;
2119 }
2120 if (!changed && PyBytes_CheckExact(input_obj)) {
2121 Py_DECREF(result);
2122 Py_INCREF(input_obj);
2123 return input_obj;
2124 }
2125 /* Fix the size of the resulting string */
2126 if (inlen > 0)
2127 _PyBytes_Resize(&result, output - output_start);
2128 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002129}
2130
2131
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002132/*[clinic input]
2133
2134@staticmethod
2135bytes.maketrans
2136
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002137 frm: Py_buffer
2138 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002139 /
2140
2141Return a translation table useable for the bytes or bytearray translate method.
2142
2143The returned table will be one where each byte in frm is mapped to the byte at
2144the same position in to.
2145
2146The bytes objects frm and to must be of the same length.
2147[clinic start generated code]*/
2148
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002149static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002150bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002151/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002152{
2153 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002154}
2155
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002156
2157/*[clinic input]
2158bytes.replace
2159
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002160 old: Py_buffer
2161 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002162 count: Py_ssize_t = -1
2163 Maximum number of occurrences to replace.
2164 -1 (the default value) means replace all occurrences.
2165 /
2166
2167Return a copy with all occurrences of substring old replaced by new.
2168
2169If the optional argument count is given, only the first count occurrences are
2170replaced.
2171[clinic start generated code]*/
2172
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002173static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002174bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
Larry Hastings89964c42015-04-14 18:07:59 -04002175 Py_ssize_t count)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002176/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002177{
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +03002178 return stringlib_replace((PyObject *)self,
2179 (const char *)old->buf, old->len,
2180 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002181}
2182
2183/** End DALKE **/
2184
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002185
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002186static PyObject *
2187bytes_startswith(PyBytesObject *self, PyObject *args)
2188{
2189 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2190}
2191
2192static PyObject *
2193bytes_endswith(PyBytesObject *self, PyObject *args)
2194{
2195 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2196}
2197
2198
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002199/*[clinic input]
2200bytes.decode
2201
2202 encoding: str(c_default="NULL") = 'utf-8'
2203 The encoding with which to decode the bytes.
2204 errors: str(c_default="NULL") = 'strict'
2205 The error handling scheme to use for the handling of decoding errors.
2206 The default is 'strict' meaning that decoding errors raise a
2207 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2208 as well as any other name registered with codecs.register_error that
2209 can handle UnicodeDecodeErrors.
2210
2211Decode the bytes using the codec registered for encoding.
2212[clinic start generated code]*/
2213
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002214static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002215bytes_decode_impl(PyBytesObject *self, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04002216 const char *errors)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002217/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002218{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002219 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002220}
2221
Guido van Rossum20188312006-05-05 15:15:40 +00002222
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002223/*[clinic input]
2224bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002225
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002226 keepends: bool(accept={int}) = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002227
2228Return a list of the lines in the bytes, breaking at line boundaries.
2229
2230Line breaks are not included in the resulting list unless keepends is given and
2231true.
2232[clinic start generated code]*/
2233
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002234static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002235bytes_splitlines_impl(PyBytesObject *self, int keepends)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002236/*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002237{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002238 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002239 (PyObject*) self, PyBytes_AS_STRING(self),
2240 PyBytes_GET_SIZE(self), keepends
2241 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002242}
2243
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002244/*[clinic input]
2245@classmethod
2246bytes.fromhex
2247
2248 string: unicode
2249 /
2250
2251Create a bytes object from a string of hexadecimal numbers.
2252
2253Spaces between two numbers are accepted.
2254Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2255[clinic start generated code]*/
2256
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002257static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002258bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002259/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002260{
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002261 PyObject *result = _PyBytes_FromHex(string, 0);
2262 if (type != &PyBytes_Type && result != NULL) {
Petr Viktorinffd97532020-02-11 17:46:57 +01002263 Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002264 }
2265 return result;
Victor Stinner2bf89932015-10-14 11:25:33 +02002266}
2267
2268PyObject*
2269_PyBytes_FromHex(PyObject *string, int use_bytearray)
2270{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002271 char *buf;
Victor Stinner2bf89932015-10-14 11:25:33 +02002272 Py_ssize_t hexlen, invalid_char;
2273 unsigned int top, bot;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03002274 const Py_UCS1 *str, *end;
Victor Stinner2bf89932015-10-14 11:25:33 +02002275 _PyBytesWriter writer;
2276
2277 _PyBytesWriter_Init(&writer);
2278 writer.use_bytearray = use_bytearray;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002279
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002280 assert(PyUnicode_Check(string));
2281 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002282 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002283 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002284
Victor Stinner2bf89932015-10-14 11:25:33 +02002285 if (!PyUnicode_IS_ASCII(string)) {
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03002286 const void *data = PyUnicode_DATA(string);
Victor Stinner2bf89932015-10-14 11:25:33 +02002287 unsigned int kind = PyUnicode_KIND(string);
2288 Py_ssize_t i;
2289
2290 /* search for the first non-ASCII character */
2291 for (i = 0; i < hexlen; i++) {
2292 if (PyUnicode_READ(kind, data, i) >= 128)
2293 break;
2294 }
2295 invalid_char = i;
2296 goto error;
2297 }
2298
2299 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2300 str = PyUnicode_1BYTE_DATA(string);
2301
2302 /* This overestimates if there are spaces */
2303 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2304 if (buf == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002305 return NULL;
Victor Stinner2bf89932015-10-14 11:25:33 +02002306
2307 end = str + hexlen;
2308 while (str < end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002309 /* skip over spaces in the input */
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002310 if (Py_ISSPACE(*str)) {
Victor Stinner2bf89932015-10-14 11:25:33 +02002311 do {
2312 str++;
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002313 } while (Py_ISSPACE(*str));
Victor Stinner2bf89932015-10-14 11:25:33 +02002314 if (str >= end)
2315 break;
2316 }
2317
2318 top = _PyLong_DigitValue[*str];
2319 if (top >= 16) {
2320 invalid_char = str - PyUnicode_1BYTE_DATA(string);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002321 goto error;
2322 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002323 str++;
2324
2325 bot = _PyLong_DigitValue[*str];
2326 if (bot >= 16) {
2327 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2328 goto error;
2329 }
2330 str++;
2331
2332 *buf++ = (unsigned char)((top << 4) + bot);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002333 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002334
2335 return _PyBytesWriter_Finish(&writer, buf);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002336
2337 error:
Victor Stinner2bf89932015-10-14 11:25:33 +02002338 PyErr_Format(PyExc_ValueError,
2339 "non-hexadecimal number found in "
2340 "fromhex() arg at position %zd", invalid_char);
2341 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002342 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002343}
2344
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002345/*[clinic input]
2346bytes.hex
2347
2348 sep: object = NULL
2349 An optional single character or byte to separate hex bytes.
2350 bytes_per_sep: int = 1
2351 How many bytes between separators. Positive values count from the
2352 right, negative values count from the left.
2353
2354Create a str of hexadecimal numbers from a bytes object.
2355
2356Example:
2357>>> value = b'\xb9\x01\xef'
2358>>> value.hex()
2359'b901ef'
2360>>> value.hex(':')
2361'b9:01:ef'
2362>>> value.hex(':', 2)
2363'b9:01ef'
2364>>> value.hex(':', -2)
2365'b901:ef'
2366[clinic start generated code]*/
Gregory P. Smith8cb65692015-04-25 23:22:26 +00002367
2368static PyObject *
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002369bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2370/*[clinic end generated code: output=1f134da504064139 input=f1238d3455990218]*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002371{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03002372 const char *argbuf = PyBytes_AS_STRING(self);
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002373 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002374 return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002375}
2376
2377static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302378bytes_getnewargs(PyBytesObject *v, PyObject *Py_UNUSED(ignored))
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002379{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002380 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002381}
2382
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002383
2384static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002385bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002386 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302387 {"capitalize", stringlib_capitalize, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002388 _Py_capitalize__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002389 STRINGLIB_CENTER_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002390 {"count", (PyCFunction)bytes_count, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002391 _Py_count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002392 BYTES_DECODE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002393 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002394 _Py_endswith__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002395 STRINGLIB_EXPANDTABS_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002396 {"find", (PyCFunction)bytes_find, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002397 _Py_find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002398 BYTES_FROMHEX_METHODDEF
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002399 BYTES_HEX_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002400 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302401 {"isalnum", stringlib_isalnum, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002402 _Py_isalnum__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302403 {"isalpha", stringlib_isalpha, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002404 _Py_isalpha__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302405 {"isascii", stringlib_isascii, METH_NOARGS,
INADA Naokia49ac992018-01-27 14:06:21 +09002406 _Py_isascii__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302407 {"isdigit", stringlib_isdigit, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002408 _Py_isdigit__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302409 {"islower", stringlib_islower, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002410 _Py_islower__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302411 {"isspace", stringlib_isspace, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002412 _Py_isspace__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302413 {"istitle", stringlib_istitle, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002414 _Py_istitle__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302415 {"isupper", stringlib_isupper, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002416 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002417 BYTES_JOIN_METHODDEF
Tal Einatc929df32018-07-06 13:17:38 +03002418 STRINGLIB_LJUST_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302419 {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002420 BYTES_LSTRIP_METHODDEF
2421 BYTES_MAKETRANS_METHODDEF
2422 BYTES_PARTITION_METHODDEF
2423 BYTES_REPLACE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002424 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2425 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002426 STRINGLIB_RJUST_METHODDEF
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002427 BYTES_RPARTITION_METHODDEF
2428 BYTES_RSPLIT_METHODDEF
2429 BYTES_RSTRIP_METHODDEF
2430 BYTES_SPLIT_METHODDEF
2431 BYTES_SPLITLINES_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002432 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002433 _Py_startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002434 BYTES_STRIP_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302435 {"swapcase", stringlib_swapcase, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002436 _Py_swapcase__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302437 {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002438 BYTES_TRANSLATE_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302439 {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002440 STRINGLIB_ZFILL_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002441 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002442};
2443
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002444static PyObject *
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002445bytes_mod(PyObject *self, PyObject *arg)
Ethan Furmanb95b5612015-01-23 20:05:18 -08002446{
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002447 if (!PyBytes_Check(self)) {
2448 Py_RETURN_NOTIMPLEMENTED;
Victor Stinner772b2b02015-10-14 09:56:53 +02002449 }
Victor Stinner772b2b02015-10-14 09:56:53 +02002450 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002451 arg, 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -08002452}
2453
2454static PyNumberMethods bytes_as_number = {
2455 0, /*nb_add*/
2456 0, /*nb_subtract*/
2457 0, /*nb_multiply*/
2458 bytes_mod, /*nb_remainder*/
2459};
2460
2461static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002462bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002463
2464static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002465bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002466{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002467 PyObject *x = NULL;
2468 const char *encoding = NULL;
2469 const char *errors = NULL;
2470 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002471 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002472 Py_ssize_t size;
2473 static char *kwlist[] = {"source", "encoding", "errors", 0};
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002474
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002475 if (type != &PyBytes_Type)
Serhiy Storchaka15095802015-11-25 15:47:01 +02002476 return bytes_subtype_new(type, args, kwds);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002477 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2478 &encoding, &errors))
2479 return NULL;
2480 if (x == NULL) {
2481 if (encoding != NULL || errors != NULL) {
2482 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka2c2044e2018-10-21 15:29:12 +03002483 encoding != NULL ?
2484 "encoding without a string argument" :
2485 "errors without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002486 return NULL;
2487 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02002488 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002489 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002490
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002491 if (encoding != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002492 /* Encode via the codec registry */
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002493 if (!PyUnicode_Check(x)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002494 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002495 "encoding without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002496 return NULL;
2497 }
2498 new = PyUnicode_AsEncodedString(x, encoding, errors);
2499 if (new == NULL)
2500 return NULL;
2501 assert(PyBytes_Check(new));
2502 return new;
2503 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002504
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002505 if (errors != NULL) {
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002506 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002507 PyUnicode_Check(x) ?
2508 "string argument without an encoding" :
2509 "errors without a string argument");
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002510 return NULL;
2511 }
2512
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002513 /* We'd like to call PyObject_Bytes here, but we need to check for an
2514 integer argument before deferring to PyBytes_FromObject, something
2515 PyObject_Bytes doesn't do. */
2516 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2517 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +01002518 new = _PyObject_CallNoArg(func);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002519 Py_DECREF(func);
2520 if (new == NULL)
2521 return NULL;
2522 if (!PyBytes_Check(new)) {
2523 PyErr_Format(PyExc_TypeError,
2524 "__bytes__ returned non-bytes (type %.200s)",
2525 Py_TYPE(new)->tp_name);
2526 Py_DECREF(new);
2527 return NULL;
2528 }
2529 return new;
2530 }
2531 else if (PyErr_Occurred())
2532 return NULL;
2533
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002534 if (PyUnicode_Check(x)) {
2535 PyErr_SetString(PyExc_TypeError,
2536 "string argument without an encoding");
2537 return NULL;
2538 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002539 /* Is it an integer? */
Victor Stinnera15e2602020-04-08 02:01:56 +02002540 if (_PyIndex_Check(x)) {
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002541 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2542 if (size == -1 && PyErr_Occurred()) {
Serhiy Storchakae8904212018-10-15 00:02:57 +03002543 if (!PyErr_ExceptionMatches(PyExc_TypeError))
INADA Naokia634e232017-01-06 17:32:01 +09002544 return NULL;
2545 PyErr_Clear(); /* fall through */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002546 }
INADA Naokia634e232017-01-06 17:32:01 +09002547 else {
2548 if (size < 0) {
2549 PyErr_SetString(PyExc_ValueError, "negative count");
2550 return NULL;
2551 }
2552 new = _PyBytes_FromSize(size, 1);
2553 if (new == NULL)
2554 return NULL;
2555 return new;
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002556 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002557 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002558
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002559 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002560}
2561
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002562static PyObject*
2563_PyBytes_FromBuffer(PyObject *x)
2564{
2565 PyObject *new;
2566 Py_buffer view;
2567
2568 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2569 return NULL;
2570
2571 new = PyBytes_FromStringAndSize(NULL, view.len);
2572 if (!new)
2573 goto fail;
2574 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2575 &view, view.len, 'C') < 0)
2576 goto fail;
2577 PyBuffer_Release(&view);
2578 return new;
2579
2580fail:
2581 Py_XDECREF(new);
2582 PyBuffer_Release(&view);
2583 return NULL;
2584}
2585
2586static PyObject*
2587_PyBytes_FromList(PyObject *x)
2588{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002589 Py_ssize_t i, size = PyList_GET_SIZE(x);
2590 Py_ssize_t value;
2591 char *str;
2592 PyObject *item;
2593 _PyBytesWriter writer;
2594
2595 _PyBytesWriter_Init(&writer);
2596 str = _PyBytesWriter_Alloc(&writer, size);
2597 if (str == NULL)
2598 return NULL;
2599 writer.overallocate = 1;
2600 size = writer.allocated;
2601
2602 for (i = 0; i < PyList_GET_SIZE(x); i++) {
2603 item = PyList_GET_ITEM(x, i);
2604 Py_INCREF(item);
2605 value = PyNumber_AsSsize_t(item, NULL);
2606 Py_DECREF(item);
2607 if (value == -1 && PyErr_Occurred())
2608 goto error;
2609
2610 if (value < 0 || value >= 256) {
2611 PyErr_SetString(PyExc_ValueError,
2612 "bytes must be in range(0, 256)");
2613 goto error;
2614 }
2615
2616 if (i >= size) {
2617 str = _PyBytesWriter_Resize(&writer, str, size+1);
2618 if (str == NULL)
2619 return NULL;
2620 size = writer.allocated;
2621 }
2622 *str++ = (char) value;
2623 }
2624 return _PyBytesWriter_Finish(&writer, str);
2625
2626 error:
2627 _PyBytesWriter_Dealloc(&writer);
2628 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002629}
2630
2631static PyObject*
2632_PyBytes_FromTuple(PyObject *x)
2633{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002634 PyObject *bytes;
2635 Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2636 Py_ssize_t value;
2637 char *str;
2638 PyObject *item;
2639
2640 bytes = PyBytes_FromStringAndSize(NULL, size);
2641 if (bytes == NULL)
2642 return NULL;
2643 str = ((PyBytesObject *)bytes)->ob_sval;
2644
2645 for (i = 0; i < size; i++) {
2646 item = PyTuple_GET_ITEM(x, i);
2647 value = PyNumber_AsSsize_t(item, NULL);
2648 if (value == -1 && PyErr_Occurred())
2649 goto error;
2650
2651 if (value < 0 || value >= 256) {
2652 PyErr_SetString(PyExc_ValueError,
2653 "bytes must be in range(0, 256)");
2654 goto error;
2655 }
2656 *str++ = (char) value;
2657 }
2658 return bytes;
2659
2660 error:
2661 Py_DECREF(bytes);
2662 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002663}
2664
2665static PyObject *
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002666_PyBytes_FromIterator(PyObject *it, PyObject *x)
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002667{
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002668 char *str;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002669 Py_ssize_t i, size;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002670 _PyBytesWriter writer;
2671
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002672 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02002673 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002674 if (size == -1 && PyErr_Occurred())
2675 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002676
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002677 _PyBytesWriter_Init(&writer);
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002678 str = _PyBytesWriter_Alloc(&writer, size);
2679 if (str == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002680 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002681 writer.overallocate = 1;
2682 size = writer.allocated;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002683
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002684 /* Run the iterator to exhaustion */
2685 for (i = 0; ; i++) {
2686 PyObject *item;
2687 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002688
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002689 /* Get the next item */
2690 item = PyIter_Next(it);
2691 if (item == NULL) {
2692 if (PyErr_Occurred())
2693 goto error;
2694 break;
2695 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002696
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002697 /* Interpret it as an int (__index__) */
Serhiy Storchakaf54d7812016-07-06 21:39:44 +03002698 value = PyNumber_AsSsize_t(item, NULL);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002699 Py_DECREF(item);
2700 if (value == -1 && PyErr_Occurred())
2701 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002702
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002703 /* Range check */
2704 if (value < 0 || value >= 256) {
2705 PyErr_SetString(PyExc_ValueError,
2706 "bytes must be in range(0, 256)");
2707 goto error;
2708 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002709
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002710 /* Append the byte */
2711 if (i >= size) {
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002712 str = _PyBytesWriter_Resize(&writer, str, size+1);
2713 if (str == NULL)
2714 return NULL;
2715 size = writer.allocated;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002716 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002717 *str++ = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002718 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002719
2720 return _PyBytesWriter_Finish(&writer, str);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002721
2722 error:
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002723 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002724 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002725}
2726
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002727PyObject *
2728PyBytes_FromObject(PyObject *x)
2729{
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002730 PyObject *it, *result;
2731
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002732 if (x == NULL) {
2733 PyErr_BadInternalCall();
2734 return NULL;
2735 }
2736
2737 if (PyBytes_CheckExact(x)) {
2738 Py_INCREF(x);
2739 return x;
2740 }
2741
2742 /* Use the modern buffer interface */
2743 if (PyObject_CheckBuffer(x))
2744 return _PyBytes_FromBuffer(x);
2745
2746 if (PyList_CheckExact(x))
2747 return _PyBytes_FromList(x);
2748
2749 if (PyTuple_CheckExact(x))
2750 return _PyBytes_FromTuple(x);
2751
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002752 if (!PyUnicode_Check(x)) {
2753 it = PyObject_GetIter(x);
2754 if (it != NULL) {
2755 result = _PyBytes_FromIterator(it, x);
2756 Py_DECREF(it);
2757 return result;
2758 }
Serhiy Storchakae8904212018-10-15 00:02:57 +03002759 if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2760 return NULL;
2761 }
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002762 }
2763
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002764 PyErr_Format(PyExc_TypeError,
2765 "cannot convert '%.200s' object to bytes",
Victor Stinner58ac7002020-02-07 03:04:21 +01002766 Py_TYPE(x)->tp_name);
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002767 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002768}
2769
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002770static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002771bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002772{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002773 PyObject *tmp, *pnew;
2774 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002775
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002776 assert(PyType_IsSubtype(type, &PyBytes_Type));
2777 tmp = bytes_new(&PyBytes_Type, args, kwds);
2778 if (tmp == NULL)
2779 return NULL;
Serhiy Storchaka15095802015-11-25 15:47:01 +02002780 assert(PyBytes_Check(tmp));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002781 n = PyBytes_GET_SIZE(tmp);
2782 pnew = type->tp_alloc(type, n);
2783 if (pnew != NULL) {
Christian Heimesf051e432016-09-13 20:22:02 +02002784 memcpy(PyBytes_AS_STRING(pnew),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002785 PyBytes_AS_STRING(tmp), n+1);
2786 ((PyBytesObject *)pnew)->ob_shash =
2787 ((PyBytesObject *)tmp)->ob_shash;
2788 }
2789 Py_DECREF(tmp);
2790 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002791}
2792
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002793PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002794"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002795bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002796bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002797bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2798bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002799\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002800Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002801 - an iterable yielding integers in range(256)\n\
2802 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002803 - any object implementing the buffer API.\n\
2804 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002805
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002806static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002807
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002808PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002809 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2810 "bytes",
2811 PyBytesObject_SIZE,
2812 sizeof(char),
Inada Naoki7d408692019-05-29 17:23:27 +09002813 0, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002814 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002815 0, /* tp_getattr */
2816 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002817 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002818 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08002819 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002820 &bytes_as_sequence, /* tp_as_sequence */
2821 &bytes_as_mapping, /* tp_as_mapping */
2822 (hashfunc)bytes_hash, /* tp_hash */
2823 0, /* tp_call */
2824 bytes_str, /* tp_str */
2825 PyObject_GenericGetAttr, /* tp_getattro */
2826 0, /* tp_setattro */
2827 &bytes_as_buffer, /* tp_as_buffer */
2828 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2829 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2830 bytes_doc, /* tp_doc */
2831 0, /* tp_traverse */
2832 0, /* tp_clear */
2833 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2834 0, /* tp_weaklistoffset */
2835 bytes_iter, /* tp_iter */
2836 0, /* tp_iternext */
2837 bytes_methods, /* tp_methods */
2838 0, /* tp_members */
2839 0, /* tp_getset */
2840 &PyBaseObject_Type, /* tp_base */
2841 0, /* tp_dict */
2842 0, /* tp_descr_get */
2843 0, /* tp_descr_set */
2844 0, /* tp_dictoffset */
2845 0, /* tp_init */
2846 0, /* tp_alloc */
2847 bytes_new, /* tp_new */
2848 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002849};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002850
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002851void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002852PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002853{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002854 assert(pv != NULL);
2855 if (*pv == NULL)
2856 return;
2857 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002858 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002859 return;
2860 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02002861
2862 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2863 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05002864 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02002865 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02002866
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002867 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02002868 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2869 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2870 Py_CLEAR(*pv);
2871 return;
2872 }
2873
2874 oldsize = PyBytes_GET_SIZE(*pv);
2875 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2876 PyErr_NoMemory();
2877 goto error;
2878 }
2879 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2880 goto error;
2881
2882 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2883 PyBuffer_Release(&wb);
2884 return;
2885
2886 error:
2887 PyBuffer_Release(&wb);
2888 Py_CLEAR(*pv);
2889 return;
2890 }
2891
2892 else {
2893 /* Multiple references, need to create new object */
2894 PyObject *v;
2895 v = bytes_concat(*pv, w);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002896 Py_SETREF(*pv, v);
Antoine Pitrou161d6952014-05-01 14:36:20 +02002897 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002898}
2899
2900void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002901PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002902{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002903 PyBytes_Concat(pv, w);
2904 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002905}
2906
2907
Ethan Furmanb95b5612015-01-23 20:05:18 -08002908/* The following function breaks the notion that bytes are immutable:
2909 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002910 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08002911 as creating a new bytes object and destroying the old one, only
2912 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002913 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08002914 Note that if there's not enough memory to resize the bytes object, the
2915 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002916 memory" exception is set, and -1 is returned. Else (on success) 0 is
2917 returned, and the value in *pv may or may not be the same as on input.
2918 As always, an extra byte is allocated for a trailing \0 byte (newsize
2919 does *not* include that), and a trailing \0 byte is stored.
2920*/
2921
2922int
2923_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2924{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002925 PyObject *v;
2926 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002927 v = *pv;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002928 if (!PyBytes_Check(v) || newsize < 0) {
2929 goto error;
2930 }
2931 if (Py_SIZE(v) == newsize) {
2932 /* return early if newsize equals to v->ob_size */
2933 return 0;
2934 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02002935 if (Py_SIZE(v) == 0) {
2936 if (newsize == 0) {
2937 return 0;
2938 }
2939 *pv = _PyBytes_FromSize(newsize, 0);
2940 Py_DECREF(v);
2941 return (*pv == NULL) ? -1 : 0;
2942 }
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002943 if (Py_REFCNT(v) != 1) {
2944 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002945 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02002946 if (newsize == 0) {
2947 *pv = _PyBytes_FromSize(0, 0);
2948 Py_DECREF(v);
2949 return (*pv == NULL) ? -1 : 0;
2950 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002951 /* XXX UNREF/NEWREF interface should be more symmetrical */
Victor Stinner49932fe2020-02-03 17:55:05 +01002952#ifdef Py_REF_DEBUG
2953 _Py_RefTotal--;
2954#endif
2955#ifdef Py_TRACE_REFS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002956 _Py_ForgetReference(v);
Victor Stinner49932fe2020-02-03 17:55:05 +01002957#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002958 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03002959 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002960 if (*pv == NULL) {
2961 PyObject_Del(v);
2962 PyErr_NoMemory();
2963 return -1;
2964 }
2965 _Py_NewReference(*pv);
2966 sv = (PyBytesObject *) *pv;
Victor Stinner60ac6ed2020-02-07 23:18:08 +01002967 Py_SET_SIZE(sv, newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002968 sv->ob_sval[newsize] = '\0';
2969 sv->ob_shash = -1; /* invalidate cached hash value */
2970 return 0;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002971error:
2972 *pv = 0;
2973 Py_DECREF(v);
2974 PyErr_BadInternalCall();
2975 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002976}
2977
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002978void
Victor Stinnerbed48172019-08-27 00:12:32 +02002979_PyBytes_Fini(void)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002980{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002981 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002982 for (i = 0; i < UCHAR_MAX + 1; i++)
2983 Py_CLEAR(characters[i]);
2984 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002985}
2986
Benjamin Peterson4116f362008-05-27 00:36:20 +00002987/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002988
2989typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002990 PyObject_HEAD
2991 Py_ssize_t it_index;
2992 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002993} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002994
2995static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002996striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002997{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002998 _PyObject_GC_UNTRACK(it);
2999 Py_XDECREF(it->it_seq);
3000 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003001}
3002
3003static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003004striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003005{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003006 Py_VISIT(it->it_seq);
3007 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003008}
3009
3010static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003011striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003012{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003013 PyBytesObject *seq;
3014 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003015
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003016 assert(it != NULL);
3017 seq = it->it_seq;
3018 if (seq == NULL)
3019 return NULL;
3020 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003021
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003022 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3023 item = PyLong_FromLong(
3024 (unsigned char)seq->ob_sval[it->it_index]);
3025 if (item != NULL)
3026 ++it->it_index;
3027 return item;
3028 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003029
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003030 it->it_seq = NULL;
Serhiy Storchakafbb1c5e2016-03-30 20:40:02 +03003031 Py_DECREF(seq);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003032 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003033}
3034
3035static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303036striter_len(striterobject *it, PyObject *Py_UNUSED(ignored))
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003037{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003038 Py_ssize_t len = 0;
3039 if (it->it_seq)
3040 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3041 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003042}
3043
3044PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003045 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003046
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003047static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303048striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003049{
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003050 _Py_IDENTIFIER(iter);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003051 if (it->it_seq != NULL) {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003052 return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003053 it->it_seq, it->it_index);
3054 } else {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003055 return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003056 }
3057}
3058
3059PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3060
3061static PyObject *
3062striter_setstate(striterobject *it, PyObject *state)
3063{
3064 Py_ssize_t index = PyLong_AsSsize_t(state);
3065 if (index == -1 && PyErr_Occurred())
3066 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003067 if (it->it_seq != NULL) {
3068 if (index < 0)
3069 index = 0;
3070 else if (index > PyBytes_GET_SIZE(it->it_seq))
3071 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3072 it->it_index = index;
3073 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003074 Py_RETURN_NONE;
3075}
3076
3077PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3078
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003079static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003080 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3081 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003082 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3083 reduce_doc},
3084 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3085 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003086 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003087};
3088
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003089PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003090 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3091 "bytes_iterator", /* tp_name */
3092 sizeof(striterobject), /* tp_basicsize */
3093 0, /* tp_itemsize */
3094 /* methods */
3095 (destructor)striter_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003096 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003097 0, /* tp_getattr */
3098 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003099 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003100 0, /* tp_repr */
3101 0, /* tp_as_number */
3102 0, /* tp_as_sequence */
3103 0, /* tp_as_mapping */
3104 0, /* tp_hash */
3105 0, /* tp_call */
3106 0, /* tp_str */
3107 PyObject_GenericGetAttr, /* tp_getattro */
3108 0, /* tp_setattro */
3109 0, /* tp_as_buffer */
3110 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3111 0, /* tp_doc */
3112 (traverseproc)striter_traverse, /* tp_traverse */
3113 0, /* tp_clear */
3114 0, /* tp_richcompare */
3115 0, /* tp_weaklistoffset */
3116 PyObject_SelfIter, /* tp_iter */
3117 (iternextfunc)striter_next, /* tp_iternext */
3118 striter_methods, /* tp_methods */
3119 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003120};
3121
3122static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003123bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003124{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003125 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003126
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003127 if (!PyBytes_Check(seq)) {
3128 PyErr_BadInternalCall();
3129 return NULL;
3130 }
3131 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3132 if (it == NULL)
3133 return NULL;
3134 it->it_index = 0;
3135 Py_INCREF(seq);
3136 it->it_seq = (PyBytesObject *)seq;
3137 _PyObject_GC_TRACK(it);
3138 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003139}
Victor Stinner00165072015-10-09 01:53:21 +02003140
3141
3142/* _PyBytesWriter API */
3143
3144#ifdef MS_WINDOWS
3145 /* On Windows, overallocate by 50% is the best factor */
3146# define OVERALLOCATE_FACTOR 2
3147#else
3148 /* On Linux, overallocate by 25% is the best factor */
3149# define OVERALLOCATE_FACTOR 4
3150#endif
3151
3152void
3153_PyBytesWriter_Init(_PyBytesWriter *writer)
3154{
Victor Stinner661aacc2015-10-14 09:41:48 +02003155 /* Set all attributes before small_buffer to 0 */
3156 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003157#ifndef NDEBUG
3158 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3159 sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003160#endif
3161}
3162
3163void
3164_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3165{
3166 Py_CLEAR(writer->buffer);
3167}
3168
3169Py_LOCAL_INLINE(char*)
3170_PyBytesWriter_AsString(_PyBytesWriter *writer)
3171{
Victor Stinner661aacc2015-10-14 09:41:48 +02003172 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003173 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003174 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003175 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003176 else if (writer->use_bytearray) {
3177 assert(writer->buffer != NULL);
3178 return PyByteArray_AS_STRING(writer->buffer);
3179 }
3180 else {
3181 assert(writer->buffer != NULL);
3182 return PyBytes_AS_STRING(writer->buffer);
3183 }
Victor Stinner00165072015-10-09 01:53:21 +02003184}
3185
3186Py_LOCAL_INLINE(Py_ssize_t)
Victor Stinner2bf89932015-10-14 11:25:33 +02003187_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
Victor Stinner00165072015-10-09 01:53:21 +02003188{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03003189 const char *start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003190 assert(str != NULL);
3191 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003192 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003193 return str - start;
3194}
3195
Victor Stinner68762572019-10-07 18:42:01 +02003196#ifndef NDEBUG
3197Py_LOCAL_INLINE(int)
Victor Stinner00165072015-10-09 01:53:21 +02003198_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3199{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03003200 const char *start, *end;
Victor Stinner00165072015-10-09 01:53:21 +02003201
Victor Stinner661aacc2015-10-14 09:41:48 +02003202 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003203 assert(writer->buffer == NULL);
3204 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003205 else {
3206 assert(writer->buffer != NULL);
3207 if (writer->use_bytearray)
3208 assert(PyByteArray_CheckExact(writer->buffer));
3209 else
3210 assert(PyBytes_CheckExact(writer->buffer));
3211 assert(Py_REFCNT(writer->buffer) == 1);
3212 }
Victor Stinner00165072015-10-09 01:53:21 +02003213
Victor Stinner661aacc2015-10-14 09:41:48 +02003214 if (writer->use_bytearray) {
3215 /* bytearray has its own overallocation algorithm,
3216 writer overallocation must be disabled */
3217 assert(!writer->overallocate);
3218 }
3219
3220 assert(0 <= writer->allocated);
Victor Stinner53926a12015-10-09 12:37:03 +02003221 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003222 /* the last byte must always be null */
Victor Stinner661aacc2015-10-14 09:41:48 +02003223 start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003224 assert(start[writer->allocated] == 0);
3225
3226 end = start + writer->allocated;
3227 assert(str != NULL);
3228 assert(start <= str && str <= end);
Victor Stinner68762572019-10-07 18:42:01 +02003229 return 1;
Victor Stinner00165072015-10-09 01:53:21 +02003230}
Victor Stinner68762572019-10-07 18:42:01 +02003231#endif
Victor Stinner00165072015-10-09 01:53:21 +02003232
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003233void*
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003234_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
Victor Stinner00165072015-10-09 01:53:21 +02003235{
3236 Py_ssize_t allocated, pos;
3237
Victor Stinner68762572019-10-07 18:42:01 +02003238 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003239 assert(writer->allocated < size);
Victor Stinner00165072015-10-09 01:53:21 +02003240
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003241 allocated = size;
Victor Stinner00165072015-10-09 01:53:21 +02003242 if (writer->overallocate
3243 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3244 /* overallocate to limit the number of realloc() */
3245 allocated += allocated / OVERALLOCATE_FACTOR;
3246 }
3247
Victor Stinner2bf89932015-10-14 11:25:33 +02003248 pos = _PyBytesWriter_GetSize(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003249 if (!writer->use_small_buffer) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003250 if (writer->use_bytearray) {
3251 if (PyByteArray_Resize(writer->buffer, allocated))
3252 goto error;
3253 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3254 but we cannot use ob_alloc because bytes may need to be moved
3255 to use the whole buffer. bytearray uses an internal optimization
3256 to avoid moving or copying bytes when bytes are removed at the
3257 beginning (ex: del bytearray[:1]). */
3258 }
3259 else {
3260 if (_PyBytes_Resize(&writer->buffer, allocated))
3261 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003262 }
3263 }
3264 else {
3265 /* convert from stack buffer to bytes object buffer */
3266 assert(writer->buffer == NULL);
3267
Victor Stinner661aacc2015-10-14 09:41:48 +02003268 if (writer->use_bytearray)
3269 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3270 else
3271 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003272 if (writer->buffer == NULL)
Victor Stinner661aacc2015-10-14 09:41:48 +02003273 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003274
3275 if (pos != 0) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003276 char *dest;
3277 if (writer->use_bytearray)
3278 dest = PyByteArray_AS_STRING(writer->buffer);
3279 else
3280 dest = PyBytes_AS_STRING(writer->buffer);
Christian Heimesf051e432016-09-13 20:22:02 +02003281 memcpy(dest,
Victor Stinnerb3653a32015-10-09 03:38:24 +02003282 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02003283 pos);
3284 }
3285
Victor Stinnerb3653a32015-10-09 03:38:24 +02003286 writer->use_small_buffer = 0;
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003287#ifndef NDEBUG
3288 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3289 sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003290#endif
Victor Stinner00165072015-10-09 01:53:21 +02003291 }
3292 writer->allocated = allocated;
3293
3294 str = _PyBytesWriter_AsString(writer) + pos;
Victor Stinner68762572019-10-07 18:42:01 +02003295 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinner00165072015-10-09 01:53:21 +02003296 return str;
Victor Stinner661aacc2015-10-14 09:41:48 +02003297
3298error:
3299 _PyBytesWriter_Dealloc(writer);
3300 return NULL;
Victor Stinner00165072015-10-09 01:53:21 +02003301}
3302
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003303void*
3304_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3305{
3306 Py_ssize_t new_min_size;
3307
Victor Stinner68762572019-10-07 18:42:01 +02003308 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003309 assert(size >= 0);
3310
3311 if (size == 0) {
3312 /* nothing to do */
3313 return str;
3314 }
3315
3316 if (writer->min_size > PY_SSIZE_T_MAX - size) {
3317 PyErr_NoMemory();
3318 _PyBytesWriter_Dealloc(writer);
3319 return NULL;
3320 }
3321 new_min_size = writer->min_size + size;
3322
3323 if (new_min_size > writer->allocated)
3324 str = _PyBytesWriter_Resize(writer, str, new_min_size);
3325
3326 writer->min_size = new_min_size;
3327 return str;
3328}
3329
Victor Stinner00165072015-10-09 01:53:21 +02003330/* Allocate the buffer to write size bytes.
3331 Return the pointer to the beginning of buffer data.
3332 Raise an exception and return NULL on error. */
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003333void*
Victor Stinner00165072015-10-09 01:53:21 +02003334_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3335{
3336 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02003337 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02003338 assert(size >= 0);
3339
Victor Stinnerb3653a32015-10-09 03:38:24 +02003340 writer->use_small_buffer = 1;
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003341#ifndef NDEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003342 writer->allocated = sizeof(writer->small_buffer) - 1;
Victor Stinnerf6358a72015-10-14 12:02:39 +02003343 /* In debug mode, don't use the full small buffer because it is less
3344 efficient than bytes and bytearray objects to detect buffer underflow
3345 and buffer overflow. Use 10 bytes of the small buffer to test also
3346 code using the smaller buffer in debug mode.
3347
3348 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3349 in debug mode to also be able to detect stack overflow when running
3350 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3351 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3352 stack overflow. */
3353 writer->allocated = Py_MIN(writer->allocated, 10);
3354 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3355 to detect buffer overflow */
Victor Stinnerb3653a32015-10-09 03:38:24 +02003356 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003357#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02003358 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02003359#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02003360 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02003361}
3362
3363PyObject *
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003364_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
Victor Stinner00165072015-10-09 01:53:21 +02003365{
Victor Stinner2bf89932015-10-14 11:25:33 +02003366 Py_ssize_t size;
Victor Stinner00165072015-10-09 01:53:21 +02003367 PyObject *result;
3368
Victor Stinner68762572019-10-07 18:42:01 +02003369 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinner00165072015-10-09 01:53:21 +02003370
Victor Stinner2bf89932015-10-14 11:25:33 +02003371 size = _PyBytesWriter_GetSize(writer, str);
3372 if (size == 0 && !writer->use_bytearray) {
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003373 Py_CLEAR(writer->buffer);
3374 /* Get the empty byte string singleton */
3375 result = PyBytes_FromStringAndSize(NULL, 0);
3376 }
3377 else if (writer->use_small_buffer) {
Victor Stinnere914d412016-04-15 17:52:27 +02003378 if (writer->use_bytearray) {
3379 result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3380 }
3381 else {
3382 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3383 }
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003384 }
3385 else {
3386 result = writer->buffer;
3387 writer->buffer = NULL;
3388
Victor Stinner2bf89932015-10-14 11:25:33 +02003389 if (size != writer->allocated) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003390 if (writer->use_bytearray) {
Victor Stinner2bf89932015-10-14 11:25:33 +02003391 if (PyByteArray_Resize(result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003392 Py_DECREF(result);
3393 return NULL;
3394 }
3395 }
3396 else {
Victor Stinner2bf89932015-10-14 11:25:33 +02003397 if (_PyBytes_Resize(&result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003398 assert(result == NULL);
3399 return NULL;
3400 }
Victor Stinner00165072015-10-09 01:53:21 +02003401 }
3402 }
Victor Stinner00165072015-10-09 01:53:21 +02003403 }
Victor Stinner00165072015-10-09 01:53:21 +02003404 return result;
3405}
Victor Stinnerce179bf2015-10-09 12:57:22 +02003406
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003407void*
Victor Stinnere9aa5952015-10-12 13:57:47 +02003408_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003409 const void *bytes, Py_ssize_t size)
Victor Stinnerce179bf2015-10-09 12:57:22 +02003410{
Victor Stinnere9aa5952015-10-12 13:57:47 +02003411 char *str = (char *)ptr;
3412
Victor Stinnerce179bf2015-10-09 12:57:22 +02003413 str = _PyBytesWriter_Prepare(writer, str, size);
3414 if (str == NULL)
3415 return NULL;
3416
Christian Heimesf051e432016-09-13 20:22:02 +02003417 memcpy(str, bytes, size);
Victor Stinnerce179bf2015-10-09 12:57:22 +02003418 str += size;
3419
3420 return str;
3421}