blob: bd8af72ade5d3d9beeed085744a1ba1a47dafa7b [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Victor Stinner45876a92020-02-12 22:32:34 +01006#include "pycore_bytes_methods.h"
Victor Stinnerbcda8f12018-11-21 22:27:47 +01007#include "pycore_object.h"
Victor Stinner621cebe2018-11-12 16:53:38 +01008#include "pycore_pymem.h"
9#include "pycore_pystate.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +000010
Gregory P. Smith8cb65692015-04-25 23:22:26 +000011#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +000012#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000013
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020014/*[clinic input]
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030015class bytes "PyBytesObject *" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020016[clinic start generated code]*/
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030017/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020018
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030019#include "clinic/bytesobject.c.h"
20
Christian Heimes2c9c7a52008-05-26 13:42:13 +000021static PyBytesObject *characters[UCHAR_MAX + 1];
22static PyBytesObject *nullstring;
23
Hai Shi46874c22020-01-30 17:20:25 -060024_Py_IDENTIFIER(__bytes__);
25
Mark Dickinsonfd24b322008-12-06 15:33:31 +000026/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyBytesObject_SIZE + n bytes.
28
29 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
30 3 bytes per string allocation on a typical system.
31*/
32#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
33
Victor Stinner2bf89932015-10-14 11:25:33 +020034/* Forward declaration */
35Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
36 char *str);
37
Christian Heimes2c9c7a52008-05-26 13:42:13 +000038/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000039 For PyBytes_FromString(), the parameter `str' points to a null-terminated
40 string containing exactly `size' bytes.
41
Martin Pantera90a4a92016-05-30 04:04:50 +000042 For PyBytes_FromStringAndSize(), the parameter `str' is
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043 either NULL or else points to a string containing at least `size' bytes.
44 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
45 not have to be null-terminated. (Therefore it is safe to construct a
46 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
47 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
48 bytes (setting the last byte to the null terminating character) and you can
49 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000050 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000051 alter the data yourself, since the strings may be shared.
52
53 The PyObject member `op->ob_size', which denotes the number of "extra
54 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020055 allocated for string data, not counting the null terminating character.
56 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000057 PyBytes_FromStringAndSize()) or the length of the string in the `str'
58 parameter (for PyBytes_FromString()).
59*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020060static PyObject *
61_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000062{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020063 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020064 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020065
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000066 if (size == 0 && (op = nullstring) != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 Py_INCREF(op);
68 return (PyObject *)op;
69 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000070
Victor Stinner049e5092014-08-17 22:20:00 +020071 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000072 PyErr_SetString(PyExc_OverflowError,
73 "byte string is too large");
74 return NULL;
75 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000076
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020078 if (use_calloc)
79 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
80 else
81 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000082 if (op == NULL)
83 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +010084 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020086 if (!use_calloc)
87 op->ob_sval[size] = '\0';
88 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000089 if (size == 0) {
90 nullstring = op;
91 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020092 }
93 return (PyObject *) op;
94}
95
96PyObject *
97PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
98{
99 PyBytesObject *op;
100 if (size < 0) {
101 PyErr_SetString(PyExc_SystemError,
102 "Negative size passed to PyBytes_FromStringAndSize");
103 return NULL;
104 }
105 if (size == 1 && str != NULL &&
106 (op = characters[*str & UCHAR_MAX]) != NULL)
107 {
Victor Stinnerdb067af2014-05-02 22:31:14 +0200108 Py_INCREF(op);
109 return (PyObject *)op;
110 }
111
112 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
113 if (op == NULL)
114 return NULL;
115 if (str == NULL)
116 return (PyObject *) op;
117
Christian Heimesf051e432016-09-13 20:22:02 +0200118 memcpy(op->ob_sval, str, size);
Victor Stinnerdb067af2014-05-02 22:31:14 +0200119 /* share short strings */
120 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000121 characters[*str & UCHAR_MAX] = op;
122 Py_INCREF(op);
123 }
124 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000125}
126
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000127PyObject *
128PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000129{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200130 size_t size;
131 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000132
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000133 assert(str != NULL);
134 size = strlen(str);
135 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
136 PyErr_SetString(PyExc_OverflowError,
137 "byte string is too long");
138 return NULL;
139 }
140 if (size == 0 && (op = nullstring) != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 Py_INCREF(op);
142 return (PyObject *)op;
143 }
144 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 Py_INCREF(op);
146 return (PyObject *)op;
147 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 /* Inline PyObject_NewVar */
150 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
151 if (op == NULL)
152 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +0100153 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 op->ob_shash = -1;
Christian Heimesf051e432016-09-13 20:22:02 +0200155 memcpy(op->ob_sval, str, size+1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 /* share short strings */
157 if (size == 0) {
158 nullstring = op;
159 Py_INCREF(op);
160 } else if (size == 1) {
161 characters[*str & UCHAR_MAX] = op;
162 Py_INCREF(op);
163 }
164 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000165}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000166
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000167PyObject *
168PyBytes_FromFormatV(const char *format, va_list vargs)
169{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000170 char *s;
Victor Stinner03dab782015-10-14 00:21:35 +0200171 const char *f;
172 const char *p;
173 Py_ssize_t prec;
174 int longflag;
175 int size_tflag;
176 /* Longest 64-bit formatted numbers:
177 - "18446744073709551615\0" (21 bytes)
178 - "-9223372036854775808\0" (21 bytes)
179 Decimal takes the most space (it isn't enough for octal.)
Guido van Rossum343e97f2007-04-09 00:43:24 +0000180
Victor Stinner03dab782015-10-14 00:21:35 +0200181 Longest 64-bit pointer representation:
182 "0xffffffffffffffff\0" (19 bytes). */
183 char buffer[21];
184 _PyBytesWriter writer;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000185
Victor Stinner03dab782015-10-14 00:21:35 +0200186 _PyBytesWriter_Init(&writer);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000187
Victor Stinner03dab782015-10-14 00:21:35 +0200188 s = _PyBytesWriter_Alloc(&writer, strlen(format));
189 if (s == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000190 return NULL;
Victor Stinner03dab782015-10-14 00:21:35 +0200191 writer.overallocate = 1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000192
Victor Stinner03dab782015-10-14 00:21:35 +0200193#define WRITE_BYTES(str) \
194 do { \
195 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
196 if (s == NULL) \
197 goto error; \
198 } while (0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000199
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000200 for (f = format; *f; f++) {
Victor Stinner03dab782015-10-14 00:21:35 +0200201 if (*f != '%') {
202 *s++ = *f;
203 continue;
204 }
205
206 p = f++;
207
208 /* ignore the width (ex: 10 in "%10s") */
209 while (Py_ISDIGIT(*f))
210 f++;
211
212 /* parse the precision (ex: 10 in "%.10s") */
213 prec = 0;
214 if (*f == '.') {
215 f++;
216 for (; Py_ISDIGIT(*f); f++) {
217 prec = (prec * 10) + (*f - '0');
218 }
219 }
220
221 while (*f && *f != '%' && !Py_ISALPHA(*f))
222 f++;
223
224 /* handle the long flag ('l'), but only for %ld and %lu.
225 others can be added when necessary. */
226 longflag = 0;
227 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
228 longflag = 1;
229 ++f;
230 }
231
232 /* handle the size_t flag ('z'). */
233 size_tflag = 0;
234 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
235 size_tflag = 1;
236 ++f;
237 }
238
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700239 /* subtract bytes preallocated for the format string
Victor Stinner03dab782015-10-14 00:21:35 +0200240 (ex: 2 for "%s") */
241 writer.min_size -= (f - p + 1);
242
243 switch (*f) {
244 case 'c':
245 {
246 int c = va_arg(vargs, int);
247 if (c < 0 || c > 255) {
248 PyErr_SetString(PyExc_OverflowError,
249 "PyBytes_FromFormatV(): %c format "
250 "expects an integer in range [0; 255]");
251 goto error;
252 }
253 writer.min_size++;
254 *s++ = (unsigned char)c;
255 break;
256 }
257
258 case 'd':
259 if (longflag)
260 sprintf(buffer, "%ld", va_arg(vargs, long));
261 else if (size_tflag)
262 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
263 va_arg(vargs, Py_ssize_t));
264 else
265 sprintf(buffer, "%d", va_arg(vargs, int));
266 assert(strlen(buffer) < sizeof(buffer));
267 WRITE_BYTES(buffer);
268 break;
269
270 case 'u':
271 if (longflag)
272 sprintf(buffer, "%lu",
273 va_arg(vargs, unsigned long));
274 else if (size_tflag)
275 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
276 va_arg(vargs, size_t));
277 else
278 sprintf(buffer, "%u",
279 va_arg(vargs, unsigned int));
280 assert(strlen(buffer) < sizeof(buffer));
281 WRITE_BYTES(buffer);
282 break;
283
284 case 'i':
285 sprintf(buffer, "%i", va_arg(vargs, int));
286 assert(strlen(buffer) < sizeof(buffer));
287 WRITE_BYTES(buffer);
288 break;
289
290 case 'x':
291 sprintf(buffer, "%x", va_arg(vargs, int));
292 assert(strlen(buffer) < sizeof(buffer));
293 WRITE_BYTES(buffer);
294 break;
295
296 case 's':
297 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000298 Py_ssize_t i;
Victor Stinner03dab782015-10-14 00:21:35 +0200299
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200300 p = va_arg(vargs, const char*);
Serhiy Storchakad586ccb2019-01-12 10:30:35 +0200301 if (prec <= 0) {
302 i = strlen(p);
303 }
304 else {
305 i = 0;
306 while (i < prec && p[i]) {
307 i++;
308 }
309 }
Victor Stinner03dab782015-10-14 00:21:35 +0200310 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
311 if (s == NULL)
312 goto error;
313 break;
314 }
315
316 case 'p':
317 sprintf(buffer, "%p", va_arg(vargs, void*));
318 assert(strlen(buffer) < sizeof(buffer));
319 /* %p is ill-defined: ensure leading 0x. */
320 if (buffer[1] == 'X')
321 buffer[1] = 'x';
322 else if (buffer[1] != 'x') {
323 memmove(buffer+2, buffer, strlen(buffer)+1);
324 buffer[0] = '0';
325 buffer[1] = 'x';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000326 }
Victor Stinner03dab782015-10-14 00:21:35 +0200327 WRITE_BYTES(buffer);
328 break;
329
330 case '%':
331 writer.min_size++;
332 *s++ = '%';
333 break;
334
335 default:
336 if (*f == 0) {
337 /* fix min_size if we reached the end of the format string */
338 writer.min_size++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000339 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000340
Victor Stinner03dab782015-10-14 00:21:35 +0200341 /* invalid format string: copy unformatted string and exit */
342 WRITE_BYTES(p);
343 return _PyBytesWriter_Finish(&writer, s);
344 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000345 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000346
Victor Stinner03dab782015-10-14 00:21:35 +0200347#undef WRITE_BYTES
348
349 return _PyBytesWriter_Finish(&writer, s);
350
351 error:
352 _PyBytesWriter_Dealloc(&writer);
353 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000354}
355
356PyObject *
357PyBytes_FromFormat(const char *format, ...)
358{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 PyObject* ret;
360 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000361
362#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000363 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000364#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000365 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000366#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 ret = PyBytes_FromFormatV(format, vargs);
368 va_end(vargs);
369 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000370}
371
Ethan Furmanb95b5612015-01-23 20:05:18 -0800372/* Helpers for formatstring */
373
374Py_LOCAL_INLINE(PyObject *)
375getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
376{
377 Py_ssize_t argidx = *p_argidx;
378 if (argidx < arglen) {
379 (*p_argidx)++;
380 if (arglen < 0)
381 return args;
382 else
383 return PyTuple_GetItem(args, argidx);
384 }
385 PyErr_SetString(PyExc_TypeError,
386 "not enough arguments for format string");
387 return NULL;
388}
389
390/* Format codes
391 * F_LJUST '-'
392 * F_SIGN '+'
393 * F_BLANK ' '
394 * F_ALT '#'
395 * F_ZERO '0'
396 */
397#define F_LJUST (1<<0)
398#define F_SIGN (1<<1)
399#define F_BLANK (1<<2)
400#define F_ALT (1<<3)
401#define F_ZERO (1<<4)
402
403/* Returns a new reference to a PyBytes object, or NULL on failure. */
404
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200405static char*
406formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200407 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800408{
409 char *p;
410 PyObject *result;
411 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200412 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800413
414 x = PyFloat_AsDouble(v);
415 if (x == -1.0 && PyErr_Occurred()) {
416 PyErr_Format(PyExc_TypeError, "float argument required, "
417 "not %.200s", Py_TYPE(v)->tp_name);
418 return NULL;
419 }
420
421 if (prec < 0)
422 prec = 6;
423
424 p = PyOS_double_to_string(x, type, prec,
425 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
426
427 if (p == NULL)
428 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200429
430 len = strlen(p);
431 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200432 str = _PyBytesWriter_Prepare(writer, str, len);
433 if (str == NULL)
434 return NULL;
Christian Heimesf051e432016-09-13 20:22:02 +0200435 memcpy(str, p, len);
Victor Stinner71dc3d82016-04-26 12:35:13 +0200436 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200437 str += len;
438 return str;
439 }
440
441 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800442 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200443 *p_result = result;
Zackery Spytz96c59322018-10-03 00:01:30 -0600444 return result != NULL ? str : NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800445}
446
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300447static PyObject *
448formatlong(PyObject *v, int flags, int prec, int type)
449{
450 PyObject *result, *iobj;
451 if (type == 'i')
452 type = 'd';
453 if (PyLong_Check(v))
454 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
455 if (PyNumber_Check(v)) {
456 /* make sure number is a type of integer for o, x, and X */
457 if (type == 'o' || type == 'x' || type == 'X')
458 iobj = PyNumber_Index(v);
459 else
460 iobj = PyNumber_Long(v);
461 if (iobj == NULL) {
462 if (!PyErr_ExceptionMatches(PyExc_TypeError))
463 return NULL;
464 }
465 else if (!PyLong_Check(iobj))
466 Py_CLEAR(iobj);
467 if (iobj != NULL) {
468 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
469 Py_DECREF(iobj);
470 return result;
471 }
472 }
473 PyErr_Format(PyExc_TypeError,
474 "%%%c format: %s is required, not %.200s", type,
475 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
476 : "a number",
477 Py_TYPE(v)->tp_name);
478 return NULL;
479}
480
481static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200482byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800483{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300484 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200485 *p = PyBytes_AS_STRING(arg)[0];
486 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800487 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300488 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200489 *p = PyByteArray_AS_STRING(arg)[0];
490 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800491 }
492 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300493 PyObject *iobj;
494 long ival;
495 int overflow;
496 /* make sure number is a type of integer */
497 if (PyLong_Check(arg)) {
498 ival = PyLong_AsLongAndOverflow(arg, &overflow);
499 }
500 else {
501 iobj = PyNumber_Index(arg);
502 if (iobj == NULL) {
503 if (!PyErr_ExceptionMatches(PyExc_TypeError))
504 return 0;
505 goto onError;
506 }
507 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
508 Py_DECREF(iobj);
509 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300510 if (!overflow && ival == -1 && PyErr_Occurred())
511 goto onError;
512 if (overflow || !(0 <= ival && ival <= 255)) {
513 PyErr_SetString(PyExc_OverflowError,
514 "%c arg not in range(256)");
515 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800516 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300517 *p = (char)ival;
518 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800519 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300520 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200521 PyErr_SetString(PyExc_TypeError,
522 "%c requires an integer in range(256) or a single byte");
523 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800524}
525
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800526static PyObject *_PyBytes_FromBuffer(PyObject *x);
527
Ethan Furmanb95b5612015-01-23 20:05:18 -0800528static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200529format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800530{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200531 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800532 /* is it a bytes object? */
533 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200534 *pbuf = PyBytes_AS_STRING(v);
535 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800536 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200537 return v;
538 }
539 if (PyByteArray_Check(v)) {
540 *pbuf = PyByteArray_AS_STRING(v);
541 *plen = PyByteArray_GET_SIZE(v);
542 Py_INCREF(v);
543 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800544 }
545 /* does it support __bytes__? */
546 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
547 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +0100548 result = _PyObject_CallNoArg(func);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800549 Py_DECREF(func);
550 if (result == NULL)
551 return NULL;
552 if (!PyBytes_Check(result)) {
553 PyErr_Format(PyExc_TypeError,
554 "__bytes__ returned non-bytes (type %.200s)",
555 Py_TYPE(result)->tp_name);
556 Py_DECREF(result);
557 return NULL;
558 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200559 *pbuf = PyBytes_AS_STRING(result);
560 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800561 return result;
562 }
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800563 /* does it support buffer protocol? */
564 if (PyObject_CheckBuffer(v)) {
565 /* maybe we can avoid making a copy of the buffer object here? */
566 result = _PyBytes_FromBuffer(v);
567 if (result == NULL)
568 return NULL;
569 *pbuf = PyBytes_AS_STRING(result);
570 *plen = PyBytes_GET_SIZE(result);
571 return result;
572 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800573 PyErr_Format(PyExc_TypeError,
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800574 "%%b requires a bytes-like object, "
575 "or an object that implements __bytes__, not '%.100s'",
Ethan Furmanb95b5612015-01-23 20:05:18 -0800576 Py_TYPE(v)->tp_name);
577 return NULL;
578}
579
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200580/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800581
582PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +0200583_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
584 PyObject *args, int use_bytearray)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800585{
Victor Stinner772b2b02015-10-14 09:56:53 +0200586 const char *fmt;
587 char *res;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800588 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200589 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800590 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800591 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200592 _PyBytesWriter writer;
593
Victor Stinner772b2b02015-10-14 09:56:53 +0200594 if (args == NULL) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800595 PyErr_BadInternalCall();
596 return NULL;
597 }
Victor Stinner772b2b02015-10-14 09:56:53 +0200598 fmt = format;
599 fmtcnt = format_len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200600
601 _PyBytesWriter_Init(&writer);
Victor Stinner772b2b02015-10-14 09:56:53 +0200602 writer.use_bytearray = use_bytearray;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200603
604 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
605 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800606 return NULL;
Victor Stinner772b2b02015-10-14 09:56:53 +0200607 if (!use_bytearray)
608 writer.overallocate = 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200609
Ethan Furmanb95b5612015-01-23 20:05:18 -0800610 if (PyTuple_Check(args)) {
611 arglen = PyTuple_GET_SIZE(args);
612 argidx = 0;
613 }
614 else {
615 arglen = -1;
616 argidx = -2;
617 }
618 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
619 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
620 !PyByteArray_Check(args)) {
621 dict = args;
622 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200623
Ethan Furmanb95b5612015-01-23 20:05:18 -0800624 while (--fmtcnt >= 0) {
625 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200626 Py_ssize_t len;
627 char *pos;
628
Xiang Zhangb76ad512017-03-06 17:17:05 +0800629 pos = (char *)memchr(fmt + 1, '%', fmtcnt);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200630 if (pos != NULL)
631 len = pos - fmt;
Victor Stinner772b2b02015-10-14 09:56:53 +0200632 else
Xiang Zhangb76ad512017-03-06 17:17:05 +0800633 len = fmtcnt + 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200634 assert(len != 0);
635
Christian Heimesf051e432016-09-13 20:22:02 +0200636 memcpy(res, fmt, len);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200637 res += len;
638 fmt += len;
639 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800640 }
641 else {
642 /* Got a format specifier */
643 int flags = 0;
644 Py_ssize_t width = -1;
645 int prec = -1;
646 int c = '\0';
647 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800648 PyObject *v = NULL;
649 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200650 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800651 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200652 Py_ssize_t len = 0;
653 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200654 Py_ssize_t alloc;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800655
Ethan Furmanb95b5612015-01-23 20:05:18 -0800656 fmt++;
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200657 if (*fmt == '%') {
658 *res++ = '%';
659 fmt++;
660 fmtcnt--;
661 continue;
662 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800663 if (*fmt == '(') {
Victor Stinner772b2b02015-10-14 09:56:53 +0200664 const char *keystart;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800665 Py_ssize_t keylen;
666 PyObject *key;
667 int pcount = 1;
668
669 if (dict == NULL) {
670 PyErr_SetString(PyExc_TypeError,
671 "format requires a mapping");
672 goto error;
673 }
674 ++fmt;
675 --fmtcnt;
676 keystart = fmt;
677 /* Skip over balanced parentheses */
678 while (pcount > 0 && --fmtcnt >= 0) {
679 if (*fmt == ')')
680 --pcount;
681 else if (*fmt == '(')
682 ++pcount;
683 fmt++;
684 }
685 keylen = fmt - keystart - 1;
686 if (fmtcnt < 0 || pcount > 0) {
687 PyErr_SetString(PyExc_ValueError,
688 "incomplete format key");
689 goto error;
690 }
691 key = PyBytes_FromStringAndSize(keystart,
692 keylen);
693 if (key == NULL)
694 goto error;
695 if (args_owned) {
696 Py_DECREF(args);
697 args_owned = 0;
698 }
699 args = PyObject_GetItem(dict, key);
700 Py_DECREF(key);
701 if (args == NULL) {
702 goto error;
703 }
704 args_owned = 1;
705 arglen = -1;
706 argidx = -2;
707 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200708
709 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800710 while (--fmtcnt >= 0) {
711 switch (c = *fmt++) {
712 case '-': flags |= F_LJUST; continue;
713 case '+': flags |= F_SIGN; continue;
714 case ' ': flags |= F_BLANK; continue;
715 case '#': flags |= F_ALT; continue;
716 case '0': flags |= F_ZERO; continue;
717 }
718 break;
719 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200720
721 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800722 if (c == '*') {
723 v = getnextarg(args, arglen, &argidx);
724 if (v == NULL)
725 goto error;
726 if (!PyLong_Check(v)) {
727 PyErr_SetString(PyExc_TypeError,
728 "* wants int");
729 goto error;
730 }
731 width = PyLong_AsSsize_t(v);
732 if (width == -1 && PyErr_Occurred())
733 goto error;
734 if (width < 0) {
735 flags |= F_LJUST;
736 width = -width;
737 }
738 if (--fmtcnt >= 0)
739 c = *fmt++;
740 }
741 else if (c >= 0 && isdigit(c)) {
742 width = c - '0';
743 while (--fmtcnt >= 0) {
744 c = Py_CHARMASK(*fmt++);
745 if (!isdigit(c))
746 break;
747 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
748 PyErr_SetString(
749 PyExc_ValueError,
750 "width too big");
751 goto error;
752 }
753 width = width*10 + (c - '0');
754 }
755 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200756
757 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800758 if (c == '.') {
759 prec = 0;
760 if (--fmtcnt >= 0)
761 c = *fmt++;
762 if (c == '*') {
763 v = getnextarg(args, arglen, &argidx);
764 if (v == NULL)
765 goto error;
766 if (!PyLong_Check(v)) {
767 PyErr_SetString(
768 PyExc_TypeError,
769 "* wants int");
770 goto error;
771 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200772 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800773 if (prec == -1 && PyErr_Occurred())
774 goto error;
775 if (prec < 0)
776 prec = 0;
777 if (--fmtcnt >= 0)
778 c = *fmt++;
779 }
780 else if (c >= 0 && isdigit(c)) {
781 prec = c - '0';
782 while (--fmtcnt >= 0) {
783 c = Py_CHARMASK(*fmt++);
784 if (!isdigit(c))
785 break;
786 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
787 PyErr_SetString(
788 PyExc_ValueError,
789 "prec too big");
790 goto error;
791 }
792 prec = prec*10 + (c - '0');
793 }
794 }
795 } /* prec */
796 if (fmtcnt >= 0) {
797 if (c == 'h' || c == 'l' || c == 'L') {
798 if (--fmtcnt >= 0)
799 c = *fmt++;
800 }
801 }
802 if (fmtcnt < 0) {
803 PyErr_SetString(PyExc_ValueError,
804 "incomplete format");
805 goto error;
806 }
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200807 v = getnextarg(args, arglen, &argidx);
808 if (v == NULL)
809 goto error;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200810
Alexey Izbyshevccd99752018-08-23 10:50:52 +0300811 if (fmtcnt == 0) {
812 /* last write: disable writer overallocation */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200813 writer.overallocate = 0;
814 }
815
Ethan Furmanb95b5612015-01-23 20:05:18 -0800816 sign = 0;
817 fill = ' ';
818 switch (c) {
Ethan Furman62e977f2015-03-11 08:17:00 -0700819 case 'r':
820 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800821 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200822 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800823 if (temp == NULL)
824 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200825 assert(PyUnicode_IS_ASCII(temp));
826 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
827 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800828 if (prec >= 0 && len > prec)
829 len = prec;
830 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200831
Ethan Furmanb95b5612015-01-23 20:05:18 -0800832 case 's':
833 // %s is only for 2/3 code; 3 only code should use %b
834 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200835 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800836 if (temp == NULL)
837 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800838 if (prec >= 0 && len > prec)
839 len = prec;
840 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200841
Ethan Furmanb95b5612015-01-23 20:05:18 -0800842 case 'i':
843 case 'd':
844 case 'u':
845 case 'o':
846 case 'x':
847 case 'X':
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200848 if (PyLong_CheckExact(v)
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200849 && width == -1 && prec == -1
850 && !(flags & (F_SIGN | F_BLANK))
851 && c != 'X')
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200852 {
853 /* Fast path */
854 int alternate = flags & F_ALT;
855 int base;
856
857 switch(c)
858 {
859 default:
Barry Warsawb2e57942017-09-14 18:13:16 -0700860 Py_UNREACHABLE();
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200861 case 'd':
862 case 'i':
863 case 'u':
864 base = 10;
865 break;
866 case 'o':
867 base = 8;
868 break;
869 case 'x':
870 case 'X':
871 base = 16;
872 break;
873 }
874
875 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200876 writer.min_size -= 2; /* size preallocated for "%d" */
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200877 res = _PyLong_FormatBytesWriter(&writer, res,
878 v, base, alternate);
879 if (res == NULL)
880 goto error;
881 continue;
882 }
883
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300884 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200885 if (!temp)
886 goto error;
887 assert(PyUnicode_IS_ASCII(temp));
888 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
889 len = PyUnicode_GET_LENGTH(temp);
890 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800891 if (flags & F_ZERO)
892 fill = '0';
893 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200894
Ethan Furmanb95b5612015-01-23 20:05:18 -0800895 case 'e':
896 case 'E':
897 case 'f':
898 case 'F':
899 case 'g':
900 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200901 if (width == -1 && prec == -1
902 && !(flags & (F_SIGN | F_BLANK)))
903 {
904 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200905 writer.min_size -= 2; /* size preallocated for "%f" */
Victor Stinnerad771582015-10-09 12:38:53 +0200906 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200907 if (res == NULL)
908 goto error;
909 continue;
910 }
911
Victor Stinnerad771582015-10-09 12:38:53 +0200912 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800913 goto error;
914 pbuf = PyBytes_AS_STRING(temp);
915 len = PyBytes_GET_SIZE(temp);
916 sign = 1;
917 if (flags & F_ZERO)
918 fill = '0';
919 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200920
Ethan Furmanb95b5612015-01-23 20:05:18 -0800921 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200922 pbuf = &onechar;
923 len = byte_converter(v, &onechar);
924 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800925 goto error;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200926 if (width == -1) {
927 /* Fast path */
928 *res++ = onechar;
929 continue;
930 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800931 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200932
Ethan Furmanb95b5612015-01-23 20:05:18 -0800933 default:
934 PyErr_Format(PyExc_ValueError,
935 "unsupported format character '%c' (0x%x) "
936 "at index %zd",
937 c, c,
Victor Stinner772b2b02015-10-14 09:56:53 +0200938 (Py_ssize_t)(fmt - 1 - format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800939 goto error;
940 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200941
Ethan Furmanb95b5612015-01-23 20:05:18 -0800942 if (sign) {
943 if (*pbuf == '-' || *pbuf == '+') {
944 sign = *pbuf++;
945 len--;
946 }
947 else if (flags & F_SIGN)
948 sign = '+';
949 else if (flags & F_BLANK)
950 sign = ' ';
951 else
952 sign = 0;
953 }
954 if (width < len)
955 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200956
957 alloc = width;
958 if (sign != 0 && len == width)
959 alloc++;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200960 /* 2: size preallocated for %s */
961 if (alloc > 2) {
962 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200963 if (res == NULL)
964 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800965 }
Victor Stinner60ec6ef2019-10-07 22:31:42 +0200966#ifndef NDEBUG
967 char *before = res;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200968#endif
969
970 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800971 if (sign) {
972 if (fill != ' ')
973 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800974 if (width > len)
975 width--;
976 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200977
978 /* Write the numeric prefix for "x", "X" and "o" formats
979 if the alternate form is used.
980 For example, write "0x" for the "%#x" format. */
Serhiy Storchakab1a16192016-12-17 21:48:03 +0200981 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800982 assert(pbuf[0] == '0');
983 assert(pbuf[1] == c);
984 if (fill != ' ') {
985 *res++ = *pbuf++;
986 *res++ = *pbuf++;
987 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800988 width -= 2;
989 if (width < 0)
990 width = 0;
991 len -= 2;
992 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200993
994 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800995 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200996 memset(res, fill, width - len);
997 res += (width - len);
998 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800999 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001000
1001 /* If padding with spaces: write sign if needed and/or numeric
1002 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001003 if (fill == ' ') {
1004 if (sign)
1005 *res++ = sign;
Serhiy Storchakab1a16192016-12-17 21:48:03 +02001006 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001007 assert(pbuf[0] == '0');
1008 assert(pbuf[1] == c);
1009 *res++ = *pbuf++;
1010 *res++ = *pbuf++;
1011 }
1012 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001013
1014 /* Copy bytes */
Christian Heimesf051e432016-09-13 20:22:02 +02001015 memcpy(res, pbuf, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001016 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001017
1018 /* Pad right with the fill character if needed */
1019 if (width > len) {
1020 memset(res, ' ', width - len);
1021 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001022 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001023
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +02001024 if (dict && (argidx < arglen)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001025 PyErr_SetString(PyExc_TypeError,
1026 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -08001027 Py_XDECREF(temp);
1028 goto error;
1029 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001030 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001031
Victor Stinner60ec6ef2019-10-07 22:31:42 +02001032#ifndef NDEBUG
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001033 /* check that we computed the exact size for this write */
1034 assert((res - before) == alloc);
1035#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -08001036 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001037
1038 /* If overallocation was disabled, ensure that it was the last
1039 write. Otherwise, we missed an optimization */
Alexey Izbyshevccd99752018-08-23 10:50:52 +03001040 assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001041 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001042
Ethan Furmanb95b5612015-01-23 20:05:18 -08001043 if (argidx < arglen && !dict) {
1044 PyErr_SetString(PyExc_TypeError,
1045 "not all arguments converted during bytes formatting");
1046 goto error;
1047 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001048
Ethan Furmanb95b5612015-01-23 20:05:18 -08001049 if (args_owned) {
1050 Py_DECREF(args);
1051 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001052 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001053
1054 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001055 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001056 if (args_owned) {
1057 Py_DECREF(args);
1058 }
1059 return NULL;
1060}
1061
Greg Price3a4f6672019-09-12 11:12:22 -07001062/* Unescape a backslash-escaped string. */
Eric V. Smith42454af2016-10-31 09:22:08 -04001063PyObject *_PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001064 Py_ssize_t len,
1065 const char *errors,
Eric V. Smith42454af2016-10-31 09:22:08 -04001066 const char **first_invalid_escape)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001067{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001068 int c;
Victor Stinner2ec80632015-10-14 13:32:13 +02001069 char *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001070 const char *end;
Victor Stinner2ec80632015-10-14 13:32:13 +02001071 _PyBytesWriter writer;
1072
1073 _PyBytesWriter_Init(&writer);
1074
1075 p = _PyBytesWriter_Alloc(&writer, len);
1076 if (p == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001077 return NULL;
Victor Stinner2ec80632015-10-14 13:32:13 +02001078 writer.overallocate = 1;
1079
Eric V. Smith42454af2016-10-31 09:22:08 -04001080 *first_invalid_escape = NULL;
1081
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001082 end = s + len;
1083 while (s < end) {
1084 if (*s != '\\') {
Greg Price3a4f6672019-09-12 11:12:22 -07001085 *p++ = *s++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001086 continue;
1087 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001088
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001089 s++;
Victor Stinner2ec80632015-10-14 13:32:13 +02001090 if (s == end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001091 PyErr_SetString(PyExc_ValueError,
1092 "Trailing \\ in string");
1093 goto failed;
1094 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001095
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001096 switch (*s++) {
1097 /* XXX This assumes ASCII! */
1098 case '\n': break;
1099 case '\\': *p++ = '\\'; break;
1100 case '\'': *p++ = '\''; break;
1101 case '\"': *p++ = '\"'; break;
1102 case 'b': *p++ = '\b'; break;
1103 case 'f': *p++ = '\014'; break; /* FF */
1104 case 't': *p++ = '\t'; break;
1105 case 'n': *p++ = '\n'; break;
1106 case 'r': *p++ = '\r'; break;
1107 case 'v': *p++ = '\013'; break; /* VT */
1108 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1109 case '0': case '1': case '2': case '3':
1110 case '4': case '5': case '6': case '7':
1111 c = s[-1] - '0';
1112 if (s < end && '0' <= *s && *s <= '7') {
1113 c = (c<<3) + *s++ - '0';
1114 if (s < end && '0' <= *s && *s <= '7')
1115 c = (c<<3) + *s++ - '0';
1116 }
1117 *p++ = c;
1118 break;
1119 case 'x':
Victor Stinner2ec80632015-10-14 13:32:13 +02001120 if (s+1 < end) {
1121 int digit1, digit2;
1122 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1123 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1124 if (digit1 < 16 && digit2 < 16) {
1125 *p++ = (unsigned char)((digit1 << 4) + digit2);
1126 s += 2;
1127 break;
1128 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001129 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001130 /* invalid hexadecimal digits */
1131
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001132 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001133 PyErr_Format(PyExc_ValueError,
Serhiy Storchakad53fe5f2019-03-13 22:59:55 +02001134 "invalid \\x escape at position %zd",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001135 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001136 goto failed;
1137 }
1138 if (strcmp(errors, "replace") == 0) {
1139 *p++ = '?';
1140 } else if (strcmp(errors, "ignore") == 0)
1141 /* do nothing */;
1142 else {
1143 PyErr_Format(PyExc_ValueError,
1144 "decoding error; unknown "
1145 "error handling code: %.400s",
1146 errors);
1147 goto failed;
1148 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001149 /* skip \x */
1150 if (s < end && Py_ISXDIGIT(s[0]))
1151 s++; /* and a hexdigit */
1152 break;
Victor Stinner2ec80632015-10-14 13:32:13 +02001153
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001154 default:
Eric V. Smith42454af2016-10-31 09:22:08 -04001155 if (*first_invalid_escape == NULL) {
1156 *first_invalid_escape = s-1; /* Back up one char, since we've
1157 already incremented s. */
1158 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001159 *p++ = '\\';
Eric V. Smith42454af2016-10-31 09:22:08 -04001160 s--;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001161 }
1162 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001163
1164 return _PyBytesWriter_Finish(&writer, p);
1165
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001166 failed:
Victor Stinner2ec80632015-10-14 13:32:13 +02001167 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001168 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001169}
1170
Eric V. Smith42454af2016-10-31 09:22:08 -04001171PyObject *PyBytes_DecodeEscape(const char *s,
1172 Py_ssize_t len,
1173 const char *errors,
Greg Price3a4f6672019-09-12 11:12:22 -07001174 Py_ssize_t Py_UNUSED(unicode),
1175 const char *Py_UNUSED(recode_encoding))
Eric V. Smith42454af2016-10-31 09:22:08 -04001176{
1177 const char* first_invalid_escape;
Greg Price3a4f6672019-09-12 11:12:22 -07001178 PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
Eric V. Smith42454af2016-10-31 09:22:08 -04001179 &first_invalid_escape);
1180 if (result == NULL)
1181 return NULL;
1182 if (first_invalid_escape != NULL) {
1183 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1184 "invalid escape sequence '\\%c'",
Serhiy Storchaka56cb4652017-10-20 17:08:15 +03001185 (unsigned char)*first_invalid_escape) < 0) {
Eric V. Smith42454af2016-10-31 09:22:08 -04001186 Py_DECREF(result);
1187 return NULL;
1188 }
1189 }
1190 return result;
1191
1192}
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001193/* -------------------------------------------------------------------- */
1194/* object api */
1195
1196Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001197PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001198{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001199 if (!PyBytes_Check(op)) {
1200 PyErr_Format(PyExc_TypeError,
1201 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1202 return -1;
1203 }
1204 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001205}
1206
1207char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001208PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001209{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001210 if (!PyBytes_Check(op)) {
1211 PyErr_Format(PyExc_TypeError,
1212 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1213 return NULL;
1214 }
1215 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001216}
1217
1218int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001219PyBytes_AsStringAndSize(PyObject *obj,
1220 char **s,
1221 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001222{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001223 if (s == NULL) {
1224 PyErr_BadInternalCall();
1225 return -1;
1226 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001227
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001228 if (!PyBytes_Check(obj)) {
1229 PyErr_Format(PyExc_TypeError,
1230 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1231 return -1;
1232 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001233
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001234 *s = PyBytes_AS_STRING(obj);
1235 if (len != NULL)
1236 *len = PyBytes_GET_SIZE(obj);
1237 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001238 PyErr_SetString(PyExc_ValueError,
1239 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001240 return -1;
1241 }
1242 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001243}
Neal Norwitz6968b052007-02-27 19:02:19 +00001244
1245/* -------------------------------------------------------------------- */
1246/* Methods */
1247
Eric Smith0923d1d2009-04-16 20:16:10 +00001248#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001249
1250#include "stringlib/fastsearch.h"
1251#include "stringlib/count.h"
1252#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001253#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001254#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001255#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001256#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001257
Eric Smith0f78bff2009-11-30 01:01:42 +00001258#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001259
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001260PyObject *
1261PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001262{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001263 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001264 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001265 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001266 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001267 unsigned char quote, *s, *p;
1268
1269 /* Compute size of output string */
1270 squotes = dquotes = 0;
1271 newsize = 3; /* b'' */
1272 s = (unsigned char*)op->ob_sval;
1273 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001274 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001275 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001276 case '\'': squotes++; break;
1277 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001278 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001279 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001280 default:
1281 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001282 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001283 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001284 if (newsize > PY_SSIZE_T_MAX - incr)
1285 goto overflow;
1286 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001287 }
1288 quote = '\'';
1289 if (smartquotes && squotes && !dquotes)
1290 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001291 if (squotes && quote == '\'') {
1292 if (newsize > PY_SSIZE_T_MAX - squotes)
1293 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001294 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001295 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001296
1297 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001298 if (v == NULL) {
1299 return NULL;
1300 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001301 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001302
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001303 *p++ = 'b', *p++ = quote;
1304 for (i = 0; i < length; i++) {
1305 unsigned char c = op->ob_sval[i];
1306 if (c == quote || c == '\\')
1307 *p++ = '\\', *p++ = c;
1308 else if (c == '\t')
1309 *p++ = '\\', *p++ = 't';
1310 else if (c == '\n')
1311 *p++ = '\\', *p++ = 'n';
1312 else if (c == '\r')
1313 *p++ = '\\', *p++ = 'r';
1314 else if (c < ' ' || c >= 0x7f) {
1315 *p++ = '\\';
1316 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001317 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1318 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001319 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001320 else
1321 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001322 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001323 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001324 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001325 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001326
1327 overflow:
1328 PyErr_SetString(PyExc_OverflowError,
1329 "bytes object is too large to make repr");
1330 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001331}
1332
Neal Norwitz6968b052007-02-27 19:02:19 +00001333static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001334bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001335{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001336 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001337}
1338
Neal Norwitz6968b052007-02-27 19:02:19 +00001339static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001340bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001341{
Victor Stinner331a6a52019-05-27 16:39:22 +02001342 PyConfig *config = &_PyInterpreterState_GET_UNSAFE()->config;
Victor Stinnerc96be812019-05-14 17:34:56 +02001343 if (config->bytes_warning) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001344 if (PyErr_WarnEx(PyExc_BytesWarning,
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001345 "str() on a bytes instance", 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001346 return NULL;
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001347 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001348 }
1349 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001350}
1351
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001352static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001353bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001354{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001355 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001356}
Neal Norwitz6968b052007-02-27 19:02:19 +00001357
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001358/* This is also used by PyBytes_Concat() */
1359static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001360bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001361{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001362 Py_buffer va, vb;
1363 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001364
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001365 va.len = -1;
1366 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001367 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1368 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001369 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
Serhiy Storchaka6b5a9ec2017-03-19 19:47:02 +02001370 Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001371 goto done;
1372 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001373
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001374 /* Optimize end cases */
1375 if (va.len == 0 && PyBytes_CheckExact(b)) {
1376 result = b;
1377 Py_INCREF(result);
1378 goto done;
1379 }
1380 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1381 result = a;
1382 Py_INCREF(result);
1383 goto done;
1384 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001385
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001386 if (va.len > PY_SSIZE_T_MAX - vb.len) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001387 PyErr_NoMemory();
1388 goto done;
1389 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001390
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001391 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001392 if (result != NULL) {
1393 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1394 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1395 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001396
1397 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001398 if (va.len != -1)
1399 PyBuffer_Release(&va);
1400 if (vb.len != -1)
1401 PyBuffer_Release(&vb);
1402 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001403}
Neal Norwitz6968b052007-02-27 19:02:19 +00001404
1405static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001406bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001407{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001408 Py_ssize_t i;
1409 Py_ssize_t j;
1410 Py_ssize_t size;
1411 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001412 size_t nbytes;
1413 if (n < 0)
1414 n = 0;
1415 /* watch out for overflows: the size can overflow int,
1416 * and the # of bytes needed can overflow size_t
1417 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001418 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001419 PyErr_SetString(PyExc_OverflowError,
1420 "repeated bytes are too long");
1421 return NULL;
1422 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001423 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001424 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1425 Py_INCREF(a);
1426 return (PyObject *)a;
1427 }
1428 nbytes = (size_t)size;
1429 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1430 PyErr_SetString(PyExc_OverflowError,
1431 "repeated bytes are too long");
1432 return NULL;
1433 }
1434 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1435 if (op == NULL)
1436 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +01001437 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001438 op->ob_shash = -1;
1439 op->ob_sval[size] = '\0';
1440 if (Py_SIZE(a) == 1 && n > 0) {
1441 memset(op->ob_sval, a->ob_sval[0] , n);
1442 return (PyObject *) op;
1443 }
1444 i = 0;
1445 if (i < size) {
Christian Heimesf051e432016-09-13 20:22:02 +02001446 memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001447 i = Py_SIZE(a);
1448 }
1449 while (i < size) {
1450 j = (i <= size-i) ? i : size-i;
Christian Heimesf051e432016-09-13 20:22:02 +02001451 memcpy(op->ob_sval+i, op->ob_sval, j);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001452 i += j;
1453 }
1454 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001455}
1456
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001457static int
1458bytes_contains(PyObject *self, PyObject *arg)
1459{
1460 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1461}
1462
Neal Norwitz6968b052007-02-27 19:02:19 +00001463static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001464bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001465{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001466 if (i < 0 || i >= Py_SIZE(a)) {
1467 PyErr_SetString(PyExc_IndexError, "index out of range");
1468 return NULL;
1469 }
1470 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001471}
1472
Benjamin Peterson621b4302016-09-09 13:54:34 -07001473static int
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001474bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1475{
1476 int cmp;
1477 Py_ssize_t len;
1478
1479 len = Py_SIZE(a);
1480 if (Py_SIZE(b) != len)
1481 return 0;
1482
1483 if (a->ob_sval[0] != b->ob_sval[0])
1484 return 0;
1485
1486 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1487 return (cmp == 0);
1488}
1489
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001490static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001491bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001492{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001493 int c;
1494 Py_ssize_t len_a, len_b;
1495 Py_ssize_t min_len;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001496 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001497
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001498 /* Make sure both arguments are strings. */
1499 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Victor Stinner331a6a52019-05-27 16:39:22 +02001500 PyConfig *config = &_PyInterpreterState_GET_UNSAFE()->config;
Victor Stinnerc96be812019-05-14 17:34:56 +02001501 if (config->bytes_warning && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001502 rc = PyObject_IsInstance((PyObject*)a,
1503 (PyObject*)&PyUnicode_Type);
1504 if (!rc)
1505 rc = PyObject_IsInstance((PyObject*)b,
1506 (PyObject*)&PyUnicode_Type);
1507 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001508 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001509 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001510 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001511 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001512 return NULL;
1513 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001514 else {
1515 rc = PyObject_IsInstance((PyObject*)a,
1516 (PyObject*)&PyLong_Type);
1517 if (!rc)
1518 rc = PyObject_IsInstance((PyObject*)b,
1519 (PyObject*)&PyLong_Type);
1520 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001521 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001522 if (rc) {
1523 if (PyErr_WarnEx(PyExc_BytesWarning,
1524 "Comparison between bytes and int", 1))
1525 return NULL;
1526 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001527 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001528 }
stratakise8b19652017-11-02 11:32:54 +01001529 Py_RETURN_NOTIMPLEMENTED;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001530 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001531 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001532 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001533 case Py_EQ:
1534 case Py_LE:
1535 case Py_GE:
1536 /* a string is equal to itself */
stratakise8b19652017-11-02 11:32:54 +01001537 Py_RETURN_TRUE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001538 case Py_NE:
1539 case Py_LT:
1540 case Py_GT:
stratakise8b19652017-11-02 11:32:54 +01001541 Py_RETURN_FALSE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001542 default:
1543 PyErr_BadArgument();
1544 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001545 }
1546 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001547 else if (op == Py_EQ || op == Py_NE) {
1548 int eq = bytes_compare_eq(a, b);
1549 eq ^= (op == Py_NE);
stratakise8b19652017-11-02 11:32:54 +01001550 return PyBool_FromLong(eq);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001551 }
1552 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001553 len_a = Py_SIZE(a);
1554 len_b = Py_SIZE(b);
1555 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001556 if (min_len > 0) {
1557 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001558 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001559 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001560 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001561 else
1562 c = 0;
stratakise8b19652017-11-02 11:32:54 +01001563 if (c != 0)
1564 Py_RETURN_RICHCOMPARE(c, 0, op);
1565 Py_RETURN_RICHCOMPARE(len_a, len_b, op);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001566 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001567}
1568
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001569static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001570bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001571{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001572 if (a->ob_shash == -1) {
1573 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001574 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001575 }
1576 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001577}
1578
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001579static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001580bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001581{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001582 if (PyIndex_Check(item)) {
1583 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1584 if (i == -1 && PyErr_Occurred())
1585 return NULL;
1586 if (i < 0)
1587 i += PyBytes_GET_SIZE(self);
1588 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1589 PyErr_SetString(PyExc_IndexError,
1590 "index out of range");
1591 return NULL;
1592 }
1593 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1594 }
1595 else if (PySlice_Check(item)) {
Zackery Spytz14514d92019-05-17 01:13:03 -06001596 Py_ssize_t start, stop, step, slicelength, i;
1597 size_t cur;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001598 char* source_buf;
1599 char* result_buf;
1600 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001601
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001602 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001603 return NULL;
1604 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001605 slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1606 &stop, step);
Neal Norwitz6968b052007-02-27 19:02:19 +00001607
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001608 if (slicelength <= 0) {
1609 return PyBytes_FromStringAndSize("", 0);
1610 }
1611 else if (start == 0 && step == 1 &&
1612 slicelength == PyBytes_GET_SIZE(self) &&
1613 PyBytes_CheckExact(self)) {
1614 Py_INCREF(self);
1615 return (PyObject *)self;
1616 }
1617 else if (step == 1) {
1618 return PyBytes_FromStringAndSize(
1619 PyBytes_AS_STRING(self) + start,
1620 slicelength);
1621 }
1622 else {
1623 source_buf = PyBytes_AS_STRING(self);
1624 result = PyBytes_FromStringAndSize(NULL, slicelength);
1625 if (result == NULL)
1626 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001627
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001628 result_buf = PyBytes_AS_STRING(result);
1629 for (cur = start, i = 0; i < slicelength;
1630 cur += step, i++) {
1631 result_buf[i] = source_buf[cur];
1632 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001633
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001634 return result;
1635 }
1636 }
1637 else {
1638 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001639 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001640 Py_TYPE(item)->tp_name);
1641 return NULL;
1642 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001643}
1644
1645static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001646bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001647{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001648 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1649 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001650}
1651
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001652static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001653 (lenfunc)bytes_length, /*sq_length*/
1654 (binaryfunc)bytes_concat, /*sq_concat*/
1655 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1656 (ssizeargfunc)bytes_item, /*sq_item*/
1657 0, /*sq_slice*/
1658 0, /*sq_ass_item*/
1659 0, /*sq_ass_slice*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001660 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001661};
1662
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001663static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001664 (lenfunc)bytes_length,
1665 (binaryfunc)bytes_subscript,
1666 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001667};
1668
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001669static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001670 (getbufferproc)bytes_buffer_getbuffer,
1671 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001672};
1673
1674
1675#define LEFTSTRIP 0
1676#define RIGHTSTRIP 1
1677#define BOTHSTRIP 2
1678
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001679/*[clinic input]
1680bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001681
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001682 sep: object = None
1683 The delimiter according which to split the bytes.
1684 None (the default value) means split on ASCII whitespace characters
1685 (space, tab, return, newline, formfeed, vertical tab).
1686 maxsplit: Py_ssize_t = -1
1687 Maximum number of splits to do.
1688 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001689
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001690Return a list of the sections in the bytes, using sep as the delimiter.
1691[clinic start generated code]*/
1692
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001693static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001694bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1695/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001696{
1697 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001698 const char *s = PyBytes_AS_STRING(self), *sub;
1699 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001700 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001701
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001702 if (maxsplit < 0)
1703 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001704 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001705 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001706 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001707 return NULL;
1708 sub = vsub.buf;
1709 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001710
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001711 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1712 PyBuffer_Release(&vsub);
1713 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001714}
1715
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001716/*[clinic input]
1717bytes.partition
1718
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001719 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001720 /
1721
1722Partition the bytes into three parts using the given separator.
1723
1724This will search for the separator sep in the bytes. If the separator is found,
1725returns a 3-tuple containing the part before the separator, the separator
1726itself, and the part after it.
1727
1728If the separator is not found, returns a 3-tuple containing the original bytes
1729object and two empty bytes objects.
1730[clinic start generated code]*/
1731
Neal Norwitz6968b052007-02-27 19:02:19 +00001732static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001733bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001734/*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001735{
Neal Norwitz6968b052007-02-27 19:02:19 +00001736 return stringlib_partition(
1737 (PyObject*) self,
1738 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001739 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001740 );
1741}
1742
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001743/*[clinic input]
1744bytes.rpartition
1745
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001746 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001747 /
1748
1749Partition the bytes into three parts using the given separator.
1750
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001751This will search for the separator sep in the bytes, starting at the end. If
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001752the separator is found, returns a 3-tuple containing the part before the
1753separator, the separator itself, and the part after it.
1754
1755If the separator is not found, returns a 3-tuple containing two empty bytes
1756objects and the original bytes object.
1757[clinic start generated code]*/
1758
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001759static PyObject *
1760bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001761/*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001762{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001763 return stringlib_rpartition(
1764 (PyObject*) self,
1765 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001766 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001767 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001768}
1769
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001770/*[clinic input]
1771bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001772
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001773Return a list of the sections in the bytes, using sep as the delimiter.
1774
1775Splitting is done starting at the end of the bytes and working to the front.
1776[clinic start generated code]*/
1777
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001778static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001779bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1780/*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001781{
1782 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001783 const char *s = PyBytes_AS_STRING(self), *sub;
1784 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001785 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001786
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001787 if (maxsplit < 0)
1788 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001789 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001790 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001791 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001792 return NULL;
1793 sub = vsub.buf;
1794 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001795
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001796 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1797 PyBuffer_Release(&vsub);
1798 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001799}
1800
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001801
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001802/*[clinic input]
1803bytes.join
1804
1805 iterable_of_bytes: object
1806 /
1807
1808Concatenate any number of bytes objects.
1809
1810The bytes whose method is called is inserted in between each pair.
1811
1812The result is returned as a new bytes object.
1813
1814Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1815[clinic start generated code]*/
1816
Neal Norwitz6968b052007-02-27 19:02:19 +00001817static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001818bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1819/*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001820{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001821 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001822}
1823
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001824PyObject *
1825_PyBytes_Join(PyObject *sep, PyObject *x)
1826{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001827 assert(sep != NULL && PyBytes_Check(sep));
1828 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001829 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001830}
1831
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001832static PyObject *
1833bytes_find(PyBytesObject *self, PyObject *args)
1834{
1835 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1836}
1837
1838static PyObject *
1839bytes_index(PyBytesObject *self, PyObject *args)
1840{
1841 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1842}
1843
1844
1845static PyObject *
1846bytes_rfind(PyBytesObject *self, PyObject *args)
1847{
1848 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1849}
1850
1851
1852static PyObject *
1853bytes_rindex(PyBytesObject *self, PyObject *args)
1854{
1855 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1856}
1857
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001858
1859Py_LOCAL_INLINE(PyObject *)
1860do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001861{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001862 Py_buffer vsep;
1863 char *s = PyBytes_AS_STRING(self);
1864 Py_ssize_t len = PyBytes_GET_SIZE(self);
1865 char *sep;
1866 Py_ssize_t seplen;
1867 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001868
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001869 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001870 return NULL;
1871 sep = vsep.buf;
1872 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001873
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001874 i = 0;
1875 if (striptype != RIGHTSTRIP) {
1876 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1877 i++;
1878 }
1879 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001880
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001881 j = len;
1882 if (striptype != LEFTSTRIP) {
1883 do {
1884 j--;
1885 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1886 j++;
1887 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001888
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001889 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001890
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001891 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1892 Py_INCREF(self);
1893 return (PyObject*)self;
1894 }
1895 else
1896 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001897}
1898
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001899
1900Py_LOCAL_INLINE(PyObject *)
1901do_strip(PyBytesObject *self, int striptype)
1902{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001903 char *s = PyBytes_AS_STRING(self);
1904 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001905
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001906 i = 0;
1907 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001908 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001909 i++;
1910 }
1911 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001912
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001913 j = len;
1914 if (striptype != LEFTSTRIP) {
1915 do {
1916 j--;
David Malcolm96960882010-11-05 17:23:41 +00001917 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001918 j++;
1919 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001920
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001921 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1922 Py_INCREF(self);
1923 return (PyObject*)self;
1924 }
1925 else
1926 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001927}
1928
1929
1930Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001931do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001932{
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001933 if (bytes != Py_None) {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001934 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001935 }
1936 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001937}
1938
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001939/*[clinic input]
1940bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001941
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001942 bytes: object = None
1943 /
1944
1945Strip leading and trailing bytes contained in the argument.
1946
1947If the argument is omitted or None, strip leading and trailing ASCII whitespace.
1948[clinic start generated code]*/
1949
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001950static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001951bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001952/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001953{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001954 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001955}
1956
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001957/*[clinic input]
1958bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001959
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001960 bytes: object = None
1961 /
1962
1963Strip leading bytes contained in the argument.
1964
1965If the argument is omitted or None, strip leading ASCII whitespace.
1966[clinic start generated code]*/
1967
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001968static PyObject *
1969bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001970/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001971{
1972 return do_argstrip(self, LEFTSTRIP, bytes);
1973}
1974
1975/*[clinic input]
1976bytes.rstrip
1977
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001978 bytes: object = None
1979 /
1980
1981Strip trailing bytes contained in the argument.
1982
1983If the argument is omitted or None, strip trailing ASCII whitespace.
1984[clinic start generated code]*/
1985
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001986static PyObject *
1987bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001988/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001989{
1990 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001991}
Neal Norwitz6968b052007-02-27 19:02:19 +00001992
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001993
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001994static PyObject *
1995bytes_count(PyBytesObject *self, PyObject *args)
1996{
1997 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1998}
1999
2000
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002001/*[clinic input]
2002bytes.translate
2003
Victor Stinner049e5092014-08-17 22:20:00 +02002004 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002005 Translation table, which must be a bytes object of length 256.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002006 /
Martin Panter1b6c6da2016-08-27 08:35:02 +00002007 delete as deletechars: object(c_default="NULL") = b''
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002008
2009Return a copy with each character mapped by the given translation table.
2010
Martin Panter1b6c6da2016-08-27 08:35:02 +00002011All characters occurring in the optional argument delete are removed.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002012The remaining characters are mapped through the given translation table.
2013[clinic start generated code]*/
2014
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002015static PyObject *
Martin Panter1b6c6da2016-08-27 08:35:02 +00002016bytes_translate_impl(PyBytesObject *self, PyObject *table,
Larry Hastings89964c42015-04-14 18:07:59 -04002017 PyObject *deletechars)
Martin Panter1b6c6da2016-08-27 08:35:02 +00002018/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002019{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002020 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002021 Py_buffer table_view = {NULL, NULL};
2022 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002023 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002024 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002025 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002026 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002027 Py_ssize_t inlen, tablen, dellen = 0;
2028 PyObject *result;
2029 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002030
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002031 if (PyBytes_Check(table)) {
2032 table_chars = PyBytes_AS_STRING(table);
2033 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002034 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002035 else if (table == Py_None) {
2036 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002037 tablen = 256;
2038 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002039 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002040 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002041 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002042 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002043 tablen = table_view.len;
2044 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002045
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002046 if (tablen != 256) {
2047 PyErr_SetString(PyExc_ValueError,
2048 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002049 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002050 return NULL;
2051 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002052
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002053 if (deletechars != NULL) {
2054 if (PyBytes_Check(deletechars)) {
2055 del_table_chars = PyBytes_AS_STRING(deletechars);
2056 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002057 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002058 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002059 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002060 PyBuffer_Release(&table_view);
2061 return NULL;
2062 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002063 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002064 dellen = del_table_view.len;
2065 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002066 }
2067 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002068 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002069 dellen = 0;
2070 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002071
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002072 inlen = PyBytes_GET_SIZE(input_obj);
2073 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002074 if (result == NULL) {
2075 PyBuffer_Release(&del_table_view);
2076 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002077 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002078 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002079 output_start = output = PyBytes_AS_STRING(result);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002080 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002081
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002082 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002083 /* If no deletions are required, use faster code */
2084 for (i = inlen; --i >= 0; ) {
2085 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002086 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002087 changed = 1;
2088 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002089 if (!changed && PyBytes_CheckExact(input_obj)) {
2090 Py_INCREF(input_obj);
2091 Py_DECREF(result);
2092 result = input_obj;
2093 }
2094 PyBuffer_Release(&del_table_view);
2095 PyBuffer_Release(&table_view);
2096 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002097 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002098
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002099 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002100 for (i = 0; i < 256; i++)
2101 trans_table[i] = Py_CHARMASK(i);
2102 } else {
2103 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002104 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002105 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002106 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002107
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002108 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002109 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002110 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002111
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002112 for (i = inlen; --i >= 0; ) {
2113 c = Py_CHARMASK(*input++);
2114 if (trans_table[c] != -1)
2115 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2116 continue;
2117 changed = 1;
2118 }
2119 if (!changed && PyBytes_CheckExact(input_obj)) {
2120 Py_DECREF(result);
2121 Py_INCREF(input_obj);
2122 return input_obj;
2123 }
2124 /* Fix the size of the resulting string */
2125 if (inlen > 0)
2126 _PyBytes_Resize(&result, output - output_start);
2127 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002128}
2129
2130
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002131/*[clinic input]
2132
2133@staticmethod
2134bytes.maketrans
2135
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002136 frm: Py_buffer
2137 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002138 /
2139
2140Return a translation table useable for the bytes or bytearray translate method.
2141
2142The returned table will be one where each byte in frm is mapped to the byte at
2143the same position in to.
2144
2145The bytes objects frm and to must be of the same length.
2146[clinic start generated code]*/
2147
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002148static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002149bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002150/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002151{
2152 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002153}
2154
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002155
2156/*[clinic input]
2157bytes.replace
2158
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002159 old: Py_buffer
2160 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002161 count: Py_ssize_t = -1
2162 Maximum number of occurrences to replace.
2163 -1 (the default value) means replace all occurrences.
2164 /
2165
2166Return a copy with all occurrences of substring old replaced by new.
2167
2168If the optional argument count is given, only the first count occurrences are
2169replaced.
2170[clinic start generated code]*/
2171
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002172static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002173bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
Larry Hastings89964c42015-04-14 18:07:59 -04002174 Py_ssize_t count)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002175/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002176{
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +03002177 return stringlib_replace((PyObject *)self,
2178 (const char *)old->buf, old->len,
2179 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002180}
2181
2182/** End DALKE **/
2183
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002184
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002185static PyObject *
2186bytes_startswith(PyBytesObject *self, PyObject *args)
2187{
2188 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2189}
2190
2191static PyObject *
2192bytes_endswith(PyBytesObject *self, PyObject *args)
2193{
2194 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2195}
2196
2197
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002198/*[clinic input]
2199bytes.decode
2200
2201 encoding: str(c_default="NULL") = 'utf-8'
2202 The encoding with which to decode the bytes.
2203 errors: str(c_default="NULL") = 'strict'
2204 The error handling scheme to use for the handling of decoding errors.
2205 The default is 'strict' meaning that decoding errors raise a
2206 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2207 as well as any other name registered with codecs.register_error that
2208 can handle UnicodeDecodeErrors.
2209
2210Decode the bytes using the codec registered for encoding.
2211[clinic start generated code]*/
2212
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002213static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002214bytes_decode_impl(PyBytesObject *self, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04002215 const char *errors)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002216/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002217{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002218 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002219}
2220
Guido van Rossum20188312006-05-05 15:15:40 +00002221
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002222/*[clinic input]
2223bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002224
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002225 keepends: bool(accept={int}) = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002226
2227Return a list of the lines in the bytes, breaking at line boundaries.
2228
2229Line breaks are not included in the resulting list unless keepends is given and
2230true.
2231[clinic start generated code]*/
2232
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002233static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002234bytes_splitlines_impl(PyBytesObject *self, int keepends)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002235/*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002236{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002237 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002238 (PyObject*) self, PyBytes_AS_STRING(self),
2239 PyBytes_GET_SIZE(self), keepends
2240 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002241}
2242
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002243/*[clinic input]
2244@classmethod
2245bytes.fromhex
2246
2247 string: unicode
2248 /
2249
2250Create a bytes object from a string of hexadecimal numbers.
2251
2252Spaces between two numbers are accepted.
2253Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2254[clinic start generated code]*/
2255
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002256static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002257bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002258/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002259{
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002260 PyObject *result = _PyBytes_FromHex(string, 0);
2261 if (type != &PyBytes_Type && result != NULL) {
Petr Viktorinffd97532020-02-11 17:46:57 +01002262 Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002263 }
2264 return result;
Victor Stinner2bf89932015-10-14 11:25:33 +02002265}
2266
2267PyObject*
2268_PyBytes_FromHex(PyObject *string, int use_bytearray)
2269{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002270 char *buf;
Victor Stinner2bf89932015-10-14 11:25:33 +02002271 Py_ssize_t hexlen, invalid_char;
2272 unsigned int top, bot;
2273 Py_UCS1 *str, *end;
2274 _PyBytesWriter writer;
2275
2276 _PyBytesWriter_Init(&writer);
2277 writer.use_bytearray = use_bytearray;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002278
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002279 assert(PyUnicode_Check(string));
2280 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002281 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002282 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002283
Victor Stinner2bf89932015-10-14 11:25:33 +02002284 if (!PyUnicode_IS_ASCII(string)) {
2285 void *data = PyUnicode_DATA(string);
2286 unsigned int kind = PyUnicode_KIND(string);
2287 Py_ssize_t i;
2288
2289 /* search for the first non-ASCII character */
2290 for (i = 0; i < hexlen; i++) {
2291 if (PyUnicode_READ(kind, data, i) >= 128)
2292 break;
2293 }
2294 invalid_char = i;
2295 goto error;
2296 }
2297
2298 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2299 str = PyUnicode_1BYTE_DATA(string);
2300
2301 /* This overestimates if there are spaces */
2302 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2303 if (buf == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002304 return NULL;
Victor Stinner2bf89932015-10-14 11:25:33 +02002305
2306 end = str + hexlen;
2307 while (str < end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002308 /* skip over spaces in the input */
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002309 if (Py_ISSPACE(*str)) {
Victor Stinner2bf89932015-10-14 11:25:33 +02002310 do {
2311 str++;
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002312 } while (Py_ISSPACE(*str));
Victor Stinner2bf89932015-10-14 11:25:33 +02002313 if (str >= end)
2314 break;
2315 }
2316
2317 top = _PyLong_DigitValue[*str];
2318 if (top >= 16) {
2319 invalid_char = str - PyUnicode_1BYTE_DATA(string);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002320 goto error;
2321 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002322 str++;
2323
2324 bot = _PyLong_DigitValue[*str];
2325 if (bot >= 16) {
2326 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2327 goto error;
2328 }
2329 str++;
2330
2331 *buf++ = (unsigned char)((top << 4) + bot);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002332 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002333
2334 return _PyBytesWriter_Finish(&writer, buf);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002335
2336 error:
Victor Stinner2bf89932015-10-14 11:25:33 +02002337 PyErr_Format(PyExc_ValueError,
2338 "non-hexadecimal number found in "
2339 "fromhex() arg at position %zd", invalid_char);
2340 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002341 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002342}
2343
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002344/*[clinic input]
2345bytes.hex
2346
2347 sep: object = NULL
2348 An optional single character or byte to separate hex bytes.
2349 bytes_per_sep: int = 1
2350 How many bytes between separators. Positive values count from the
2351 right, negative values count from the left.
2352
2353Create a str of hexadecimal numbers from a bytes object.
2354
2355Example:
2356>>> value = b'\xb9\x01\xef'
2357>>> value.hex()
2358'b901ef'
2359>>> value.hex(':')
2360'b9:01:ef'
2361>>> value.hex(':', 2)
2362'b9:01ef'
2363>>> value.hex(':', -2)
2364'b901:ef'
2365[clinic start generated code]*/
Gregory P. Smith8cb65692015-04-25 23:22:26 +00002366
2367static PyObject *
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002368bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2369/*[clinic end generated code: output=1f134da504064139 input=f1238d3455990218]*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002370{
2371 char* argbuf = PyBytes_AS_STRING(self);
2372 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002373 return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002374}
2375
2376static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302377bytes_getnewargs(PyBytesObject *v, PyObject *Py_UNUSED(ignored))
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002378{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002379 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002380}
2381
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002382
2383static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002384bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002385 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302386 {"capitalize", stringlib_capitalize, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002387 _Py_capitalize__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002388 STRINGLIB_CENTER_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002389 {"count", (PyCFunction)bytes_count, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002390 _Py_count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002391 BYTES_DECODE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002392 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002393 _Py_endswith__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002394 STRINGLIB_EXPANDTABS_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002395 {"find", (PyCFunction)bytes_find, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002396 _Py_find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002397 BYTES_FROMHEX_METHODDEF
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002398 BYTES_HEX_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002399 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302400 {"isalnum", stringlib_isalnum, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002401 _Py_isalnum__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302402 {"isalpha", stringlib_isalpha, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002403 _Py_isalpha__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302404 {"isascii", stringlib_isascii, METH_NOARGS,
INADA Naokia49ac992018-01-27 14:06:21 +09002405 _Py_isascii__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302406 {"isdigit", stringlib_isdigit, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002407 _Py_isdigit__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302408 {"islower", stringlib_islower, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002409 _Py_islower__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302410 {"isspace", stringlib_isspace, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002411 _Py_isspace__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302412 {"istitle", stringlib_istitle, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002413 _Py_istitle__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302414 {"isupper", stringlib_isupper, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002415 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002416 BYTES_JOIN_METHODDEF
Tal Einatc929df32018-07-06 13:17:38 +03002417 STRINGLIB_LJUST_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302418 {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002419 BYTES_LSTRIP_METHODDEF
2420 BYTES_MAKETRANS_METHODDEF
2421 BYTES_PARTITION_METHODDEF
2422 BYTES_REPLACE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002423 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2424 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002425 STRINGLIB_RJUST_METHODDEF
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002426 BYTES_RPARTITION_METHODDEF
2427 BYTES_RSPLIT_METHODDEF
2428 BYTES_RSTRIP_METHODDEF
2429 BYTES_SPLIT_METHODDEF
2430 BYTES_SPLITLINES_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002431 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002432 _Py_startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002433 BYTES_STRIP_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302434 {"swapcase", stringlib_swapcase, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002435 _Py_swapcase__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302436 {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002437 BYTES_TRANSLATE_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302438 {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002439 STRINGLIB_ZFILL_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002440 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002441};
2442
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002443static PyObject *
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002444bytes_mod(PyObject *self, PyObject *arg)
Ethan Furmanb95b5612015-01-23 20:05:18 -08002445{
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002446 if (!PyBytes_Check(self)) {
2447 Py_RETURN_NOTIMPLEMENTED;
Victor Stinner772b2b02015-10-14 09:56:53 +02002448 }
Victor Stinner772b2b02015-10-14 09:56:53 +02002449 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002450 arg, 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -08002451}
2452
2453static PyNumberMethods bytes_as_number = {
2454 0, /*nb_add*/
2455 0, /*nb_subtract*/
2456 0, /*nb_multiply*/
2457 bytes_mod, /*nb_remainder*/
2458};
2459
2460static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002461bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002462
2463static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002464bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002465{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002466 PyObject *x = NULL;
2467 const char *encoding = NULL;
2468 const char *errors = NULL;
2469 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002470 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002471 Py_ssize_t size;
2472 static char *kwlist[] = {"source", "encoding", "errors", 0};
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002473
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002474 if (type != &PyBytes_Type)
Serhiy Storchaka15095802015-11-25 15:47:01 +02002475 return bytes_subtype_new(type, args, kwds);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002476 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2477 &encoding, &errors))
2478 return NULL;
2479 if (x == NULL) {
2480 if (encoding != NULL || errors != NULL) {
2481 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka2c2044e2018-10-21 15:29:12 +03002482 encoding != NULL ?
2483 "encoding without a string argument" :
2484 "errors without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002485 return NULL;
2486 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02002487 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002488 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002489
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002490 if (encoding != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002491 /* Encode via the codec registry */
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002492 if (!PyUnicode_Check(x)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002493 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002494 "encoding without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002495 return NULL;
2496 }
2497 new = PyUnicode_AsEncodedString(x, encoding, errors);
2498 if (new == NULL)
2499 return NULL;
2500 assert(PyBytes_Check(new));
2501 return new;
2502 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002503
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002504 if (errors != NULL) {
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002505 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002506 PyUnicode_Check(x) ?
2507 "string argument without an encoding" :
2508 "errors without a string argument");
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002509 return NULL;
2510 }
2511
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002512 /* We'd like to call PyObject_Bytes here, but we need to check for an
2513 integer argument before deferring to PyBytes_FromObject, something
2514 PyObject_Bytes doesn't do. */
2515 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2516 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +01002517 new = _PyObject_CallNoArg(func);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002518 Py_DECREF(func);
2519 if (new == NULL)
2520 return NULL;
2521 if (!PyBytes_Check(new)) {
2522 PyErr_Format(PyExc_TypeError,
2523 "__bytes__ returned non-bytes (type %.200s)",
2524 Py_TYPE(new)->tp_name);
2525 Py_DECREF(new);
2526 return NULL;
2527 }
2528 return new;
2529 }
2530 else if (PyErr_Occurred())
2531 return NULL;
2532
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002533 if (PyUnicode_Check(x)) {
2534 PyErr_SetString(PyExc_TypeError,
2535 "string argument without an encoding");
2536 return NULL;
2537 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002538 /* Is it an integer? */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002539 if (PyIndex_Check(x)) {
2540 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2541 if (size == -1 && PyErr_Occurred()) {
Serhiy Storchakae8904212018-10-15 00:02:57 +03002542 if (!PyErr_ExceptionMatches(PyExc_TypeError))
INADA Naokia634e232017-01-06 17:32:01 +09002543 return NULL;
2544 PyErr_Clear(); /* fall through */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002545 }
INADA Naokia634e232017-01-06 17:32:01 +09002546 else {
2547 if (size < 0) {
2548 PyErr_SetString(PyExc_ValueError, "negative count");
2549 return NULL;
2550 }
2551 new = _PyBytes_FromSize(size, 1);
2552 if (new == NULL)
2553 return NULL;
2554 return new;
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002555 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002556 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002557
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002558 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002559}
2560
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002561static PyObject*
2562_PyBytes_FromBuffer(PyObject *x)
2563{
2564 PyObject *new;
2565 Py_buffer view;
2566
2567 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2568 return NULL;
2569
2570 new = PyBytes_FromStringAndSize(NULL, view.len);
2571 if (!new)
2572 goto fail;
2573 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2574 &view, view.len, 'C') < 0)
2575 goto fail;
2576 PyBuffer_Release(&view);
2577 return new;
2578
2579fail:
2580 Py_XDECREF(new);
2581 PyBuffer_Release(&view);
2582 return NULL;
2583}
2584
2585static PyObject*
2586_PyBytes_FromList(PyObject *x)
2587{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002588 Py_ssize_t i, size = PyList_GET_SIZE(x);
2589 Py_ssize_t value;
2590 char *str;
2591 PyObject *item;
2592 _PyBytesWriter writer;
2593
2594 _PyBytesWriter_Init(&writer);
2595 str = _PyBytesWriter_Alloc(&writer, size);
2596 if (str == NULL)
2597 return NULL;
2598 writer.overallocate = 1;
2599 size = writer.allocated;
2600
2601 for (i = 0; i < PyList_GET_SIZE(x); i++) {
2602 item = PyList_GET_ITEM(x, i);
2603 Py_INCREF(item);
2604 value = PyNumber_AsSsize_t(item, NULL);
2605 Py_DECREF(item);
2606 if (value == -1 && PyErr_Occurred())
2607 goto error;
2608
2609 if (value < 0 || value >= 256) {
2610 PyErr_SetString(PyExc_ValueError,
2611 "bytes must be in range(0, 256)");
2612 goto error;
2613 }
2614
2615 if (i >= size) {
2616 str = _PyBytesWriter_Resize(&writer, str, size+1);
2617 if (str == NULL)
2618 return NULL;
2619 size = writer.allocated;
2620 }
2621 *str++ = (char) value;
2622 }
2623 return _PyBytesWriter_Finish(&writer, str);
2624
2625 error:
2626 _PyBytesWriter_Dealloc(&writer);
2627 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002628}
2629
2630static PyObject*
2631_PyBytes_FromTuple(PyObject *x)
2632{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002633 PyObject *bytes;
2634 Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2635 Py_ssize_t value;
2636 char *str;
2637 PyObject *item;
2638
2639 bytes = PyBytes_FromStringAndSize(NULL, size);
2640 if (bytes == NULL)
2641 return NULL;
2642 str = ((PyBytesObject *)bytes)->ob_sval;
2643
2644 for (i = 0; i < size; i++) {
2645 item = PyTuple_GET_ITEM(x, i);
2646 value = PyNumber_AsSsize_t(item, NULL);
2647 if (value == -1 && PyErr_Occurred())
2648 goto error;
2649
2650 if (value < 0 || value >= 256) {
2651 PyErr_SetString(PyExc_ValueError,
2652 "bytes must be in range(0, 256)");
2653 goto error;
2654 }
2655 *str++ = (char) value;
2656 }
2657 return bytes;
2658
2659 error:
2660 Py_DECREF(bytes);
2661 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002662}
2663
2664static PyObject *
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002665_PyBytes_FromIterator(PyObject *it, PyObject *x)
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002666{
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002667 char *str;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002668 Py_ssize_t i, size;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002669 _PyBytesWriter writer;
2670
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002671 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02002672 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002673 if (size == -1 && PyErr_Occurred())
2674 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002675
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002676 _PyBytesWriter_Init(&writer);
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002677 str = _PyBytesWriter_Alloc(&writer, size);
2678 if (str == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002679 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002680 writer.overallocate = 1;
2681 size = writer.allocated;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002682
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002683 /* Run the iterator to exhaustion */
2684 for (i = 0; ; i++) {
2685 PyObject *item;
2686 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002687
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002688 /* Get the next item */
2689 item = PyIter_Next(it);
2690 if (item == NULL) {
2691 if (PyErr_Occurred())
2692 goto error;
2693 break;
2694 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002695
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002696 /* Interpret it as an int (__index__) */
Serhiy Storchakaf54d7812016-07-06 21:39:44 +03002697 value = PyNumber_AsSsize_t(item, NULL);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002698 Py_DECREF(item);
2699 if (value == -1 && PyErr_Occurred())
2700 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002701
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002702 /* Range check */
2703 if (value < 0 || value >= 256) {
2704 PyErr_SetString(PyExc_ValueError,
2705 "bytes must be in range(0, 256)");
2706 goto error;
2707 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002708
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002709 /* Append the byte */
2710 if (i >= size) {
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002711 str = _PyBytesWriter_Resize(&writer, str, size+1);
2712 if (str == NULL)
2713 return NULL;
2714 size = writer.allocated;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002715 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002716 *str++ = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002717 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002718
2719 return _PyBytesWriter_Finish(&writer, str);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002720
2721 error:
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002722 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002723 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002724}
2725
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002726PyObject *
2727PyBytes_FromObject(PyObject *x)
2728{
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002729 PyObject *it, *result;
2730
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002731 if (x == NULL) {
2732 PyErr_BadInternalCall();
2733 return NULL;
2734 }
2735
2736 if (PyBytes_CheckExact(x)) {
2737 Py_INCREF(x);
2738 return x;
2739 }
2740
2741 /* Use the modern buffer interface */
2742 if (PyObject_CheckBuffer(x))
2743 return _PyBytes_FromBuffer(x);
2744
2745 if (PyList_CheckExact(x))
2746 return _PyBytes_FromList(x);
2747
2748 if (PyTuple_CheckExact(x))
2749 return _PyBytes_FromTuple(x);
2750
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002751 if (!PyUnicode_Check(x)) {
2752 it = PyObject_GetIter(x);
2753 if (it != NULL) {
2754 result = _PyBytes_FromIterator(it, x);
2755 Py_DECREF(it);
2756 return result;
2757 }
Serhiy Storchakae8904212018-10-15 00:02:57 +03002758 if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2759 return NULL;
2760 }
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002761 }
2762
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002763 PyErr_Format(PyExc_TypeError,
2764 "cannot convert '%.200s' object to bytes",
Victor Stinner58ac7002020-02-07 03:04:21 +01002765 Py_TYPE(x)->tp_name);
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002766 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002767}
2768
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002769static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002770bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002771{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002772 PyObject *tmp, *pnew;
2773 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002774
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002775 assert(PyType_IsSubtype(type, &PyBytes_Type));
2776 tmp = bytes_new(&PyBytes_Type, args, kwds);
2777 if (tmp == NULL)
2778 return NULL;
Serhiy Storchaka15095802015-11-25 15:47:01 +02002779 assert(PyBytes_Check(tmp));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002780 n = PyBytes_GET_SIZE(tmp);
2781 pnew = type->tp_alloc(type, n);
2782 if (pnew != NULL) {
Christian Heimesf051e432016-09-13 20:22:02 +02002783 memcpy(PyBytes_AS_STRING(pnew),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002784 PyBytes_AS_STRING(tmp), n+1);
2785 ((PyBytesObject *)pnew)->ob_shash =
2786 ((PyBytesObject *)tmp)->ob_shash;
2787 }
2788 Py_DECREF(tmp);
2789 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002790}
2791
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002792PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002793"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002794bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002795bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002796bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2797bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002798\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002799Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002800 - an iterable yielding integers in range(256)\n\
2801 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002802 - any object implementing the buffer API.\n\
2803 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002804
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002805static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002806
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002807PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002808 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2809 "bytes",
2810 PyBytesObject_SIZE,
2811 sizeof(char),
Inada Naoki7d408692019-05-29 17:23:27 +09002812 0, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002813 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002814 0, /* tp_getattr */
2815 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002816 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002817 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08002818 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002819 &bytes_as_sequence, /* tp_as_sequence */
2820 &bytes_as_mapping, /* tp_as_mapping */
2821 (hashfunc)bytes_hash, /* tp_hash */
2822 0, /* tp_call */
2823 bytes_str, /* tp_str */
2824 PyObject_GenericGetAttr, /* tp_getattro */
2825 0, /* tp_setattro */
2826 &bytes_as_buffer, /* tp_as_buffer */
2827 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2828 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2829 bytes_doc, /* tp_doc */
2830 0, /* tp_traverse */
2831 0, /* tp_clear */
2832 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2833 0, /* tp_weaklistoffset */
2834 bytes_iter, /* tp_iter */
2835 0, /* tp_iternext */
2836 bytes_methods, /* tp_methods */
2837 0, /* tp_members */
2838 0, /* tp_getset */
2839 &PyBaseObject_Type, /* tp_base */
2840 0, /* tp_dict */
2841 0, /* tp_descr_get */
2842 0, /* tp_descr_set */
2843 0, /* tp_dictoffset */
2844 0, /* tp_init */
2845 0, /* tp_alloc */
2846 bytes_new, /* tp_new */
2847 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002848};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002849
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002850void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002851PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002852{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002853 assert(pv != NULL);
2854 if (*pv == NULL)
2855 return;
2856 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002857 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002858 return;
2859 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02002860
2861 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2862 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05002863 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02002864 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02002865
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002866 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02002867 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2868 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2869 Py_CLEAR(*pv);
2870 return;
2871 }
2872
2873 oldsize = PyBytes_GET_SIZE(*pv);
2874 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2875 PyErr_NoMemory();
2876 goto error;
2877 }
2878 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2879 goto error;
2880
2881 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2882 PyBuffer_Release(&wb);
2883 return;
2884
2885 error:
2886 PyBuffer_Release(&wb);
2887 Py_CLEAR(*pv);
2888 return;
2889 }
2890
2891 else {
2892 /* Multiple references, need to create new object */
2893 PyObject *v;
2894 v = bytes_concat(*pv, w);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002895 Py_SETREF(*pv, v);
Antoine Pitrou161d6952014-05-01 14:36:20 +02002896 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002897}
2898
2899void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002900PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002901{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002902 PyBytes_Concat(pv, w);
2903 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002904}
2905
2906
Ethan Furmanb95b5612015-01-23 20:05:18 -08002907/* The following function breaks the notion that bytes are immutable:
2908 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002909 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08002910 as creating a new bytes object and destroying the old one, only
2911 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002912 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08002913 Note that if there's not enough memory to resize the bytes object, the
2914 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002915 memory" exception is set, and -1 is returned. Else (on success) 0 is
2916 returned, and the value in *pv may or may not be the same as on input.
2917 As always, an extra byte is allocated for a trailing \0 byte (newsize
2918 does *not* include that), and a trailing \0 byte is stored.
2919*/
2920
2921int
2922_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2923{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002924 PyObject *v;
2925 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002926 v = *pv;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002927 if (!PyBytes_Check(v) || newsize < 0) {
2928 goto error;
2929 }
2930 if (Py_SIZE(v) == newsize) {
2931 /* return early if newsize equals to v->ob_size */
2932 return 0;
2933 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02002934 if (Py_SIZE(v) == 0) {
2935 if (newsize == 0) {
2936 return 0;
2937 }
2938 *pv = _PyBytes_FromSize(newsize, 0);
2939 Py_DECREF(v);
2940 return (*pv == NULL) ? -1 : 0;
2941 }
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002942 if (Py_REFCNT(v) != 1) {
2943 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002944 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02002945 if (newsize == 0) {
2946 *pv = _PyBytes_FromSize(0, 0);
2947 Py_DECREF(v);
2948 return (*pv == NULL) ? -1 : 0;
2949 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002950 /* XXX UNREF/NEWREF interface should be more symmetrical */
Victor Stinner49932fe2020-02-03 17:55:05 +01002951#ifdef Py_REF_DEBUG
2952 _Py_RefTotal--;
2953#endif
2954#ifdef Py_TRACE_REFS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002955 _Py_ForgetReference(v);
Victor Stinner49932fe2020-02-03 17:55:05 +01002956#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002957 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03002958 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002959 if (*pv == NULL) {
2960 PyObject_Del(v);
2961 PyErr_NoMemory();
2962 return -1;
2963 }
2964 _Py_NewReference(*pv);
2965 sv = (PyBytesObject *) *pv;
Victor Stinner60ac6ed2020-02-07 23:18:08 +01002966 Py_SET_SIZE(sv, newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002967 sv->ob_sval[newsize] = '\0';
2968 sv->ob_shash = -1; /* invalidate cached hash value */
2969 return 0;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002970error:
2971 *pv = 0;
2972 Py_DECREF(v);
2973 PyErr_BadInternalCall();
2974 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002975}
2976
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002977void
Victor Stinnerbed48172019-08-27 00:12:32 +02002978_PyBytes_Fini(void)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002979{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002980 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002981 for (i = 0; i < UCHAR_MAX + 1; i++)
2982 Py_CLEAR(characters[i]);
2983 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002984}
2985
Benjamin Peterson4116f362008-05-27 00:36:20 +00002986/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002987
2988typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002989 PyObject_HEAD
2990 Py_ssize_t it_index;
2991 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002992} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002993
2994static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002995striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002996{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002997 _PyObject_GC_UNTRACK(it);
2998 Py_XDECREF(it->it_seq);
2999 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003000}
3001
3002static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003003striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003004{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003005 Py_VISIT(it->it_seq);
3006 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003007}
3008
3009static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003010striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003011{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003012 PyBytesObject *seq;
3013 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003014
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003015 assert(it != NULL);
3016 seq = it->it_seq;
3017 if (seq == NULL)
3018 return NULL;
3019 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003020
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003021 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3022 item = PyLong_FromLong(
3023 (unsigned char)seq->ob_sval[it->it_index]);
3024 if (item != NULL)
3025 ++it->it_index;
3026 return item;
3027 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003028
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003029 it->it_seq = NULL;
Serhiy Storchakafbb1c5e2016-03-30 20:40:02 +03003030 Py_DECREF(seq);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003031 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003032}
3033
3034static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303035striter_len(striterobject *it, PyObject *Py_UNUSED(ignored))
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003036{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003037 Py_ssize_t len = 0;
3038 if (it->it_seq)
3039 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3040 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003041}
3042
3043PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003044 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003045
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003046static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303047striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003048{
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003049 _Py_IDENTIFIER(iter);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003050 if (it->it_seq != NULL) {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003051 return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003052 it->it_seq, it->it_index);
3053 } else {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003054 return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003055 }
3056}
3057
3058PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3059
3060static PyObject *
3061striter_setstate(striterobject *it, PyObject *state)
3062{
3063 Py_ssize_t index = PyLong_AsSsize_t(state);
3064 if (index == -1 && PyErr_Occurred())
3065 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003066 if (it->it_seq != NULL) {
3067 if (index < 0)
3068 index = 0;
3069 else if (index > PyBytes_GET_SIZE(it->it_seq))
3070 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3071 it->it_index = index;
3072 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003073 Py_RETURN_NONE;
3074}
3075
3076PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3077
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003078static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003079 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3080 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003081 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3082 reduce_doc},
3083 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3084 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003085 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003086};
3087
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003088PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003089 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3090 "bytes_iterator", /* tp_name */
3091 sizeof(striterobject), /* tp_basicsize */
3092 0, /* tp_itemsize */
3093 /* methods */
3094 (destructor)striter_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003095 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003096 0, /* tp_getattr */
3097 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003098 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003099 0, /* tp_repr */
3100 0, /* tp_as_number */
3101 0, /* tp_as_sequence */
3102 0, /* tp_as_mapping */
3103 0, /* tp_hash */
3104 0, /* tp_call */
3105 0, /* tp_str */
3106 PyObject_GenericGetAttr, /* tp_getattro */
3107 0, /* tp_setattro */
3108 0, /* tp_as_buffer */
3109 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3110 0, /* tp_doc */
3111 (traverseproc)striter_traverse, /* tp_traverse */
3112 0, /* tp_clear */
3113 0, /* tp_richcompare */
3114 0, /* tp_weaklistoffset */
3115 PyObject_SelfIter, /* tp_iter */
3116 (iternextfunc)striter_next, /* tp_iternext */
3117 striter_methods, /* tp_methods */
3118 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003119};
3120
3121static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003122bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003123{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003124 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003125
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003126 if (!PyBytes_Check(seq)) {
3127 PyErr_BadInternalCall();
3128 return NULL;
3129 }
3130 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3131 if (it == NULL)
3132 return NULL;
3133 it->it_index = 0;
3134 Py_INCREF(seq);
3135 it->it_seq = (PyBytesObject *)seq;
3136 _PyObject_GC_TRACK(it);
3137 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003138}
Victor Stinner00165072015-10-09 01:53:21 +02003139
3140
3141/* _PyBytesWriter API */
3142
3143#ifdef MS_WINDOWS
3144 /* On Windows, overallocate by 50% is the best factor */
3145# define OVERALLOCATE_FACTOR 2
3146#else
3147 /* On Linux, overallocate by 25% is the best factor */
3148# define OVERALLOCATE_FACTOR 4
3149#endif
3150
3151void
3152_PyBytesWriter_Init(_PyBytesWriter *writer)
3153{
Victor Stinner661aacc2015-10-14 09:41:48 +02003154 /* Set all attributes before small_buffer to 0 */
3155 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003156#ifndef NDEBUG
3157 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3158 sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003159#endif
3160}
3161
3162void
3163_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3164{
3165 Py_CLEAR(writer->buffer);
3166}
3167
3168Py_LOCAL_INLINE(char*)
3169_PyBytesWriter_AsString(_PyBytesWriter *writer)
3170{
Victor Stinner661aacc2015-10-14 09:41:48 +02003171 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003172 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003173 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003174 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003175 else if (writer->use_bytearray) {
3176 assert(writer->buffer != NULL);
3177 return PyByteArray_AS_STRING(writer->buffer);
3178 }
3179 else {
3180 assert(writer->buffer != NULL);
3181 return PyBytes_AS_STRING(writer->buffer);
3182 }
Victor Stinner00165072015-10-09 01:53:21 +02003183}
3184
3185Py_LOCAL_INLINE(Py_ssize_t)
Victor Stinner2bf89932015-10-14 11:25:33 +02003186_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
Victor Stinner00165072015-10-09 01:53:21 +02003187{
3188 char *start = _PyBytesWriter_AsString(writer);
3189 assert(str != NULL);
3190 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003191 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003192 return str - start;
3193}
3194
Victor Stinner68762572019-10-07 18:42:01 +02003195#ifndef NDEBUG
3196Py_LOCAL_INLINE(int)
Victor Stinner00165072015-10-09 01:53:21 +02003197_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3198{
Victor Stinner00165072015-10-09 01:53:21 +02003199 char *start, *end;
3200
Victor Stinner661aacc2015-10-14 09:41:48 +02003201 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003202 assert(writer->buffer == NULL);
3203 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003204 else {
3205 assert(writer->buffer != NULL);
3206 if (writer->use_bytearray)
3207 assert(PyByteArray_CheckExact(writer->buffer));
3208 else
3209 assert(PyBytes_CheckExact(writer->buffer));
3210 assert(Py_REFCNT(writer->buffer) == 1);
3211 }
Victor Stinner00165072015-10-09 01:53:21 +02003212
Victor Stinner661aacc2015-10-14 09:41:48 +02003213 if (writer->use_bytearray) {
3214 /* bytearray has its own overallocation algorithm,
3215 writer overallocation must be disabled */
3216 assert(!writer->overallocate);
3217 }
3218
3219 assert(0 <= writer->allocated);
Victor Stinner53926a12015-10-09 12:37:03 +02003220 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003221 /* the last byte must always be null */
Victor Stinner661aacc2015-10-14 09:41:48 +02003222 start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003223 assert(start[writer->allocated] == 0);
3224
3225 end = start + writer->allocated;
3226 assert(str != NULL);
3227 assert(start <= str && str <= end);
Victor Stinner68762572019-10-07 18:42:01 +02003228 return 1;
Victor Stinner00165072015-10-09 01:53:21 +02003229}
Victor Stinner68762572019-10-07 18:42:01 +02003230#endif
Victor Stinner00165072015-10-09 01:53:21 +02003231
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003232void*
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003233_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
Victor Stinner00165072015-10-09 01:53:21 +02003234{
3235 Py_ssize_t allocated, pos;
3236
Victor Stinner68762572019-10-07 18:42:01 +02003237 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003238 assert(writer->allocated < size);
Victor Stinner00165072015-10-09 01:53:21 +02003239
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003240 allocated = size;
Victor Stinner00165072015-10-09 01:53:21 +02003241 if (writer->overallocate
3242 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3243 /* overallocate to limit the number of realloc() */
3244 allocated += allocated / OVERALLOCATE_FACTOR;
3245 }
3246
Victor Stinner2bf89932015-10-14 11:25:33 +02003247 pos = _PyBytesWriter_GetSize(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003248 if (!writer->use_small_buffer) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003249 if (writer->use_bytearray) {
3250 if (PyByteArray_Resize(writer->buffer, allocated))
3251 goto error;
3252 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3253 but we cannot use ob_alloc because bytes may need to be moved
3254 to use the whole buffer. bytearray uses an internal optimization
3255 to avoid moving or copying bytes when bytes are removed at the
3256 beginning (ex: del bytearray[:1]). */
3257 }
3258 else {
3259 if (_PyBytes_Resize(&writer->buffer, allocated))
3260 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003261 }
3262 }
3263 else {
3264 /* convert from stack buffer to bytes object buffer */
3265 assert(writer->buffer == NULL);
3266
Victor Stinner661aacc2015-10-14 09:41:48 +02003267 if (writer->use_bytearray)
3268 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3269 else
3270 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003271 if (writer->buffer == NULL)
Victor Stinner661aacc2015-10-14 09:41:48 +02003272 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003273
3274 if (pos != 0) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003275 char *dest;
3276 if (writer->use_bytearray)
3277 dest = PyByteArray_AS_STRING(writer->buffer);
3278 else
3279 dest = PyBytes_AS_STRING(writer->buffer);
Christian Heimesf051e432016-09-13 20:22:02 +02003280 memcpy(dest,
Victor Stinnerb3653a32015-10-09 03:38:24 +02003281 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02003282 pos);
3283 }
3284
Victor Stinnerb3653a32015-10-09 03:38:24 +02003285 writer->use_small_buffer = 0;
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003286#ifndef NDEBUG
3287 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3288 sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003289#endif
Victor Stinner00165072015-10-09 01:53:21 +02003290 }
3291 writer->allocated = allocated;
3292
3293 str = _PyBytesWriter_AsString(writer) + pos;
Victor Stinner68762572019-10-07 18:42:01 +02003294 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinner00165072015-10-09 01:53:21 +02003295 return str;
Victor Stinner661aacc2015-10-14 09:41:48 +02003296
3297error:
3298 _PyBytesWriter_Dealloc(writer);
3299 return NULL;
Victor Stinner00165072015-10-09 01:53:21 +02003300}
3301
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003302void*
3303_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3304{
3305 Py_ssize_t new_min_size;
3306
Victor Stinner68762572019-10-07 18:42:01 +02003307 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003308 assert(size >= 0);
3309
3310 if (size == 0) {
3311 /* nothing to do */
3312 return str;
3313 }
3314
3315 if (writer->min_size > PY_SSIZE_T_MAX - size) {
3316 PyErr_NoMemory();
3317 _PyBytesWriter_Dealloc(writer);
3318 return NULL;
3319 }
3320 new_min_size = writer->min_size + size;
3321
3322 if (new_min_size > writer->allocated)
3323 str = _PyBytesWriter_Resize(writer, str, new_min_size);
3324
3325 writer->min_size = new_min_size;
3326 return str;
3327}
3328
Victor Stinner00165072015-10-09 01:53:21 +02003329/* Allocate the buffer to write size bytes.
3330 Return the pointer to the beginning of buffer data.
3331 Raise an exception and return NULL on error. */
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003332void*
Victor Stinner00165072015-10-09 01:53:21 +02003333_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3334{
3335 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02003336 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02003337 assert(size >= 0);
3338
Victor Stinnerb3653a32015-10-09 03:38:24 +02003339 writer->use_small_buffer = 1;
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003340#ifndef NDEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003341 writer->allocated = sizeof(writer->small_buffer) - 1;
Victor Stinnerf6358a72015-10-14 12:02:39 +02003342 /* In debug mode, don't use the full small buffer because it is less
3343 efficient than bytes and bytearray objects to detect buffer underflow
3344 and buffer overflow. Use 10 bytes of the small buffer to test also
3345 code using the smaller buffer in debug mode.
3346
3347 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3348 in debug mode to also be able to detect stack overflow when running
3349 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3350 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3351 stack overflow. */
3352 writer->allocated = Py_MIN(writer->allocated, 10);
3353 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3354 to detect buffer overflow */
Victor Stinnerb3653a32015-10-09 03:38:24 +02003355 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003356#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02003357 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02003358#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02003359 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02003360}
3361
3362PyObject *
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003363_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
Victor Stinner00165072015-10-09 01:53:21 +02003364{
Victor Stinner2bf89932015-10-14 11:25:33 +02003365 Py_ssize_t size;
Victor Stinner00165072015-10-09 01:53:21 +02003366 PyObject *result;
3367
Victor Stinner68762572019-10-07 18:42:01 +02003368 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinner00165072015-10-09 01:53:21 +02003369
Victor Stinner2bf89932015-10-14 11:25:33 +02003370 size = _PyBytesWriter_GetSize(writer, str);
3371 if (size == 0 && !writer->use_bytearray) {
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003372 Py_CLEAR(writer->buffer);
3373 /* Get the empty byte string singleton */
3374 result = PyBytes_FromStringAndSize(NULL, 0);
3375 }
3376 else if (writer->use_small_buffer) {
Victor Stinnere914d412016-04-15 17:52:27 +02003377 if (writer->use_bytearray) {
3378 result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3379 }
3380 else {
3381 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3382 }
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003383 }
3384 else {
3385 result = writer->buffer;
3386 writer->buffer = NULL;
3387
Victor Stinner2bf89932015-10-14 11:25:33 +02003388 if (size != writer->allocated) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003389 if (writer->use_bytearray) {
Victor Stinner2bf89932015-10-14 11:25:33 +02003390 if (PyByteArray_Resize(result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003391 Py_DECREF(result);
3392 return NULL;
3393 }
3394 }
3395 else {
Victor Stinner2bf89932015-10-14 11:25:33 +02003396 if (_PyBytes_Resize(&result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003397 assert(result == NULL);
3398 return NULL;
3399 }
Victor Stinner00165072015-10-09 01:53:21 +02003400 }
3401 }
Victor Stinner00165072015-10-09 01:53:21 +02003402 }
Victor Stinner00165072015-10-09 01:53:21 +02003403 return result;
3404}
Victor Stinnerce179bf2015-10-09 12:57:22 +02003405
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003406void*
Victor Stinnere9aa5952015-10-12 13:57:47 +02003407_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003408 const void *bytes, Py_ssize_t size)
Victor Stinnerce179bf2015-10-09 12:57:22 +02003409{
Victor Stinnere9aa5952015-10-12 13:57:47 +02003410 char *str = (char *)ptr;
3411
Victor Stinnerce179bf2015-10-09 12:57:22 +02003412 str = _PyBytesWriter_Prepare(writer, str, size);
3413 if (str == NULL)
3414 return NULL;
3415
Christian Heimesf051e432016-09-13 20:22:02 +02003416 memcpy(str, bytes, size);
Victor Stinnerce179bf2015-10-09 12:57:22 +02003417 str += size;
3418
3419 return str;
3420}