blob: 987d98d4ed50f674d9ef9d79346bc1746b5cecdf [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Victor Stinnera15e2602020-04-08 02:01:56 +02006#include "pycore_abstract.h" // _PyIndex_Check()
Victor Stinner45876a92020-02-12 22:32:34 +01007#include "pycore_bytes_methods.h"
Victor Stinnerbcda8f12018-11-21 22:27:47 +01008#include "pycore_object.h"
Victor Stinner621cebe2018-11-12 16:53:38 +01009#include "pycore_pymem.h"
10#include "pycore_pystate.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +000011
Gregory P. Smith8cb65692015-04-25 23:22:26 +000012#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +000013#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000014
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020015/*[clinic input]
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030016class bytes "PyBytesObject *" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020017[clinic start generated code]*/
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030018/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020019
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030020#include "clinic/bytesobject.c.h"
21
Christian Heimes2c9c7a52008-05-26 13:42:13 +000022static PyBytesObject *characters[UCHAR_MAX + 1];
23static PyBytesObject *nullstring;
24
Hai Shi46874c22020-01-30 17:20:25 -060025_Py_IDENTIFIER(__bytes__);
26
Mark Dickinsonfd24b322008-12-06 15:33:31 +000027/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
28 for a string of length n should request PyBytesObject_SIZE + n bytes.
29
30 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
31 3 bytes per string allocation on a typical system.
32*/
33#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
34
Victor Stinner2bf89932015-10-14 11:25:33 +020035/* Forward declaration */
36Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
37 char *str);
38
Christian Heimes2c9c7a52008-05-26 13:42:13 +000039/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000040 For PyBytes_FromString(), the parameter `str' points to a null-terminated
41 string containing exactly `size' bytes.
42
Martin Pantera90a4a92016-05-30 04:04:50 +000043 For PyBytes_FromStringAndSize(), the parameter `str' is
Christian Heimes2c9c7a52008-05-26 13:42:13 +000044 either NULL or else points to a string containing at least `size' bytes.
45 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
46 not have to be null-terminated. (Therefore it is safe to construct a
47 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
48 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
49 bytes (setting the last byte to the null terminating character) and you can
50 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000051 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000052 alter the data yourself, since the strings may be shared.
53
54 The PyObject member `op->ob_size', which denotes the number of "extra
55 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020056 allocated for string data, not counting the null terminating character.
57 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000058 PyBytes_FromStringAndSize()) or the length of the string in the `str'
59 parameter (for PyBytes_FromString()).
60*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020061static PyObject *
62_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000063{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020064 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020065 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020066
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 if (size == 0 && (op = nullstring) != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000068 Py_INCREF(op);
69 return (PyObject *)op;
70 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000071
Victor Stinner049e5092014-08-17 22:20:00 +020072 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000073 PyErr_SetString(PyExc_OverflowError,
74 "byte string is too large");
75 return NULL;
76 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000077
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020079 if (use_calloc)
80 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
81 else
82 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000083 if (op == NULL)
84 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +010085 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000086 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020087 if (!use_calloc)
88 op->ob_sval[size] = '\0';
89 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000090 if (size == 0) {
91 nullstring = op;
92 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020093 }
94 return (PyObject *) op;
95}
96
97PyObject *
98PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
99{
100 PyBytesObject *op;
101 if (size < 0) {
102 PyErr_SetString(PyExc_SystemError,
103 "Negative size passed to PyBytes_FromStringAndSize");
104 return NULL;
105 }
106 if (size == 1 && str != NULL &&
107 (op = characters[*str & UCHAR_MAX]) != NULL)
108 {
Victor Stinnerdb067af2014-05-02 22:31:14 +0200109 Py_INCREF(op);
110 return (PyObject *)op;
111 }
112
113 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
114 if (op == NULL)
115 return NULL;
116 if (str == NULL)
117 return (PyObject *) op;
118
Christian Heimesf051e432016-09-13 20:22:02 +0200119 memcpy(op->ob_sval, str, size);
Victor Stinnerdb067af2014-05-02 22:31:14 +0200120 /* share short strings */
121 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000122 characters[*str & UCHAR_MAX] = op;
123 Py_INCREF(op);
124 }
125 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000126}
127
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000128PyObject *
129PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000130{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200131 size_t size;
132 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000133
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 assert(str != NULL);
135 size = strlen(str);
136 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
137 PyErr_SetString(PyExc_OverflowError,
138 "byte string is too long");
139 return NULL;
140 }
141 if (size == 0 && (op = nullstring) != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000142 Py_INCREF(op);
143 return (PyObject *)op;
144 }
145 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000146 Py_INCREF(op);
147 return (PyObject *)op;
148 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000149
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000150 /* Inline PyObject_NewVar */
151 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
152 if (op == NULL)
153 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +0100154 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000155 op->ob_shash = -1;
Christian Heimesf051e432016-09-13 20:22:02 +0200156 memcpy(op->ob_sval, str, size+1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000157 /* share short strings */
158 if (size == 0) {
159 nullstring = op;
160 Py_INCREF(op);
161 } else if (size == 1) {
162 characters[*str & UCHAR_MAX] = op;
163 Py_INCREF(op);
164 }
165 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000166}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000167
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000168PyObject *
169PyBytes_FromFormatV(const char *format, va_list vargs)
170{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000171 char *s;
Victor Stinner03dab782015-10-14 00:21:35 +0200172 const char *f;
173 const char *p;
174 Py_ssize_t prec;
175 int longflag;
176 int size_tflag;
177 /* Longest 64-bit formatted numbers:
178 - "18446744073709551615\0" (21 bytes)
179 - "-9223372036854775808\0" (21 bytes)
180 Decimal takes the most space (it isn't enough for octal.)
Guido van Rossum343e97f2007-04-09 00:43:24 +0000181
Victor Stinner03dab782015-10-14 00:21:35 +0200182 Longest 64-bit pointer representation:
183 "0xffffffffffffffff\0" (19 bytes). */
184 char buffer[21];
185 _PyBytesWriter writer;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000186
Victor Stinner03dab782015-10-14 00:21:35 +0200187 _PyBytesWriter_Init(&writer);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000188
Victor Stinner03dab782015-10-14 00:21:35 +0200189 s = _PyBytesWriter_Alloc(&writer, strlen(format));
190 if (s == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000191 return NULL;
Victor Stinner03dab782015-10-14 00:21:35 +0200192 writer.overallocate = 1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000193
Victor Stinner03dab782015-10-14 00:21:35 +0200194#define WRITE_BYTES(str) \
195 do { \
196 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
197 if (s == NULL) \
198 goto error; \
199 } while (0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000200
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000201 for (f = format; *f; f++) {
Victor Stinner03dab782015-10-14 00:21:35 +0200202 if (*f != '%') {
203 *s++ = *f;
204 continue;
205 }
206
207 p = f++;
208
209 /* ignore the width (ex: 10 in "%10s") */
210 while (Py_ISDIGIT(*f))
211 f++;
212
213 /* parse the precision (ex: 10 in "%.10s") */
214 prec = 0;
215 if (*f == '.') {
216 f++;
217 for (; Py_ISDIGIT(*f); f++) {
218 prec = (prec * 10) + (*f - '0');
219 }
220 }
221
222 while (*f && *f != '%' && !Py_ISALPHA(*f))
223 f++;
224
225 /* handle the long flag ('l'), but only for %ld and %lu.
226 others can be added when necessary. */
227 longflag = 0;
228 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
229 longflag = 1;
230 ++f;
231 }
232
233 /* handle the size_t flag ('z'). */
234 size_tflag = 0;
235 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
236 size_tflag = 1;
237 ++f;
238 }
239
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700240 /* subtract bytes preallocated for the format string
Victor Stinner03dab782015-10-14 00:21:35 +0200241 (ex: 2 for "%s") */
242 writer.min_size -= (f - p + 1);
243
244 switch (*f) {
245 case 'c':
246 {
247 int c = va_arg(vargs, int);
248 if (c < 0 || c > 255) {
249 PyErr_SetString(PyExc_OverflowError,
250 "PyBytes_FromFormatV(): %c format "
251 "expects an integer in range [0; 255]");
252 goto error;
253 }
254 writer.min_size++;
255 *s++ = (unsigned char)c;
256 break;
257 }
258
259 case 'd':
260 if (longflag)
261 sprintf(buffer, "%ld", va_arg(vargs, long));
262 else if (size_tflag)
263 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
264 va_arg(vargs, Py_ssize_t));
265 else
266 sprintf(buffer, "%d", va_arg(vargs, int));
267 assert(strlen(buffer) < sizeof(buffer));
268 WRITE_BYTES(buffer);
269 break;
270
271 case 'u':
272 if (longflag)
273 sprintf(buffer, "%lu",
274 va_arg(vargs, unsigned long));
275 else if (size_tflag)
276 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
277 va_arg(vargs, size_t));
278 else
279 sprintf(buffer, "%u",
280 va_arg(vargs, unsigned int));
281 assert(strlen(buffer) < sizeof(buffer));
282 WRITE_BYTES(buffer);
283 break;
284
285 case 'i':
286 sprintf(buffer, "%i", va_arg(vargs, int));
287 assert(strlen(buffer) < sizeof(buffer));
288 WRITE_BYTES(buffer);
289 break;
290
291 case 'x':
292 sprintf(buffer, "%x", va_arg(vargs, int));
293 assert(strlen(buffer) < sizeof(buffer));
294 WRITE_BYTES(buffer);
295 break;
296
297 case 's':
298 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000299 Py_ssize_t i;
Victor Stinner03dab782015-10-14 00:21:35 +0200300
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200301 p = va_arg(vargs, const char*);
Serhiy Storchakad586ccb2019-01-12 10:30:35 +0200302 if (prec <= 0) {
303 i = strlen(p);
304 }
305 else {
306 i = 0;
307 while (i < prec && p[i]) {
308 i++;
309 }
310 }
Victor Stinner03dab782015-10-14 00:21:35 +0200311 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
312 if (s == NULL)
313 goto error;
314 break;
315 }
316
317 case 'p':
318 sprintf(buffer, "%p", va_arg(vargs, void*));
319 assert(strlen(buffer) < sizeof(buffer));
320 /* %p is ill-defined: ensure leading 0x. */
321 if (buffer[1] == 'X')
322 buffer[1] = 'x';
323 else if (buffer[1] != 'x') {
324 memmove(buffer+2, buffer, strlen(buffer)+1);
325 buffer[0] = '0';
326 buffer[1] = 'x';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000327 }
Victor Stinner03dab782015-10-14 00:21:35 +0200328 WRITE_BYTES(buffer);
329 break;
330
331 case '%':
332 writer.min_size++;
333 *s++ = '%';
334 break;
335
336 default:
337 if (*f == 0) {
338 /* fix min_size if we reached the end of the format string */
339 writer.min_size++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000340 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000341
Victor Stinner03dab782015-10-14 00:21:35 +0200342 /* invalid format string: copy unformatted string and exit */
343 WRITE_BYTES(p);
344 return _PyBytesWriter_Finish(&writer, s);
345 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000347
Victor Stinner03dab782015-10-14 00:21:35 +0200348#undef WRITE_BYTES
349
350 return _PyBytesWriter_Finish(&writer, s);
351
352 error:
353 _PyBytesWriter_Dealloc(&writer);
354 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000355}
356
357PyObject *
358PyBytes_FromFormat(const char *format, ...)
359{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000360 PyObject* ret;
361 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000362
363#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000364 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000365#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000366 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000367#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000368 ret = PyBytes_FromFormatV(format, vargs);
369 va_end(vargs);
370 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000371}
372
Ethan Furmanb95b5612015-01-23 20:05:18 -0800373/* Helpers for formatstring */
374
375Py_LOCAL_INLINE(PyObject *)
376getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
377{
378 Py_ssize_t argidx = *p_argidx;
379 if (argidx < arglen) {
380 (*p_argidx)++;
381 if (arglen < 0)
382 return args;
383 else
384 return PyTuple_GetItem(args, argidx);
385 }
386 PyErr_SetString(PyExc_TypeError,
387 "not enough arguments for format string");
388 return NULL;
389}
390
391/* Format codes
392 * F_LJUST '-'
393 * F_SIGN '+'
394 * F_BLANK ' '
395 * F_ALT '#'
396 * F_ZERO '0'
397 */
398#define F_LJUST (1<<0)
399#define F_SIGN (1<<1)
400#define F_BLANK (1<<2)
401#define F_ALT (1<<3)
402#define F_ZERO (1<<4)
403
404/* Returns a new reference to a PyBytes object, or NULL on failure. */
405
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200406static char*
407formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200408 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800409{
410 char *p;
411 PyObject *result;
412 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200413 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800414
415 x = PyFloat_AsDouble(v);
416 if (x == -1.0 && PyErr_Occurred()) {
417 PyErr_Format(PyExc_TypeError, "float argument required, "
418 "not %.200s", Py_TYPE(v)->tp_name);
419 return NULL;
420 }
421
422 if (prec < 0)
423 prec = 6;
424
425 p = PyOS_double_to_string(x, type, prec,
426 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
427
428 if (p == NULL)
429 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200430
431 len = strlen(p);
432 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200433 str = _PyBytesWriter_Prepare(writer, str, len);
434 if (str == NULL)
435 return NULL;
Christian Heimesf051e432016-09-13 20:22:02 +0200436 memcpy(str, p, len);
Victor Stinner71dc3d82016-04-26 12:35:13 +0200437 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200438 str += len;
439 return str;
440 }
441
442 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800443 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200444 *p_result = result;
Zackery Spytz96c59322018-10-03 00:01:30 -0600445 return result != NULL ? str : NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800446}
447
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300448static PyObject *
449formatlong(PyObject *v, int flags, int prec, int type)
450{
451 PyObject *result, *iobj;
452 if (type == 'i')
453 type = 'd';
454 if (PyLong_Check(v))
455 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
456 if (PyNumber_Check(v)) {
457 /* make sure number is a type of integer for o, x, and X */
458 if (type == 'o' || type == 'x' || type == 'X')
459 iobj = PyNumber_Index(v);
460 else
461 iobj = PyNumber_Long(v);
462 if (iobj == NULL) {
463 if (!PyErr_ExceptionMatches(PyExc_TypeError))
464 return NULL;
465 }
466 else if (!PyLong_Check(iobj))
467 Py_CLEAR(iobj);
468 if (iobj != NULL) {
469 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
470 Py_DECREF(iobj);
471 return result;
472 }
473 }
474 PyErr_Format(PyExc_TypeError,
475 "%%%c format: %s is required, not %.200s", type,
476 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
477 : "a number",
478 Py_TYPE(v)->tp_name);
479 return NULL;
480}
481
482static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200483byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800484{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300485 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200486 *p = PyBytes_AS_STRING(arg)[0];
487 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800488 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300489 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200490 *p = PyByteArray_AS_STRING(arg)[0];
491 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800492 }
493 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300494 PyObject *iobj;
495 long ival;
496 int overflow;
497 /* make sure number is a type of integer */
498 if (PyLong_Check(arg)) {
499 ival = PyLong_AsLongAndOverflow(arg, &overflow);
500 }
501 else {
502 iobj = PyNumber_Index(arg);
503 if (iobj == NULL) {
504 if (!PyErr_ExceptionMatches(PyExc_TypeError))
505 return 0;
506 goto onError;
507 }
508 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
509 Py_DECREF(iobj);
510 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300511 if (!overflow && ival == -1 && PyErr_Occurred())
512 goto onError;
513 if (overflow || !(0 <= ival && ival <= 255)) {
514 PyErr_SetString(PyExc_OverflowError,
515 "%c arg not in range(256)");
516 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800517 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300518 *p = (char)ival;
519 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800520 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300521 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200522 PyErr_SetString(PyExc_TypeError,
523 "%c requires an integer in range(256) or a single byte");
524 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800525}
526
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800527static PyObject *_PyBytes_FromBuffer(PyObject *x);
528
Ethan Furmanb95b5612015-01-23 20:05:18 -0800529static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200530format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800531{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200532 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800533 /* is it a bytes object? */
534 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200535 *pbuf = PyBytes_AS_STRING(v);
536 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800537 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200538 return v;
539 }
540 if (PyByteArray_Check(v)) {
541 *pbuf = PyByteArray_AS_STRING(v);
542 *plen = PyByteArray_GET_SIZE(v);
543 Py_INCREF(v);
544 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800545 }
546 /* does it support __bytes__? */
547 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
548 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +0100549 result = _PyObject_CallNoArg(func);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800550 Py_DECREF(func);
551 if (result == NULL)
552 return NULL;
553 if (!PyBytes_Check(result)) {
554 PyErr_Format(PyExc_TypeError,
555 "__bytes__ returned non-bytes (type %.200s)",
556 Py_TYPE(result)->tp_name);
557 Py_DECREF(result);
558 return NULL;
559 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200560 *pbuf = PyBytes_AS_STRING(result);
561 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800562 return result;
563 }
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800564 /* does it support buffer protocol? */
565 if (PyObject_CheckBuffer(v)) {
566 /* maybe we can avoid making a copy of the buffer object here? */
567 result = _PyBytes_FromBuffer(v);
568 if (result == NULL)
569 return NULL;
570 *pbuf = PyBytes_AS_STRING(result);
571 *plen = PyBytes_GET_SIZE(result);
572 return result;
573 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800574 PyErr_Format(PyExc_TypeError,
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800575 "%%b requires a bytes-like object, "
576 "or an object that implements __bytes__, not '%.100s'",
Ethan Furmanb95b5612015-01-23 20:05:18 -0800577 Py_TYPE(v)->tp_name);
578 return NULL;
579}
580
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200581/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800582
583PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +0200584_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
585 PyObject *args, int use_bytearray)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800586{
Victor Stinner772b2b02015-10-14 09:56:53 +0200587 const char *fmt;
588 char *res;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800589 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200590 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800591 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800592 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200593 _PyBytesWriter writer;
594
Victor Stinner772b2b02015-10-14 09:56:53 +0200595 if (args == NULL) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800596 PyErr_BadInternalCall();
597 return NULL;
598 }
Victor Stinner772b2b02015-10-14 09:56:53 +0200599 fmt = format;
600 fmtcnt = format_len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200601
602 _PyBytesWriter_Init(&writer);
Victor Stinner772b2b02015-10-14 09:56:53 +0200603 writer.use_bytearray = use_bytearray;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200604
605 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
606 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800607 return NULL;
Victor Stinner772b2b02015-10-14 09:56:53 +0200608 if (!use_bytearray)
609 writer.overallocate = 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200610
Ethan Furmanb95b5612015-01-23 20:05:18 -0800611 if (PyTuple_Check(args)) {
612 arglen = PyTuple_GET_SIZE(args);
613 argidx = 0;
614 }
615 else {
616 arglen = -1;
617 argidx = -2;
618 }
619 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
620 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
621 !PyByteArray_Check(args)) {
622 dict = args;
623 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200624
Ethan Furmanb95b5612015-01-23 20:05:18 -0800625 while (--fmtcnt >= 0) {
626 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200627 Py_ssize_t len;
628 char *pos;
629
Xiang Zhangb76ad512017-03-06 17:17:05 +0800630 pos = (char *)memchr(fmt + 1, '%', fmtcnt);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200631 if (pos != NULL)
632 len = pos - fmt;
Victor Stinner772b2b02015-10-14 09:56:53 +0200633 else
Xiang Zhangb76ad512017-03-06 17:17:05 +0800634 len = fmtcnt + 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200635 assert(len != 0);
636
Christian Heimesf051e432016-09-13 20:22:02 +0200637 memcpy(res, fmt, len);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200638 res += len;
639 fmt += len;
640 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800641 }
642 else {
643 /* Got a format specifier */
644 int flags = 0;
645 Py_ssize_t width = -1;
646 int prec = -1;
647 int c = '\0';
648 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800649 PyObject *v = NULL;
650 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200651 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800652 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200653 Py_ssize_t len = 0;
654 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200655 Py_ssize_t alloc;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800656
Ethan Furmanb95b5612015-01-23 20:05:18 -0800657 fmt++;
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200658 if (*fmt == '%') {
659 *res++ = '%';
660 fmt++;
661 fmtcnt--;
662 continue;
663 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800664 if (*fmt == '(') {
Victor Stinner772b2b02015-10-14 09:56:53 +0200665 const char *keystart;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800666 Py_ssize_t keylen;
667 PyObject *key;
668 int pcount = 1;
669
670 if (dict == NULL) {
671 PyErr_SetString(PyExc_TypeError,
672 "format requires a mapping");
673 goto error;
674 }
675 ++fmt;
676 --fmtcnt;
677 keystart = fmt;
678 /* Skip over balanced parentheses */
679 while (pcount > 0 && --fmtcnt >= 0) {
680 if (*fmt == ')')
681 --pcount;
682 else if (*fmt == '(')
683 ++pcount;
684 fmt++;
685 }
686 keylen = fmt - keystart - 1;
687 if (fmtcnt < 0 || pcount > 0) {
688 PyErr_SetString(PyExc_ValueError,
689 "incomplete format key");
690 goto error;
691 }
692 key = PyBytes_FromStringAndSize(keystart,
693 keylen);
694 if (key == NULL)
695 goto error;
696 if (args_owned) {
697 Py_DECREF(args);
698 args_owned = 0;
699 }
700 args = PyObject_GetItem(dict, key);
701 Py_DECREF(key);
702 if (args == NULL) {
703 goto error;
704 }
705 args_owned = 1;
706 arglen = -1;
707 argidx = -2;
708 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200709
710 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800711 while (--fmtcnt >= 0) {
712 switch (c = *fmt++) {
713 case '-': flags |= F_LJUST; continue;
714 case '+': flags |= F_SIGN; continue;
715 case ' ': flags |= F_BLANK; continue;
716 case '#': flags |= F_ALT; continue;
717 case '0': flags |= F_ZERO; continue;
718 }
719 break;
720 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200721
722 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800723 if (c == '*') {
724 v = getnextarg(args, arglen, &argidx);
725 if (v == NULL)
726 goto error;
727 if (!PyLong_Check(v)) {
728 PyErr_SetString(PyExc_TypeError,
729 "* wants int");
730 goto error;
731 }
732 width = PyLong_AsSsize_t(v);
733 if (width == -1 && PyErr_Occurred())
734 goto error;
735 if (width < 0) {
736 flags |= F_LJUST;
737 width = -width;
738 }
739 if (--fmtcnt >= 0)
740 c = *fmt++;
741 }
742 else if (c >= 0 && isdigit(c)) {
743 width = c - '0';
744 while (--fmtcnt >= 0) {
745 c = Py_CHARMASK(*fmt++);
746 if (!isdigit(c))
747 break;
748 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
749 PyErr_SetString(
750 PyExc_ValueError,
751 "width too big");
752 goto error;
753 }
754 width = width*10 + (c - '0');
755 }
756 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200757
758 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800759 if (c == '.') {
760 prec = 0;
761 if (--fmtcnt >= 0)
762 c = *fmt++;
763 if (c == '*') {
764 v = getnextarg(args, arglen, &argidx);
765 if (v == NULL)
766 goto error;
767 if (!PyLong_Check(v)) {
768 PyErr_SetString(
769 PyExc_TypeError,
770 "* wants int");
771 goto error;
772 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200773 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800774 if (prec == -1 && PyErr_Occurred())
775 goto error;
776 if (prec < 0)
777 prec = 0;
778 if (--fmtcnt >= 0)
779 c = *fmt++;
780 }
781 else if (c >= 0 && isdigit(c)) {
782 prec = c - '0';
783 while (--fmtcnt >= 0) {
784 c = Py_CHARMASK(*fmt++);
785 if (!isdigit(c))
786 break;
787 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
788 PyErr_SetString(
789 PyExc_ValueError,
790 "prec too big");
791 goto error;
792 }
793 prec = prec*10 + (c - '0');
794 }
795 }
796 } /* prec */
797 if (fmtcnt >= 0) {
798 if (c == 'h' || c == 'l' || c == 'L') {
799 if (--fmtcnt >= 0)
800 c = *fmt++;
801 }
802 }
803 if (fmtcnt < 0) {
804 PyErr_SetString(PyExc_ValueError,
805 "incomplete format");
806 goto error;
807 }
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200808 v = getnextarg(args, arglen, &argidx);
809 if (v == NULL)
810 goto error;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200811
Alexey Izbyshevccd99752018-08-23 10:50:52 +0300812 if (fmtcnt == 0) {
813 /* last write: disable writer overallocation */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200814 writer.overallocate = 0;
815 }
816
Ethan Furmanb95b5612015-01-23 20:05:18 -0800817 sign = 0;
818 fill = ' ';
819 switch (c) {
Ethan Furman62e977f2015-03-11 08:17:00 -0700820 case 'r':
821 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800822 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200823 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800824 if (temp == NULL)
825 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200826 assert(PyUnicode_IS_ASCII(temp));
827 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
828 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800829 if (prec >= 0 && len > prec)
830 len = prec;
831 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200832
Ethan Furmanb95b5612015-01-23 20:05:18 -0800833 case 's':
834 // %s is only for 2/3 code; 3 only code should use %b
835 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200836 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800837 if (temp == NULL)
838 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800839 if (prec >= 0 && len > prec)
840 len = prec;
841 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200842
Ethan Furmanb95b5612015-01-23 20:05:18 -0800843 case 'i':
844 case 'd':
845 case 'u':
846 case 'o':
847 case 'x':
848 case 'X':
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200849 if (PyLong_CheckExact(v)
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200850 && width == -1 && prec == -1
851 && !(flags & (F_SIGN | F_BLANK))
852 && c != 'X')
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200853 {
854 /* Fast path */
855 int alternate = flags & F_ALT;
856 int base;
857
858 switch(c)
859 {
860 default:
Barry Warsawb2e57942017-09-14 18:13:16 -0700861 Py_UNREACHABLE();
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200862 case 'd':
863 case 'i':
864 case 'u':
865 base = 10;
866 break;
867 case 'o':
868 base = 8;
869 break;
870 case 'x':
871 case 'X':
872 base = 16;
873 break;
874 }
875
876 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200877 writer.min_size -= 2; /* size preallocated for "%d" */
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200878 res = _PyLong_FormatBytesWriter(&writer, res,
879 v, base, alternate);
880 if (res == NULL)
881 goto error;
882 continue;
883 }
884
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300885 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200886 if (!temp)
887 goto error;
888 assert(PyUnicode_IS_ASCII(temp));
889 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
890 len = PyUnicode_GET_LENGTH(temp);
891 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800892 if (flags & F_ZERO)
893 fill = '0';
894 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200895
Ethan Furmanb95b5612015-01-23 20:05:18 -0800896 case 'e':
897 case 'E':
898 case 'f':
899 case 'F':
900 case 'g':
901 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200902 if (width == -1 && prec == -1
903 && !(flags & (F_SIGN | F_BLANK)))
904 {
905 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200906 writer.min_size -= 2; /* size preallocated for "%f" */
Victor Stinnerad771582015-10-09 12:38:53 +0200907 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200908 if (res == NULL)
909 goto error;
910 continue;
911 }
912
Victor Stinnerad771582015-10-09 12:38:53 +0200913 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800914 goto error;
915 pbuf = PyBytes_AS_STRING(temp);
916 len = PyBytes_GET_SIZE(temp);
917 sign = 1;
918 if (flags & F_ZERO)
919 fill = '0';
920 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200921
Ethan Furmanb95b5612015-01-23 20:05:18 -0800922 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200923 pbuf = &onechar;
924 len = byte_converter(v, &onechar);
925 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800926 goto error;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200927 if (width == -1) {
928 /* Fast path */
929 *res++ = onechar;
930 continue;
931 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800932 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200933
Ethan Furmanb95b5612015-01-23 20:05:18 -0800934 default:
935 PyErr_Format(PyExc_ValueError,
936 "unsupported format character '%c' (0x%x) "
937 "at index %zd",
938 c, c,
Victor Stinner772b2b02015-10-14 09:56:53 +0200939 (Py_ssize_t)(fmt - 1 - format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800940 goto error;
941 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200942
Ethan Furmanb95b5612015-01-23 20:05:18 -0800943 if (sign) {
944 if (*pbuf == '-' || *pbuf == '+') {
945 sign = *pbuf++;
946 len--;
947 }
948 else if (flags & F_SIGN)
949 sign = '+';
950 else if (flags & F_BLANK)
951 sign = ' ';
952 else
953 sign = 0;
954 }
955 if (width < len)
956 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200957
958 alloc = width;
959 if (sign != 0 && len == width)
960 alloc++;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200961 /* 2: size preallocated for %s */
962 if (alloc > 2) {
963 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200964 if (res == NULL)
965 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800966 }
Victor Stinner60ec6ef2019-10-07 22:31:42 +0200967#ifndef NDEBUG
968 char *before = res;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200969#endif
970
971 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800972 if (sign) {
973 if (fill != ' ')
974 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800975 if (width > len)
976 width--;
977 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200978
979 /* Write the numeric prefix for "x", "X" and "o" formats
980 if the alternate form is used.
981 For example, write "0x" for the "%#x" format. */
Serhiy Storchakab1a16192016-12-17 21:48:03 +0200982 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800983 assert(pbuf[0] == '0');
984 assert(pbuf[1] == c);
985 if (fill != ' ') {
986 *res++ = *pbuf++;
987 *res++ = *pbuf++;
988 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800989 width -= 2;
990 if (width < 0)
991 width = 0;
992 len -= 2;
993 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200994
995 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800996 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200997 memset(res, fill, width - len);
998 res += (width - len);
999 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -08001000 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001001
1002 /* If padding with spaces: write sign if needed and/or numeric
1003 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001004 if (fill == ' ') {
1005 if (sign)
1006 *res++ = sign;
Serhiy Storchakab1a16192016-12-17 21:48:03 +02001007 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001008 assert(pbuf[0] == '0');
1009 assert(pbuf[1] == c);
1010 *res++ = *pbuf++;
1011 *res++ = *pbuf++;
1012 }
1013 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001014
1015 /* Copy bytes */
Christian Heimesf051e432016-09-13 20:22:02 +02001016 memcpy(res, pbuf, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001017 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001018
1019 /* Pad right with the fill character if needed */
1020 if (width > len) {
1021 memset(res, ' ', width - len);
1022 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001023 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001024
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +02001025 if (dict && (argidx < arglen)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001026 PyErr_SetString(PyExc_TypeError,
1027 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -08001028 Py_XDECREF(temp);
1029 goto error;
1030 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001031 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001032
Victor Stinner60ec6ef2019-10-07 22:31:42 +02001033#ifndef NDEBUG
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001034 /* check that we computed the exact size for this write */
1035 assert((res - before) == alloc);
1036#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -08001037 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001038
1039 /* If overallocation was disabled, ensure that it was the last
1040 write. Otherwise, we missed an optimization */
Alexey Izbyshevccd99752018-08-23 10:50:52 +03001041 assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001042 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001043
Ethan Furmanb95b5612015-01-23 20:05:18 -08001044 if (argidx < arglen && !dict) {
1045 PyErr_SetString(PyExc_TypeError,
1046 "not all arguments converted during bytes formatting");
1047 goto error;
1048 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001049
Ethan Furmanb95b5612015-01-23 20:05:18 -08001050 if (args_owned) {
1051 Py_DECREF(args);
1052 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001053 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001054
1055 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001056 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001057 if (args_owned) {
1058 Py_DECREF(args);
1059 }
1060 return NULL;
1061}
1062
Greg Price3a4f6672019-09-12 11:12:22 -07001063/* Unescape a backslash-escaped string. */
Eric V. Smith42454af2016-10-31 09:22:08 -04001064PyObject *_PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001065 Py_ssize_t len,
1066 const char *errors,
Eric V. Smith42454af2016-10-31 09:22:08 -04001067 const char **first_invalid_escape)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001068{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001069 int c;
Victor Stinner2ec80632015-10-14 13:32:13 +02001070 char *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001071 const char *end;
Victor Stinner2ec80632015-10-14 13:32:13 +02001072 _PyBytesWriter writer;
1073
1074 _PyBytesWriter_Init(&writer);
1075
1076 p = _PyBytesWriter_Alloc(&writer, len);
1077 if (p == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001078 return NULL;
Victor Stinner2ec80632015-10-14 13:32:13 +02001079 writer.overallocate = 1;
1080
Eric V. Smith42454af2016-10-31 09:22:08 -04001081 *first_invalid_escape = NULL;
1082
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001083 end = s + len;
1084 while (s < end) {
1085 if (*s != '\\') {
Greg Price3a4f6672019-09-12 11:12:22 -07001086 *p++ = *s++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001087 continue;
1088 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001089
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001090 s++;
Victor Stinner2ec80632015-10-14 13:32:13 +02001091 if (s == end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001092 PyErr_SetString(PyExc_ValueError,
1093 "Trailing \\ in string");
1094 goto failed;
1095 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001097 switch (*s++) {
1098 /* XXX This assumes ASCII! */
1099 case '\n': break;
1100 case '\\': *p++ = '\\'; break;
1101 case '\'': *p++ = '\''; break;
1102 case '\"': *p++ = '\"'; break;
1103 case 'b': *p++ = '\b'; break;
1104 case 'f': *p++ = '\014'; break; /* FF */
1105 case 't': *p++ = '\t'; break;
1106 case 'n': *p++ = '\n'; break;
1107 case 'r': *p++ = '\r'; break;
1108 case 'v': *p++ = '\013'; break; /* VT */
1109 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1110 case '0': case '1': case '2': case '3':
1111 case '4': case '5': case '6': case '7':
1112 c = s[-1] - '0';
1113 if (s < end && '0' <= *s && *s <= '7') {
1114 c = (c<<3) + *s++ - '0';
1115 if (s < end && '0' <= *s && *s <= '7')
1116 c = (c<<3) + *s++ - '0';
1117 }
1118 *p++ = c;
1119 break;
1120 case 'x':
Victor Stinner2ec80632015-10-14 13:32:13 +02001121 if (s+1 < end) {
1122 int digit1, digit2;
1123 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1124 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1125 if (digit1 < 16 && digit2 < 16) {
1126 *p++ = (unsigned char)((digit1 << 4) + digit2);
1127 s += 2;
1128 break;
1129 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001130 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001131 /* invalid hexadecimal digits */
1132
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001133 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001134 PyErr_Format(PyExc_ValueError,
Serhiy Storchakad53fe5f2019-03-13 22:59:55 +02001135 "invalid \\x escape at position %zd",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001136 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001137 goto failed;
1138 }
1139 if (strcmp(errors, "replace") == 0) {
1140 *p++ = '?';
1141 } else if (strcmp(errors, "ignore") == 0)
1142 /* do nothing */;
1143 else {
1144 PyErr_Format(PyExc_ValueError,
1145 "decoding error; unknown "
1146 "error handling code: %.400s",
1147 errors);
1148 goto failed;
1149 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001150 /* skip \x */
1151 if (s < end && Py_ISXDIGIT(s[0]))
1152 s++; /* and a hexdigit */
1153 break;
Victor Stinner2ec80632015-10-14 13:32:13 +02001154
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001155 default:
Eric V. Smith42454af2016-10-31 09:22:08 -04001156 if (*first_invalid_escape == NULL) {
1157 *first_invalid_escape = s-1; /* Back up one char, since we've
1158 already incremented s. */
1159 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001160 *p++ = '\\';
Eric V. Smith42454af2016-10-31 09:22:08 -04001161 s--;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001162 }
1163 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001164
1165 return _PyBytesWriter_Finish(&writer, p);
1166
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001167 failed:
Victor Stinner2ec80632015-10-14 13:32:13 +02001168 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001169 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001170}
1171
Eric V. Smith42454af2016-10-31 09:22:08 -04001172PyObject *PyBytes_DecodeEscape(const char *s,
1173 Py_ssize_t len,
1174 const char *errors,
Greg Price3a4f6672019-09-12 11:12:22 -07001175 Py_ssize_t Py_UNUSED(unicode),
1176 const char *Py_UNUSED(recode_encoding))
Eric V. Smith42454af2016-10-31 09:22:08 -04001177{
1178 const char* first_invalid_escape;
Greg Price3a4f6672019-09-12 11:12:22 -07001179 PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
Eric V. Smith42454af2016-10-31 09:22:08 -04001180 &first_invalid_escape);
1181 if (result == NULL)
1182 return NULL;
1183 if (first_invalid_escape != NULL) {
1184 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1185 "invalid escape sequence '\\%c'",
Serhiy Storchaka56cb4652017-10-20 17:08:15 +03001186 (unsigned char)*first_invalid_escape) < 0) {
Eric V. Smith42454af2016-10-31 09:22:08 -04001187 Py_DECREF(result);
1188 return NULL;
1189 }
1190 }
1191 return result;
1192
1193}
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001194/* -------------------------------------------------------------------- */
1195/* object api */
1196
1197Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001198PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001199{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001200 if (!PyBytes_Check(op)) {
1201 PyErr_Format(PyExc_TypeError,
1202 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1203 return -1;
1204 }
1205 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001206}
1207
1208char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001209PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001210{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001211 if (!PyBytes_Check(op)) {
1212 PyErr_Format(PyExc_TypeError,
1213 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1214 return NULL;
1215 }
1216 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001217}
1218
1219int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001220PyBytes_AsStringAndSize(PyObject *obj,
1221 char **s,
1222 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001223{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001224 if (s == NULL) {
1225 PyErr_BadInternalCall();
1226 return -1;
1227 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001228
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001229 if (!PyBytes_Check(obj)) {
1230 PyErr_Format(PyExc_TypeError,
1231 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1232 return -1;
1233 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001234
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001235 *s = PyBytes_AS_STRING(obj);
1236 if (len != NULL)
1237 *len = PyBytes_GET_SIZE(obj);
1238 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001239 PyErr_SetString(PyExc_ValueError,
1240 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001241 return -1;
1242 }
1243 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001244}
Neal Norwitz6968b052007-02-27 19:02:19 +00001245
1246/* -------------------------------------------------------------------- */
1247/* Methods */
1248
Eric Smith0923d1d2009-04-16 20:16:10 +00001249#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001250
1251#include "stringlib/fastsearch.h"
1252#include "stringlib/count.h"
1253#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001254#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001255#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001256#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001257#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001258
Eric Smith0f78bff2009-11-30 01:01:42 +00001259#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001260
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001261PyObject *
1262PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001263{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001264 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001265 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001266 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001267 PyObject *v;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001268 unsigned char quote;
1269 const unsigned char *s;
1270 Py_UCS1 *p;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001271
1272 /* Compute size of output string */
1273 squotes = dquotes = 0;
1274 newsize = 3; /* b'' */
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001275 s = (const unsigned char*)op->ob_sval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001276 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001277 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001278 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001279 case '\'': squotes++; break;
1280 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001281 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001282 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001283 default:
1284 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001285 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001286 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001287 if (newsize > PY_SSIZE_T_MAX - incr)
1288 goto overflow;
1289 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001290 }
1291 quote = '\'';
1292 if (smartquotes && squotes && !dquotes)
1293 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001294 if (squotes && quote == '\'') {
1295 if (newsize > PY_SSIZE_T_MAX - squotes)
1296 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001297 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001298 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001299
1300 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001301 if (v == NULL) {
1302 return NULL;
1303 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001304 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001305
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001306 *p++ = 'b', *p++ = quote;
1307 for (i = 0; i < length; i++) {
1308 unsigned char c = op->ob_sval[i];
1309 if (c == quote || c == '\\')
1310 *p++ = '\\', *p++ = c;
1311 else if (c == '\t')
1312 *p++ = '\\', *p++ = 't';
1313 else if (c == '\n')
1314 *p++ = '\\', *p++ = 'n';
1315 else if (c == '\r')
1316 *p++ = '\\', *p++ = 'r';
1317 else if (c < ' ' || c >= 0x7f) {
1318 *p++ = '\\';
1319 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001320 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1321 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001322 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001323 else
1324 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001325 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001326 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001327 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001328 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001329
1330 overflow:
1331 PyErr_SetString(PyExc_OverflowError,
1332 "bytes object is too large to make repr");
1333 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001334}
1335
Neal Norwitz6968b052007-02-27 19:02:19 +00001336static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001337bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001338{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001339 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001340}
1341
Neal Norwitz6968b052007-02-27 19:02:19 +00001342static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001343bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001344{
Victor Stinner331a6a52019-05-27 16:39:22 +02001345 PyConfig *config = &_PyInterpreterState_GET_UNSAFE()->config;
Victor Stinnerc96be812019-05-14 17:34:56 +02001346 if (config->bytes_warning) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001347 if (PyErr_WarnEx(PyExc_BytesWarning,
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001348 "str() on a bytes instance", 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001349 return NULL;
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001350 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001351 }
1352 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001353}
1354
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001355static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001356bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001357{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001358 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001359}
Neal Norwitz6968b052007-02-27 19:02:19 +00001360
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001361/* This is also used by PyBytes_Concat() */
1362static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001363bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001364{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001365 Py_buffer va, vb;
1366 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001367
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001368 va.len = -1;
1369 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001370 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1371 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001372 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
Serhiy Storchaka6b5a9ec2017-03-19 19:47:02 +02001373 Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001374 goto done;
1375 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001376
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001377 /* Optimize end cases */
1378 if (va.len == 0 && PyBytes_CheckExact(b)) {
1379 result = b;
1380 Py_INCREF(result);
1381 goto done;
1382 }
1383 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1384 result = a;
1385 Py_INCREF(result);
1386 goto done;
1387 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001388
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001389 if (va.len > PY_SSIZE_T_MAX - vb.len) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001390 PyErr_NoMemory();
1391 goto done;
1392 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001393
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001394 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001395 if (result != NULL) {
1396 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1397 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1398 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001399
1400 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001401 if (va.len != -1)
1402 PyBuffer_Release(&va);
1403 if (vb.len != -1)
1404 PyBuffer_Release(&vb);
1405 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001406}
Neal Norwitz6968b052007-02-27 19:02:19 +00001407
1408static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001409bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001410{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001411 Py_ssize_t i;
1412 Py_ssize_t j;
1413 Py_ssize_t size;
1414 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001415 size_t nbytes;
1416 if (n < 0)
1417 n = 0;
1418 /* watch out for overflows: the size can overflow int,
1419 * and the # of bytes needed can overflow size_t
1420 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001421 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001422 PyErr_SetString(PyExc_OverflowError,
1423 "repeated bytes are too long");
1424 return NULL;
1425 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001426 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001427 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1428 Py_INCREF(a);
1429 return (PyObject *)a;
1430 }
1431 nbytes = (size_t)size;
1432 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1433 PyErr_SetString(PyExc_OverflowError,
1434 "repeated bytes are too long");
1435 return NULL;
1436 }
1437 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1438 if (op == NULL)
1439 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +01001440 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001441 op->ob_shash = -1;
1442 op->ob_sval[size] = '\0';
1443 if (Py_SIZE(a) == 1 && n > 0) {
1444 memset(op->ob_sval, a->ob_sval[0] , n);
1445 return (PyObject *) op;
1446 }
1447 i = 0;
1448 if (i < size) {
Christian Heimesf051e432016-09-13 20:22:02 +02001449 memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001450 i = Py_SIZE(a);
1451 }
1452 while (i < size) {
1453 j = (i <= size-i) ? i : size-i;
Christian Heimesf051e432016-09-13 20:22:02 +02001454 memcpy(op->ob_sval+i, op->ob_sval, j);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001455 i += j;
1456 }
1457 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001458}
1459
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001460static int
1461bytes_contains(PyObject *self, PyObject *arg)
1462{
1463 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1464}
1465
Neal Norwitz6968b052007-02-27 19:02:19 +00001466static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001467bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001468{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001469 if (i < 0 || i >= Py_SIZE(a)) {
1470 PyErr_SetString(PyExc_IndexError, "index out of range");
1471 return NULL;
1472 }
1473 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001474}
1475
Benjamin Peterson621b4302016-09-09 13:54:34 -07001476static int
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001477bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1478{
1479 int cmp;
1480 Py_ssize_t len;
1481
1482 len = Py_SIZE(a);
1483 if (Py_SIZE(b) != len)
1484 return 0;
1485
1486 if (a->ob_sval[0] != b->ob_sval[0])
1487 return 0;
1488
1489 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1490 return (cmp == 0);
1491}
1492
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001493static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001494bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001495{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001496 int c;
1497 Py_ssize_t len_a, len_b;
1498 Py_ssize_t min_len;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001499 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001500
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001501 /* Make sure both arguments are strings. */
1502 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Victor Stinner331a6a52019-05-27 16:39:22 +02001503 PyConfig *config = &_PyInterpreterState_GET_UNSAFE()->config;
Victor Stinnerc96be812019-05-14 17:34:56 +02001504 if (config->bytes_warning && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001505 rc = PyObject_IsInstance((PyObject*)a,
1506 (PyObject*)&PyUnicode_Type);
1507 if (!rc)
1508 rc = PyObject_IsInstance((PyObject*)b,
1509 (PyObject*)&PyUnicode_Type);
1510 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001511 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001512 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001513 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001514 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001515 return NULL;
1516 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001517 else {
1518 rc = PyObject_IsInstance((PyObject*)a,
1519 (PyObject*)&PyLong_Type);
1520 if (!rc)
1521 rc = PyObject_IsInstance((PyObject*)b,
1522 (PyObject*)&PyLong_Type);
1523 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001524 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001525 if (rc) {
1526 if (PyErr_WarnEx(PyExc_BytesWarning,
1527 "Comparison between bytes and int", 1))
1528 return NULL;
1529 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001530 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001531 }
stratakise8b19652017-11-02 11:32:54 +01001532 Py_RETURN_NOTIMPLEMENTED;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001533 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001534 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001535 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001536 case Py_EQ:
1537 case Py_LE:
1538 case Py_GE:
1539 /* a string is equal to itself */
stratakise8b19652017-11-02 11:32:54 +01001540 Py_RETURN_TRUE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001541 case Py_NE:
1542 case Py_LT:
1543 case Py_GT:
stratakise8b19652017-11-02 11:32:54 +01001544 Py_RETURN_FALSE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001545 default:
1546 PyErr_BadArgument();
1547 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001548 }
1549 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001550 else if (op == Py_EQ || op == Py_NE) {
1551 int eq = bytes_compare_eq(a, b);
1552 eq ^= (op == Py_NE);
stratakise8b19652017-11-02 11:32:54 +01001553 return PyBool_FromLong(eq);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001554 }
1555 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001556 len_a = Py_SIZE(a);
1557 len_b = Py_SIZE(b);
1558 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001559 if (min_len > 0) {
1560 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001561 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001562 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001563 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001564 else
1565 c = 0;
stratakise8b19652017-11-02 11:32:54 +01001566 if (c != 0)
1567 Py_RETURN_RICHCOMPARE(c, 0, op);
1568 Py_RETURN_RICHCOMPARE(len_a, len_b, op);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001569 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001570}
1571
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001572static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001573bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001574{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001575 if (a->ob_shash == -1) {
1576 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001577 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001578 }
1579 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001580}
1581
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001582static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001583bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001584{
Victor Stinnera15e2602020-04-08 02:01:56 +02001585 if (_PyIndex_Check(item)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001586 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1587 if (i == -1 && PyErr_Occurred())
1588 return NULL;
1589 if (i < 0)
1590 i += PyBytes_GET_SIZE(self);
1591 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1592 PyErr_SetString(PyExc_IndexError,
1593 "index out of range");
1594 return NULL;
1595 }
1596 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1597 }
1598 else if (PySlice_Check(item)) {
Zackery Spytz14514d92019-05-17 01:13:03 -06001599 Py_ssize_t start, stop, step, slicelength, i;
1600 size_t cur;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001601 char* source_buf;
1602 char* result_buf;
1603 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001604
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001605 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001606 return NULL;
1607 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001608 slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1609 &stop, step);
Neal Norwitz6968b052007-02-27 19:02:19 +00001610
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001611 if (slicelength <= 0) {
1612 return PyBytes_FromStringAndSize("", 0);
1613 }
1614 else if (start == 0 && step == 1 &&
1615 slicelength == PyBytes_GET_SIZE(self) &&
1616 PyBytes_CheckExact(self)) {
1617 Py_INCREF(self);
1618 return (PyObject *)self;
1619 }
1620 else if (step == 1) {
1621 return PyBytes_FromStringAndSize(
1622 PyBytes_AS_STRING(self) + start,
1623 slicelength);
1624 }
1625 else {
1626 source_buf = PyBytes_AS_STRING(self);
1627 result = PyBytes_FromStringAndSize(NULL, slicelength);
1628 if (result == NULL)
1629 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001630
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001631 result_buf = PyBytes_AS_STRING(result);
1632 for (cur = start, i = 0; i < slicelength;
1633 cur += step, i++) {
1634 result_buf[i] = source_buf[cur];
1635 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001636
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001637 return result;
1638 }
1639 }
1640 else {
1641 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001642 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001643 Py_TYPE(item)->tp_name);
1644 return NULL;
1645 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001646}
1647
1648static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001649bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001650{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001651 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1652 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001653}
1654
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001655static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001656 (lenfunc)bytes_length, /*sq_length*/
1657 (binaryfunc)bytes_concat, /*sq_concat*/
1658 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1659 (ssizeargfunc)bytes_item, /*sq_item*/
1660 0, /*sq_slice*/
1661 0, /*sq_ass_item*/
1662 0, /*sq_ass_slice*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001663 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001664};
1665
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001666static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001667 (lenfunc)bytes_length,
1668 (binaryfunc)bytes_subscript,
1669 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001670};
1671
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001672static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001673 (getbufferproc)bytes_buffer_getbuffer,
1674 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001675};
1676
1677
1678#define LEFTSTRIP 0
1679#define RIGHTSTRIP 1
1680#define BOTHSTRIP 2
1681
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001682/*[clinic input]
1683bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001684
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001685 sep: object = None
1686 The delimiter according which to split the bytes.
1687 None (the default value) means split on ASCII whitespace characters
1688 (space, tab, return, newline, formfeed, vertical tab).
1689 maxsplit: Py_ssize_t = -1
1690 Maximum number of splits to do.
1691 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001692
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001693Return a list of the sections in the bytes, using sep as the delimiter.
1694[clinic start generated code]*/
1695
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001696static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001697bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1698/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001699{
1700 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001701 const char *s = PyBytes_AS_STRING(self), *sub;
1702 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001703 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001704
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001705 if (maxsplit < 0)
1706 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001707 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001708 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001709 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001710 return NULL;
1711 sub = vsub.buf;
1712 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001713
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001714 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1715 PyBuffer_Release(&vsub);
1716 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001717}
1718
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001719/*[clinic input]
1720bytes.partition
1721
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001722 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001723 /
1724
1725Partition the bytes into three parts using the given separator.
1726
1727This will search for the separator sep in the bytes. If the separator is found,
1728returns a 3-tuple containing the part before the separator, the separator
1729itself, and the part after it.
1730
1731If the separator is not found, returns a 3-tuple containing the original bytes
1732object and two empty bytes objects.
1733[clinic start generated code]*/
1734
Neal Norwitz6968b052007-02-27 19:02:19 +00001735static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001736bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001737/*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001738{
Neal Norwitz6968b052007-02-27 19:02:19 +00001739 return stringlib_partition(
1740 (PyObject*) self,
1741 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001742 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001743 );
1744}
1745
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001746/*[clinic input]
1747bytes.rpartition
1748
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001749 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001750 /
1751
1752Partition the bytes into three parts using the given separator.
1753
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001754This will search for the separator sep in the bytes, starting at the end. If
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001755the separator is found, returns a 3-tuple containing the part before the
1756separator, the separator itself, and the part after it.
1757
1758If the separator is not found, returns a 3-tuple containing two empty bytes
1759objects and the original bytes object.
1760[clinic start generated code]*/
1761
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001762static PyObject *
1763bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001764/*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001765{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001766 return stringlib_rpartition(
1767 (PyObject*) self,
1768 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001769 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001770 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001771}
1772
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001773/*[clinic input]
1774bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001775
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001776Return a list of the sections in the bytes, using sep as the delimiter.
1777
1778Splitting is done starting at the end of the bytes and working to the front.
1779[clinic start generated code]*/
1780
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001781static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001782bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1783/*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001784{
1785 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001786 const char *s = PyBytes_AS_STRING(self), *sub;
1787 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001788 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001789
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001790 if (maxsplit < 0)
1791 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001792 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001793 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001794 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001795 return NULL;
1796 sub = vsub.buf;
1797 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001798
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001799 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1800 PyBuffer_Release(&vsub);
1801 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001802}
1803
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001804
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001805/*[clinic input]
1806bytes.join
1807
1808 iterable_of_bytes: object
1809 /
1810
1811Concatenate any number of bytes objects.
1812
1813The bytes whose method is called is inserted in between each pair.
1814
1815The result is returned as a new bytes object.
1816
1817Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1818[clinic start generated code]*/
1819
Neal Norwitz6968b052007-02-27 19:02:19 +00001820static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001821bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1822/*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001823{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001824 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001825}
1826
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001827PyObject *
1828_PyBytes_Join(PyObject *sep, PyObject *x)
1829{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001830 assert(sep != NULL && PyBytes_Check(sep));
1831 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001832 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001833}
1834
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001835static PyObject *
1836bytes_find(PyBytesObject *self, PyObject *args)
1837{
1838 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1839}
1840
1841static PyObject *
1842bytes_index(PyBytesObject *self, PyObject *args)
1843{
1844 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1845}
1846
1847
1848static PyObject *
1849bytes_rfind(PyBytesObject *self, PyObject *args)
1850{
1851 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1852}
1853
1854
1855static PyObject *
1856bytes_rindex(PyBytesObject *self, PyObject *args)
1857{
1858 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1859}
1860
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001861
1862Py_LOCAL_INLINE(PyObject *)
1863do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001864{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001865 Py_buffer vsep;
1866 char *s = PyBytes_AS_STRING(self);
1867 Py_ssize_t len = PyBytes_GET_SIZE(self);
1868 char *sep;
1869 Py_ssize_t seplen;
1870 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001871
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001872 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001873 return NULL;
1874 sep = vsep.buf;
1875 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001876
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001877 i = 0;
1878 if (striptype != RIGHTSTRIP) {
1879 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1880 i++;
1881 }
1882 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001883
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001884 j = len;
1885 if (striptype != LEFTSTRIP) {
1886 do {
1887 j--;
1888 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1889 j++;
1890 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001891
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001892 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001893
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001894 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1895 Py_INCREF(self);
1896 return (PyObject*)self;
1897 }
1898 else
1899 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001900}
1901
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001902
1903Py_LOCAL_INLINE(PyObject *)
1904do_strip(PyBytesObject *self, int striptype)
1905{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001906 char *s = PyBytes_AS_STRING(self);
1907 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001908
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001909 i = 0;
1910 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001911 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001912 i++;
1913 }
1914 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001915
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001916 j = len;
1917 if (striptype != LEFTSTRIP) {
1918 do {
1919 j--;
David Malcolm96960882010-11-05 17:23:41 +00001920 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001921 j++;
1922 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001923
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001924 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1925 Py_INCREF(self);
1926 return (PyObject*)self;
1927 }
1928 else
1929 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001930}
1931
1932
1933Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001934do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001935{
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001936 if (bytes != Py_None) {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001937 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001938 }
1939 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001940}
1941
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001942/*[clinic input]
1943bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001944
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001945 bytes: object = None
1946 /
1947
1948Strip leading and trailing bytes contained in the argument.
1949
1950If the argument is omitted or None, strip leading and trailing ASCII whitespace.
1951[clinic start generated code]*/
1952
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001953static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001954bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001955/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001956{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001957 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001958}
1959
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001960/*[clinic input]
1961bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001962
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001963 bytes: object = None
1964 /
1965
1966Strip leading bytes contained in the argument.
1967
1968If the argument is omitted or None, strip leading ASCII whitespace.
1969[clinic start generated code]*/
1970
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001971static PyObject *
1972bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001973/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001974{
1975 return do_argstrip(self, LEFTSTRIP, bytes);
1976}
1977
1978/*[clinic input]
1979bytes.rstrip
1980
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001981 bytes: object = None
1982 /
1983
1984Strip trailing bytes contained in the argument.
1985
1986If the argument is omitted or None, strip trailing ASCII whitespace.
1987[clinic start generated code]*/
1988
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001989static PyObject *
1990bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001991/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001992{
1993 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001994}
Neal Norwitz6968b052007-02-27 19:02:19 +00001995
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001996
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001997static PyObject *
1998bytes_count(PyBytesObject *self, PyObject *args)
1999{
2000 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2001}
2002
2003
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002004/*[clinic input]
2005bytes.translate
2006
Victor Stinner049e5092014-08-17 22:20:00 +02002007 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002008 Translation table, which must be a bytes object of length 256.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002009 /
Martin Panter1b6c6da2016-08-27 08:35:02 +00002010 delete as deletechars: object(c_default="NULL") = b''
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002011
2012Return a copy with each character mapped by the given translation table.
2013
Martin Panter1b6c6da2016-08-27 08:35:02 +00002014All characters occurring in the optional argument delete are removed.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002015The remaining characters are mapped through the given translation table.
2016[clinic start generated code]*/
2017
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002018static PyObject *
Martin Panter1b6c6da2016-08-27 08:35:02 +00002019bytes_translate_impl(PyBytesObject *self, PyObject *table,
Larry Hastings89964c42015-04-14 18:07:59 -04002020 PyObject *deletechars)
Martin Panter1b6c6da2016-08-27 08:35:02 +00002021/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002022{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002023 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002024 Py_buffer table_view = {NULL, NULL};
2025 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002026 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002027 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002028 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002029 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002030 Py_ssize_t inlen, tablen, dellen = 0;
2031 PyObject *result;
2032 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002033
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002034 if (PyBytes_Check(table)) {
2035 table_chars = PyBytes_AS_STRING(table);
2036 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002037 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002038 else if (table == Py_None) {
2039 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002040 tablen = 256;
2041 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002042 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002043 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002044 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002045 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002046 tablen = table_view.len;
2047 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002048
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002049 if (tablen != 256) {
2050 PyErr_SetString(PyExc_ValueError,
2051 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002052 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002053 return NULL;
2054 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002055
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002056 if (deletechars != NULL) {
2057 if (PyBytes_Check(deletechars)) {
2058 del_table_chars = PyBytes_AS_STRING(deletechars);
2059 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002060 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002061 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002062 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002063 PyBuffer_Release(&table_view);
2064 return NULL;
2065 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002066 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002067 dellen = del_table_view.len;
2068 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002069 }
2070 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002071 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002072 dellen = 0;
2073 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002074
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002075 inlen = PyBytes_GET_SIZE(input_obj);
2076 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002077 if (result == NULL) {
2078 PyBuffer_Release(&del_table_view);
2079 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002080 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002081 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002082 output_start = output = PyBytes_AS_STRING(result);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002083 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002084
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002085 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002086 /* If no deletions are required, use faster code */
2087 for (i = inlen; --i >= 0; ) {
2088 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002089 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002090 changed = 1;
2091 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002092 if (!changed && PyBytes_CheckExact(input_obj)) {
2093 Py_INCREF(input_obj);
2094 Py_DECREF(result);
2095 result = input_obj;
2096 }
2097 PyBuffer_Release(&del_table_view);
2098 PyBuffer_Release(&table_view);
2099 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002100 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002101
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002102 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002103 for (i = 0; i < 256; i++)
2104 trans_table[i] = Py_CHARMASK(i);
2105 } else {
2106 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002107 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002108 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002109 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002110
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002111 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002112 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002113 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002114
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002115 for (i = inlen; --i >= 0; ) {
2116 c = Py_CHARMASK(*input++);
2117 if (trans_table[c] != -1)
2118 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2119 continue;
2120 changed = 1;
2121 }
2122 if (!changed && PyBytes_CheckExact(input_obj)) {
2123 Py_DECREF(result);
2124 Py_INCREF(input_obj);
2125 return input_obj;
2126 }
2127 /* Fix the size of the resulting string */
2128 if (inlen > 0)
2129 _PyBytes_Resize(&result, output - output_start);
2130 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002131}
2132
2133
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002134/*[clinic input]
2135
2136@staticmethod
2137bytes.maketrans
2138
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002139 frm: Py_buffer
2140 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002141 /
2142
2143Return a translation table useable for the bytes or bytearray translate method.
2144
2145The returned table will be one where each byte in frm is mapped to the byte at
2146the same position in to.
2147
2148The bytes objects frm and to must be of the same length.
2149[clinic start generated code]*/
2150
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002151static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002152bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002153/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002154{
2155 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002156}
2157
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002158
2159/*[clinic input]
2160bytes.replace
2161
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002162 old: Py_buffer
2163 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002164 count: Py_ssize_t = -1
2165 Maximum number of occurrences to replace.
2166 -1 (the default value) means replace all occurrences.
2167 /
2168
2169Return a copy with all occurrences of substring old replaced by new.
2170
2171If the optional argument count is given, only the first count occurrences are
2172replaced.
2173[clinic start generated code]*/
2174
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002175static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002176bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
Larry Hastings89964c42015-04-14 18:07:59 -04002177 Py_ssize_t count)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002178/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002179{
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +03002180 return stringlib_replace((PyObject *)self,
2181 (const char *)old->buf, old->len,
2182 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002183}
2184
2185/** End DALKE **/
2186
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002187
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002188static PyObject *
2189bytes_startswith(PyBytesObject *self, PyObject *args)
2190{
2191 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2192}
2193
2194static PyObject *
2195bytes_endswith(PyBytesObject *self, PyObject *args)
2196{
2197 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2198}
2199
2200
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002201/*[clinic input]
2202bytes.decode
2203
2204 encoding: str(c_default="NULL") = 'utf-8'
2205 The encoding with which to decode the bytes.
2206 errors: str(c_default="NULL") = 'strict'
2207 The error handling scheme to use for the handling of decoding errors.
2208 The default is 'strict' meaning that decoding errors raise a
2209 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2210 as well as any other name registered with codecs.register_error that
2211 can handle UnicodeDecodeErrors.
2212
2213Decode the bytes using the codec registered for encoding.
2214[clinic start generated code]*/
2215
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002216static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002217bytes_decode_impl(PyBytesObject *self, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04002218 const char *errors)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002219/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002220{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002221 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002222}
2223
Guido van Rossum20188312006-05-05 15:15:40 +00002224
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002225/*[clinic input]
2226bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002227
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002228 keepends: bool(accept={int}) = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002229
2230Return a list of the lines in the bytes, breaking at line boundaries.
2231
2232Line breaks are not included in the resulting list unless keepends is given and
2233true.
2234[clinic start generated code]*/
2235
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002236static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002237bytes_splitlines_impl(PyBytesObject *self, int keepends)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002238/*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002239{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002240 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002241 (PyObject*) self, PyBytes_AS_STRING(self),
2242 PyBytes_GET_SIZE(self), keepends
2243 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002244}
2245
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002246/*[clinic input]
2247@classmethod
2248bytes.fromhex
2249
2250 string: unicode
2251 /
2252
2253Create a bytes object from a string of hexadecimal numbers.
2254
2255Spaces between two numbers are accepted.
2256Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2257[clinic start generated code]*/
2258
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002259static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002260bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002261/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002262{
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002263 PyObject *result = _PyBytes_FromHex(string, 0);
2264 if (type != &PyBytes_Type && result != NULL) {
Petr Viktorinffd97532020-02-11 17:46:57 +01002265 Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002266 }
2267 return result;
Victor Stinner2bf89932015-10-14 11:25:33 +02002268}
2269
2270PyObject*
2271_PyBytes_FromHex(PyObject *string, int use_bytearray)
2272{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002273 char *buf;
Victor Stinner2bf89932015-10-14 11:25:33 +02002274 Py_ssize_t hexlen, invalid_char;
2275 unsigned int top, bot;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03002276 const Py_UCS1 *str, *end;
Victor Stinner2bf89932015-10-14 11:25:33 +02002277 _PyBytesWriter writer;
2278
2279 _PyBytesWriter_Init(&writer);
2280 writer.use_bytearray = use_bytearray;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002281
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002282 assert(PyUnicode_Check(string));
2283 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002284 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002285 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002286
Victor Stinner2bf89932015-10-14 11:25:33 +02002287 if (!PyUnicode_IS_ASCII(string)) {
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03002288 const void *data = PyUnicode_DATA(string);
Victor Stinner2bf89932015-10-14 11:25:33 +02002289 unsigned int kind = PyUnicode_KIND(string);
2290 Py_ssize_t i;
2291
2292 /* search for the first non-ASCII character */
2293 for (i = 0; i < hexlen; i++) {
2294 if (PyUnicode_READ(kind, data, i) >= 128)
2295 break;
2296 }
2297 invalid_char = i;
2298 goto error;
2299 }
2300
2301 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2302 str = PyUnicode_1BYTE_DATA(string);
2303
2304 /* This overestimates if there are spaces */
2305 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2306 if (buf == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002307 return NULL;
Victor Stinner2bf89932015-10-14 11:25:33 +02002308
2309 end = str + hexlen;
2310 while (str < end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002311 /* skip over spaces in the input */
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002312 if (Py_ISSPACE(*str)) {
Victor Stinner2bf89932015-10-14 11:25:33 +02002313 do {
2314 str++;
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002315 } while (Py_ISSPACE(*str));
Victor Stinner2bf89932015-10-14 11:25:33 +02002316 if (str >= end)
2317 break;
2318 }
2319
2320 top = _PyLong_DigitValue[*str];
2321 if (top >= 16) {
2322 invalid_char = str - PyUnicode_1BYTE_DATA(string);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002323 goto error;
2324 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002325 str++;
2326
2327 bot = _PyLong_DigitValue[*str];
2328 if (bot >= 16) {
2329 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2330 goto error;
2331 }
2332 str++;
2333
2334 *buf++ = (unsigned char)((top << 4) + bot);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002335 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002336
2337 return _PyBytesWriter_Finish(&writer, buf);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002338
2339 error:
Victor Stinner2bf89932015-10-14 11:25:33 +02002340 PyErr_Format(PyExc_ValueError,
2341 "non-hexadecimal number found in "
2342 "fromhex() arg at position %zd", invalid_char);
2343 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002344 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002345}
2346
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002347/*[clinic input]
2348bytes.hex
2349
2350 sep: object = NULL
2351 An optional single character or byte to separate hex bytes.
2352 bytes_per_sep: int = 1
2353 How many bytes between separators. Positive values count from the
2354 right, negative values count from the left.
2355
2356Create a str of hexadecimal numbers from a bytes object.
2357
2358Example:
2359>>> value = b'\xb9\x01\xef'
2360>>> value.hex()
2361'b901ef'
2362>>> value.hex(':')
2363'b9:01:ef'
2364>>> value.hex(':', 2)
2365'b9:01ef'
2366>>> value.hex(':', -2)
2367'b901:ef'
2368[clinic start generated code]*/
Gregory P. Smith8cb65692015-04-25 23:22:26 +00002369
2370static PyObject *
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002371bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2372/*[clinic end generated code: output=1f134da504064139 input=f1238d3455990218]*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002373{
2374 char* argbuf = PyBytes_AS_STRING(self);
2375 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002376 return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002377}
2378
2379static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302380bytes_getnewargs(PyBytesObject *v, PyObject *Py_UNUSED(ignored))
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002381{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002382 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002383}
2384
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002385
2386static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002387bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002388 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302389 {"capitalize", stringlib_capitalize, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002390 _Py_capitalize__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002391 STRINGLIB_CENTER_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002392 {"count", (PyCFunction)bytes_count, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002393 _Py_count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002394 BYTES_DECODE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002395 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002396 _Py_endswith__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002397 STRINGLIB_EXPANDTABS_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002398 {"find", (PyCFunction)bytes_find, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002399 _Py_find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002400 BYTES_FROMHEX_METHODDEF
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002401 BYTES_HEX_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002402 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302403 {"isalnum", stringlib_isalnum, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002404 _Py_isalnum__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302405 {"isalpha", stringlib_isalpha, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002406 _Py_isalpha__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302407 {"isascii", stringlib_isascii, METH_NOARGS,
INADA Naokia49ac992018-01-27 14:06:21 +09002408 _Py_isascii__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302409 {"isdigit", stringlib_isdigit, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002410 _Py_isdigit__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302411 {"islower", stringlib_islower, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002412 _Py_islower__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302413 {"isspace", stringlib_isspace, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002414 _Py_isspace__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302415 {"istitle", stringlib_istitle, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002416 _Py_istitle__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302417 {"isupper", stringlib_isupper, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002418 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002419 BYTES_JOIN_METHODDEF
Tal Einatc929df32018-07-06 13:17:38 +03002420 STRINGLIB_LJUST_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302421 {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002422 BYTES_LSTRIP_METHODDEF
2423 BYTES_MAKETRANS_METHODDEF
2424 BYTES_PARTITION_METHODDEF
2425 BYTES_REPLACE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002426 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2427 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002428 STRINGLIB_RJUST_METHODDEF
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002429 BYTES_RPARTITION_METHODDEF
2430 BYTES_RSPLIT_METHODDEF
2431 BYTES_RSTRIP_METHODDEF
2432 BYTES_SPLIT_METHODDEF
2433 BYTES_SPLITLINES_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002434 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002435 _Py_startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002436 BYTES_STRIP_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302437 {"swapcase", stringlib_swapcase, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002438 _Py_swapcase__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302439 {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002440 BYTES_TRANSLATE_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302441 {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002442 STRINGLIB_ZFILL_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002443 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002444};
2445
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002446static PyObject *
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002447bytes_mod(PyObject *self, PyObject *arg)
Ethan Furmanb95b5612015-01-23 20:05:18 -08002448{
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002449 if (!PyBytes_Check(self)) {
2450 Py_RETURN_NOTIMPLEMENTED;
Victor Stinner772b2b02015-10-14 09:56:53 +02002451 }
Victor Stinner772b2b02015-10-14 09:56:53 +02002452 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002453 arg, 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -08002454}
2455
2456static PyNumberMethods bytes_as_number = {
2457 0, /*nb_add*/
2458 0, /*nb_subtract*/
2459 0, /*nb_multiply*/
2460 bytes_mod, /*nb_remainder*/
2461};
2462
2463static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002464bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002465
2466static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002467bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002468{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002469 PyObject *x = NULL;
2470 const char *encoding = NULL;
2471 const char *errors = NULL;
2472 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002473 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002474 Py_ssize_t size;
2475 static char *kwlist[] = {"source", "encoding", "errors", 0};
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002476
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002477 if (type != &PyBytes_Type)
Serhiy Storchaka15095802015-11-25 15:47:01 +02002478 return bytes_subtype_new(type, args, kwds);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002479 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2480 &encoding, &errors))
2481 return NULL;
2482 if (x == NULL) {
2483 if (encoding != NULL || errors != NULL) {
2484 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka2c2044e2018-10-21 15:29:12 +03002485 encoding != NULL ?
2486 "encoding without a string argument" :
2487 "errors without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002488 return NULL;
2489 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02002490 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002491 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002492
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002493 if (encoding != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002494 /* Encode via the codec registry */
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002495 if (!PyUnicode_Check(x)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002496 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002497 "encoding without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002498 return NULL;
2499 }
2500 new = PyUnicode_AsEncodedString(x, encoding, errors);
2501 if (new == NULL)
2502 return NULL;
2503 assert(PyBytes_Check(new));
2504 return new;
2505 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002506
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002507 if (errors != NULL) {
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002508 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002509 PyUnicode_Check(x) ?
2510 "string argument without an encoding" :
2511 "errors without a string argument");
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002512 return NULL;
2513 }
2514
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002515 /* We'd like to call PyObject_Bytes here, but we need to check for an
2516 integer argument before deferring to PyBytes_FromObject, something
2517 PyObject_Bytes doesn't do. */
2518 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2519 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +01002520 new = _PyObject_CallNoArg(func);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002521 Py_DECREF(func);
2522 if (new == NULL)
2523 return NULL;
2524 if (!PyBytes_Check(new)) {
2525 PyErr_Format(PyExc_TypeError,
2526 "__bytes__ returned non-bytes (type %.200s)",
2527 Py_TYPE(new)->tp_name);
2528 Py_DECREF(new);
2529 return NULL;
2530 }
2531 return new;
2532 }
2533 else if (PyErr_Occurred())
2534 return NULL;
2535
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002536 if (PyUnicode_Check(x)) {
2537 PyErr_SetString(PyExc_TypeError,
2538 "string argument without an encoding");
2539 return NULL;
2540 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002541 /* Is it an integer? */
Victor Stinnera15e2602020-04-08 02:01:56 +02002542 if (_PyIndex_Check(x)) {
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002543 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2544 if (size == -1 && PyErr_Occurred()) {
Serhiy Storchakae8904212018-10-15 00:02:57 +03002545 if (!PyErr_ExceptionMatches(PyExc_TypeError))
INADA Naokia634e232017-01-06 17:32:01 +09002546 return NULL;
2547 PyErr_Clear(); /* fall through */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002548 }
INADA Naokia634e232017-01-06 17:32:01 +09002549 else {
2550 if (size < 0) {
2551 PyErr_SetString(PyExc_ValueError, "negative count");
2552 return NULL;
2553 }
2554 new = _PyBytes_FromSize(size, 1);
2555 if (new == NULL)
2556 return NULL;
2557 return new;
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002558 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002559 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002560
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002561 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002562}
2563
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002564static PyObject*
2565_PyBytes_FromBuffer(PyObject *x)
2566{
2567 PyObject *new;
2568 Py_buffer view;
2569
2570 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2571 return NULL;
2572
2573 new = PyBytes_FromStringAndSize(NULL, view.len);
2574 if (!new)
2575 goto fail;
2576 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2577 &view, view.len, 'C') < 0)
2578 goto fail;
2579 PyBuffer_Release(&view);
2580 return new;
2581
2582fail:
2583 Py_XDECREF(new);
2584 PyBuffer_Release(&view);
2585 return NULL;
2586}
2587
2588static PyObject*
2589_PyBytes_FromList(PyObject *x)
2590{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002591 Py_ssize_t i, size = PyList_GET_SIZE(x);
2592 Py_ssize_t value;
2593 char *str;
2594 PyObject *item;
2595 _PyBytesWriter writer;
2596
2597 _PyBytesWriter_Init(&writer);
2598 str = _PyBytesWriter_Alloc(&writer, size);
2599 if (str == NULL)
2600 return NULL;
2601 writer.overallocate = 1;
2602 size = writer.allocated;
2603
2604 for (i = 0; i < PyList_GET_SIZE(x); i++) {
2605 item = PyList_GET_ITEM(x, i);
2606 Py_INCREF(item);
2607 value = PyNumber_AsSsize_t(item, NULL);
2608 Py_DECREF(item);
2609 if (value == -1 && PyErr_Occurred())
2610 goto error;
2611
2612 if (value < 0 || value >= 256) {
2613 PyErr_SetString(PyExc_ValueError,
2614 "bytes must be in range(0, 256)");
2615 goto error;
2616 }
2617
2618 if (i >= size) {
2619 str = _PyBytesWriter_Resize(&writer, str, size+1);
2620 if (str == NULL)
2621 return NULL;
2622 size = writer.allocated;
2623 }
2624 *str++ = (char) value;
2625 }
2626 return _PyBytesWriter_Finish(&writer, str);
2627
2628 error:
2629 _PyBytesWriter_Dealloc(&writer);
2630 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002631}
2632
2633static PyObject*
2634_PyBytes_FromTuple(PyObject *x)
2635{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002636 PyObject *bytes;
2637 Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2638 Py_ssize_t value;
2639 char *str;
2640 PyObject *item;
2641
2642 bytes = PyBytes_FromStringAndSize(NULL, size);
2643 if (bytes == NULL)
2644 return NULL;
2645 str = ((PyBytesObject *)bytes)->ob_sval;
2646
2647 for (i = 0; i < size; i++) {
2648 item = PyTuple_GET_ITEM(x, i);
2649 value = PyNumber_AsSsize_t(item, NULL);
2650 if (value == -1 && PyErr_Occurred())
2651 goto error;
2652
2653 if (value < 0 || value >= 256) {
2654 PyErr_SetString(PyExc_ValueError,
2655 "bytes must be in range(0, 256)");
2656 goto error;
2657 }
2658 *str++ = (char) value;
2659 }
2660 return bytes;
2661
2662 error:
2663 Py_DECREF(bytes);
2664 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002665}
2666
2667static PyObject *
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002668_PyBytes_FromIterator(PyObject *it, PyObject *x)
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002669{
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002670 char *str;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002671 Py_ssize_t i, size;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002672 _PyBytesWriter writer;
2673
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002674 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02002675 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002676 if (size == -1 && PyErr_Occurred())
2677 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002678
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002679 _PyBytesWriter_Init(&writer);
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002680 str = _PyBytesWriter_Alloc(&writer, size);
2681 if (str == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002682 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002683 writer.overallocate = 1;
2684 size = writer.allocated;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002685
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002686 /* Run the iterator to exhaustion */
2687 for (i = 0; ; i++) {
2688 PyObject *item;
2689 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002690
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002691 /* Get the next item */
2692 item = PyIter_Next(it);
2693 if (item == NULL) {
2694 if (PyErr_Occurred())
2695 goto error;
2696 break;
2697 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002698
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002699 /* Interpret it as an int (__index__) */
Serhiy Storchakaf54d7812016-07-06 21:39:44 +03002700 value = PyNumber_AsSsize_t(item, NULL);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002701 Py_DECREF(item);
2702 if (value == -1 && PyErr_Occurred())
2703 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002704
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002705 /* Range check */
2706 if (value < 0 || value >= 256) {
2707 PyErr_SetString(PyExc_ValueError,
2708 "bytes must be in range(0, 256)");
2709 goto error;
2710 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002711
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002712 /* Append the byte */
2713 if (i >= size) {
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002714 str = _PyBytesWriter_Resize(&writer, str, size+1);
2715 if (str == NULL)
2716 return NULL;
2717 size = writer.allocated;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002718 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002719 *str++ = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002720 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002721
2722 return _PyBytesWriter_Finish(&writer, str);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002723
2724 error:
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002725 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002726 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002727}
2728
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002729PyObject *
2730PyBytes_FromObject(PyObject *x)
2731{
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002732 PyObject *it, *result;
2733
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002734 if (x == NULL) {
2735 PyErr_BadInternalCall();
2736 return NULL;
2737 }
2738
2739 if (PyBytes_CheckExact(x)) {
2740 Py_INCREF(x);
2741 return x;
2742 }
2743
2744 /* Use the modern buffer interface */
2745 if (PyObject_CheckBuffer(x))
2746 return _PyBytes_FromBuffer(x);
2747
2748 if (PyList_CheckExact(x))
2749 return _PyBytes_FromList(x);
2750
2751 if (PyTuple_CheckExact(x))
2752 return _PyBytes_FromTuple(x);
2753
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002754 if (!PyUnicode_Check(x)) {
2755 it = PyObject_GetIter(x);
2756 if (it != NULL) {
2757 result = _PyBytes_FromIterator(it, x);
2758 Py_DECREF(it);
2759 return result;
2760 }
Serhiy Storchakae8904212018-10-15 00:02:57 +03002761 if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2762 return NULL;
2763 }
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002764 }
2765
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002766 PyErr_Format(PyExc_TypeError,
2767 "cannot convert '%.200s' object to bytes",
Victor Stinner58ac7002020-02-07 03:04:21 +01002768 Py_TYPE(x)->tp_name);
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002769 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002770}
2771
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002772static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002773bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002774{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002775 PyObject *tmp, *pnew;
2776 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002777
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002778 assert(PyType_IsSubtype(type, &PyBytes_Type));
2779 tmp = bytes_new(&PyBytes_Type, args, kwds);
2780 if (tmp == NULL)
2781 return NULL;
Serhiy Storchaka15095802015-11-25 15:47:01 +02002782 assert(PyBytes_Check(tmp));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002783 n = PyBytes_GET_SIZE(tmp);
2784 pnew = type->tp_alloc(type, n);
2785 if (pnew != NULL) {
Christian Heimesf051e432016-09-13 20:22:02 +02002786 memcpy(PyBytes_AS_STRING(pnew),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002787 PyBytes_AS_STRING(tmp), n+1);
2788 ((PyBytesObject *)pnew)->ob_shash =
2789 ((PyBytesObject *)tmp)->ob_shash;
2790 }
2791 Py_DECREF(tmp);
2792 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002793}
2794
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002795PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002796"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002797bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002798bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002799bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2800bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002801\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002802Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002803 - an iterable yielding integers in range(256)\n\
2804 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002805 - any object implementing the buffer API.\n\
2806 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002807
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002808static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002809
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002810PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002811 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2812 "bytes",
2813 PyBytesObject_SIZE,
2814 sizeof(char),
Inada Naoki7d408692019-05-29 17:23:27 +09002815 0, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002816 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002817 0, /* tp_getattr */
2818 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002819 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002820 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08002821 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002822 &bytes_as_sequence, /* tp_as_sequence */
2823 &bytes_as_mapping, /* tp_as_mapping */
2824 (hashfunc)bytes_hash, /* tp_hash */
2825 0, /* tp_call */
2826 bytes_str, /* tp_str */
2827 PyObject_GenericGetAttr, /* tp_getattro */
2828 0, /* tp_setattro */
2829 &bytes_as_buffer, /* tp_as_buffer */
2830 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2831 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2832 bytes_doc, /* tp_doc */
2833 0, /* tp_traverse */
2834 0, /* tp_clear */
2835 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2836 0, /* tp_weaklistoffset */
2837 bytes_iter, /* tp_iter */
2838 0, /* tp_iternext */
2839 bytes_methods, /* tp_methods */
2840 0, /* tp_members */
2841 0, /* tp_getset */
2842 &PyBaseObject_Type, /* tp_base */
2843 0, /* tp_dict */
2844 0, /* tp_descr_get */
2845 0, /* tp_descr_set */
2846 0, /* tp_dictoffset */
2847 0, /* tp_init */
2848 0, /* tp_alloc */
2849 bytes_new, /* tp_new */
2850 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002851};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002852
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002853void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002854PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002855{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002856 assert(pv != NULL);
2857 if (*pv == NULL)
2858 return;
2859 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002860 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002861 return;
2862 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02002863
2864 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2865 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05002866 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02002867 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02002868
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002869 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02002870 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2871 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2872 Py_CLEAR(*pv);
2873 return;
2874 }
2875
2876 oldsize = PyBytes_GET_SIZE(*pv);
2877 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2878 PyErr_NoMemory();
2879 goto error;
2880 }
2881 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2882 goto error;
2883
2884 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2885 PyBuffer_Release(&wb);
2886 return;
2887
2888 error:
2889 PyBuffer_Release(&wb);
2890 Py_CLEAR(*pv);
2891 return;
2892 }
2893
2894 else {
2895 /* Multiple references, need to create new object */
2896 PyObject *v;
2897 v = bytes_concat(*pv, w);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002898 Py_SETREF(*pv, v);
Antoine Pitrou161d6952014-05-01 14:36:20 +02002899 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002900}
2901
2902void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002903PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002904{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002905 PyBytes_Concat(pv, w);
2906 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002907}
2908
2909
Ethan Furmanb95b5612015-01-23 20:05:18 -08002910/* The following function breaks the notion that bytes are immutable:
2911 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002912 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08002913 as creating a new bytes object and destroying the old one, only
2914 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002915 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08002916 Note that if there's not enough memory to resize the bytes object, the
2917 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002918 memory" exception is set, and -1 is returned. Else (on success) 0 is
2919 returned, and the value in *pv may or may not be the same as on input.
2920 As always, an extra byte is allocated for a trailing \0 byte (newsize
2921 does *not* include that), and a trailing \0 byte is stored.
2922*/
2923
2924int
2925_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2926{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002927 PyObject *v;
2928 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002929 v = *pv;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002930 if (!PyBytes_Check(v) || newsize < 0) {
2931 goto error;
2932 }
2933 if (Py_SIZE(v) == newsize) {
2934 /* return early if newsize equals to v->ob_size */
2935 return 0;
2936 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02002937 if (Py_SIZE(v) == 0) {
2938 if (newsize == 0) {
2939 return 0;
2940 }
2941 *pv = _PyBytes_FromSize(newsize, 0);
2942 Py_DECREF(v);
2943 return (*pv == NULL) ? -1 : 0;
2944 }
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002945 if (Py_REFCNT(v) != 1) {
2946 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002947 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02002948 if (newsize == 0) {
2949 *pv = _PyBytes_FromSize(0, 0);
2950 Py_DECREF(v);
2951 return (*pv == NULL) ? -1 : 0;
2952 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002953 /* XXX UNREF/NEWREF interface should be more symmetrical */
Victor Stinner49932fe2020-02-03 17:55:05 +01002954#ifdef Py_REF_DEBUG
2955 _Py_RefTotal--;
2956#endif
2957#ifdef Py_TRACE_REFS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002958 _Py_ForgetReference(v);
Victor Stinner49932fe2020-02-03 17:55:05 +01002959#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002960 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03002961 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002962 if (*pv == NULL) {
2963 PyObject_Del(v);
2964 PyErr_NoMemory();
2965 return -1;
2966 }
2967 _Py_NewReference(*pv);
2968 sv = (PyBytesObject *) *pv;
Victor Stinner60ac6ed2020-02-07 23:18:08 +01002969 Py_SET_SIZE(sv, newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002970 sv->ob_sval[newsize] = '\0';
2971 sv->ob_shash = -1; /* invalidate cached hash value */
2972 return 0;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002973error:
2974 *pv = 0;
2975 Py_DECREF(v);
2976 PyErr_BadInternalCall();
2977 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002978}
2979
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002980void
Victor Stinnerbed48172019-08-27 00:12:32 +02002981_PyBytes_Fini(void)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002982{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002983 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002984 for (i = 0; i < UCHAR_MAX + 1; i++)
2985 Py_CLEAR(characters[i]);
2986 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002987}
2988
Benjamin Peterson4116f362008-05-27 00:36:20 +00002989/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002990
2991typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002992 PyObject_HEAD
2993 Py_ssize_t it_index;
2994 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002995} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002996
2997static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002998striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002999{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003000 _PyObject_GC_UNTRACK(it);
3001 Py_XDECREF(it->it_seq);
3002 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003003}
3004
3005static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003006striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003007{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003008 Py_VISIT(it->it_seq);
3009 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003010}
3011
3012static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003013striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003014{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003015 PyBytesObject *seq;
3016 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003017
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003018 assert(it != NULL);
3019 seq = it->it_seq;
3020 if (seq == NULL)
3021 return NULL;
3022 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003023
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003024 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3025 item = PyLong_FromLong(
3026 (unsigned char)seq->ob_sval[it->it_index]);
3027 if (item != NULL)
3028 ++it->it_index;
3029 return item;
3030 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003031
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003032 it->it_seq = NULL;
Serhiy Storchakafbb1c5e2016-03-30 20:40:02 +03003033 Py_DECREF(seq);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003034 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003035}
3036
3037static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303038striter_len(striterobject *it, PyObject *Py_UNUSED(ignored))
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003039{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003040 Py_ssize_t len = 0;
3041 if (it->it_seq)
3042 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3043 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003044}
3045
3046PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003047 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003048
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003049static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303050striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003051{
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003052 _Py_IDENTIFIER(iter);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003053 if (it->it_seq != NULL) {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003054 return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003055 it->it_seq, it->it_index);
3056 } else {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003057 return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003058 }
3059}
3060
3061PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3062
3063static PyObject *
3064striter_setstate(striterobject *it, PyObject *state)
3065{
3066 Py_ssize_t index = PyLong_AsSsize_t(state);
3067 if (index == -1 && PyErr_Occurred())
3068 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003069 if (it->it_seq != NULL) {
3070 if (index < 0)
3071 index = 0;
3072 else if (index > PyBytes_GET_SIZE(it->it_seq))
3073 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3074 it->it_index = index;
3075 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003076 Py_RETURN_NONE;
3077}
3078
3079PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3080
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003081static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003082 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3083 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003084 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3085 reduce_doc},
3086 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3087 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003088 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003089};
3090
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003091PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003092 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3093 "bytes_iterator", /* tp_name */
3094 sizeof(striterobject), /* tp_basicsize */
3095 0, /* tp_itemsize */
3096 /* methods */
3097 (destructor)striter_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003098 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003099 0, /* tp_getattr */
3100 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003101 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003102 0, /* tp_repr */
3103 0, /* tp_as_number */
3104 0, /* tp_as_sequence */
3105 0, /* tp_as_mapping */
3106 0, /* tp_hash */
3107 0, /* tp_call */
3108 0, /* tp_str */
3109 PyObject_GenericGetAttr, /* tp_getattro */
3110 0, /* tp_setattro */
3111 0, /* tp_as_buffer */
3112 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3113 0, /* tp_doc */
3114 (traverseproc)striter_traverse, /* tp_traverse */
3115 0, /* tp_clear */
3116 0, /* tp_richcompare */
3117 0, /* tp_weaklistoffset */
3118 PyObject_SelfIter, /* tp_iter */
3119 (iternextfunc)striter_next, /* tp_iternext */
3120 striter_methods, /* tp_methods */
3121 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003122};
3123
3124static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003125bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003126{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003127 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003128
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003129 if (!PyBytes_Check(seq)) {
3130 PyErr_BadInternalCall();
3131 return NULL;
3132 }
3133 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3134 if (it == NULL)
3135 return NULL;
3136 it->it_index = 0;
3137 Py_INCREF(seq);
3138 it->it_seq = (PyBytesObject *)seq;
3139 _PyObject_GC_TRACK(it);
3140 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003141}
Victor Stinner00165072015-10-09 01:53:21 +02003142
3143
3144/* _PyBytesWriter API */
3145
3146#ifdef MS_WINDOWS
3147 /* On Windows, overallocate by 50% is the best factor */
3148# define OVERALLOCATE_FACTOR 2
3149#else
3150 /* On Linux, overallocate by 25% is the best factor */
3151# define OVERALLOCATE_FACTOR 4
3152#endif
3153
3154void
3155_PyBytesWriter_Init(_PyBytesWriter *writer)
3156{
Victor Stinner661aacc2015-10-14 09:41:48 +02003157 /* Set all attributes before small_buffer to 0 */
3158 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003159#ifndef NDEBUG
3160 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3161 sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003162#endif
3163}
3164
3165void
3166_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3167{
3168 Py_CLEAR(writer->buffer);
3169}
3170
3171Py_LOCAL_INLINE(char*)
3172_PyBytesWriter_AsString(_PyBytesWriter *writer)
3173{
Victor Stinner661aacc2015-10-14 09:41:48 +02003174 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003175 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003176 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003177 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003178 else if (writer->use_bytearray) {
3179 assert(writer->buffer != NULL);
3180 return PyByteArray_AS_STRING(writer->buffer);
3181 }
3182 else {
3183 assert(writer->buffer != NULL);
3184 return PyBytes_AS_STRING(writer->buffer);
3185 }
Victor Stinner00165072015-10-09 01:53:21 +02003186}
3187
3188Py_LOCAL_INLINE(Py_ssize_t)
Victor Stinner2bf89932015-10-14 11:25:33 +02003189_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
Victor Stinner00165072015-10-09 01:53:21 +02003190{
3191 char *start = _PyBytesWriter_AsString(writer);
3192 assert(str != NULL);
3193 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003194 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003195 return str - start;
3196}
3197
Victor Stinner68762572019-10-07 18:42:01 +02003198#ifndef NDEBUG
3199Py_LOCAL_INLINE(int)
Victor Stinner00165072015-10-09 01:53:21 +02003200_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3201{
Victor Stinner00165072015-10-09 01:53:21 +02003202 char *start, *end;
3203
Victor Stinner661aacc2015-10-14 09:41:48 +02003204 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003205 assert(writer->buffer == NULL);
3206 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003207 else {
3208 assert(writer->buffer != NULL);
3209 if (writer->use_bytearray)
3210 assert(PyByteArray_CheckExact(writer->buffer));
3211 else
3212 assert(PyBytes_CheckExact(writer->buffer));
3213 assert(Py_REFCNT(writer->buffer) == 1);
3214 }
Victor Stinner00165072015-10-09 01:53:21 +02003215
Victor Stinner661aacc2015-10-14 09:41:48 +02003216 if (writer->use_bytearray) {
3217 /* bytearray has its own overallocation algorithm,
3218 writer overallocation must be disabled */
3219 assert(!writer->overallocate);
3220 }
3221
3222 assert(0 <= writer->allocated);
Victor Stinner53926a12015-10-09 12:37:03 +02003223 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003224 /* the last byte must always be null */
Victor Stinner661aacc2015-10-14 09:41:48 +02003225 start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003226 assert(start[writer->allocated] == 0);
3227
3228 end = start + writer->allocated;
3229 assert(str != NULL);
3230 assert(start <= str && str <= end);
Victor Stinner68762572019-10-07 18:42:01 +02003231 return 1;
Victor Stinner00165072015-10-09 01:53:21 +02003232}
Victor Stinner68762572019-10-07 18:42:01 +02003233#endif
Victor Stinner00165072015-10-09 01:53:21 +02003234
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003235void*
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003236_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
Victor Stinner00165072015-10-09 01:53:21 +02003237{
3238 Py_ssize_t allocated, pos;
3239
Victor Stinner68762572019-10-07 18:42:01 +02003240 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003241 assert(writer->allocated < size);
Victor Stinner00165072015-10-09 01:53:21 +02003242
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003243 allocated = size;
Victor Stinner00165072015-10-09 01:53:21 +02003244 if (writer->overallocate
3245 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3246 /* overallocate to limit the number of realloc() */
3247 allocated += allocated / OVERALLOCATE_FACTOR;
3248 }
3249
Victor Stinner2bf89932015-10-14 11:25:33 +02003250 pos = _PyBytesWriter_GetSize(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003251 if (!writer->use_small_buffer) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003252 if (writer->use_bytearray) {
3253 if (PyByteArray_Resize(writer->buffer, allocated))
3254 goto error;
3255 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3256 but we cannot use ob_alloc because bytes may need to be moved
3257 to use the whole buffer. bytearray uses an internal optimization
3258 to avoid moving or copying bytes when bytes are removed at the
3259 beginning (ex: del bytearray[:1]). */
3260 }
3261 else {
3262 if (_PyBytes_Resize(&writer->buffer, allocated))
3263 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003264 }
3265 }
3266 else {
3267 /* convert from stack buffer to bytes object buffer */
3268 assert(writer->buffer == NULL);
3269
Victor Stinner661aacc2015-10-14 09:41:48 +02003270 if (writer->use_bytearray)
3271 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3272 else
3273 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003274 if (writer->buffer == NULL)
Victor Stinner661aacc2015-10-14 09:41:48 +02003275 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003276
3277 if (pos != 0) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003278 char *dest;
3279 if (writer->use_bytearray)
3280 dest = PyByteArray_AS_STRING(writer->buffer);
3281 else
3282 dest = PyBytes_AS_STRING(writer->buffer);
Christian Heimesf051e432016-09-13 20:22:02 +02003283 memcpy(dest,
Victor Stinnerb3653a32015-10-09 03:38:24 +02003284 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02003285 pos);
3286 }
3287
Victor Stinnerb3653a32015-10-09 03:38:24 +02003288 writer->use_small_buffer = 0;
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003289#ifndef NDEBUG
3290 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3291 sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003292#endif
Victor Stinner00165072015-10-09 01:53:21 +02003293 }
3294 writer->allocated = allocated;
3295
3296 str = _PyBytesWriter_AsString(writer) + pos;
Victor Stinner68762572019-10-07 18:42:01 +02003297 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinner00165072015-10-09 01:53:21 +02003298 return str;
Victor Stinner661aacc2015-10-14 09:41:48 +02003299
3300error:
3301 _PyBytesWriter_Dealloc(writer);
3302 return NULL;
Victor Stinner00165072015-10-09 01:53:21 +02003303}
3304
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003305void*
3306_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3307{
3308 Py_ssize_t new_min_size;
3309
Victor Stinner68762572019-10-07 18:42:01 +02003310 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003311 assert(size >= 0);
3312
3313 if (size == 0) {
3314 /* nothing to do */
3315 return str;
3316 }
3317
3318 if (writer->min_size > PY_SSIZE_T_MAX - size) {
3319 PyErr_NoMemory();
3320 _PyBytesWriter_Dealloc(writer);
3321 return NULL;
3322 }
3323 new_min_size = writer->min_size + size;
3324
3325 if (new_min_size > writer->allocated)
3326 str = _PyBytesWriter_Resize(writer, str, new_min_size);
3327
3328 writer->min_size = new_min_size;
3329 return str;
3330}
3331
Victor Stinner00165072015-10-09 01:53:21 +02003332/* Allocate the buffer to write size bytes.
3333 Return the pointer to the beginning of buffer data.
3334 Raise an exception and return NULL on error. */
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003335void*
Victor Stinner00165072015-10-09 01:53:21 +02003336_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3337{
3338 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02003339 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02003340 assert(size >= 0);
3341
Victor Stinnerb3653a32015-10-09 03:38:24 +02003342 writer->use_small_buffer = 1;
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003343#ifndef NDEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003344 writer->allocated = sizeof(writer->small_buffer) - 1;
Victor Stinnerf6358a72015-10-14 12:02:39 +02003345 /* In debug mode, don't use the full small buffer because it is less
3346 efficient than bytes and bytearray objects to detect buffer underflow
3347 and buffer overflow. Use 10 bytes of the small buffer to test also
3348 code using the smaller buffer in debug mode.
3349
3350 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3351 in debug mode to also be able to detect stack overflow when running
3352 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3353 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3354 stack overflow. */
3355 writer->allocated = Py_MIN(writer->allocated, 10);
3356 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3357 to detect buffer overflow */
Victor Stinnerb3653a32015-10-09 03:38:24 +02003358 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003359#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02003360 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02003361#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02003362 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02003363}
3364
3365PyObject *
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003366_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
Victor Stinner00165072015-10-09 01:53:21 +02003367{
Victor Stinner2bf89932015-10-14 11:25:33 +02003368 Py_ssize_t size;
Victor Stinner00165072015-10-09 01:53:21 +02003369 PyObject *result;
3370
Victor Stinner68762572019-10-07 18:42:01 +02003371 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinner00165072015-10-09 01:53:21 +02003372
Victor Stinner2bf89932015-10-14 11:25:33 +02003373 size = _PyBytesWriter_GetSize(writer, str);
3374 if (size == 0 && !writer->use_bytearray) {
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003375 Py_CLEAR(writer->buffer);
3376 /* Get the empty byte string singleton */
3377 result = PyBytes_FromStringAndSize(NULL, 0);
3378 }
3379 else if (writer->use_small_buffer) {
Victor Stinnere914d412016-04-15 17:52:27 +02003380 if (writer->use_bytearray) {
3381 result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3382 }
3383 else {
3384 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3385 }
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003386 }
3387 else {
3388 result = writer->buffer;
3389 writer->buffer = NULL;
3390
Victor Stinner2bf89932015-10-14 11:25:33 +02003391 if (size != writer->allocated) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003392 if (writer->use_bytearray) {
Victor Stinner2bf89932015-10-14 11:25:33 +02003393 if (PyByteArray_Resize(result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003394 Py_DECREF(result);
3395 return NULL;
3396 }
3397 }
3398 else {
Victor Stinner2bf89932015-10-14 11:25:33 +02003399 if (_PyBytes_Resize(&result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003400 assert(result == NULL);
3401 return NULL;
3402 }
Victor Stinner00165072015-10-09 01:53:21 +02003403 }
3404 }
Victor Stinner00165072015-10-09 01:53:21 +02003405 }
Victor Stinner00165072015-10-09 01:53:21 +02003406 return result;
3407}
Victor Stinnerce179bf2015-10-09 12:57:22 +02003408
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003409void*
Victor Stinnere9aa5952015-10-12 13:57:47 +02003410_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003411 const void *bytes, Py_ssize_t size)
Victor Stinnerce179bf2015-10-09 12:57:22 +02003412{
Victor Stinnere9aa5952015-10-12 13:57:47 +02003413 char *str = (char *)ptr;
3414
Victor Stinnerce179bf2015-10-09 12:57:22 +02003415 str = _PyBytesWriter_Prepare(writer, str, size);
3416 if (str == NULL)
3417 return NULL;
3418
Christian Heimesf051e432016-09-13 20:22:02 +02003419 memcpy(str, bytes, size);
Victor Stinnerce179bf2015-10-09 12:57:22 +02003420 str += size;
3421
3422 return str;
3423}