blob: 03cd7ddd27901325e93ef06e633fe643a6284329 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Victor Stinnera15e2602020-04-08 02:01:56 +02006#include "pycore_abstract.h" // _PyIndex_Check()
Victor Stinner45876a92020-02-12 22:32:34 +01007#include "pycore_bytes_methods.h"
Victor Stinnerbcda8f12018-11-21 22:27:47 +01008#include "pycore_object.h"
Victor Stinner621cebe2018-11-12 16:53:38 +01009#include "pycore_pymem.h"
10#include "pycore_pystate.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +000011
Gregory P. Smith8cb65692015-04-25 23:22:26 +000012#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +000013#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000014
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020015/*[clinic input]
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030016class bytes "PyBytesObject *" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020017[clinic start generated code]*/
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030018/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020019
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030020#include "clinic/bytesobject.c.h"
21
Christian Heimes2c9c7a52008-05-26 13:42:13 +000022static PyBytesObject *characters[UCHAR_MAX + 1];
23static PyBytesObject *nullstring;
24
Hai Shi46874c22020-01-30 17:20:25 -060025_Py_IDENTIFIER(__bytes__);
26
Mark Dickinsonfd24b322008-12-06 15:33:31 +000027/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
28 for a string of length n should request PyBytesObject_SIZE + n bytes.
29
30 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
31 3 bytes per string allocation on a typical system.
32*/
33#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
34
Victor Stinner2bf89932015-10-14 11:25:33 +020035/* Forward declaration */
36Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
37 char *str);
38
Christian Heimes2c9c7a52008-05-26 13:42:13 +000039/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000040 For PyBytes_FromString(), the parameter `str' points to a null-terminated
41 string containing exactly `size' bytes.
42
Martin Pantera90a4a92016-05-30 04:04:50 +000043 For PyBytes_FromStringAndSize(), the parameter `str' is
Christian Heimes2c9c7a52008-05-26 13:42:13 +000044 either NULL or else points to a string containing at least `size' bytes.
45 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
46 not have to be null-terminated. (Therefore it is safe to construct a
47 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
48 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
49 bytes (setting the last byte to the null terminating character) and you can
50 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000051 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000052 alter the data yourself, since the strings may be shared.
53
54 The PyObject member `op->ob_size', which denotes the number of "extra
55 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020056 allocated for string data, not counting the null terminating character.
57 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000058 PyBytes_FromStringAndSize()) or the length of the string in the `str'
59 parameter (for PyBytes_FromString()).
60*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020061static PyObject *
62_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000063{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020064 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020065 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020066
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 if (size == 0 && (op = nullstring) != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000068 Py_INCREF(op);
69 return (PyObject *)op;
70 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000071
Victor Stinner049e5092014-08-17 22:20:00 +020072 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000073 PyErr_SetString(PyExc_OverflowError,
74 "byte string is too large");
75 return NULL;
76 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000077
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020079 if (use_calloc)
80 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
81 else
82 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000083 if (op == NULL)
84 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +010085 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000086 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020087 if (!use_calloc)
88 op->ob_sval[size] = '\0';
89 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000090 if (size == 0) {
91 nullstring = op;
92 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020093 }
94 return (PyObject *) op;
95}
96
97PyObject *
98PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
99{
100 PyBytesObject *op;
101 if (size < 0) {
102 PyErr_SetString(PyExc_SystemError,
103 "Negative size passed to PyBytes_FromStringAndSize");
104 return NULL;
105 }
106 if (size == 1 && str != NULL &&
107 (op = characters[*str & UCHAR_MAX]) != NULL)
108 {
Victor Stinnerdb067af2014-05-02 22:31:14 +0200109 Py_INCREF(op);
110 return (PyObject *)op;
111 }
112
113 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
114 if (op == NULL)
115 return NULL;
116 if (str == NULL)
117 return (PyObject *) op;
118
Christian Heimesf051e432016-09-13 20:22:02 +0200119 memcpy(op->ob_sval, str, size);
Victor Stinnerdb067af2014-05-02 22:31:14 +0200120 /* share short strings */
121 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000122 characters[*str & UCHAR_MAX] = op;
123 Py_INCREF(op);
124 }
125 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000126}
127
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000128PyObject *
129PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000130{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200131 size_t size;
132 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000133
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 assert(str != NULL);
135 size = strlen(str);
136 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
137 PyErr_SetString(PyExc_OverflowError,
138 "byte string is too long");
139 return NULL;
140 }
141 if (size == 0 && (op = nullstring) != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000142 Py_INCREF(op);
143 return (PyObject *)op;
144 }
145 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000146 Py_INCREF(op);
147 return (PyObject *)op;
148 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000149
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000150 /* Inline PyObject_NewVar */
151 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
152 if (op == NULL)
153 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +0100154 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000155 op->ob_shash = -1;
Christian Heimesf051e432016-09-13 20:22:02 +0200156 memcpy(op->ob_sval, str, size+1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000157 /* share short strings */
158 if (size == 0) {
159 nullstring = op;
160 Py_INCREF(op);
161 } else if (size == 1) {
162 characters[*str & UCHAR_MAX] = op;
163 Py_INCREF(op);
164 }
165 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000166}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000167
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000168PyObject *
169PyBytes_FromFormatV(const char *format, va_list vargs)
170{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000171 char *s;
Victor Stinner03dab782015-10-14 00:21:35 +0200172 const char *f;
173 const char *p;
174 Py_ssize_t prec;
175 int longflag;
176 int size_tflag;
177 /* Longest 64-bit formatted numbers:
178 - "18446744073709551615\0" (21 bytes)
179 - "-9223372036854775808\0" (21 bytes)
180 Decimal takes the most space (it isn't enough for octal.)
Guido van Rossum343e97f2007-04-09 00:43:24 +0000181
Victor Stinner03dab782015-10-14 00:21:35 +0200182 Longest 64-bit pointer representation:
183 "0xffffffffffffffff\0" (19 bytes). */
184 char buffer[21];
185 _PyBytesWriter writer;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000186
Victor Stinner03dab782015-10-14 00:21:35 +0200187 _PyBytesWriter_Init(&writer);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000188
Victor Stinner03dab782015-10-14 00:21:35 +0200189 s = _PyBytesWriter_Alloc(&writer, strlen(format));
190 if (s == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000191 return NULL;
Victor Stinner03dab782015-10-14 00:21:35 +0200192 writer.overallocate = 1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000193
Victor Stinner03dab782015-10-14 00:21:35 +0200194#define WRITE_BYTES(str) \
195 do { \
196 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
197 if (s == NULL) \
198 goto error; \
199 } while (0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000200
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000201 for (f = format; *f; f++) {
Victor Stinner03dab782015-10-14 00:21:35 +0200202 if (*f != '%') {
203 *s++ = *f;
204 continue;
205 }
206
207 p = f++;
208
209 /* ignore the width (ex: 10 in "%10s") */
210 while (Py_ISDIGIT(*f))
211 f++;
212
213 /* parse the precision (ex: 10 in "%.10s") */
214 prec = 0;
215 if (*f == '.') {
216 f++;
217 for (; Py_ISDIGIT(*f); f++) {
218 prec = (prec * 10) + (*f - '0');
219 }
220 }
221
222 while (*f && *f != '%' && !Py_ISALPHA(*f))
223 f++;
224
225 /* handle the long flag ('l'), but only for %ld and %lu.
226 others can be added when necessary. */
227 longflag = 0;
228 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
229 longflag = 1;
230 ++f;
231 }
232
233 /* handle the size_t flag ('z'). */
234 size_tflag = 0;
235 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
236 size_tflag = 1;
237 ++f;
238 }
239
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700240 /* subtract bytes preallocated for the format string
Victor Stinner03dab782015-10-14 00:21:35 +0200241 (ex: 2 for "%s") */
242 writer.min_size -= (f - p + 1);
243
244 switch (*f) {
245 case 'c':
246 {
247 int c = va_arg(vargs, int);
248 if (c < 0 || c > 255) {
249 PyErr_SetString(PyExc_OverflowError,
250 "PyBytes_FromFormatV(): %c format "
251 "expects an integer in range [0; 255]");
252 goto error;
253 }
254 writer.min_size++;
255 *s++ = (unsigned char)c;
256 break;
257 }
258
259 case 'd':
260 if (longflag)
261 sprintf(buffer, "%ld", va_arg(vargs, long));
262 else if (size_tflag)
263 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
264 va_arg(vargs, Py_ssize_t));
265 else
266 sprintf(buffer, "%d", va_arg(vargs, int));
267 assert(strlen(buffer) < sizeof(buffer));
268 WRITE_BYTES(buffer);
269 break;
270
271 case 'u':
272 if (longflag)
273 sprintf(buffer, "%lu",
274 va_arg(vargs, unsigned long));
275 else if (size_tflag)
276 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
277 va_arg(vargs, size_t));
278 else
279 sprintf(buffer, "%u",
280 va_arg(vargs, unsigned int));
281 assert(strlen(buffer) < sizeof(buffer));
282 WRITE_BYTES(buffer);
283 break;
284
285 case 'i':
286 sprintf(buffer, "%i", va_arg(vargs, int));
287 assert(strlen(buffer) < sizeof(buffer));
288 WRITE_BYTES(buffer);
289 break;
290
291 case 'x':
292 sprintf(buffer, "%x", va_arg(vargs, int));
293 assert(strlen(buffer) < sizeof(buffer));
294 WRITE_BYTES(buffer);
295 break;
296
297 case 's':
298 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000299 Py_ssize_t i;
Victor Stinner03dab782015-10-14 00:21:35 +0200300
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200301 p = va_arg(vargs, const char*);
Serhiy Storchakad586ccb2019-01-12 10:30:35 +0200302 if (prec <= 0) {
303 i = strlen(p);
304 }
305 else {
306 i = 0;
307 while (i < prec && p[i]) {
308 i++;
309 }
310 }
Victor Stinner03dab782015-10-14 00:21:35 +0200311 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
312 if (s == NULL)
313 goto error;
314 break;
315 }
316
317 case 'p':
318 sprintf(buffer, "%p", va_arg(vargs, void*));
319 assert(strlen(buffer) < sizeof(buffer));
320 /* %p is ill-defined: ensure leading 0x. */
321 if (buffer[1] == 'X')
322 buffer[1] = 'x';
323 else if (buffer[1] != 'x') {
324 memmove(buffer+2, buffer, strlen(buffer)+1);
325 buffer[0] = '0';
326 buffer[1] = 'x';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000327 }
Victor Stinner03dab782015-10-14 00:21:35 +0200328 WRITE_BYTES(buffer);
329 break;
330
331 case '%':
332 writer.min_size++;
333 *s++ = '%';
334 break;
335
336 default:
337 if (*f == 0) {
338 /* fix min_size if we reached the end of the format string */
339 writer.min_size++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000340 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000341
Victor Stinner03dab782015-10-14 00:21:35 +0200342 /* invalid format string: copy unformatted string and exit */
343 WRITE_BYTES(p);
344 return _PyBytesWriter_Finish(&writer, s);
345 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000347
Victor Stinner03dab782015-10-14 00:21:35 +0200348#undef WRITE_BYTES
349
350 return _PyBytesWriter_Finish(&writer, s);
351
352 error:
353 _PyBytesWriter_Dealloc(&writer);
354 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000355}
356
357PyObject *
358PyBytes_FromFormat(const char *format, ...)
359{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000360 PyObject* ret;
361 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000362
363#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000364 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000365#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000366 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000367#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000368 ret = PyBytes_FromFormatV(format, vargs);
369 va_end(vargs);
370 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000371}
372
Ethan Furmanb95b5612015-01-23 20:05:18 -0800373/* Helpers for formatstring */
374
375Py_LOCAL_INLINE(PyObject *)
376getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
377{
378 Py_ssize_t argidx = *p_argidx;
379 if (argidx < arglen) {
380 (*p_argidx)++;
381 if (arglen < 0)
382 return args;
383 else
384 return PyTuple_GetItem(args, argidx);
385 }
386 PyErr_SetString(PyExc_TypeError,
387 "not enough arguments for format string");
388 return NULL;
389}
390
391/* Format codes
392 * F_LJUST '-'
393 * F_SIGN '+'
394 * F_BLANK ' '
395 * F_ALT '#'
396 * F_ZERO '0'
397 */
398#define F_LJUST (1<<0)
399#define F_SIGN (1<<1)
400#define F_BLANK (1<<2)
401#define F_ALT (1<<3)
402#define F_ZERO (1<<4)
403
404/* Returns a new reference to a PyBytes object, or NULL on failure. */
405
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200406static char*
407formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200408 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800409{
410 char *p;
411 PyObject *result;
412 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200413 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800414
415 x = PyFloat_AsDouble(v);
416 if (x == -1.0 && PyErr_Occurred()) {
417 PyErr_Format(PyExc_TypeError, "float argument required, "
418 "not %.200s", Py_TYPE(v)->tp_name);
419 return NULL;
420 }
421
422 if (prec < 0)
423 prec = 6;
424
425 p = PyOS_double_to_string(x, type, prec,
426 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
427
428 if (p == NULL)
429 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200430
431 len = strlen(p);
432 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200433 str = _PyBytesWriter_Prepare(writer, str, len);
434 if (str == NULL)
435 return NULL;
Christian Heimesf051e432016-09-13 20:22:02 +0200436 memcpy(str, p, len);
Victor Stinner71dc3d82016-04-26 12:35:13 +0200437 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200438 str += len;
439 return str;
440 }
441
442 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800443 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200444 *p_result = result;
Zackery Spytz96c59322018-10-03 00:01:30 -0600445 return result != NULL ? str : NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800446}
447
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300448static PyObject *
449formatlong(PyObject *v, int flags, int prec, int type)
450{
451 PyObject *result, *iobj;
452 if (type == 'i')
453 type = 'd';
454 if (PyLong_Check(v))
455 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
456 if (PyNumber_Check(v)) {
457 /* make sure number is a type of integer for o, x, and X */
458 if (type == 'o' || type == 'x' || type == 'X')
459 iobj = PyNumber_Index(v);
460 else
461 iobj = PyNumber_Long(v);
462 if (iobj == NULL) {
463 if (!PyErr_ExceptionMatches(PyExc_TypeError))
464 return NULL;
465 }
466 else if (!PyLong_Check(iobj))
467 Py_CLEAR(iobj);
468 if (iobj != NULL) {
469 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
470 Py_DECREF(iobj);
471 return result;
472 }
473 }
474 PyErr_Format(PyExc_TypeError,
475 "%%%c format: %s is required, not %.200s", type,
476 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
477 : "a number",
478 Py_TYPE(v)->tp_name);
479 return NULL;
480}
481
482static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200483byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800484{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300485 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200486 *p = PyBytes_AS_STRING(arg)[0];
487 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800488 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300489 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200490 *p = PyByteArray_AS_STRING(arg)[0];
491 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800492 }
493 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300494 PyObject *iobj;
495 long ival;
496 int overflow;
497 /* make sure number is a type of integer */
498 if (PyLong_Check(arg)) {
499 ival = PyLong_AsLongAndOverflow(arg, &overflow);
500 }
501 else {
502 iobj = PyNumber_Index(arg);
503 if (iobj == NULL) {
504 if (!PyErr_ExceptionMatches(PyExc_TypeError))
505 return 0;
506 goto onError;
507 }
508 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
509 Py_DECREF(iobj);
510 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300511 if (!overflow && ival == -1 && PyErr_Occurred())
512 goto onError;
513 if (overflow || !(0 <= ival && ival <= 255)) {
514 PyErr_SetString(PyExc_OverflowError,
515 "%c arg not in range(256)");
516 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800517 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300518 *p = (char)ival;
519 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800520 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300521 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200522 PyErr_SetString(PyExc_TypeError,
523 "%c requires an integer in range(256) or a single byte");
524 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800525}
526
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800527static PyObject *_PyBytes_FromBuffer(PyObject *x);
528
Ethan Furmanb95b5612015-01-23 20:05:18 -0800529static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200530format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800531{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200532 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800533 /* is it a bytes object? */
534 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200535 *pbuf = PyBytes_AS_STRING(v);
536 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800537 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200538 return v;
539 }
540 if (PyByteArray_Check(v)) {
541 *pbuf = PyByteArray_AS_STRING(v);
542 *plen = PyByteArray_GET_SIZE(v);
543 Py_INCREF(v);
544 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800545 }
546 /* does it support __bytes__? */
547 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
548 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +0100549 result = _PyObject_CallNoArg(func);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800550 Py_DECREF(func);
551 if (result == NULL)
552 return NULL;
553 if (!PyBytes_Check(result)) {
554 PyErr_Format(PyExc_TypeError,
555 "__bytes__ returned non-bytes (type %.200s)",
556 Py_TYPE(result)->tp_name);
557 Py_DECREF(result);
558 return NULL;
559 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200560 *pbuf = PyBytes_AS_STRING(result);
561 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800562 return result;
563 }
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800564 /* does it support buffer protocol? */
565 if (PyObject_CheckBuffer(v)) {
566 /* maybe we can avoid making a copy of the buffer object here? */
567 result = _PyBytes_FromBuffer(v);
568 if (result == NULL)
569 return NULL;
570 *pbuf = PyBytes_AS_STRING(result);
571 *plen = PyBytes_GET_SIZE(result);
572 return result;
573 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800574 PyErr_Format(PyExc_TypeError,
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800575 "%%b requires a bytes-like object, "
576 "or an object that implements __bytes__, not '%.100s'",
Ethan Furmanb95b5612015-01-23 20:05:18 -0800577 Py_TYPE(v)->tp_name);
578 return NULL;
579}
580
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200581/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800582
583PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +0200584_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
585 PyObject *args, int use_bytearray)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800586{
Victor Stinner772b2b02015-10-14 09:56:53 +0200587 const char *fmt;
588 char *res;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800589 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200590 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800591 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800592 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200593 _PyBytesWriter writer;
594
Victor Stinner772b2b02015-10-14 09:56:53 +0200595 if (args == NULL) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800596 PyErr_BadInternalCall();
597 return NULL;
598 }
Victor Stinner772b2b02015-10-14 09:56:53 +0200599 fmt = format;
600 fmtcnt = format_len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200601
602 _PyBytesWriter_Init(&writer);
Victor Stinner772b2b02015-10-14 09:56:53 +0200603 writer.use_bytearray = use_bytearray;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200604
605 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
606 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800607 return NULL;
Victor Stinner772b2b02015-10-14 09:56:53 +0200608 if (!use_bytearray)
609 writer.overallocate = 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200610
Ethan Furmanb95b5612015-01-23 20:05:18 -0800611 if (PyTuple_Check(args)) {
612 arglen = PyTuple_GET_SIZE(args);
613 argidx = 0;
614 }
615 else {
616 arglen = -1;
617 argidx = -2;
618 }
619 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
620 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
621 !PyByteArray_Check(args)) {
622 dict = args;
623 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200624
Ethan Furmanb95b5612015-01-23 20:05:18 -0800625 while (--fmtcnt >= 0) {
626 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200627 Py_ssize_t len;
628 char *pos;
629
Xiang Zhangb76ad512017-03-06 17:17:05 +0800630 pos = (char *)memchr(fmt + 1, '%', fmtcnt);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200631 if (pos != NULL)
632 len = pos - fmt;
Victor Stinner772b2b02015-10-14 09:56:53 +0200633 else
Xiang Zhangb76ad512017-03-06 17:17:05 +0800634 len = fmtcnt + 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200635 assert(len != 0);
636
Christian Heimesf051e432016-09-13 20:22:02 +0200637 memcpy(res, fmt, len);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200638 res += len;
639 fmt += len;
640 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800641 }
642 else {
643 /* Got a format specifier */
644 int flags = 0;
645 Py_ssize_t width = -1;
646 int prec = -1;
647 int c = '\0';
648 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800649 PyObject *v = NULL;
650 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200651 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800652 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200653 Py_ssize_t len = 0;
654 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200655 Py_ssize_t alloc;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800656
Ethan Furmanb95b5612015-01-23 20:05:18 -0800657 fmt++;
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200658 if (*fmt == '%') {
659 *res++ = '%';
660 fmt++;
661 fmtcnt--;
662 continue;
663 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800664 if (*fmt == '(') {
Victor Stinner772b2b02015-10-14 09:56:53 +0200665 const char *keystart;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800666 Py_ssize_t keylen;
667 PyObject *key;
668 int pcount = 1;
669
670 if (dict == NULL) {
671 PyErr_SetString(PyExc_TypeError,
672 "format requires a mapping");
673 goto error;
674 }
675 ++fmt;
676 --fmtcnt;
677 keystart = fmt;
678 /* Skip over balanced parentheses */
679 while (pcount > 0 && --fmtcnt >= 0) {
680 if (*fmt == ')')
681 --pcount;
682 else if (*fmt == '(')
683 ++pcount;
684 fmt++;
685 }
686 keylen = fmt - keystart - 1;
687 if (fmtcnt < 0 || pcount > 0) {
688 PyErr_SetString(PyExc_ValueError,
689 "incomplete format key");
690 goto error;
691 }
692 key = PyBytes_FromStringAndSize(keystart,
693 keylen);
694 if (key == NULL)
695 goto error;
696 if (args_owned) {
697 Py_DECREF(args);
698 args_owned = 0;
699 }
700 args = PyObject_GetItem(dict, key);
701 Py_DECREF(key);
702 if (args == NULL) {
703 goto error;
704 }
705 args_owned = 1;
706 arglen = -1;
707 argidx = -2;
708 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200709
710 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800711 while (--fmtcnt >= 0) {
712 switch (c = *fmt++) {
713 case '-': flags |= F_LJUST; continue;
714 case '+': flags |= F_SIGN; continue;
715 case ' ': flags |= F_BLANK; continue;
716 case '#': flags |= F_ALT; continue;
717 case '0': flags |= F_ZERO; continue;
718 }
719 break;
720 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200721
722 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800723 if (c == '*') {
724 v = getnextarg(args, arglen, &argidx);
725 if (v == NULL)
726 goto error;
727 if (!PyLong_Check(v)) {
728 PyErr_SetString(PyExc_TypeError,
729 "* wants int");
730 goto error;
731 }
732 width = PyLong_AsSsize_t(v);
733 if (width == -1 && PyErr_Occurred())
734 goto error;
735 if (width < 0) {
736 flags |= F_LJUST;
737 width = -width;
738 }
739 if (--fmtcnt >= 0)
740 c = *fmt++;
741 }
742 else if (c >= 0 && isdigit(c)) {
743 width = c - '0';
744 while (--fmtcnt >= 0) {
745 c = Py_CHARMASK(*fmt++);
746 if (!isdigit(c))
747 break;
748 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
749 PyErr_SetString(
750 PyExc_ValueError,
751 "width too big");
752 goto error;
753 }
754 width = width*10 + (c - '0');
755 }
756 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200757
758 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800759 if (c == '.') {
760 prec = 0;
761 if (--fmtcnt >= 0)
762 c = *fmt++;
763 if (c == '*') {
764 v = getnextarg(args, arglen, &argidx);
765 if (v == NULL)
766 goto error;
767 if (!PyLong_Check(v)) {
768 PyErr_SetString(
769 PyExc_TypeError,
770 "* wants int");
771 goto error;
772 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200773 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800774 if (prec == -1 && PyErr_Occurred())
775 goto error;
776 if (prec < 0)
777 prec = 0;
778 if (--fmtcnt >= 0)
779 c = *fmt++;
780 }
781 else if (c >= 0 && isdigit(c)) {
782 prec = c - '0';
783 while (--fmtcnt >= 0) {
784 c = Py_CHARMASK(*fmt++);
785 if (!isdigit(c))
786 break;
787 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
788 PyErr_SetString(
789 PyExc_ValueError,
790 "prec too big");
791 goto error;
792 }
793 prec = prec*10 + (c - '0');
794 }
795 }
796 } /* prec */
797 if (fmtcnt >= 0) {
798 if (c == 'h' || c == 'l' || c == 'L') {
799 if (--fmtcnt >= 0)
800 c = *fmt++;
801 }
802 }
803 if (fmtcnt < 0) {
804 PyErr_SetString(PyExc_ValueError,
805 "incomplete format");
806 goto error;
807 }
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200808 v = getnextarg(args, arglen, &argidx);
809 if (v == NULL)
810 goto error;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200811
Alexey Izbyshevccd99752018-08-23 10:50:52 +0300812 if (fmtcnt == 0) {
813 /* last write: disable writer overallocation */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200814 writer.overallocate = 0;
815 }
816
Ethan Furmanb95b5612015-01-23 20:05:18 -0800817 sign = 0;
818 fill = ' ';
819 switch (c) {
Ethan Furman62e977f2015-03-11 08:17:00 -0700820 case 'r':
821 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800822 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200823 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800824 if (temp == NULL)
825 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200826 assert(PyUnicode_IS_ASCII(temp));
827 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
828 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800829 if (prec >= 0 && len > prec)
830 len = prec;
831 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200832
Ethan Furmanb95b5612015-01-23 20:05:18 -0800833 case 's':
834 // %s is only for 2/3 code; 3 only code should use %b
835 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200836 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800837 if (temp == NULL)
838 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800839 if (prec >= 0 && len > prec)
840 len = prec;
841 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200842
Ethan Furmanb95b5612015-01-23 20:05:18 -0800843 case 'i':
844 case 'd':
845 case 'u':
846 case 'o':
847 case 'x':
848 case 'X':
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200849 if (PyLong_CheckExact(v)
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200850 && width == -1 && prec == -1
851 && !(flags & (F_SIGN | F_BLANK))
852 && c != 'X')
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200853 {
854 /* Fast path */
855 int alternate = flags & F_ALT;
856 int base;
857
858 switch(c)
859 {
860 default:
Barry Warsawb2e57942017-09-14 18:13:16 -0700861 Py_UNREACHABLE();
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200862 case 'd':
863 case 'i':
864 case 'u':
865 base = 10;
866 break;
867 case 'o':
868 base = 8;
869 break;
870 case 'x':
871 case 'X':
872 base = 16;
873 break;
874 }
875
876 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200877 writer.min_size -= 2; /* size preallocated for "%d" */
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200878 res = _PyLong_FormatBytesWriter(&writer, res,
879 v, base, alternate);
880 if (res == NULL)
881 goto error;
882 continue;
883 }
884
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300885 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200886 if (!temp)
887 goto error;
888 assert(PyUnicode_IS_ASCII(temp));
889 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
890 len = PyUnicode_GET_LENGTH(temp);
891 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800892 if (flags & F_ZERO)
893 fill = '0';
894 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200895
Ethan Furmanb95b5612015-01-23 20:05:18 -0800896 case 'e':
897 case 'E':
898 case 'f':
899 case 'F':
900 case 'g':
901 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200902 if (width == -1 && prec == -1
903 && !(flags & (F_SIGN | F_BLANK)))
904 {
905 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200906 writer.min_size -= 2; /* size preallocated for "%f" */
Victor Stinnerad771582015-10-09 12:38:53 +0200907 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200908 if (res == NULL)
909 goto error;
910 continue;
911 }
912
Victor Stinnerad771582015-10-09 12:38:53 +0200913 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800914 goto error;
915 pbuf = PyBytes_AS_STRING(temp);
916 len = PyBytes_GET_SIZE(temp);
917 sign = 1;
918 if (flags & F_ZERO)
919 fill = '0';
920 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200921
Ethan Furmanb95b5612015-01-23 20:05:18 -0800922 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200923 pbuf = &onechar;
924 len = byte_converter(v, &onechar);
925 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800926 goto error;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200927 if (width == -1) {
928 /* Fast path */
929 *res++ = onechar;
930 continue;
931 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800932 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200933
Ethan Furmanb95b5612015-01-23 20:05:18 -0800934 default:
935 PyErr_Format(PyExc_ValueError,
936 "unsupported format character '%c' (0x%x) "
937 "at index %zd",
938 c, c,
Victor Stinner772b2b02015-10-14 09:56:53 +0200939 (Py_ssize_t)(fmt - 1 - format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800940 goto error;
941 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200942
Ethan Furmanb95b5612015-01-23 20:05:18 -0800943 if (sign) {
944 if (*pbuf == '-' || *pbuf == '+') {
945 sign = *pbuf++;
946 len--;
947 }
948 else if (flags & F_SIGN)
949 sign = '+';
950 else if (flags & F_BLANK)
951 sign = ' ';
952 else
953 sign = 0;
954 }
955 if (width < len)
956 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200957
958 alloc = width;
959 if (sign != 0 && len == width)
960 alloc++;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200961 /* 2: size preallocated for %s */
962 if (alloc > 2) {
963 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200964 if (res == NULL)
965 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800966 }
Victor Stinner60ec6ef2019-10-07 22:31:42 +0200967#ifndef NDEBUG
968 char *before = res;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200969#endif
970
971 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800972 if (sign) {
973 if (fill != ' ')
974 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800975 if (width > len)
976 width--;
977 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200978
979 /* Write the numeric prefix for "x", "X" and "o" formats
980 if the alternate form is used.
981 For example, write "0x" for the "%#x" format. */
Serhiy Storchakab1a16192016-12-17 21:48:03 +0200982 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800983 assert(pbuf[0] == '0');
984 assert(pbuf[1] == c);
985 if (fill != ' ') {
986 *res++ = *pbuf++;
987 *res++ = *pbuf++;
988 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800989 width -= 2;
990 if (width < 0)
991 width = 0;
992 len -= 2;
993 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200994
995 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800996 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200997 memset(res, fill, width - len);
998 res += (width - len);
999 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -08001000 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001001
1002 /* If padding with spaces: write sign if needed and/or numeric
1003 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001004 if (fill == ' ') {
1005 if (sign)
1006 *res++ = sign;
Serhiy Storchakab1a16192016-12-17 21:48:03 +02001007 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001008 assert(pbuf[0] == '0');
1009 assert(pbuf[1] == c);
1010 *res++ = *pbuf++;
1011 *res++ = *pbuf++;
1012 }
1013 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001014
1015 /* Copy bytes */
Christian Heimesf051e432016-09-13 20:22:02 +02001016 memcpy(res, pbuf, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001017 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001018
1019 /* Pad right with the fill character if needed */
1020 if (width > len) {
1021 memset(res, ' ', width - len);
1022 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001023 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001024
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +02001025 if (dict && (argidx < arglen)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001026 PyErr_SetString(PyExc_TypeError,
1027 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -08001028 Py_XDECREF(temp);
1029 goto error;
1030 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001031 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001032
Victor Stinner60ec6ef2019-10-07 22:31:42 +02001033#ifndef NDEBUG
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001034 /* check that we computed the exact size for this write */
1035 assert((res - before) == alloc);
1036#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -08001037 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001038
1039 /* If overallocation was disabled, ensure that it was the last
1040 write. Otherwise, we missed an optimization */
Alexey Izbyshevccd99752018-08-23 10:50:52 +03001041 assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001042 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001043
Ethan Furmanb95b5612015-01-23 20:05:18 -08001044 if (argidx < arglen && !dict) {
1045 PyErr_SetString(PyExc_TypeError,
1046 "not all arguments converted during bytes formatting");
1047 goto error;
1048 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001049
Ethan Furmanb95b5612015-01-23 20:05:18 -08001050 if (args_owned) {
1051 Py_DECREF(args);
1052 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001053 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001054
1055 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001056 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001057 if (args_owned) {
1058 Py_DECREF(args);
1059 }
1060 return NULL;
1061}
1062
Greg Price3a4f6672019-09-12 11:12:22 -07001063/* Unescape a backslash-escaped string. */
Eric V. Smith42454af2016-10-31 09:22:08 -04001064PyObject *_PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001065 Py_ssize_t len,
1066 const char *errors,
Eric V. Smith42454af2016-10-31 09:22:08 -04001067 const char **first_invalid_escape)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001068{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001069 int c;
Victor Stinner2ec80632015-10-14 13:32:13 +02001070 char *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001071 const char *end;
Victor Stinner2ec80632015-10-14 13:32:13 +02001072 _PyBytesWriter writer;
1073
1074 _PyBytesWriter_Init(&writer);
1075
1076 p = _PyBytesWriter_Alloc(&writer, len);
1077 if (p == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001078 return NULL;
Victor Stinner2ec80632015-10-14 13:32:13 +02001079 writer.overallocate = 1;
1080
Eric V. Smith42454af2016-10-31 09:22:08 -04001081 *first_invalid_escape = NULL;
1082
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001083 end = s + len;
1084 while (s < end) {
1085 if (*s != '\\') {
Greg Price3a4f6672019-09-12 11:12:22 -07001086 *p++ = *s++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001087 continue;
1088 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001089
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001090 s++;
Victor Stinner2ec80632015-10-14 13:32:13 +02001091 if (s == end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001092 PyErr_SetString(PyExc_ValueError,
1093 "Trailing \\ in string");
1094 goto failed;
1095 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001097 switch (*s++) {
1098 /* XXX This assumes ASCII! */
1099 case '\n': break;
1100 case '\\': *p++ = '\\'; break;
1101 case '\'': *p++ = '\''; break;
1102 case '\"': *p++ = '\"'; break;
1103 case 'b': *p++ = '\b'; break;
1104 case 'f': *p++ = '\014'; break; /* FF */
1105 case 't': *p++ = '\t'; break;
1106 case 'n': *p++ = '\n'; break;
1107 case 'r': *p++ = '\r'; break;
1108 case 'v': *p++ = '\013'; break; /* VT */
1109 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1110 case '0': case '1': case '2': case '3':
1111 case '4': case '5': case '6': case '7':
1112 c = s[-1] - '0';
1113 if (s < end && '0' <= *s && *s <= '7') {
1114 c = (c<<3) + *s++ - '0';
1115 if (s < end && '0' <= *s && *s <= '7')
1116 c = (c<<3) + *s++ - '0';
1117 }
1118 *p++ = c;
1119 break;
1120 case 'x':
Victor Stinner2ec80632015-10-14 13:32:13 +02001121 if (s+1 < end) {
1122 int digit1, digit2;
1123 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1124 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1125 if (digit1 < 16 && digit2 < 16) {
1126 *p++ = (unsigned char)((digit1 << 4) + digit2);
1127 s += 2;
1128 break;
1129 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001130 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001131 /* invalid hexadecimal digits */
1132
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001133 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001134 PyErr_Format(PyExc_ValueError,
Serhiy Storchakad53fe5f2019-03-13 22:59:55 +02001135 "invalid \\x escape at position %zd",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001136 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001137 goto failed;
1138 }
1139 if (strcmp(errors, "replace") == 0) {
1140 *p++ = '?';
1141 } else if (strcmp(errors, "ignore") == 0)
1142 /* do nothing */;
1143 else {
1144 PyErr_Format(PyExc_ValueError,
1145 "decoding error; unknown "
1146 "error handling code: %.400s",
1147 errors);
1148 goto failed;
1149 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001150 /* skip \x */
1151 if (s < end && Py_ISXDIGIT(s[0]))
1152 s++; /* and a hexdigit */
1153 break;
Victor Stinner2ec80632015-10-14 13:32:13 +02001154
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001155 default:
Eric V. Smith42454af2016-10-31 09:22:08 -04001156 if (*first_invalid_escape == NULL) {
1157 *first_invalid_escape = s-1; /* Back up one char, since we've
1158 already incremented s. */
1159 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001160 *p++ = '\\';
Eric V. Smith42454af2016-10-31 09:22:08 -04001161 s--;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001162 }
1163 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001164
1165 return _PyBytesWriter_Finish(&writer, p);
1166
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001167 failed:
Victor Stinner2ec80632015-10-14 13:32:13 +02001168 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001169 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001170}
1171
Eric V. Smith42454af2016-10-31 09:22:08 -04001172PyObject *PyBytes_DecodeEscape(const char *s,
1173 Py_ssize_t len,
1174 const char *errors,
Greg Price3a4f6672019-09-12 11:12:22 -07001175 Py_ssize_t Py_UNUSED(unicode),
1176 const char *Py_UNUSED(recode_encoding))
Eric V. Smith42454af2016-10-31 09:22:08 -04001177{
1178 const char* first_invalid_escape;
Greg Price3a4f6672019-09-12 11:12:22 -07001179 PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
Eric V. Smith42454af2016-10-31 09:22:08 -04001180 &first_invalid_escape);
1181 if (result == NULL)
1182 return NULL;
1183 if (first_invalid_escape != NULL) {
1184 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1185 "invalid escape sequence '\\%c'",
Serhiy Storchaka56cb4652017-10-20 17:08:15 +03001186 (unsigned char)*first_invalid_escape) < 0) {
Eric V. Smith42454af2016-10-31 09:22:08 -04001187 Py_DECREF(result);
1188 return NULL;
1189 }
1190 }
1191 return result;
1192
1193}
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001194/* -------------------------------------------------------------------- */
1195/* object api */
1196
1197Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001198PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001199{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001200 if (!PyBytes_Check(op)) {
1201 PyErr_Format(PyExc_TypeError,
1202 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1203 return -1;
1204 }
1205 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001206}
1207
1208char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001209PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001210{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001211 if (!PyBytes_Check(op)) {
1212 PyErr_Format(PyExc_TypeError,
1213 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1214 return NULL;
1215 }
1216 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001217}
1218
1219int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001220PyBytes_AsStringAndSize(PyObject *obj,
1221 char **s,
1222 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001223{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001224 if (s == NULL) {
1225 PyErr_BadInternalCall();
1226 return -1;
1227 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001228
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001229 if (!PyBytes_Check(obj)) {
1230 PyErr_Format(PyExc_TypeError,
1231 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1232 return -1;
1233 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001234
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001235 *s = PyBytes_AS_STRING(obj);
1236 if (len != NULL)
1237 *len = PyBytes_GET_SIZE(obj);
1238 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001239 PyErr_SetString(PyExc_ValueError,
1240 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001241 return -1;
1242 }
1243 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001244}
Neal Norwitz6968b052007-02-27 19:02:19 +00001245
1246/* -------------------------------------------------------------------- */
1247/* Methods */
1248
Eric Smith0923d1d2009-04-16 20:16:10 +00001249#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001250
1251#include "stringlib/fastsearch.h"
1252#include "stringlib/count.h"
1253#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001254#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001255#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001256#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001257#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001258
Eric Smith0f78bff2009-11-30 01:01:42 +00001259#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001260
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001261PyObject *
1262PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001263{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001264 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001265 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001266 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001267 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001268 unsigned char quote, *s, *p;
1269
1270 /* Compute size of output string */
1271 squotes = dquotes = 0;
1272 newsize = 3; /* b'' */
1273 s = (unsigned char*)op->ob_sval;
1274 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001275 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001276 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001277 case '\'': squotes++; break;
1278 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001279 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001280 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001281 default:
1282 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001283 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001284 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001285 if (newsize > PY_SSIZE_T_MAX - incr)
1286 goto overflow;
1287 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001288 }
1289 quote = '\'';
1290 if (smartquotes && squotes && !dquotes)
1291 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001292 if (squotes && quote == '\'') {
1293 if (newsize > PY_SSIZE_T_MAX - squotes)
1294 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001295 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001296 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001297
1298 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001299 if (v == NULL) {
1300 return NULL;
1301 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001302 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001303
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001304 *p++ = 'b', *p++ = quote;
1305 for (i = 0; i < length; i++) {
1306 unsigned char c = op->ob_sval[i];
1307 if (c == quote || c == '\\')
1308 *p++ = '\\', *p++ = c;
1309 else if (c == '\t')
1310 *p++ = '\\', *p++ = 't';
1311 else if (c == '\n')
1312 *p++ = '\\', *p++ = 'n';
1313 else if (c == '\r')
1314 *p++ = '\\', *p++ = 'r';
1315 else if (c < ' ' || c >= 0x7f) {
1316 *p++ = '\\';
1317 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001318 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1319 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001320 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001321 else
1322 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001323 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001324 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001325 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001326 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001327
1328 overflow:
1329 PyErr_SetString(PyExc_OverflowError,
1330 "bytes object is too large to make repr");
1331 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001332}
1333
Neal Norwitz6968b052007-02-27 19:02:19 +00001334static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001335bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001336{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001337 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001338}
1339
Neal Norwitz6968b052007-02-27 19:02:19 +00001340static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001341bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001342{
Victor Stinner331a6a52019-05-27 16:39:22 +02001343 PyConfig *config = &_PyInterpreterState_GET_UNSAFE()->config;
Victor Stinnerc96be812019-05-14 17:34:56 +02001344 if (config->bytes_warning) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001345 if (PyErr_WarnEx(PyExc_BytesWarning,
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001346 "str() on a bytes instance", 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001347 return NULL;
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001348 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001349 }
1350 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001351}
1352
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001353static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001354bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001355{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001356 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001357}
Neal Norwitz6968b052007-02-27 19:02:19 +00001358
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001359/* This is also used by PyBytes_Concat() */
1360static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001361bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001362{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001363 Py_buffer va, vb;
1364 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001365
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001366 va.len = -1;
1367 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001368 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1369 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001370 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
Serhiy Storchaka6b5a9ec2017-03-19 19:47:02 +02001371 Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001372 goto done;
1373 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001374
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001375 /* Optimize end cases */
1376 if (va.len == 0 && PyBytes_CheckExact(b)) {
1377 result = b;
1378 Py_INCREF(result);
1379 goto done;
1380 }
1381 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1382 result = a;
1383 Py_INCREF(result);
1384 goto done;
1385 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001386
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001387 if (va.len > PY_SSIZE_T_MAX - vb.len) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001388 PyErr_NoMemory();
1389 goto done;
1390 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001391
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001392 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001393 if (result != NULL) {
1394 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1395 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1396 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001397
1398 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001399 if (va.len != -1)
1400 PyBuffer_Release(&va);
1401 if (vb.len != -1)
1402 PyBuffer_Release(&vb);
1403 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001404}
Neal Norwitz6968b052007-02-27 19:02:19 +00001405
1406static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001407bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001408{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001409 Py_ssize_t i;
1410 Py_ssize_t j;
1411 Py_ssize_t size;
1412 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001413 size_t nbytes;
1414 if (n < 0)
1415 n = 0;
1416 /* watch out for overflows: the size can overflow int,
1417 * and the # of bytes needed can overflow size_t
1418 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001419 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001420 PyErr_SetString(PyExc_OverflowError,
1421 "repeated bytes are too long");
1422 return NULL;
1423 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001424 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001425 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1426 Py_INCREF(a);
1427 return (PyObject *)a;
1428 }
1429 nbytes = (size_t)size;
1430 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1431 PyErr_SetString(PyExc_OverflowError,
1432 "repeated bytes are too long");
1433 return NULL;
1434 }
1435 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1436 if (op == NULL)
1437 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +01001438 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001439 op->ob_shash = -1;
1440 op->ob_sval[size] = '\0';
1441 if (Py_SIZE(a) == 1 && n > 0) {
1442 memset(op->ob_sval, a->ob_sval[0] , n);
1443 return (PyObject *) op;
1444 }
1445 i = 0;
1446 if (i < size) {
Christian Heimesf051e432016-09-13 20:22:02 +02001447 memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001448 i = Py_SIZE(a);
1449 }
1450 while (i < size) {
1451 j = (i <= size-i) ? i : size-i;
Christian Heimesf051e432016-09-13 20:22:02 +02001452 memcpy(op->ob_sval+i, op->ob_sval, j);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001453 i += j;
1454 }
1455 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001456}
1457
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001458static int
1459bytes_contains(PyObject *self, PyObject *arg)
1460{
1461 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1462}
1463
Neal Norwitz6968b052007-02-27 19:02:19 +00001464static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001465bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001466{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001467 if (i < 0 || i >= Py_SIZE(a)) {
1468 PyErr_SetString(PyExc_IndexError, "index out of range");
1469 return NULL;
1470 }
1471 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001472}
1473
Benjamin Peterson621b4302016-09-09 13:54:34 -07001474static int
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001475bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1476{
1477 int cmp;
1478 Py_ssize_t len;
1479
1480 len = Py_SIZE(a);
1481 if (Py_SIZE(b) != len)
1482 return 0;
1483
1484 if (a->ob_sval[0] != b->ob_sval[0])
1485 return 0;
1486
1487 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1488 return (cmp == 0);
1489}
1490
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001491static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001492bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001493{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001494 int c;
1495 Py_ssize_t len_a, len_b;
1496 Py_ssize_t min_len;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001497 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001498
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001499 /* Make sure both arguments are strings. */
1500 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Victor Stinner331a6a52019-05-27 16:39:22 +02001501 PyConfig *config = &_PyInterpreterState_GET_UNSAFE()->config;
Victor Stinnerc96be812019-05-14 17:34:56 +02001502 if (config->bytes_warning && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001503 rc = PyObject_IsInstance((PyObject*)a,
1504 (PyObject*)&PyUnicode_Type);
1505 if (!rc)
1506 rc = PyObject_IsInstance((PyObject*)b,
1507 (PyObject*)&PyUnicode_Type);
1508 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001509 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001510 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001511 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001512 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001513 return NULL;
1514 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001515 else {
1516 rc = PyObject_IsInstance((PyObject*)a,
1517 (PyObject*)&PyLong_Type);
1518 if (!rc)
1519 rc = PyObject_IsInstance((PyObject*)b,
1520 (PyObject*)&PyLong_Type);
1521 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001522 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001523 if (rc) {
1524 if (PyErr_WarnEx(PyExc_BytesWarning,
1525 "Comparison between bytes and int", 1))
1526 return NULL;
1527 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001528 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001529 }
stratakise8b19652017-11-02 11:32:54 +01001530 Py_RETURN_NOTIMPLEMENTED;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001531 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001532 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001533 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001534 case Py_EQ:
1535 case Py_LE:
1536 case Py_GE:
1537 /* a string is equal to itself */
stratakise8b19652017-11-02 11:32:54 +01001538 Py_RETURN_TRUE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001539 case Py_NE:
1540 case Py_LT:
1541 case Py_GT:
stratakise8b19652017-11-02 11:32:54 +01001542 Py_RETURN_FALSE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001543 default:
1544 PyErr_BadArgument();
1545 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001546 }
1547 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001548 else if (op == Py_EQ || op == Py_NE) {
1549 int eq = bytes_compare_eq(a, b);
1550 eq ^= (op == Py_NE);
stratakise8b19652017-11-02 11:32:54 +01001551 return PyBool_FromLong(eq);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001552 }
1553 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001554 len_a = Py_SIZE(a);
1555 len_b = Py_SIZE(b);
1556 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001557 if (min_len > 0) {
1558 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001559 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001560 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001561 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001562 else
1563 c = 0;
stratakise8b19652017-11-02 11:32:54 +01001564 if (c != 0)
1565 Py_RETURN_RICHCOMPARE(c, 0, op);
1566 Py_RETURN_RICHCOMPARE(len_a, len_b, op);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001567 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001568}
1569
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001570static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001571bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001572{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001573 if (a->ob_shash == -1) {
1574 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001575 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001576 }
1577 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001578}
1579
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001580static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001581bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001582{
Victor Stinnera15e2602020-04-08 02:01:56 +02001583 if (_PyIndex_Check(item)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001584 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1585 if (i == -1 && PyErr_Occurred())
1586 return NULL;
1587 if (i < 0)
1588 i += PyBytes_GET_SIZE(self);
1589 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1590 PyErr_SetString(PyExc_IndexError,
1591 "index out of range");
1592 return NULL;
1593 }
1594 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1595 }
1596 else if (PySlice_Check(item)) {
Zackery Spytz14514d92019-05-17 01:13:03 -06001597 Py_ssize_t start, stop, step, slicelength, i;
1598 size_t cur;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001599 char* source_buf;
1600 char* result_buf;
1601 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001602
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001603 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001604 return NULL;
1605 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001606 slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1607 &stop, step);
Neal Norwitz6968b052007-02-27 19:02:19 +00001608
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001609 if (slicelength <= 0) {
1610 return PyBytes_FromStringAndSize("", 0);
1611 }
1612 else if (start == 0 && step == 1 &&
1613 slicelength == PyBytes_GET_SIZE(self) &&
1614 PyBytes_CheckExact(self)) {
1615 Py_INCREF(self);
1616 return (PyObject *)self;
1617 }
1618 else if (step == 1) {
1619 return PyBytes_FromStringAndSize(
1620 PyBytes_AS_STRING(self) + start,
1621 slicelength);
1622 }
1623 else {
1624 source_buf = PyBytes_AS_STRING(self);
1625 result = PyBytes_FromStringAndSize(NULL, slicelength);
1626 if (result == NULL)
1627 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001628
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001629 result_buf = PyBytes_AS_STRING(result);
1630 for (cur = start, i = 0; i < slicelength;
1631 cur += step, i++) {
1632 result_buf[i] = source_buf[cur];
1633 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001634
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001635 return result;
1636 }
1637 }
1638 else {
1639 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001640 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001641 Py_TYPE(item)->tp_name);
1642 return NULL;
1643 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001644}
1645
1646static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001647bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001648{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001649 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1650 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001651}
1652
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001653static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001654 (lenfunc)bytes_length, /*sq_length*/
1655 (binaryfunc)bytes_concat, /*sq_concat*/
1656 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1657 (ssizeargfunc)bytes_item, /*sq_item*/
1658 0, /*sq_slice*/
1659 0, /*sq_ass_item*/
1660 0, /*sq_ass_slice*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001661 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001662};
1663
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001664static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001665 (lenfunc)bytes_length,
1666 (binaryfunc)bytes_subscript,
1667 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001668};
1669
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001670static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001671 (getbufferproc)bytes_buffer_getbuffer,
1672 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001673};
1674
1675
1676#define LEFTSTRIP 0
1677#define RIGHTSTRIP 1
1678#define BOTHSTRIP 2
1679
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001680/*[clinic input]
1681bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001682
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001683 sep: object = None
1684 The delimiter according which to split the bytes.
1685 None (the default value) means split on ASCII whitespace characters
1686 (space, tab, return, newline, formfeed, vertical tab).
1687 maxsplit: Py_ssize_t = -1
1688 Maximum number of splits to do.
1689 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001690
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001691Return a list of the sections in the bytes, using sep as the delimiter.
1692[clinic start generated code]*/
1693
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001694static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001695bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1696/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001697{
1698 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001699 const char *s = PyBytes_AS_STRING(self), *sub;
1700 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001701 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001702
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001703 if (maxsplit < 0)
1704 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001705 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001706 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001707 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001708 return NULL;
1709 sub = vsub.buf;
1710 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001711
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001712 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1713 PyBuffer_Release(&vsub);
1714 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001715}
1716
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001717/*[clinic input]
1718bytes.partition
1719
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001720 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001721 /
1722
1723Partition the bytes into three parts using the given separator.
1724
1725This will search for the separator sep in the bytes. If the separator is found,
1726returns a 3-tuple containing the part before the separator, the separator
1727itself, and the part after it.
1728
1729If the separator is not found, returns a 3-tuple containing the original bytes
1730object and two empty bytes objects.
1731[clinic start generated code]*/
1732
Neal Norwitz6968b052007-02-27 19:02:19 +00001733static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001734bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001735/*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001736{
Neal Norwitz6968b052007-02-27 19:02:19 +00001737 return stringlib_partition(
1738 (PyObject*) self,
1739 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001740 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001741 );
1742}
1743
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001744/*[clinic input]
1745bytes.rpartition
1746
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001747 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001748 /
1749
1750Partition the bytes into three parts using the given separator.
1751
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001752This will search for the separator sep in the bytes, starting at the end. If
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001753the separator is found, returns a 3-tuple containing the part before the
1754separator, the separator itself, and the part after it.
1755
1756If the separator is not found, returns a 3-tuple containing two empty bytes
1757objects and the original bytes object.
1758[clinic start generated code]*/
1759
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001760static PyObject *
1761bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001762/*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001763{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001764 return stringlib_rpartition(
1765 (PyObject*) self,
1766 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001767 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001768 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001769}
1770
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001771/*[clinic input]
1772bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001773
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001774Return a list of the sections in the bytes, using sep as the delimiter.
1775
1776Splitting is done starting at the end of the bytes and working to the front.
1777[clinic start generated code]*/
1778
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001779static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001780bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1781/*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001782{
1783 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001784 const char *s = PyBytes_AS_STRING(self), *sub;
1785 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001786 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001787
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001788 if (maxsplit < 0)
1789 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001790 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001791 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001792 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001793 return NULL;
1794 sub = vsub.buf;
1795 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001796
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001797 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1798 PyBuffer_Release(&vsub);
1799 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001800}
1801
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001802
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001803/*[clinic input]
1804bytes.join
1805
1806 iterable_of_bytes: object
1807 /
1808
1809Concatenate any number of bytes objects.
1810
1811The bytes whose method is called is inserted in between each pair.
1812
1813The result is returned as a new bytes object.
1814
1815Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1816[clinic start generated code]*/
1817
Neal Norwitz6968b052007-02-27 19:02:19 +00001818static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001819bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1820/*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001821{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001822 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001823}
1824
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001825PyObject *
1826_PyBytes_Join(PyObject *sep, PyObject *x)
1827{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001828 assert(sep != NULL && PyBytes_Check(sep));
1829 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001830 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001831}
1832
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001833static PyObject *
1834bytes_find(PyBytesObject *self, PyObject *args)
1835{
1836 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1837}
1838
1839static PyObject *
1840bytes_index(PyBytesObject *self, PyObject *args)
1841{
1842 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1843}
1844
1845
1846static PyObject *
1847bytes_rfind(PyBytesObject *self, PyObject *args)
1848{
1849 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1850}
1851
1852
1853static PyObject *
1854bytes_rindex(PyBytesObject *self, PyObject *args)
1855{
1856 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1857}
1858
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001859
1860Py_LOCAL_INLINE(PyObject *)
1861do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001862{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001863 Py_buffer vsep;
1864 char *s = PyBytes_AS_STRING(self);
1865 Py_ssize_t len = PyBytes_GET_SIZE(self);
1866 char *sep;
1867 Py_ssize_t seplen;
1868 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001869
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001870 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001871 return NULL;
1872 sep = vsep.buf;
1873 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001874
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001875 i = 0;
1876 if (striptype != RIGHTSTRIP) {
1877 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1878 i++;
1879 }
1880 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001881
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001882 j = len;
1883 if (striptype != LEFTSTRIP) {
1884 do {
1885 j--;
1886 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1887 j++;
1888 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001889
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001890 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001891
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001892 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1893 Py_INCREF(self);
1894 return (PyObject*)self;
1895 }
1896 else
1897 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001898}
1899
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001900
1901Py_LOCAL_INLINE(PyObject *)
1902do_strip(PyBytesObject *self, int striptype)
1903{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001904 char *s = PyBytes_AS_STRING(self);
1905 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001906
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001907 i = 0;
1908 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001909 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001910 i++;
1911 }
1912 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001913
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001914 j = len;
1915 if (striptype != LEFTSTRIP) {
1916 do {
1917 j--;
David Malcolm96960882010-11-05 17:23:41 +00001918 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001919 j++;
1920 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001921
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001922 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1923 Py_INCREF(self);
1924 return (PyObject*)self;
1925 }
1926 else
1927 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001928}
1929
1930
1931Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001932do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001933{
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001934 if (bytes != Py_None) {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001935 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001936 }
1937 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001938}
1939
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001940/*[clinic input]
1941bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001942
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001943 bytes: object = None
1944 /
1945
1946Strip leading and trailing bytes contained in the argument.
1947
1948If the argument is omitted or None, strip leading and trailing ASCII whitespace.
1949[clinic start generated code]*/
1950
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001951static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001952bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001953/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001954{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001955 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001956}
1957
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001958/*[clinic input]
1959bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001960
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001961 bytes: object = None
1962 /
1963
1964Strip leading bytes contained in the argument.
1965
1966If the argument is omitted or None, strip leading ASCII whitespace.
1967[clinic start generated code]*/
1968
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001969static PyObject *
1970bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001971/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001972{
1973 return do_argstrip(self, LEFTSTRIP, bytes);
1974}
1975
1976/*[clinic input]
1977bytes.rstrip
1978
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001979 bytes: object = None
1980 /
1981
1982Strip trailing bytes contained in the argument.
1983
1984If the argument is omitted or None, strip trailing ASCII whitespace.
1985[clinic start generated code]*/
1986
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001987static PyObject *
1988bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001989/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001990{
1991 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001992}
Neal Norwitz6968b052007-02-27 19:02:19 +00001993
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001994
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001995static PyObject *
1996bytes_count(PyBytesObject *self, PyObject *args)
1997{
1998 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1999}
2000
2001
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002002/*[clinic input]
2003bytes.translate
2004
Victor Stinner049e5092014-08-17 22:20:00 +02002005 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002006 Translation table, which must be a bytes object of length 256.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002007 /
Martin Panter1b6c6da2016-08-27 08:35:02 +00002008 delete as deletechars: object(c_default="NULL") = b''
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002009
2010Return a copy with each character mapped by the given translation table.
2011
Martin Panter1b6c6da2016-08-27 08:35:02 +00002012All characters occurring in the optional argument delete are removed.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002013The remaining characters are mapped through the given translation table.
2014[clinic start generated code]*/
2015
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002016static PyObject *
Martin Panter1b6c6da2016-08-27 08:35:02 +00002017bytes_translate_impl(PyBytesObject *self, PyObject *table,
Larry Hastings89964c42015-04-14 18:07:59 -04002018 PyObject *deletechars)
Martin Panter1b6c6da2016-08-27 08:35:02 +00002019/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002020{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002021 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002022 Py_buffer table_view = {NULL, NULL};
2023 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002024 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002025 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002026 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002027 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002028 Py_ssize_t inlen, tablen, dellen = 0;
2029 PyObject *result;
2030 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002031
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002032 if (PyBytes_Check(table)) {
2033 table_chars = PyBytes_AS_STRING(table);
2034 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002035 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002036 else if (table == Py_None) {
2037 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002038 tablen = 256;
2039 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002040 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002041 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002042 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002043 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002044 tablen = table_view.len;
2045 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002046
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002047 if (tablen != 256) {
2048 PyErr_SetString(PyExc_ValueError,
2049 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002050 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002051 return NULL;
2052 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002053
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002054 if (deletechars != NULL) {
2055 if (PyBytes_Check(deletechars)) {
2056 del_table_chars = PyBytes_AS_STRING(deletechars);
2057 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002058 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002059 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002060 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002061 PyBuffer_Release(&table_view);
2062 return NULL;
2063 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002064 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002065 dellen = del_table_view.len;
2066 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002067 }
2068 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002069 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002070 dellen = 0;
2071 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002072
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002073 inlen = PyBytes_GET_SIZE(input_obj);
2074 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002075 if (result == NULL) {
2076 PyBuffer_Release(&del_table_view);
2077 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002078 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002079 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002080 output_start = output = PyBytes_AS_STRING(result);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002081 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002082
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002083 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002084 /* If no deletions are required, use faster code */
2085 for (i = inlen; --i >= 0; ) {
2086 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002087 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002088 changed = 1;
2089 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002090 if (!changed && PyBytes_CheckExact(input_obj)) {
2091 Py_INCREF(input_obj);
2092 Py_DECREF(result);
2093 result = input_obj;
2094 }
2095 PyBuffer_Release(&del_table_view);
2096 PyBuffer_Release(&table_view);
2097 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002098 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002099
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002100 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002101 for (i = 0; i < 256; i++)
2102 trans_table[i] = Py_CHARMASK(i);
2103 } else {
2104 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002105 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002106 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002107 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002108
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002109 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002110 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002111 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002112
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002113 for (i = inlen; --i >= 0; ) {
2114 c = Py_CHARMASK(*input++);
2115 if (trans_table[c] != -1)
2116 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2117 continue;
2118 changed = 1;
2119 }
2120 if (!changed && PyBytes_CheckExact(input_obj)) {
2121 Py_DECREF(result);
2122 Py_INCREF(input_obj);
2123 return input_obj;
2124 }
2125 /* Fix the size of the resulting string */
2126 if (inlen > 0)
2127 _PyBytes_Resize(&result, output - output_start);
2128 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002129}
2130
2131
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002132/*[clinic input]
2133
2134@staticmethod
2135bytes.maketrans
2136
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002137 frm: Py_buffer
2138 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002139 /
2140
2141Return a translation table useable for the bytes or bytearray translate method.
2142
2143The returned table will be one where each byte in frm is mapped to the byte at
2144the same position in to.
2145
2146The bytes objects frm and to must be of the same length.
2147[clinic start generated code]*/
2148
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002149static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002150bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002151/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002152{
2153 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002154}
2155
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002156
2157/*[clinic input]
2158bytes.replace
2159
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002160 old: Py_buffer
2161 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002162 count: Py_ssize_t = -1
2163 Maximum number of occurrences to replace.
2164 -1 (the default value) means replace all occurrences.
2165 /
2166
2167Return a copy with all occurrences of substring old replaced by new.
2168
2169If the optional argument count is given, only the first count occurrences are
2170replaced.
2171[clinic start generated code]*/
2172
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002173static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002174bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
Larry Hastings89964c42015-04-14 18:07:59 -04002175 Py_ssize_t count)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002176/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002177{
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +03002178 return stringlib_replace((PyObject *)self,
2179 (const char *)old->buf, old->len,
2180 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002181}
2182
2183/** End DALKE **/
2184
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002185
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002186static PyObject *
2187bytes_startswith(PyBytesObject *self, PyObject *args)
2188{
2189 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2190}
2191
2192static PyObject *
2193bytes_endswith(PyBytesObject *self, PyObject *args)
2194{
2195 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2196}
2197
2198
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002199/*[clinic input]
2200bytes.decode
2201
2202 encoding: str(c_default="NULL") = 'utf-8'
2203 The encoding with which to decode the bytes.
2204 errors: str(c_default="NULL") = 'strict'
2205 The error handling scheme to use for the handling of decoding errors.
2206 The default is 'strict' meaning that decoding errors raise a
2207 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2208 as well as any other name registered with codecs.register_error that
2209 can handle UnicodeDecodeErrors.
2210
2211Decode the bytes using the codec registered for encoding.
2212[clinic start generated code]*/
2213
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002214static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002215bytes_decode_impl(PyBytesObject *self, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04002216 const char *errors)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002217/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002218{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002219 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002220}
2221
Guido van Rossum20188312006-05-05 15:15:40 +00002222
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002223/*[clinic input]
2224bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002225
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002226 keepends: bool(accept={int}) = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002227
2228Return a list of the lines in the bytes, breaking at line boundaries.
2229
2230Line breaks are not included in the resulting list unless keepends is given and
2231true.
2232[clinic start generated code]*/
2233
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002234static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002235bytes_splitlines_impl(PyBytesObject *self, int keepends)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002236/*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002237{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002238 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002239 (PyObject*) self, PyBytes_AS_STRING(self),
2240 PyBytes_GET_SIZE(self), keepends
2241 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002242}
2243
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002244/*[clinic input]
2245@classmethod
2246bytes.fromhex
2247
2248 string: unicode
2249 /
2250
2251Create a bytes object from a string of hexadecimal numbers.
2252
2253Spaces between two numbers are accepted.
2254Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2255[clinic start generated code]*/
2256
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002257static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002258bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002259/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002260{
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002261 PyObject *result = _PyBytes_FromHex(string, 0);
2262 if (type != &PyBytes_Type && result != NULL) {
Petr Viktorinffd97532020-02-11 17:46:57 +01002263 Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002264 }
2265 return result;
Victor Stinner2bf89932015-10-14 11:25:33 +02002266}
2267
2268PyObject*
2269_PyBytes_FromHex(PyObject *string, int use_bytearray)
2270{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002271 char *buf;
Victor Stinner2bf89932015-10-14 11:25:33 +02002272 Py_ssize_t hexlen, invalid_char;
2273 unsigned int top, bot;
2274 Py_UCS1 *str, *end;
2275 _PyBytesWriter writer;
2276
2277 _PyBytesWriter_Init(&writer);
2278 writer.use_bytearray = use_bytearray;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002279
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002280 assert(PyUnicode_Check(string));
2281 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002282 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002283 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002284
Victor Stinner2bf89932015-10-14 11:25:33 +02002285 if (!PyUnicode_IS_ASCII(string)) {
2286 void *data = PyUnicode_DATA(string);
2287 unsigned int kind = PyUnicode_KIND(string);
2288 Py_ssize_t i;
2289
2290 /* search for the first non-ASCII character */
2291 for (i = 0; i < hexlen; i++) {
2292 if (PyUnicode_READ(kind, data, i) >= 128)
2293 break;
2294 }
2295 invalid_char = i;
2296 goto error;
2297 }
2298
2299 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2300 str = PyUnicode_1BYTE_DATA(string);
2301
2302 /* This overestimates if there are spaces */
2303 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2304 if (buf == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002305 return NULL;
Victor Stinner2bf89932015-10-14 11:25:33 +02002306
2307 end = str + hexlen;
2308 while (str < end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002309 /* skip over spaces in the input */
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002310 if (Py_ISSPACE(*str)) {
Victor Stinner2bf89932015-10-14 11:25:33 +02002311 do {
2312 str++;
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002313 } while (Py_ISSPACE(*str));
Victor Stinner2bf89932015-10-14 11:25:33 +02002314 if (str >= end)
2315 break;
2316 }
2317
2318 top = _PyLong_DigitValue[*str];
2319 if (top >= 16) {
2320 invalid_char = str - PyUnicode_1BYTE_DATA(string);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002321 goto error;
2322 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002323 str++;
2324
2325 bot = _PyLong_DigitValue[*str];
2326 if (bot >= 16) {
2327 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2328 goto error;
2329 }
2330 str++;
2331
2332 *buf++ = (unsigned char)((top << 4) + bot);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002333 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002334
2335 return _PyBytesWriter_Finish(&writer, buf);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002336
2337 error:
Victor Stinner2bf89932015-10-14 11:25:33 +02002338 PyErr_Format(PyExc_ValueError,
2339 "non-hexadecimal number found in "
2340 "fromhex() arg at position %zd", invalid_char);
2341 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002342 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002343}
2344
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002345/*[clinic input]
2346bytes.hex
2347
2348 sep: object = NULL
2349 An optional single character or byte to separate hex bytes.
2350 bytes_per_sep: int = 1
2351 How many bytes between separators. Positive values count from the
2352 right, negative values count from the left.
2353
2354Create a str of hexadecimal numbers from a bytes object.
2355
2356Example:
2357>>> value = b'\xb9\x01\xef'
2358>>> value.hex()
2359'b901ef'
2360>>> value.hex(':')
2361'b9:01:ef'
2362>>> value.hex(':', 2)
2363'b9:01ef'
2364>>> value.hex(':', -2)
2365'b901:ef'
2366[clinic start generated code]*/
Gregory P. Smith8cb65692015-04-25 23:22:26 +00002367
2368static PyObject *
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002369bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2370/*[clinic end generated code: output=1f134da504064139 input=f1238d3455990218]*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002371{
2372 char* argbuf = PyBytes_AS_STRING(self);
2373 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002374 return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002375}
2376
2377static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302378bytes_getnewargs(PyBytesObject *v, PyObject *Py_UNUSED(ignored))
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002379{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002380 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002381}
2382
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002383
2384static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002385bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002386 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302387 {"capitalize", stringlib_capitalize, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002388 _Py_capitalize__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002389 STRINGLIB_CENTER_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002390 {"count", (PyCFunction)bytes_count, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002391 _Py_count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002392 BYTES_DECODE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002393 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002394 _Py_endswith__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002395 STRINGLIB_EXPANDTABS_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002396 {"find", (PyCFunction)bytes_find, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002397 _Py_find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002398 BYTES_FROMHEX_METHODDEF
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002399 BYTES_HEX_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002400 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302401 {"isalnum", stringlib_isalnum, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002402 _Py_isalnum__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302403 {"isalpha", stringlib_isalpha, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002404 _Py_isalpha__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302405 {"isascii", stringlib_isascii, METH_NOARGS,
INADA Naokia49ac992018-01-27 14:06:21 +09002406 _Py_isascii__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302407 {"isdigit", stringlib_isdigit, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002408 _Py_isdigit__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302409 {"islower", stringlib_islower, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002410 _Py_islower__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302411 {"isspace", stringlib_isspace, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002412 _Py_isspace__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302413 {"istitle", stringlib_istitle, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002414 _Py_istitle__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302415 {"isupper", stringlib_isupper, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002416 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002417 BYTES_JOIN_METHODDEF
Tal Einatc929df32018-07-06 13:17:38 +03002418 STRINGLIB_LJUST_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302419 {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002420 BYTES_LSTRIP_METHODDEF
2421 BYTES_MAKETRANS_METHODDEF
2422 BYTES_PARTITION_METHODDEF
2423 BYTES_REPLACE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002424 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2425 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002426 STRINGLIB_RJUST_METHODDEF
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002427 BYTES_RPARTITION_METHODDEF
2428 BYTES_RSPLIT_METHODDEF
2429 BYTES_RSTRIP_METHODDEF
2430 BYTES_SPLIT_METHODDEF
2431 BYTES_SPLITLINES_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002432 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002433 _Py_startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002434 BYTES_STRIP_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302435 {"swapcase", stringlib_swapcase, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002436 _Py_swapcase__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302437 {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002438 BYTES_TRANSLATE_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302439 {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002440 STRINGLIB_ZFILL_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002441 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002442};
2443
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002444static PyObject *
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002445bytes_mod(PyObject *self, PyObject *arg)
Ethan Furmanb95b5612015-01-23 20:05:18 -08002446{
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002447 if (!PyBytes_Check(self)) {
2448 Py_RETURN_NOTIMPLEMENTED;
Victor Stinner772b2b02015-10-14 09:56:53 +02002449 }
Victor Stinner772b2b02015-10-14 09:56:53 +02002450 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002451 arg, 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -08002452}
2453
2454static PyNumberMethods bytes_as_number = {
2455 0, /*nb_add*/
2456 0, /*nb_subtract*/
2457 0, /*nb_multiply*/
2458 bytes_mod, /*nb_remainder*/
2459};
2460
2461static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002462bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002463
2464static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002465bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002466{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002467 PyObject *x = NULL;
2468 const char *encoding = NULL;
2469 const char *errors = NULL;
2470 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002471 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002472 Py_ssize_t size;
2473 static char *kwlist[] = {"source", "encoding", "errors", 0};
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002474
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002475 if (type != &PyBytes_Type)
Serhiy Storchaka15095802015-11-25 15:47:01 +02002476 return bytes_subtype_new(type, args, kwds);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002477 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2478 &encoding, &errors))
2479 return NULL;
2480 if (x == NULL) {
2481 if (encoding != NULL || errors != NULL) {
2482 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka2c2044e2018-10-21 15:29:12 +03002483 encoding != NULL ?
2484 "encoding without a string argument" :
2485 "errors without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002486 return NULL;
2487 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02002488 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002489 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002490
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002491 if (encoding != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002492 /* Encode via the codec registry */
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002493 if (!PyUnicode_Check(x)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002494 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002495 "encoding without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002496 return NULL;
2497 }
2498 new = PyUnicode_AsEncodedString(x, encoding, errors);
2499 if (new == NULL)
2500 return NULL;
2501 assert(PyBytes_Check(new));
2502 return new;
2503 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002504
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002505 if (errors != NULL) {
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002506 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002507 PyUnicode_Check(x) ?
2508 "string argument without an encoding" :
2509 "errors without a string argument");
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002510 return NULL;
2511 }
2512
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002513 /* We'd like to call PyObject_Bytes here, but we need to check for an
2514 integer argument before deferring to PyBytes_FromObject, something
2515 PyObject_Bytes doesn't do. */
2516 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2517 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +01002518 new = _PyObject_CallNoArg(func);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002519 Py_DECREF(func);
2520 if (new == NULL)
2521 return NULL;
2522 if (!PyBytes_Check(new)) {
2523 PyErr_Format(PyExc_TypeError,
2524 "__bytes__ returned non-bytes (type %.200s)",
2525 Py_TYPE(new)->tp_name);
2526 Py_DECREF(new);
2527 return NULL;
2528 }
2529 return new;
2530 }
2531 else if (PyErr_Occurred())
2532 return NULL;
2533
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002534 if (PyUnicode_Check(x)) {
2535 PyErr_SetString(PyExc_TypeError,
2536 "string argument without an encoding");
2537 return NULL;
2538 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002539 /* Is it an integer? */
Victor Stinnera15e2602020-04-08 02:01:56 +02002540 if (_PyIndex_Check(x)) {
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002541 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2542 if (size == -1 && PyErr_Occurred()) {
Serhiy Storchakae8904212018-10-15 00:02:57 +03002543 if (!PyErr_ExceptionMatches(PyExc_TypeError))
INADA Naokia634e232017-01-06 17:32:01 +09002544 return NULL;
2545 PyErr_Clear(); /* fall through */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002546 }
INADA Naokia634e232017-01-06 17:32:01 +09002547 else {
2548 if (size < 0) {
2549 PyErr_SetString(PyExc_ValueError, "negative count");
2550 return NULL;
2551 }
2552 new = _PyBytes_FromSize(size, 1);
2553 if (new == NULL)
2554 return NULL;
2555 return new;
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002556 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002557 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002558
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002559 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002560}
2561
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002562static PyObject*
2563_PyBytes_FromBuffer(PyObject *x)
2564{
2565 PyObject *new;
2566 Py_buffer view;
2567
2568 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2569 return NULL;
2570
2571 new = PyBytes_FromStringAndSize(NULL, view.len);
2572 if (!new)
2573 goto fail;
2574 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2575 &view, view.len, 'C') < 0)
2576 goto fail;
2577 PyBuffer_Release(&view);
2578 return new;
2579
2580fail:
2581 Py_XDECREF(new);
2582 PyBuffer_Release(&view);
2583 return NULL;
2584}
2585
2586static PyObject*
2587_PyBytes_FromList(PyObject *x)
2588{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002589 Py_ssize_t i, size = PyList_GET_SIZE(x);
2590 Py_ssize_t value;
2591 char *str;
2592 PyObject *item;
2593 _PyBytesWriter writer;
2594
2595 _PyBytesWriter_Init(&writer);
2596 str = _PyBytesWriter_Alloc(&writer, size);
2597 if (str == NULL)
2598 return NULL;
2599 writer.overallocate = 1;
2600 size = writer.allocated;
2601
2602 for (i = 0; i < PyList_GET_SIZE(x); i++) {
2603 item = PyList_GET_ITEM(x, i);
2604 Py_INCREF(item);
2605 value = PyNumber_AsSsize_t(item, NULL);
2606 Py_DECREF(item);
2607 if (value == -1 && PyErr_Occurred())
2608 goto error;
2609
2610 if (value < 0 || value >= 256) {
2611 PyErr_SetString(PyExc_ValueError,
2612 "bytes must be in range(0, 256)");
2613 goto error;
2614 }
2615
2616 if (i >= size) {
2617 str = _PyBytesWriter_Resize(&writer, str, size+1);
2618 if (str == NULL)
2619 return NULL;
2620 size = writer.allocated;
2621 }
2622 *str++ = (char) value;
2623 }
2624 return _PyBytesWriter_Finish(&writer, str);
2625
2626 error:
2627 _PyBytesWriter_Dealloc(&writer);
2628 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002629}
2630
2631static PyObject*
2632_PyBytes_FromTuple(PyObject *x)
2633{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002634 PyObject *bytes;
2635 Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2636 Py_ssize_t value;
2637 char *str;
2638 PyObject *item;
2639
2640 bytes = PyBytes_FromStringAndSize(NULL, size);
2641 if (bytes == NULL)
2642 return NULL;
2643 str = ((PyBytesObject *)bytes)->ob_sval;
2644
2645 for (i = 0; i < size; i++) {
2646 item = PyTuple_GET_ITEM(x, i);
2647 value = PyNumber_AsSsize_t(item, NULL);
2648 if (value == -1 && PyErr_Occurred())
2649 goto error;
2650
2651 if (value < 0 || value >= 256) {
2652 PyErr_SetString(PyExc_ValueError,
2653 "bytes must be in range(0, 256)");
2654 goto error;
2655 }
2656 *str++ = (char) value;
2657 }
2658 return bytes;
2659
2660 error:
2661 Py_DECREF(bytes);
2662 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002663}
2664
2665static PyObject *
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002666_PyBytes_FromIterator(PyObject *it, PyObject *x)
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002667{
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002668 char *str;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002669 Py_ssize_t i, size;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002670 _PyBytesWriter writer;
2671
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002672 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02002673 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002674 if (size == -1 && PyErr_Occurred())
2675 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002676
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002677 _PyBytesWriter_Init(&writer);
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002678 str = _PyBytesWriter_Alloc(&writer, size);
2679 if (str == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002680 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002681 writer.overallocate = 1;
2682 size = writer.allocated;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002683
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002684 /* Run the iterator to exhaustion */
2685 for (i = 0; ; i++) {
2686 PyObject *item;
2687 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002688
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002689 /* Get the next item */
2690 item = PyIter_Next(it);
2691 if (item == NULL) {
2692 if (PyErr_Occurred())
2693 goto error;
2694 break;
2695 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002696
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002697 /* Interpret it as an int (__index__) */
Serhiy Storchakaf54d7812016-07-06 21:39:44 +03002698 value = PyNumber_AsSsize_t(item, NULL);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002699 Py_DECREF(item);
2700 if (value == -1 && PyErr_Occurred())
2701 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002702
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002703 /* Range check */
2704 if (value < 0 || value >= 256) {
2705 PyErr_SetString(PyExc_ValueError,
2706 "bytes must be in range(0, 256)");
2707 goto error;
2708 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002709
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002710 /* Append the byte */
2711 if (i >= size) {
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002712 str = _PyBytesWriter_Resize(&writer, str, size+1);
2713 if (str == NULL)
2714 return NULL;
2715 size = writer.allocated;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002716 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002717 *str++ = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002718 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002719
2720 return _PyBytesWriter_Finish(&writer, str);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002721
2722 error:
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002723 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002724 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002725}
2726
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002727PyObject *
2728PyBytes_FromObject(PyObject *x)
2729{
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002730 PyObject *it, *result;
2731
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002732 if (x == NULL) {
2733 PyErr_BadInternalCall();
2734 return NULL;
2735 }
2736
2737 if (PyBytes_CheckExact(x)) {
2738 Py_INCREF(x);
2739 return x;
2740 }
2741
2742 /* Use the modern buffer interface */
2743 if (PyObject_CheckBuffer(x))
2744 return _PyBytes_FromBuffer(x);
2745
2746 if (PyList_CheckExact(x))
2747 return _PyBytes_FromList(x);
2748
2749 if (PyTuple_CheckExact(x))
2750 return _PyBytes_FromTuple(x);
2751
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002752 if (!PyUnicode_Check(x)) {
2753 it = PyObject_GetIter(x);
2754 if (it != NULL) {
2755 result = _PyBytes_FromIterator(it, x);
2756 Py_DECREF(it);
2757 return result;
2758 }
Serhiy Storchakae8904212018-10-15 00:02:57 +03002759 if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2760 return NULL;
2761 }
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002762 }
2763
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002764 PyErr_Format(PyExc_TypeError,
2765 "cannot convert '%.200s' object to bytes",
Victor Stinner58ac7002020-02-07 03:04:21 +01002766 Py_TYPE(x)->tp_name);
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002767 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002768}
2769
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002770static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002771bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002772{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002773 PyObject *tmp, *pnew;
2774 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002775
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002776 assert(PyType_IsSubtype(type, &PyBytes_Type));
2777 tmp = bytes_new(&PyBytes_Type, args, kwds);
2778 if (tmp == NULL)
2779 return NULL;
Serhiy Storchaka15095802015-11-25 15:47:01 +02002780 assert(PyBytes_Check(tmp));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002781 n = PyBytes_GET_SIZE(tmp);
2782 pnew = type->tp_alloc(type, n);
2783 if (pnew != NULL) {
Christian Heimesf051e432016-09-13 20:22:02 +02002784 memcpy(PyBytes_AS_STRING(pnew),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002785 PyBytes_AS_STRING(tmp), n+1);
2786 ((PyBytesObject *)pnew)->ob_shash =
2787 ((PyBytesObject *)tmp)->ob_shash;
2788 }
2789 Py_DECREF(tmp);
2790 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002791}
2792
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002793PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002794"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002795bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002796bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002797bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2798bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002799\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002800Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002801 - an iterable yielding integers in range(256)\n\
2802 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002803 - any object implementing the buffer API.\n\
2804 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002805
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002806static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002807
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002808PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002809 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2810 "bytes",
2811 PyBytesObject_SIZE,
2812 sizeof(char),
Inada Naoki7d408692019-05-29 17:23:27 +09002813 0, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002814 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002815 0, /* tp_getattr */
2816 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002817 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002818 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08002819 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002820 &bytes_as_sequence, /* tp_as_sequence */
2821 &bytes_as_mapping, /* tp_as_mapping */
2822 (hashfunc)bytes_hash, /* tp_hash */
2823 0, /* tp_call */
2824 bytes_str, /* tp_str */
2825 PyObject_GenericGetAttr, /* tp_getattro */
2826 0, /* tp_setattro */
2827 &bytes_as_buffer, /* tp_as_buffer */
2828 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2829 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2830 bytes_doc, /* tp_doc */
2831 0, /* tp_traverse */
2832 0, /* tp_clear */
2833 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2834 0, /* tp_weaklistoffset */
2835 bytes_iter, /* tp_iter */
2836 0, /* tp_iternext */
2837 bytes_methods, /* tp_methods */
2838 0, /* tp_members */
2839 0, /* tp_getset */
2840 &PyBaseObject_Type, /* tp_base */
2841 0, /* tp_dict */
2842 0, /* tp_descr_get */
2843 0, /* tp_descr_set */
2844 0, /* tp_dictoffset */
2845 0, /* tp_init */
2846 0, /* tp_alloc */
2847 bytes_new, /* tp_new */
2848 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002849};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002850
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002851void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002852PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002853{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002854 assert(pv != NULL);
2855 if (*pv == NULL)
2856 return;
2857 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002858 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002859 return;
2860 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02002861
2862 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2863 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05002864 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02002865 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02002866
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002867 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02002868 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2869 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2870 Py_CLEAR(*pv);
2871 return;
2872 }
2873
2874 oldsize = PyBytes_GET_SIZE(*pv);
2875 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2876 PyErr_NoMemory();
2877 goto error;
2878 }
2879 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2880 goto error;
2881
2882 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2883 PyBuffer_Release(&wb);
2884 return;
2885
2886 error:
2887 PyBuffer_Release(&wb);
2888 Py_CLEAR(*pv);
2889 return;
2890 }
2891
2892 else {
2893 /* Multiple references, need to create new object */
2894 PyObject *v;
2895 v = bytes_concat(*pv, w);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002896 Py_SETREF(*pv, v);
Antoine Pitrou161d6952014-05-01 14:36:20 +02002897 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002898}
2899
2900void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002901PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002902{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002903 PyBytes_Concat(pv, w);
2904 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002905}
2906
2907
Ethan Furmanb95b5612015-01-23 20:05:18 -08002908/* The following function breaks the notion that bytes are immutable:
2909 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002910 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08002911 as creating a new bytes object and destroying the old one, only
2912 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002913 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08002914 Note that if there's not enough memory to resize the bytes object, the
2915 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002916 memory" exception is set, and -1 is returned. Else (on success) 0 is
2917 returned, and the value in *pv may or may not be the same as on input.
2918 As always, an extra byte is allocated for a trailing \0 byte (newsize
2919 does *not* include that), and a trailing \0 byte is stored.
2920*/
2921
2922int
2923_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2924{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002925 PyObject *v;
2926 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002927 v = *pv;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002928 if (!PyBytes_Check(v) || newsize < 0) {
2929 goto error;
2930 }
2931 if (Py_SIZE(v) == newsize) {
2932 /* return early if newsize equals to v->ob_size */
2933 return 0;
2934 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02002935 if (Py_SIZE(v) == 0) {
2936 if (newsize == 0) {
2937 return 0;
2938 }
2939 *pv = _PyBytes_FromSize(newsize, 0);
2940 Py_DECREF(v);
2941 return (*pv == NULL) ? -1 : 0;
2942 }
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002943 if (Py_REFCNT(v) != 1) {
2944 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002945 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02002946 if (newsize == 0) {
2947 *pv = _PyBytes_FromSize(0, 0);
2948 Py_DECREF(v);
2949 return (*pv == NULL) ? -1 : 0;
2950 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002951 /* XXX UNREF/NEWREF interface should be more symmetrical */
Victor Stinner49932fe2020-02-03 17:55:05 +01002952#ifdef Py_REF_DEBUG
2953 _Py_RefTotal--;
2954#endif
2955#ifdef Py_TRACE_REFS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002956 _Py_ForgetReference(v);
Victor Stinner49932fe2020-02-03 17:55:05 +01002957#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002958 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03002959 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002960 if (*pv == NULL) {
2961 PyObject_Del(v);
2962 PyErr_NoMemory();
2963 return -1;
2964 }
2965 _Py_NewReference(*pv);
2966 sv = (PyBytesObject *) *pv;
Victor Stinner60ac6ed2020-02-07 23:18:08 +01002967 Py_SET_SIZE(sv, newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002968 sv->ob_sval[newsize] = '\0';
2969 sv->ob_shash = -1; /* invalidate cached hash value */
2970 return 0;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002971error:
2972 *pv = 0;
2973 Py_DECREF(v);
2974 PyErr_BadInternalCall();
2975 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002976}
2977
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002978void
Victor Stinnerbed48172019-08-27 00:12:32 +02002979_PyBytes_Fini(void)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002980{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002981 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002982 for (i = 0; i < UCHAR_MAX + 1; i++)
2983 Py_CLEAR(characters[i]);
2984 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002985}
2986
Benjamin Peterson4116f362008-05-27 00:36:20 +00002987/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002988
2989typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002990 PyObject_HEAD
2991 Py_ssize_t it_index;
2992 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002993} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002994
2995static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002996striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002997{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002998 _PyObject_GC_UNTRACK(it);
2999 Py_XDECREF(it->it_seq);
3000 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003001}
3002
3003static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003004striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003005{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003006 Py_VISIT(it->it_seq);
3007 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003008}
3009
3010static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003011striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003012{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003013 PyBytesObject *seq;
3014 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003015
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003016 assert(it != NULL);
3017 seq = it->it_seq;
3018 if (seq == NULL)
3019 return NULL;
3020 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003021
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003022 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3023 item = PyLong_FromLong(
3024 (unsigned char)seq->ob_sval[it->it_index]);
3025 if (item != NULL)
3026 ++it->it_index;
3027 return item;
3028 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003029
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003030 it->it_seq = NULL;
Serhiy Storchakafbb1c5e2016-03-30 20:40:02 +03003031 Py_DECREF(seq);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003032 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003033}
3034
3035static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303036striter_len(striterobject *it, PyObject *Py_UNUSED(ignored))
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003037{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003038 Py_ssize_t len = 0;
3039 if (it->it_seq)
3040 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3041 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003042}
3043
3044PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003045 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003046
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003047static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303048striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003049{
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003050 _Py_IDENTIFIER(iter);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003051 if (it->it_seq != NULL) {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003052 return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003053 it->it_seq, it->it_index);
3054 } else {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003055 return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003056 }
3057}
3058
3059PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3060
3061static PyObject *
3062striter_setstate(striterobject *it, PyObject *state)
3063{
3064 Py_ssize_t index = PyLong_AsSsize_t(state);
3065 if (index == -1 && PyErr_Occurred())
3066 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003067 if (it->it_seq != NULL) {
3068 if (index < 0)
3069 index = 0;
3070 else if (index > PyBytes_GET_SIZE(it->it_seq))
3071 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3072 it->it_index = index;
3073 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003074 Py_RETURN_NONE;
3075}
3076
3077PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3078
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003079static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003080 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3081 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003082 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3083 reduce_doc},
3084 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3085 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003086 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003087};
3088
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003089PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003090 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3091 "bytes_iterator", /* tp_name */
3092 sizeof(striterobject), /* tp_basicsize */
3093 0, /* tp_itemsize */
3094 /* methods */
3095 (destructor)striter_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003096 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003097 0, /* tp_getattr */
3098 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003099 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003100 0, /* tp_repr */
3101 0, /* tp_as_number */
3102 0, /* tp_as_sequence */
3103 0, /* tp_as_mapping */
3104 0, /* tp_hash */
3105 0, /* tp_call */
3106 0, /* tp_str */
3107 PyObject_GenericGetAttr, /* tp_getattro */
3108 0, /* tp_setattro */
3109 0, /* tp_as_buffer */
3110 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3111 0, /* tp_doc */
3112 (traverseproc)striter_traverse, /* tp_traverse */
3113 0, /* tp_clear */
3114 0, /* tp_richcompare */
3115 0, /* tp_weaklistoffset */
3116 PyObject_SelfIter, /* tp_iter */
3117 (iternextfunc)striter_next, /* tp_iternext */
3118 striter_methods, /* tp_methods */
3119 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003120};
3121
3122static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003123bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003124{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003125 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003126
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003127 if (!PyBytes_Check(seq)) {
3128 PyErr_BadInternalCall();
3129 return NULL;
3130 }
3131 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3132 if (it == NULL)
3133 return NULL;
3134 it->it_index = 0;
3135 Py_INCREF(seq);
3136 it->it_seq = (PyBytesObject *)seq;
3137 _PyObject_GC_TRACK(it);
3138 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003139}
Victor Stinner00165072015-10-09 01:53:21 +02003140
3141
3142/* _PyBytesWriter API */
3143
3144#ifdef MS_WINDOWS
3145 /* On Windows, overallocate by 50% is the best factor */
3146# define OVERALLOCATE_FACTOR 2
3147#else
3148 /* On Linux, overallocate by 25% is the best factor */
3149# define OVERALLOCATE_FACTOR 4
3150#endif
3151
3152void
3153_PyBytesWriter_Init(_PyBytesWriter *writer)
3154{
Victor Stinner661aacc2015-10-14 09:41:48 +02003155 /* Set all attributes before small_buffer to 0 */
3156 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003157#ifndef NDEBUG
3158 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3159 sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003160#endif
3161}
3162
3163void
3164_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3165{
3166 Py_CLEAR(writer->buffer);
3167}
3168
3169Py_LOCAL_INLINE(char*)
3170_PyBytesWriter_AsString(_PyBytesWriter *writer)
3171{
Victor Stinner661aacc2015-10-14 09:41:48 +02003172 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003173 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003174 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003175 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003176 else if (writer->use_bytearray) {
3177 assert(writer->buffer != NULL);
3178 return PyByteArray_AS_STRING(writer->buffer);
3179 }
3180 else {
3181 assert(writer->buffer != NULL);
3182 return PyBytes_AS_STRING(writer->buffer);
3183 }
Victor Stinner00165072015-10-09 01:53:21 +02003184}
3185
3186Py_LOCAL_INLINE(Py_ssize_t)
Victor Stinner2bf89932015-10-14 11:25:33 +02003187_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
Victor Stinner00165072015-10-09 01:53:21 +02003188{
3189 char *start = _PyBytesWriter_AsString(writer);
3190 assert(str != NULL);
3191 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003192 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003193 return str - start;
3194}
3195
Victor Stinner68762572019-10-07 18:42:01 +02003196#ifndef NDEBUG
3197Py_LOCAL_INLINE(int)
Victor Stinner00165072015-10-09 01:53:21 +02003198_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3199{
Victor Stinner00165072015-10-09 01:53:21 +02003200 char *start, *end;
3201
Victor Stinner661aacc2015-10-14 09:41:48 +02003202 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003203 assert(writer->buffer == NULL);
3204 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003205 else {
3206 assert(writer->buffer != NULL);
3207 if (writer->use_bytearray)
3208 assert(PyByteArray_CheckExact(writer->buffer));
3209 else
3210 assert(PyBytes_CheckExact(writer->buffer));
3211 assert(Py_REFCNT(writer->buffer) == 1);
3212 }
Victor Stinner00165072015-10-09 01:53:21 +02003213
Victor Stinner661aacc2015-10-14 09:41:48 +02003214 if (writer->use_bytearray) {
3215 /* bytearray has its own overallocation algorithm,
3216 writer overallocation must be disabled */
3217 assert(!writer->overallocate);
3218 }
3219
3220 assert(0 <= writer->allocated);
Victor Stinner53926a12015-10-09 12:37:03 +02003221 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003222 /* the last byte must always be null */
Victor Stinner661aacc2015-10-14 09:41:48 +02003223 start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003224 assert(start[writer->allocated] == 0);
3225
3226 end = start + writer->allocated;
3227 assert(str != NULL);
3228 assert(start <= str && str <= end);
Victor Stinner68762572019-10-07 18:42:01 +02003229 return 1;
Victor Stinner00165072015-10-09 01:53:21 +02003230}
Victor Stinner68762572019-10-07 18:42:01 +02003231#endif
Victor Stinner00165072015-10-09 01:53:21 +02003232
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003233void*
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003234_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
Victor Stinner00165072015-10-09 01:53:21 +02003235{
3236 Py_ssize_t allocated, pos;
3237
Victor Stinner68762572019-10-07 18:42:01 +02003238 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003239 assert(writer->allocated < size);
Victor Stinner00165072015-10-09 01:53:21 +02003240
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003241 allocated = size;
Victor Stinner00165072015-10-09 01:53:21 +02003242 if (writer->overallocate
3243 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3244 /* overallocate to limit the number of realloc() */
3245 allocated += allocated / OVERALLOCATE_FACTOR;
3246 }
3247
Victor Stinner2bf89932015-10-14 11:25:33 +02003248 pos = _PyBytesWriter_GetSize(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003249 if (!writer->use_small_buffer) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003250 if (writer->use_bytearray) {
3251 if (PyByteArray_Resize(writer->buffer, allocated))
3252 goto error;
3253 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3254 but we cannot use ob_alloc because bytes may need to be moved
3255 to use the whole buffer. bytearray uses an internal optimization
3256 to avoid moving or copying bytes when bytes are removed at the
3257 beginning (ex: del bytearray[:1]). */
3258 }
3259 else {
3260 if (_PyBytes_Resize(&writer->buffer, allocated))
3261 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003262 }
3263 }
3264 else {
3265 /* convert from stack buffer to bytes object buffer */
3266 assert(writer->buffer == NULL);
3267
Victor Stinner661aacc2015-10-14 09:41:48 +02003268 if (writer->use_bytearray)
3269 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3270 else
3271 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003272 if (writer->buffer == NULL)
Victor Stinner661aacc2015-10-14 09:41:48 +02003273 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003274
3275 if (pos != 0) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003276 char *dest;
3277 if (writer->use_bytearray)
3278 dest = PyByteArray_AS_STRING(writer->buffer);
3279 else
3280 dest = PyBytes_AS_STRING(writer->buffer);
Christian Heimesf051e432016-09-13 20:22:02 +02003281 memcpy(dest,
Victor Stinnerb3653a32015-10-09 03:38:24 +02003282 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02003283 pos);
3284 }
3285
Victor Stinnerb3653a32015-10-09 03:38:24 +02003286 writer->use_small_buffer = 0;
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003287#ifndef NDEBUG
3288 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3289 sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003290#endif
Victor Stinner00165072015-10-09 01:53:21 +02003291 }
3292 writer->allocated = allocated;
3293
3294 str = _PyBytesWriter_AsString(writer) + pos;
Victor Stinner68762572019-10-07 18:42:01 +02003295 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinner00165072015-10-09 01:53:21 +02003296 return str;
Victor Stinner661aacc2015-10-14 09:41:48 +02003297
3298error:
3299 _PyBytesWriter_Dealloc(writer);
3300 return NULL;
Victor Stinner00165072015-10-09 01:53:21 +02003301}
3302
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003303void*
3304_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3305{
3306 Py_ssize_t new_min_size;
3307
Victor Stinner68762572019-10-07 18:42:01 +02003308 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003309 assert(size >= 0);
3310
3311 if (size == 0) {
3312 /* nothing to do */
3313 return str;
3314 }
3315
3316 if (writer->min_size > PY_SSIZE_T_MAX - size) {
3317 PyErr_NoMemory();
3318 _PyBytesWriter_Dealloc(writer);
3319 return NULL;
3320 }
3321 new_min_size = writer->min_size + size;
3322
3323 if (new_min_size > writer->allocated)
3324 str = _PyBytesWriter_Resize(writer, str, new_min_size);
3325
3326 writer->min_size = new_min_size;
3327 return str;
3328}
3329
Victor Stinner00165072015-10-09 01:53:21 +02003330/* Allocate the buffer to write size bytes.
3331 Return the pointer to the beginning of buffer data.
3332 Raise an exception and return NULL on error. */
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003333void*
Victor Stinner00165072015-10-09 01:53:21 +02003334_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3335{
3336 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02003337 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02003338 assert(size >= 0);
3339
Victor Stinnerb3653a32015-10-09 03:38:24 +02003340 writer->use_small_buffer = 1;
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003341#ifndef NDEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003342 writer->allocated = sizeof(writer->small_buffer) - 1;
Victor Stinnerf6358a72015-10-14 12:02:39 +02003343 /* In debug mode, don't use the full small buffer because it is less
3344 efficient than bytes and bytearray objects to detect buffer underflow
3345 and buffer overflow. Use 10 bytes of the small buffer to test also
3346 code using the smaller buffer in debug mode.
3347
3348 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3349 in debug mode to also be able to detect stack overflow when running
3350 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3351 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3352 stack overflow. */
3353 writer->allocated = Py_MIN(writer->allocated, 10);
3354 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3355 to detect buffer overflow */
Victor Stinnerb3653a32015-10-09 03:38:24 +02003356 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003357#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02003358 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02003359#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02003360 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02003361}
3362
3363PyObject *
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003364_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
Victor Stinner00165072015-10-09 01:53:21 +02003365{
Victor Stinner2bf89932015-10-14 11:25:33 +02003366 Py_ssize_t size;
Victor Stinner00165072015-10-09 01:53:21 +02003367 PyObject *result;
3368
Victor Stinner68762572019-10-07 18:42:01 +02003369 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinner00165072015-10-09 01:53:21 +02003370
Victor Stinner2bf89932015-10-14 11:25:33 +02003371 size = _PyBytesWriter_GetSize(writer, str);
3372 if (size == 0 && !writer->use_bytearray) {
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003373 Py_CLEAR(writer->buffer);
3374 /* Get the empty byte string singleton */
3375 result = PyBytes_FromStringAndSize(NULL, 0);
3376 }
3377 else if (writer->use_small_buffer) {
Victor Stinnere914d412016-04-15 17:52:27 +02003378 if (writer->use_bytearray) {
3379 result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3380 }
3381 else {
3382 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3383 }
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003384 }
3385 else {
3386 result = writer->buffer;
3387 writer->buffer = NULL;
3388
Victor Stinner2bf89932015-10-14 11:25:33 +02003389 if (size != writer->allocated) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003390 if (writer->use_bytearray) {
Victor Stinner2bf89932015-10-14 11:25:33 +02003391 if (PyByteArray_Resize(result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003392 Py_DECREF(result);
3393 return NULL;
3394 }
3395 }
3396 else {
Victor Stinner2bf89932015-10-14 11:25:33 +02003397 if (_PyBytes_Resize(&result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003398 assert(result == NULL);
3399 return NULL;
3400 }
Victor Stinner00165072015-10-09 01:53:21 +02003401 }
3402 }
Victor Stinner00165072015-10-09 01:53:21 +02003403 }
Victor Stinner00165072015-10-09 01:53:21 +02003404 return result;
3405}
Victor Stinnerce179bf2015-10-09 12:57:22 +02003406
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003407void*
Victor Stinnere9aa5952015-10-12 13:57:47 +02003408_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003409 const void *bytes, Py_ssize_t size)
Victor Stinnerce179bf2015-10-09 12:57:22 +02003410{
Victor Stinnere9aa5952015-10-12 13:57:47 +02003411 char *str = (char *)ptr;
3412
Victor Stinnerce179bf2015-10-09 12:57:22 +02003413 str = _PyBytesWriter_Prepare(writer, str, size);
3414 if (str == NULL)
3415 return NULL;
3416
Christian Heimesf051e432016-09-13 20:22:02 +02003417 memcpy(str, bytes, size);
Victor Stinnerce179bf2015-10-09 12:57:22 +02003418 str += size;
3419
3420 return str;
3421}