blob: f9823f18e8699efa3c8213826b1e8a5f518ade16 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Victor Stinnerbcda8f12018-11-21 22:27:47 +01006#include "pycore_object.h"
Victor Stinner621cebe2018-11-12 16:53:38 +01007#include "pycore_pymem.h"
8#include "pycore_pystate.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00009
Gregory P. Smith60d241f2007-10-16 06:31:30 +000010#include "bytes_methods.h"
Gregory P. Smith8cb65692015-04-25 23:22:26 +000011#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +000012#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000013
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020014/*[clinic input]
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030015class bytes "PyBytesObject *" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020016[clinic start generated code]*/
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030017/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020018
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030019#include "clinic/bytesobject.c.h"
20
Christian Heimes2c9c7a52008-05-26 13:42:13 +000021#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +000022Py_ssize_t _Py_null_strings, _Py_one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000023#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000024
Christian Heimes2c9c7a52008-05-26 13:42:13 +000025static PyBytesObject *characters[UCHAR_MAX + 1];
26static PyBytesObject *nullstring;
27
Mark Dickinsonfd24b322008-12-06 15:33:31 +000028/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
29 for a string of length n should request PyBytesObject_SIZE + n bytes.
30
31 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
32 3 bytes per string allocation on a typical system.
33*/
34#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
35
Victor Stinner2bf89932015-10-14 11:25:33 +020036/* Forward declaration */
37Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
38 char *str);
39
Christian Heimes2c9c7a52008-05-26 13:42:13 +000040/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000041 For PyBytes_FromString(), the parameter `str' points to a null-terminated
42 string containing exactly `size' bytes.
43
Martin Pantera90a4a92016-05-30 04:04:50 +000044 For PyBytes_FromStringAndSize(), the parameter `str' is
Christian Heimes2c9c7a52008-05-26 13:42:13 +000045 either NULL or else points to a string containing at least `size' bytes.
46 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
47 not have to be null-terminated. (Therefore it is safe to construct a
48 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
49 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
50 bytes (setting the last byte to the null terminating character) and you can
51 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000052 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000053 alter the data yourself, since the strings may be shared.
54
55 The PyObject member `op->ob_size', which denotes the number of "extra
56 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020057 allocated for string data, not counting the null terminating character.
58 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000059 PyBytes_FromStringAndSize()) or the length of the string in the `str'
60 parameter (for PyBytes_FromString()).
61*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020062static PyObject *
63_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000064{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020065 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020066 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020067
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000068 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000069#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +000070 _Py_null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000071#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000072 Py_INCREF(op);
73 return (PyObject *)op;
74 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000075
Victor Stinner049e5092014-08-17 22:20:00 +020076 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 PyErr_SetString(PyExc_OverflowError,
78 "byte string is too large");
79 return NULL;
80 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000081
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000082 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020083 if (use_calloc)
84 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
85 else
86 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 if (op == NULL)
88 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +010089 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000090 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020091 if (!use_calloc)
92 op->ob_sval[size] = '\0';
93 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000094 if (size == 0) {
95 nullstring = op;
96 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020097 }
98 return (PyObject *) op;
99}
100
101PyObject *
102PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
103{
104 PyBytesObject *op;
105 if (size < 0) {
106 PyErr_SetString(PyExc_SystemError,
107 "Negative size passed to PyBytes_FromStringAndSize");
108 return NULL;
109 }
110 if (size == 1 && str != NULL &&
111 (op = characters[*str & UCHAR_MAX]) != NULL)
112 {
113#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +0000114 _Py_one_strings++;
Victor Stinnerdb067af2014-05-02 22:31:14 +0200115#endif
116 Py_INCREF(op);
117 return (PyObject *)op;
118 }
119
120 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
121 if (op == NULL)
122 return NULL;
123 if (str == NULL)
124 return (PyObject *) op;
125
Christian Heimesf051e432016-09-13 20:22:02 +0200126 memcpy(op->ob_sval, str, size);
Victor Stinnerdb067af2014-05-02 22:31:14 +0200127 /* share short strings */
128 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000129 characters[*str & UCHAR_MAX] = op;
130 Py_INCREF(op);
131 }
132 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000133}
134
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000135PyObject *
136PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000137{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200138 size_t size;
139 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000140
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 assert(str != NULL);
142 size = strlen(str);
143 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
144 PyErr_SetString(PyExc_OverflowError,
145 "byte string is too long");
146 return NULL;
147 }
148 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000149#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +0000150 _Py_null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000151#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000152 Py_INCREF(op);
153 return (PyObject *)op;
154 }
155 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000156#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +0000157 _Py_one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000158#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000159 Py_INCREF(op);
160 return (PyObject *)op;
161 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000162
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000163 /* Inline PyObject_NewVar */
164 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
165 if (op == NULL)
166 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +0100167 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000168 op->ob_shash = -1;
Christian Heimesf051e432016-09-13 20:22:02 +0200169 memcpy(op->ob_sval, str, size+1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000170 /* share short strings */
171 if (size == 0) {
172 nullstring = op;
173 Py_INCREF(op);
174 } else if (size == 1) {
175 characters[*str & UCHAR_MAX] = op;
176 Py_INCREF(op);
177 }
178 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000179}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000180
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000181PyObject *
182PyBytes_FromFormatV(const char *format, va_list vargs)
183{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000184 char *s;
Victor Stinner03dab782015-10-14 00:21:35 +0200185 const char *f;
186 const char *p;
187 Py_ssize_t prec;
188 int longflag;
189 int size_tflag;
190 /* Longest 64-bit formatted numbers:
191 - "18446744073709551615\0" (21 bytes)
192 - "-9223372036854775808\0" (21 bytes)
193 Decimal takes the most space (it isn't enough for octal.)
Guido van Rossum343e97f2007-04-09 00:43:24 +0000194
Victor Stinner03dab782015-10-14 00:21:35 +0200195 Longest 64-bit pointer representation:
196 "0xffffffffffffffff\0" (19 bytes). */
197 char buffer[21];
198 _PyBytesWriter writer;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000199
Victor Stinner03dab782015-10-14 00:21:35 +0200200 _PyBytesWriter_Init(&writer);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000201
Victor Stinner03dab782015-10-14 00:21:35 +0200202 s = _PyBytesWriter_Alloc(&writer, strlen(format));
203 if (s == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000204 return NULL;
Victor Stinner03dab782015-10-14 00:21:35 +0200205 writer.overallocate = 1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000206
Victor Stinner03dab782015-10-14 00:21:35 +0200207#define WRITE_BYTES(str) \
208 do { \
209 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
210 if (s == NULL) \
211 goto error; \
212 } while (0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000213
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000214 for (f = format; *f; f++) {
Victor Stinner03dab782015-10-14 00:21:35 +0200215 if (*f != '%') {
216 *s++ = *f;
217 continue;
218 }
219
220 p = f++;
221
222 /* ignore the width (ex: 10 in "%10s") */
223 while (Py_ISDIGIT(*f))
224 f++;
225
226 /* parse the precision (ex: 10 in "%.10s") */
227 prec = 0;
228 if (*f == '.') {
229 f++;
230 for (; Py_ISDIGIT(*f); f++) {
231 prec = (prec * 10) + (*f - '0');
232 }
233 }
234
235 while (*f && *f != '%' && !Py_ISALPHA(*f))
236 f++;
237
238 /* handle the long flag ('l'), but only for %ld and %lu.
239 others can be added when necessary. */
240 longflag = 0;
241 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
242 longflag = 1;
243 ++f;
244 }
245
246 /* handle the size_t flag ('z'). */
247 size_tflag = 0;
248 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
249 size_tflag = 1;
250 ++f;
251 }
252
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700253 /* subtract bytes preallocated for the format string
Victor Stinner03dab782015-10-14 00:21:35 +0200254 (ex: 2 for "%s") */
255 writer.min_size -= (f - p + 1);
256
257 switch (*f) {
258 case 'c':
259 {
260 int c = va_arg(vargs, int);
261 if (c < 0 || c > 255) {
262 PyErr_SetString(PyExc_OverflowError,
263 "PyBytes_FromFormatV(): %c format "
264 "expects an integer in range [0; 255]");
265 goto error;
266 }
267 writer.min_size++;
268 *s++ = (unsigned char)c;
269 break;
270 }
271
272 case 'd':
273 if (longflag)
274 sprintf(buffer, "%ld", va_arg(vargs, long));
275 else if (size_tflag)
276 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
277 va_arg(vargs, Py_ssize_t));
278 else
279 sprintf(buffer, "%d", va_arg(vargs, int));
280 assert(strlen(buffer) < sizeof(buffer));
281 WRITE_BYTES(buffer);
282 break;
283
284 case 'u':
285 if (longflag)
286 sprintf(buffer, "%lu",
287 va_arg(vargs, unsigned long));
288 else if (size_tflag)
289 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
290 va_arg(vargs, size_t));
291 else
292 sprintf(buffer, "%u",
293 va_arg(vargs, unsigned int));
294 assert(strlen(buffer) < sizeof(buffer));
295 WRITE_BYTES(buffer);
296 break;
297
298 case 'i':
299 sprintf(buffer, "%i", va_arg(vargs, int));
300 assert(strlen(buffer) < sizeof(buffer));
301 WRITE_BYTES(buffer);
302 break;
303
304 case 'x':
305 sprintf(buffer, "%x", va_arg(vargs, int));
306 assert(strlen(buffer) < sizeof(buffer));
307 WRITE_BYTES(buffer);
308 break;
309
310 case 's':
311 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000312 Py_ssize_t i;
Victor Stinner03dab782015-10-14 00:21:35 +0200313
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200314 p = va_arg(vargs, const char*);
Serhiy Storchakad586ccb2019-01-12 10:30:35 +0200315 if (prec <= 0) {
316 i = strlen(p);
317 }
318 else {
319 i = 0;
320 while (i < prec && p[i]) {
321 i++;
322 }
323 }
Victor Stinner03dab782015-10-14 00:21:35 +0200324 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
325 if (s == NULL)
326 goto error;
327 break;
328 }
329
330 case 'p':
331 sprintf(buffer, "%p", va_arg(vargs, void*));
332 assert(strlen(buffer) < sizeof(buffer));
333 /* %p is ill-defined: ensure leading 0x. */
334 if (buffer[1] == 'X')
335 buffer[1] = 'x';
336 else if (buffer[1] != 'x') {
337 memmove(buffer+2, buffer, strlen(buffer)+1);
338 buffer[0] = '0';
339 buffer[1] = 'x';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000340 }
Victor Stinner03dab782015-10-14 00:21:35 +0200341 WRITE_BYTES(buffer);
342 break;
343
344 case '%':
345 writer.min_size++;
346 *s++ = '%';
347 break;
348
349 default:
350 if (*f == 0) {
351 /* fix min_size if we reached the end of the format string */
352 writer.min_size++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000353 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000354
Victor Stinner03dab782015-10-14 00:21:35 +0200355 /* invalid format string: copy unformatted string and exit */
356 WRITE_BYTES(p);
357 return _PyBytesWriter_Finish(&writer, s);
358 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000360
Victor Stinner03dab782015-10-14 00:21:35 +0200361#undef WRITE_BYTES
362
363 return _PyBytesWriter_Finish(&writer, s);
364
365 error:
366 _PyBytesWriter_Dealloc(&writer);
367 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000368}
369
370PyObject *
371PyBytes_FromFormat(const char *format, ...)
372{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000373 PyObject* ret;
374 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000375
376#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000377 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000378#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000379 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000380#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000381 ret = PyBytes_FromFormatV(format, vargs);
382 va_end(vargs);
383 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000384}
385
Ethan Furmanb95b5612015-01-23 20:05:18 -0800386/* Helpers for formatstring */
387
388Py_LOCAL_INLINE(PyObject *)
389getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
390{
391 Py_ssize_t argidx = *p_argidx;
392 if (argidx < arglen) {
393 (*p_argidx)++;
394 if (arglen < 0)
395 return args;
396 else
397 return PyTuple_GetItem(args, argidx);
398 }
399 PyErr_SetString(PyExc_TypeError,
400 "not enough arguments for format string");
401 return NULL;
402}
403
404/* Format codes
405 * F_LJUST '-'
406 * F_SIGN '+'
407 * F_BLANK ' '
408 * F_ALT '#'
409 * F_ZERO '0'
410 */
411#define F_LJUST (1<<0)
412#define F_SIGN (1<<1)
413#define F_BLANK (1<<2)
414#define F_ALT (1<<3)
415#define F_ZERO (1<<4)
416
417/* Returns a new reference to a PyBytes object, or NULL on failure. */
418
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200419static char*
420formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200421 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800422{
423 char *p;
424 PyObject *result;
425 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200426 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800427
428 x = PyFloat_AsDouble(v);
429 if (x == -1.0 && PyErr_Occurred()) {
430 PyErr_Format(PyExc_TypeError, "float argument required, "
431 "not %.200s", Py_TYPE(v)->tp_name);
432 return NULL;
433 }
434
435 if (prec < 0)
436 prec = 6;
437
438 p = PyOS_double_to_string(x, type, prec,
439 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
440
441 if (p == NULL)
442 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200443
444 len = strlen(p);
445 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200446 str = _PyBytesWriter_Prepare(writer, str, len);
447 if (str == NULL)
448 return NULL;
Christian Heimesf051e432016-09-13 20:22:02 +0200449 memcpy(str, p, len);
Victor Stinner71dc3d82016-04-26 12:35:13 +0200450 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200451 str += len;
452 return str;
453 }
454
455 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800456 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200457 *p_result = result;
Zackery Spytz96c59322018-10-03 00:01:30 -0600458 return result != NULL ? str : NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800459}
460
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300461static PyObject *
462formatlong(PyObject *v, int flags, int prec, int type)
463{
464 PyObject *result, *iobj;
465 if (type == 'i')
466 type = 'd';
467 if (PyLong_Check(v))
468 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
469 if (PyNumber_Check(v)) {
470 /* make sure number is a type of integer for o, x, and X */
471 if (type == 'o' || type == 'x' || type == 'X')
472 iobj = PyNumber_Index(v);
473 else
474 iobj = PyNumber_Long(v);
475 if (iobj == NULL) {
476 if (!PyErr_ExceptionMatches(PyExc_TypeError))
477 return NULL;
478 }
479 else if (!PyLong_Check(iobj))
480 Py_CLEAR(iobj);
481 if (iobj != NULL) {
482 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
483 Py_DECREF(iobj);
484 return result;
485 }
486 }
487 PyErr_Format(PyExc_TypeError,
488 "%%%c format: %s is required, not %.200s", type,
489 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
490 : "a number",
491 Py_TYPE(v)->tp_name);
492 return NULL;
493}
494
495static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200496byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800497{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300498 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200499 *p = PyBytes_AS_STRING(arg)[0];
500 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800501 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300502 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200503 *p = PyByteArray_AS_STRING(arg)[0];
504 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800505 }
506 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300507 PyObject *iobj;
508 long ival;
509 int overflow;
510 /* make sure number is a type of integer */
511 if (PyLong_Check(arg)) {
512 ival = PyLong_AsLongAndOverflow(arg, &overflow);
513 }
514 else {
515 iobj = PyNumber_Index(arg);
516 if (iobj == NULL) {
517 if (!PyErr_ExceptionMatches(PyExc_TypeError))
518 return 0;
519 goto onError;
520 }
521 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
522 Py_DECREF(iobj);
523 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300524 if (!overflow && ival == -1 && PyErr_Occurred())
525 goto onError;
526 if (overflow || !(0 <= ival && ival <= 255)) {
527 PyErr_SetString(PyExc_OverflowError,
528 "%c arg not in range(256)");
529 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800530 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300531 *p = (char)ival;
532 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800533 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300534 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200535 PyErr_SetString(PyExc_TypeError,
536 "%c requires an integer in range(256) or a single byte");
537 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800538}
539
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800540static PyObject *_PyBytes_FromBuffer(PyObject *x);
541
Ethan Furmanb95b5612015-01-23 20:05:18 -0800542static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200543format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800544{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200545 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800546 _Py_IDENTIFIER(__bytes__);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800547 /* is it a bytes object? */
548 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200549 *pbuf = PyBytes_AS_STRING(v);
550 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800551 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200552 return v;
553 }
554 if (PyByteArray_Check(v)) {
555 *pbuf = PyByteArray_AS_STRING(v);
556 *plen = PyByteArray_GET_SIZE(v);
557 Py_INCREF(v);
558 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800559 }
560 /* does it support __bytes__? */
561 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
562 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +0100563 result = _PyObject_CallNoArg(func);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800564 Py_DECREF(func);
565 if (result == NULL)
566 return NULL;
567 if (!PyBytes_Check(result)) {
568 PyErr_Format(PyExc_TypeError,
569 "__bytes__ returned non-bytes (type %.200s)",
570 Py_TYPE(result)->tp_name);
571 Py_DECREF(result);
572 return NULL;
573 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200574 *pbuf = PyBytes_AS_STRING(result);
575 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800576 return result;
577 }
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800578 /* does it support buffer protocol? */
579 if (PyObject_CheckBuffer(v)) {
580 /* maybe we can avoid making a copy of the buffer object here? */
581 result = _PyBytes_FromBuffer(v);
582 if (result == NULL)
583 return NULL;
584 *pbuf = PyBytes_AS_STRING(result);
585 *plen = PyBytes_GET_SIZE(result);
586 return result;
587 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800588 PyErr_Format(PyExc_TypeError,
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800589 "%%b requires a bytes-like object, "
590 "or an object that implements __bytes__, not '%.100s'",
Ethan Furmanb95b5612015-01-23 20:05:18 -0800591 Py_TYPE(v)->tp_name);
592 return NULL;
593}
594
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200595/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800596
597PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +0200598_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
599 PyObject *args, int use_bytearray)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800600{
Victor Stinner772b2b02015-10-14 09:56:53 +0200601 const char *fmt;
602 char *res;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800603 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200604 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800605 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800606 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200607 _PyBytesWriter writer;
608
Victor Stinner772b2b02015-10-14 09:56:53 +0200609 if (args == NULL) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800610 PyErr_BadInternalCall();
611 return NULL;
612 }
Victor Stinner772b2b02015-10-14 09:56:53 +0200613 fmt = format;
614 fmtcnt = format_len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200615
616 _PyBytesWriter_Init(&writer);
Victor Stinner772b2b02015-10-14 09:56:53 +0200617 writer.use_bytearray = use_bytearray;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200618
619 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
620 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800621 return NULL;
Victor Stinner772b2b02015-10-14 09:56:53 +0200622 if (!use_bytearray)
623 writer.overallocate = 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200624
Ethan Furmanb95b5612015-01-23 20:05:18 -0800625 if (PyTuple_Check(args)) {
626 arglen = PyTuple_GET_SIZE(args);
627 argidx = 0;
628 }
629 else {
630 arglen = -1;
631 argidx = -2;
632 }
633 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
634 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
635 !PyByteArray_Check(args)) {
636 dict = args;
637 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200638
Ethan Furmanb95b5612015-01-23 20:05:18 -0800639 while (--fmtcnt >= 0) {
640 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200641 Py_ssize_t len;
642 char *pos;
643
Xiang Zhangb76ad512017-03-06 17:17:05 +0800644 pos = (char *)memchr(fmt + 1, '%', fmtcnt);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200645 if (pos != NULL)
646 len = pos - fmt;
Victor Stinner772b2b02015-10-14 09:56:53 +0200647 else
Xiang Zhangb76ad512017-03-06 17:17:05 +0800648 len = fmtcnt + 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200649 assert(len != 0);
650
Christian Heimesf051e432016-09-13 20:22:02 +0200651 memcpy(res, fmt, len);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200652 res += len;
653 fmt += len;
654 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800655 }
656 else {
657 /* Got a format specifier */
658 int flags = 0;
659 Py_ssize_t width = -1;
660 int prec = -1;
661 int c = '\0';
662 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800663 PyObject *v = NULL;
664 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200665 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800666 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200667 Py_ssize_t len = 0;
668 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200669 Py_ssize_t alloc;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800670
Ethan Furmanb95b5612015-01-23 20:05:18 -0800671 fmt++;
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200672 if (*fmt == '%') {
673 *res++ = '%';
674 fmt++;
675 fmtcnt--;
676 continue;
677 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800678 if (*fmt == '(') {
Victor Stinner772b2b02015-10-14 09:56:53 +0200679 const char *keystart;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800680 Py_ssize_t keylen;
681 PyObject *key;
682 int pcount = 1;
683
684 if (dict == NULL) {
685 PyErr_SetString(PyExc_TypeError,
686 "format requires a mapping");
687 goto error;
688 }
689 ++fmt;
690 --fmtcnt;
691 keystart = fmt;
692 /* Skip over balanced parentheses */
693 while (pcount > 0 && --fmtcnt >= 0) {
694 if (*fmt == ')')
695 --pcount;
696 else if (*fmt == '(')
697 ++pcount;
698 fmt++;
699 }
700 keylen = fmt - keystart - 1;
701 if (fmtcnt < 0 || pcount > 0) {
702 PyErr_SetString(PyExc_ValueError,
703 "incomplete format key");
704 goto error;
705 }
706 key = PyBytes_FromStringAndSize(keystart,
707 keylen);
708 if (key == NULL)
709 goto error;
710 if (args_owned) {
711 Py_DECREF(args);
712 args_owned = 0;
713 }
714 args = PyObject_GetItem(dict, key);
715 Py_DECREF(key);
716 if (args == NULL) {
717 goto error;
718 }
719 args_owned = 1;
720 arglen = -1;
721 argidx = -2;
722 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200723
724 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800725 while (--fmtcnt >= 0) {
726 switch (c = *fmt++) {
727 case '-': flags |= F_LJUST; continue;
728 case '+': flags |= F_SIGN; continue;
729 case ' ': flags |= F_BLANK; continue;
730 case '#': flags |= F_ALT; continue;
731 case '0': flags |= F_ZERO; continue;
732 }
733 break;
734 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200735
736 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800737 if (c == '*') {
738 v = getnextarg(args, arglen, &argidx);
739 if (v == NULL)
740 goto error;
741 if (!PyLong_Check(v)) {
742 PyErr_SetString(PyExc_TypeError,
743 "* wants int");
744 goto error;
745 }
746 width = PyLong_AsSsize_t(v);
747 if (width == -1 && PyErr_Occurred())
748 goto error;
749 if (width < 0) {
750 flags |= F_LJUST;
751 width = -width;
752 }
753 if (--fmtcnt >= 0)
754 c = *fmt++;
755 }
756 else if (c >= 0 && isdigit(c)) {
757 width = c - '0';
758 while (--fmtcnt >= 0) {
759 c = Py_CHARMASK(*fmt++);
760 if (!isdigit(c))
761 break;
762 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
763 PyErr_SetString(
764 PyExc_ValueError,
765 "width too big");
766 goto error;
767 }
768 width = width*10 + (c - '0');
769 }
770 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200771
772 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800773 if (c == '.') {
774 prec = 0;
775 if (--fmtcnt >= 0)
776 c = *fmt++;
777 if (c == '*') {
778 v = getnextarg(args, arglen, &argidx);
779 if (v == NULL)
780 goto error;
781 if (!PyLong_Check(v)) {
782 PyErr_SetString(
783 PyExc_TypeError,
784 "* wants int");
785 goto error;
786 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200787 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800788 if (prec == -1 && PyErr_Occurred())
789 goto error;
790 if (prec < 0)
791 prec = 0;
792 if (--fmtcnt >= 0)
793 c = *fmt++;
794 }
795 else if (c >= 0 && isdigit(c)) {
796 prec = c - '0';
797 while (--fmtcnt >= 0) {
798 c = Py_CHARMASK(*fmt++);
799 if (!isdigit(c))
800 break;
801 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
802 PyErr_SetString(
803 PyExc_ValueError,
804 "prec too big");
805 goto error;
806 }
807 prec = prec*10 + (c - '0');
808 }
809 }
810 } /* prec */
811 if (fmtcnt >= 0) {
812 if (c == 'h' || c == 'l' || c == 'L') {
813 if (--fmtcnt >= 0)
814 c = *fmt++;
815 }
816 }
817 if (fmtcnt < 0) {
818 PyErr_SetString(PyExc_ValueError,
819 "incomplete format");
820 goto error;
821 }
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200822 v = getnextarg(args, arglen, &argidx);
823 if (v == NULL)
824 goto error;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200825
Alexey Izbyshevccd99752018-08-23 10:50:52 +0300826 if (fmtcnt == 0) {
827 /* last write: disable writer overallocation */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200828 writer.overallocate = 0;
829 }
830
Ethan Furmanb95b5612015-01-23 20:05:18 -0800831 sign = 0;
832 fill = ' ';
833 switch (c) {
Ethan Furman62e977f2015-03-11 08:17:00 -0700834 case 'r':
835 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800836 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200837 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800838 if (temp == NULL)
839 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200840 assert(PyUnicode_IS_ASCII(temp));
841 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
842 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800843 if (prec >= 0 && len > prec)
844 len = prec;
845 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200846
Ethan Furmanb95b5612015-01-23 20:05:18 -0800847 case 's':
848 // %s is only for 2/3 code; 3 only code should use %b
849 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200850 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800851 if (temp == NULL)
852 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800853 if (prec >= 0 && len > prec)
854 len = prec;
855 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200856
Ethan Furmanb95b5612015-01-23 20:05:18 -0800857 case 'i':
858 case 'd':
859 case 'u':
860 case 'o':
861 case 'x':
862 case 'X':
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200863 if (PyLong_CheckExact(v)
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200864 && width == -1 && prec == -1
865 && !(flags & (F_SIGN | F_BLANK))
866 && c != 'X')
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200867 {
868 /* Fast path */
869 int alternate = flags & F_ALT;
870 int base;
871
872 switch(c)
873 {
874 default:
Barry Warsawb2e57942017-09-14 18:13:16 -0700875 Py_UNREACHABLE();
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200876 case 'd':
877 case 'i':
878 case 'u':
879 base = 10;
880 break;
881 case 'o':
882 base = 8;
883 break;
884 case 'x':
885 case 'X':
886 base = 16;
887 break;
888 }
889
890 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200891 writer.min_size -= 2; /* size preallocated for "%d" */
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200892 res = _PyLong_FormatBytesWriter(&writer, res,
893 v, base, alternate);
894 if (res == NULL)
895 goto error;
896 continue;
897 }
898
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300899 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200900 if (!temp)
901 goto error;
902 assert(PyUnicode_IS_ASCII(temp));
903 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
904 len = PyUnicode_GET_LENGTH(temp);
905 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800906 if (flags & F_ZERO)
907 fill = '0';
908 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200909
Ethan Furmanb95b5612015-01-23 20:05:18 -0800910 case 'e':
911 case 'E':
912 case 'f':
913 case 'F':
914 case 'g':
915 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200916 if (width == -1 && prec == -1
917 && !(flags & (F_SIGN | F_BLANK)))
918 {
919 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200920 writer.min_size -= 2; /* size preallocated for "%f" */
Victor Stinnerad771582015-10-09 12:38:53 +0200921 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200922 if (res == NULL)
923 goto error;
924 continue;
925 }
926
Victor Stinnerad771582015-10-09 12:38:53 +0200927 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800928 goto error;
929 pbuf = PyBytes_AS_STRING(temp);
930 len = PyBytes_GET_SIZE(temp);
931 sign = 1;
932 if (flags & F_ZERO)
933 fill = '0';
934 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200935
Ethan Furmanb95b5612015-01-23 20:05:18 -0800936 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200937 pbuf = &onechar;
938 len = byte_converter(v, &onechar);
939 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800940 goto error;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200941 if (width == -1) {
942 /* Fast path */
943 *res++ = onechar;
944 continue;
945 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800946 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200947
Ethan Furmanb95b5612015-01-23 20:05:18 -0800948 default:
949 PyErr_Format(PyExc_ValueError,
950 "unsupported format character '%c' (0x%x) "
951 "at index %zd",
952 c, c,
Victor Stinner772b2b02015-10-14 09:56:53 +0200953 (Py_ssize_t)(fmt - 1 - format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800954 goto error;
955 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200956
Ethan Furmanb95b5612015-01-23 20:05:18 -0800957 if (sign) {
958 if (*pbuf == '-' || *pbuf == '+') {
959 sign = *pbuf++;
960 len--;
961 }
962 else if (flags & F_SIGN)
963 sign = '+';
964 else if (flags & F_BLANK)
965 sign = ' ';
966 else
967 sign = 0;
968 }
969 if (width < len)
970 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200971
972 alloc = width;
973 if (sign != 0 && len == width)
974 alloc++;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200975 /* 2: size preallocated for %s */
976 if (alloc > 2) {
977 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200978 if (res == NULL)
979 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800980 }
Victor Stinner60ec6ef2019-10-07 22:31:42 +0200981#ifndef NDEBUG
982 char *before = res;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200983#endif
984
985 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800986 if (sign) {
987 if (fill != ' ')
988 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800989 if (width > len)
990 width--;
991 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200992
993 /* Write the numeric prefix for "x", "X" and "o" formats
994 if the alternate form is used.
995 For example, write "0x" for the "%#x" format. */
Serhiy Storchakab1a16192016-12-17 21:48:03 +0200996 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800997 assert(pbuf[0] == '0');
998 assert(pbuf[1] == c);
999 if (fill != ' ') {
1000 *res++ = *pbuf++;
1001 *res++ = *pbuf++;
1002 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001003 width -= 2;
1004 if (width < 0)
1005 width = 0;
1006 len -= 2;
1007 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001008
1009 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001010 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001011 memset(res, fill, width - len);
1012 res += (width - len);
1013 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -08001014 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001015
1016 /* If padding with spaces: write sign if needed and/or numeric
1017 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001018 if (fill == ' ') {
1019 if (sign)
1020 *res++ = sign;
Serhiy Storchakab1a16192016-12-17 21:48:03 +02001021 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001022 assert(pbuf[0] == '0');
1023 assert(pbuf[1] == c);
1024 *res++ = *pbuf++;
1025 *res++ = *pbuf++;
1026 }
1027 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001028
1029 /* Copy bytes */
Christian Heimesf051e432016-09-13 20:22:02 +02001030 memcpy(res, pbuf, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001031 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001032
1033 /* Pad right with the fill character if needed */
1034 if (width > len) {
1035 memset(res, ' ', width - len);
1036 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001037 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001038
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +02001039 if (dict && (argidx < arglen)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001040 PyErr_SetString(PyExc_TypeError,
1041 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -08001042 Py_XDECREF(temp);
1043 goto error;
1044 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001045 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001046
Victor Stinner60ec6ef2019-10-07 22:31:42 +02001047#ifndef NDEBUG
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001048 /* check that we computed the exact size for this write */
1049 assert((res - before) == alloc);
1050#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -08001051 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001052
1053 /* If overallocation was disabled, ensure that it was the last
1054 write. Otherwise, we missed an optimization */
Alexey Izbyshevccd99752018-08-23 10:50:52 +03001055 assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001056 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001057
Ethan Furmanb95b5612015-01-23 20:05:18 -08001058 if (argidx < arglen && !dict) {
1059 PyErr_SetString(PyExc_TypeError,
1060 "not all arguments converted during bytes formatting");
1061 goto error;
1062 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001063
Ethan Furmanb95b5612015-01-23 20:05:18 -08001064 if (args_owned) {
1065 Py_DECREF(args);
1066 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001067 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001068
1069 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001070 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001071 if (args_owned) {
1072 Py_DECREF(args);
1073 }
1074 return NULL;
1075}
1076
Greg Price3a4f6672019-09-12 11:12:22 -07001077/* Unescape a backslash-escaped string. */
Eric V. Smith42454af2016-10-31 09:22:08 -04001078PyObject *_PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001079 Py_ssize_t len,
1080 const char *errors,
Eric V. Smith42454af2016-10-31 09:22:08 -04001081 const char **first_invalid_escape)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001082{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001083 int c;
Victor Stinner2ec80632015-10-14 13:32:13 +02001084 char *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001085 const char *end;
Victor Stinner2ec80632015-10-14 13:32:13 +02001086 _PyBytesWriter writer;
1087
1088 _PyBytesWriter_Init(&writer);
1089
1090 p = _PyBytesWriter_Alloc(&writer, len);
1091 if (p == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001092 return NULL;
Victor Stinner2ec80632015-10-14 13:32:13 +02001093 writer.overallocate = 1;
1094
Eric V. Smith42454af2016-10-31 09:22:08 -04001095 *first_invalid_escape = NULL;
1096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001097 end = s + len;
1098 while (s < end) {
1099 if (*s != '\\') {
Greg Price3a4f6672019-09-12 11:12:22 -07001100 *p++ = *s++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001101 continue;
1102 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001103
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001104 s++;
Victor Stinner2ec80632015-10-14 13:32:13 +02001105 if (s == end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001106 PyErr_SetString(PyExc_ValueError,
1107 "Trailing \\ in string");
1108 goto failed;
1109 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001110
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001111 switch (*s++) {
1112 /* XXX This assumes ASCII! */
1113 case '\n': break;
1114 case '\\': *p++ = '\\'; break;
1115 case '\'': *p++ = '\''; break;
1116 case '\"': *p++ = '\"'; break;
1117 case 'b': *p++ = '\b'; break;
1118 case 'f': *p++ = '\014'; break; /* FF */
1119 case 't': *p++ = '\t'; break;
1120 case 'n': *p++ = '\n'; break;
1121 case 'r': *p++ = '\r'; break;
1122 case 'v': *p++ = '\013'; break; /* VT */
1123 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1124 case '0': case '1': case '2': case '3':
1125 case '4': case '5': case '6': case '7':
1126 c = s[-1] - '0';
1127 if (s < end && '0' <= *s && *s <= '7') {
1128 c = (c<<3) + *s++ - '0';
1129 if (s < end && '0' <= *s && *s <= '7')
1130 c = (c<<3) + *s++ - '0';
1131 }
1132 *p++ = c;
1133 break;
1134 case 'x':
Victor Stinner2ec80632015-10-14 13:32:13 +02001135 if (s+1 < end) {
1136 int digit1, digit2;
1137 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1138 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1139 if (digit1 < 16 && digit2 < 16) {
1140 *p++ = (unsigned char)((digit1 << 4) + digit2);
1141 s += 2;
1142 break;
1143 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001144 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001145 /* invalid hexadecimal digits */
1146
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001147 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001148 PyErr_Format(PyExc_ValueError,
Serhiy Storchakad53fe5f2019-03-13 22:59:55 +02001149 "invalid \\x escape at position %zd",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001150 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001151 goto failed;
1152 }
1153 if (strcmp(errors, "replace") == 0) {
1154 *p++ = '?';
1155 } else if (strcmp(errors, "ignore") == 0)
1156 /* do nothing */;
1157 else {
1158 PyErr_Format(PyExc_ValueError,
1159 "decoding error; unknown "
1160 "error handling code: %.400s",
1161 errors);
1162 goto failed;
1163 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001164 /* skip \x */
1165 if (s < end && Py_ISXDIGIT(s[0]))
1166 s++; /* and a hexdigit */
1167 break;
Victor Stinner2ec80632015-10-14 13:32:13 +02001168
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001169 default:
Eric V. Smith42454af2016-10-31 09:22:08 -04001170 if (*first_invalid_escape == NULL) {
1171 *first_invalid_escape = s-1; /* Back up one char, since we've
1172 already incremented s. */
1173 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001174 *p++ = '\\';
Eric V. Smith42454af2016-10-31 09:22:08 -04001175 s--;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001176 }
1177 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001178
1179 return _PyBytesWriter_Finish(&writer, p);
1180
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001181 failed:
Victor Stinner2ec80632015-10-14 13:32:13 +02001182 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001183 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001184}
1185
Eric V. Smith42454af2016-10-31 09:22:08 -04001186PyObject *PyBytes_DecodeEscape(const char *s,
1187 Py_ssize_t len,
1188 const char *errors,
Greg Price3a4f6672019-09-12 11:12:22 -07001189 Py_ssize_t Py_UNUSED(unicode),
1190 const char *Py_UNUSED(recode_encoding))
Eric V. Smith42454af2016-10-31 09:22:08 -04001191{
1192 const char* first_invalid_escape;
Greg Price3a4f6672019-09-12 11:12:22 -07001193 PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
Eric V. Smith42454af2016-10-31 09:22:08 -04001194 &first_invalid_escape);
1195 if (result == NULL)
1196 return NULL;
1197 if (first_invalid_escape != NULL) {
1198 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1199 "invalid escape sequence '\\%c'",
Serhiy Storchaka56cb4652017-10-20 17:08:15 +03001200 (unsigned char)*first_invalid_escape) < 0) {
Eric V. Smith42454af2016-10-31 09:22:08 -04001201 Py_DECREF(result);
1202 return NULL;
1203 }
1204 }
1205 return result;
1206
1207}
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001208/* -------------------------------------------------------------------- */
1209/* object api */
1210
1211Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001212PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001213{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001214 if (!PyBytes_Check(op)) {
1215 PyErr_Format(PyExc_TypeError,
1216 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1217 return -1;
1218 }
1219 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001220}
1221
1222char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001223PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001224{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001225 if (!PyBytes_Check(op)) {
1226 PyErr_Format(PyExc_TypeError,
1227 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1228 return NULL;
1229 }
1230 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001231}
1232
1233int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001234PyBytes_AsStringAndSize(PyObject *obj,
1235 char **s,
1236 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001237{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001238 if (s == NULL) {
1239 PyErr_BadInternalCall();
1240 return -1;
1241 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001242
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001243 if (!PyBytes_Check(obj)) {
1244 PyErr_Format(PyExc_TypeError,
1245 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1246 return -1;
1247 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001248
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001249 *s = PyBytes_AS_STRING(obj);
1250 if (len != NULL)
1251 *len = PyBytes_GET_SIZE(obj);
1252 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001253 PyErr_SetString(PyExc_ValueError,
1254 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001255 return -1;
1256 }
1257 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001258}
Neal Norwitz6968b052007-02-27 19:02:19 +00001259
1260/* -------------------------------------------------------------------- */
1261/* Methods */
1262
Eric Smith0923d1d2009-04-16 20:16:10 +00001263#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001264
1265#include "stringlib/fastsearch.h"
1266#include "stringlib/count.h"
1267#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001268#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001269#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001270#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001271#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001272
Eric Smith0f78bff2009-11-30 01:01:42 +00001273#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001274
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001275PyObject *
1276PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001277{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001278 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001279 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001280 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001281 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001282 unsigned char quote, *s, *p;
1283
1284 /* Compute size of output string */
1285 squotes = dquotes = 0;
1286 newsize = 3; /* b'' */
1287 s = (unsigned char*)op->ob_sval;
1288 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001289 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001290 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001291 case '\'': squotes++; break;
1292 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001293 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001294 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001295 default:
1296 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001297 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001298 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001299 if (newsize > PY_SSIZE_T_MAX - incr)
1300 goto overflow;
1301 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001302 }
1303 quote = '\'';
1304 if (smartquotes && squotes && !dquotes)
1305 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001306 if (squotes && quote == '\'') {
1307 if (newsize > PY_SSIZE_T_MAX - squotes)
1308 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001309 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001310 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001311
1312 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001313 if (v == NULL) {
1314 return NULL;
1315 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001316 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001317
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001318 *p++ = 'b', *p++ = quote;
1319 for (i = 0; i < length; i++) {
1320 unsigned char c = op->ob_sval[i];
1321 if (c == quote || c == '\\')
1322 *p++ = '\\', *p++ = c;
1323 else if (c == '\t')
1324 *p++ = '\\', *p++ = 't';
1325 else if (c == '\n')
1326 *p++ = '\\', *p++ = 'n';
1327 else if (c == '\r')
1328 *p++ = '\\', *p++ = 'r';
1329 else if (c < ' ' || c >= 0x7f) {
1330 *p++ = '\\';
1331 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001332 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1333 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001334 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001335 else
1336 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001337 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001338 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001339 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001340 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001341
1342 overflow:
1343 PyErr_SetString(PyExc_OverflowError,
1344 "bytes object is too large to make repr");
1345 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001346}
1347
Neal Norwitz6968b052007-02-27 19:02:19 +00001348static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001349bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001350{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001351 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001352}
1353
Neal Norwitz6968b052007-02-27 19:02:19 +00001354static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001355bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001356{
Victor Stinner331a6a52019-05-27 16:39:22 +02001357 PyConfig *config = &_PyInterpreterState_GET_UNSAFE()->config;
Victor Stinnerc96be812019-05-14 17:34:56 +02001358 if (config->bytes_warning) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001359 if (PyErr_WarnEx(PyExc_BytesWarning,
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001360 "str() on a bytes instance", 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001361 return NULL;
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001362 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001363 }
1364 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001365}
1366
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001367static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001368bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001369{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001370 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001371}
Neal Norwitz6968b052007-02-27 19:02:19 +00001372
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001373/* This is also used by PyBytes_Concat() */
1374static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001375bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001376{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001377 Py_buffer va, vb;
1378 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001379
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001380 va.len = -1;
1381 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001382 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1383 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001384 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
Serhiy Storchaka6b5a9ec2017-03-19 19:47:02 +02001385 Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001386 goto done;
1387 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001388
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001389 /* Optimize end cases */
1390 if (va.len == 0 && PyBytes_CheckExact(b)) {
1391 result = b;
1392 Py_INCREF(result);
1393 goto done;
1394 }
1395 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1396 result = a;
1397 Py_INCREF(result);
1398 goto done;
1399 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001400
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001401 if (va.len > PY_SSIZE_T_MAX - vb.len) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001402 PyErr_NoMemory();
1403 goto done;
1404 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001405
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001406 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001407 if (result != NULL) {
1408 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1409 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1410 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001411
1412 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001413 if (va.len != -1)
1414 PyBuffer_Release(&va);
1415 if (vb.len != -1)
1416 PyBuffer_Release(&vb);
1417 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001418}
Neal Norwitz6968b052007-02-27 19:02:19 +00001419
1420static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001421bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001422{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001423 Py_ssize_t i;
1424 Py_ssize_t j;
1425 Py_ssize_t size;
1426 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001427 size_t nbytes;
1428 if (n < 0)
1429 n = 0;
1430 /* watch out for overflows: the size can overflow int,
1431 * and the # of bytes needed can overflow size_t
1432 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001433 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001434 PyErr_SetString(PyExc_OverflowError,
1435 "repeated bytes are too long");
1436 return NULL;
1437 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001438 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001439 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1440 Py_INCREF(a);
1441 return (PyObject *)a;
1442 }
1443 nbytes = (size_t)size;
1444 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1445 PyErr_SetString(PyExc_OverflowError,
1446 "repeated bytes are too long");
1447 return NULL;
1448 }
1449 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1450 if (op == NULL)
1451 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +01001452 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001453 op->ob_shash = -1;
1454 op->ob_sval[size] = '\0';
1455 if (Py_SIZE(a) == 1 && n > 0) {
1456 memset(op->ob_sval, a->ob_sval[0] , n);
1457 return (PyObject *) op;
1458 }
1459 i = 0;
1460 if (i < size) {
Christian Heimesf051e432016-09-13 20:22:02 +02001461 memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001462 i = Py_SIZE(a);
1463 }
1464 while (i < size) {
1465 j = (i <= size-i) ? i : size-i;
Christian Heimesf051e432016-09-13 20:22:02 +02001466 memcpy(op->ob_sval+i, op->ob_sval, j);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001467 i += j;
1468 }
1469 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001470}
1471
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001472static int
1473bytes_contains(PyObject *self, PyObject *arg)
1474{
1475 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1476}
1477
Neal Norwitz6968b052007-02-27 19:02:19 +00001478static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001479bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001480{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001481 if (i < 0 || i >= Py_SIZE(a)) {
1482 PyErr_SetString(PyExc_IndexError, "index out of range");
1483 return NULL;
1484 }
1485 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001486}
1487
Benjamin Peterson621b4302016-09-09 13:54:34 -07001488static int
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001489bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1490{
1491 int cmp;
1492 Py_ssize_t len;
1493
1494 len = Py_SIZE(a);
1495 if (Py_SIZE(b) != len)
1496 return 0;
1497
1498 if (a->ob_sval[0] != b->ob_sval[0])
1499 return 0;
1500
1501 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1502 return (cmp == 0);
1503}
1504
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001505static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001506bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001507{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001508 int c;
1509 Py_ssize_t len_a, len_b;
1510 Py_ssize_t min_len;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001511 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001512
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001513 /* Make sure both arguments are strings. */
1514 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Victor Stinner331a6a52019-05-27 16:39:22 +02001515 PyConfig *config = &_PyInterpreterState_GET_UNSAFE()->config;
Victor Stinnerc96be812019-05-14 17:34:56 +02001516 if (config->bytes_warning && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001517 rc = PyObject_IsInstance((PyObject*)a,
1518 (PyObject*)&PyUnicode_Type);
1519 if (!rc)
1520 rc = PyObject_IsInstance((PyObject*)b,
1521 (PyObject*)&PyUnicode_Type);
1522 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001523 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001524 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001525 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001526 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001527 return NULL;
1528 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001529 else {
1530 rc = PyObject_IsInstance((PyObject*)a,
1531 (PyObject*)&PyLong_Type);
1532 if (!rc)
1533 rc = PyObject_IsInstance((PyObject*)b,
1534 (PyObject*)&PyLong_Type);
1535 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001536 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001537 if (rc) {
1538 if (PyErr_WarnEx(PyExc_BytesWarning,
1539 "Comparison between bytes and int", 1))
1540 return NULL;
1541 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001542 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001543 }
stratakise8b19652017-11-02 11:32:54 +01001544 Py_RETURN_NOTIMPLEMENTED;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001545 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001546 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001547 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001548 case Py_EQ:
1549 case Py_LE:
1550 case Py_GE:
1551 /* a string is equal to itself */
stratakise8b19652017-11-02 11:32:54 +01001552 Py_RETURN_TRUE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001553 case Py_NE:
1554 case Py_LT:
1555 case Py_GT:
stratakise8b19652017-11-02 11:32:54 +01001556 Py_RETURN_FALSE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001557 default:
1558 PyErr_BadArgument();
1559 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001560 }
1561 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001562 else if (op == Py_EQ || op == Py_NE) {
1563 int eq = bytes_compare_eq(a, b);
1564 eq ^= (op == Py_NE);
stratakise8b19652017-11-02 11:32:54 +01001565 return PyBool_FromLong(eq);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001566 }
1567 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001568 len_a = Py_SIZE(a);
1569 len_b = Py_SIZE(b);
1570 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001571 if (min_len > 0) {
1572 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001573 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001574 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001575 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001576 else
1577 c = 0;
stratakise8b19652017-11-02 11:32:54 +01001578 if (c != 0)
1579 Py_RETURN_RICHCOMPARE(c, 0, op);
1580 Py_RETURN_RICHCOMPARE(len_a, len_b, op);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001581 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001582}
1583
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001584static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001585bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001586{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001587 if (a->ob_shash == -1) {
1588 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001589 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001590 }
1591 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001592}
1593
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001594static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001595bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001596{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001597 if (PyIndex_Check(item)) {
1598 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1599 if (i == -1 && PyErr_Occurred())
1600 return NULL;
1601 if (i < 0)
1602 i += PyBytes_GET_SIZE(self);
1603 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1604 PyErr_SetString(PyExc_IndexError,
1605 "index out of range");
1606 return NULL;
1607 }
1608 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1609 }
1610 else if (PySlice_Check(item)) {
Zackery Spytz14514d92019-05-17 01:13:03 -06001611 Py_ssize_t start, stop, step, slicelength, i;
1612 size_t cur;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001613 char* source_buf;
1614 char* result_buf;
1615 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001616
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001617 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001618 return NULL;
1619 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001620 slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1621 &stop, step);
Neal Norwitz6968b052007-02-27 19:02:19 +00001622
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001623 if (slicelength <= 0) {
1624 return PyBytes_FromStringAndSize("", 0);
1625 }
1626 else if (start == 0 && step == 1 &&
1627 slicelength == PyBytes_GET_SIZE(self) &&
1628 PyBytes_CheckExact(self)) {
1629 Py_INCREF(self);
1630 return (PyObject *)self;
1631 }
1632 else if (step == 1) {
1633 return PyBytes_FromStringAndSize(
1634 PyBytes_AS_STRING(self) + start,
1635 slicelength);
1636 }
1637 else {
1638 source_buf = PyBytes_AS_STRING(self);
1639 result = PyBytes_FromStringAndSize(NULL, slicelength);
1640 if (result == NULL)
1641 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001642
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001643 result_buf = PyBytes_AS_STRING(result);
1644 for (cur = start, i = 0; i < slicelength;
1645 cur += step, i++) {
1646 result_buf[i] = source_buf[cur];
1647 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001648
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001649 return result;
1650 }
1651 }
1652 else {
1653 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001654 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001655 Py_TYPE(item)->tp_name);
1656 return NULL;
1657 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001658}
1659
1660static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001661bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001662{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001663 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1664 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001665}
1666
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001667static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001668 (lenfunc)bytes_length, /*sq_length*/
1669 (binaryfunc)bytes_concat, /*sq_concat*/
1670 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1671 (ssizeargfunc)bytes_item, /*sq_item*/
1672 0, /*sq_slice*/
1673 0, /*sq_ass_item*/
1674 0, /*sq_ass_slice*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001675 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001676};
1677
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001678static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001679 (lenfunc)bytes_length,
1680 (binaryfunc)bytes_subscript,
1681 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001682};
1683
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001684static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001685 (getbufferproc)bytes_buffer_getbuffer,
1686 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001687};
1688
1689
1690#define LEFTSTRIP 0
1691#define RIGHTSTRIP 1
1692#define BOTHSTRIP 2
1693
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001694/*[clinic input]
1695bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001696
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001697 sep: object = None
1698 The delimiter according which to split the bytes.
1699 None (the default value) means split on ASCII whitespace characters
1700 (space, tab, return, newline, formfeed, vertical tab).
1701 maxsplit: Py_ssize_t = -1
1702 Maximum number of splits to do.
1703 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001704
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001705Return a list of the sections in the bytes, using sep as the delimiter.
1706[clinic start generated code]*/
1707
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001708static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001709bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1710/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001711{
1712 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001713 const char *s = PyBytes_AS_STRING(self), *sub;
1714 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001715 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001716
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001717 if (maxsplit < 0)
1718 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001719 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001720 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001721 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001722 return NULL;
1723 sub = vsub.buf;
1724 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001725
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001726 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1727 PyBuffer_Release(&vsub);
1728 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001729}
1730
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001731/*[clinic input]
1732bytes.partition
1733
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001734 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001735 /
1736
1737Partition the bytes into three parts using the given separator.
1738
1739This will search for the separator sep in the bytes. If the separator is found,
1740returns a 3-tuple containing the part before the separator, the separator
1741itself, and the part after it.
1742
1743If the separator is not found, returns a 3-tuple containing the original bytes
1744object and two empty bytes objects.
1745[clinic start generated code]*/
1746
Neal Norwitz6968b052007-02-27 19:02:19 +00001747static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001748bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001749/*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001750{
Neal Norwitz6968b052007-02-27 19:02:19 +00001751 return stringlib_partition(
1752 (PyObject*) self,
1753 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001754 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001755 );
1756}
1757
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001758/*[clinic input]
1759bytes.rpartition
1760
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001761 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001762 /
1763
1764Partition the bytes into three parts using the given separator.
1765
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001766This will search for the separator sep in the bytes, starting at the end. If
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001767the separator is found, returns a 3-tuple containing the part before the
1768separator, the separator itself, and the part after it.
1769
1770If the separator is not found, returns a 3-tuple containing two empty bytes
1771objects and the original bytes object.
1772[clinic start generated code]*/
1773
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001774static PyObject *
1775bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001776/*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001777{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001778 return stringlib_rpartition(
1779 (PyObject*) self,
1780 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001781 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001782 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001783}
1784
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001785/*[clinic input]
1786bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001787
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001788Return a list of the sections in the bytes, using sep as the delimiter.
1789
1790Splitting is done starting at the end of the bytes and working to the front.
1791[clinic start generated code]*/
1792
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001793static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001794bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1795/*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001796{
1797 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001798 const char *s = PyBytes_AS_STRING(self), *sub;
1799 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001800 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001801
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001802 if (maxsplit < 0)
1803 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001804 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001805 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001806 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001807 return NULL;
1808 sub = vsub.buf;
1809 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001810
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001811 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1812 PyBuffer_Release(&vsub);
1813 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001814}
1815
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001816
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001817/*[clinic input]
1818bytes.join
1819
1820 iterable_of_bytes: object
1821 /
1822
1823Concatenate any number of bytes objects.
1824
1825The bytes whose method is called is inserted in between each pair.
1826
1827The result is returned as a new bytes object.
1828
1829Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1830[clinic start generated code]*/
1831
Neal Norwitz6968b052007-02-27 19:02:19 +00001832static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001833bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1834/*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001835{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001836 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001837}
1838
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001839PyObject *
1840_PyBytes_Join(PyObject *sep, PyObject *x)
1841{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001842 assert(sep != NULL && PyBytes_Check(sep));
1843 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001844 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001845}
1846
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001847static PyObject *
1848bytes_find(PyBytesObject *self, PyObject *args)
1849{
1850 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1851}
1852
1853static PyObject *
1854bytes_index(PyBytesObject *self, PyObject *args)
1855{
1856 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1857}
1858
1859
1860static PyObject *
1861bytes_rfind(PyBytesObject *self, PyObject *args)
1862{
1863 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1864}
1865
1866
1867static PyObject *
1868bytes_rindex(PyBytesObject *self, PyObject *args)
1869{
1870 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1871}
1872
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001873
1874Py_LOCAL_INLINE(PyObject *)
1875do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001876{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001877 Py_buffer vsep;
1878 char *s = PyBytes_AS_STRING(self);
1879 Py_ssize_t len = PyBytes_GET_SIZE(self);
1880 char *sep;
1881 Py_ssize_t seplen;
1882 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001883
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001884 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001885 return NULL;
1886 sep = vsep.buf;
1887 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001888
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001889 i = 0;
1890 if (striptype != RIGHTSTRIP) {
1891 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1892 i++;
1893 }
1894 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001895
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001896 j = len;
1897 if (striptype != LEFTSTRIP) {
1898 do {
1899 j--;
1900 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1901 j++;
1902 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001903
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001904 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001905
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001906 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1907 Py_INCREF(self);
1908 return (PyObject*)self;
1909 }
1910 else
1911 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001912}
1913
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001914
1915Py_LOCAL_INLINE(PyObject *)
1916do_strip(PyBytesObject *self, int striptype)
1917{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001918 char *s = PyBytes_AS_STRING(self);
1919 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001920
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001921 i = 0;
1922 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001923 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001924 i++;
1925 }
1926 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001927
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001928 j = len;
1929 if (striptype != LEFTSTRIP) {
1930 do {
1931 j--;
David Malcolm96960882010-11-05 17:23:41 +00001932 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001933 j++;
1934 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001935
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001936 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1937 Py_INCREF(self);
1938 return (PyObject*)self;
1939 }
1940 else
1941 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001942}
1943
1944
1945Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001946do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001947{
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001948 if (bytes != Py_None) {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001949 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001950 }
1951 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001952}
1953
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001954/*[clinic input]
1955bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001956
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001957 bytes: object = None
1958 /
1959
1960Strip leading and trailing bytes contained in the argument.
1961
1962If the argument is omitted or None, strip leading and trailing ASCII whitespace.
1963[clinic start generated code]*/
1964
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001965static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001966bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001967/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001968{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001969 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001970}
1971
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001972/*[clinic input]
1973bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001974
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001975 bytes: object = None
1976 /
1977
1978Strip leading bytes contained in the argument.
1979
1980If the argument is omitted or None, strip leading ASCII whitespace.
1981[clinic start generated code]*/
1982
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001983static PyObject *
1984bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001985/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001986{
1987 return do_argstrip(self, LEFTSTRIP, bytes);
1988}
1989
1990/*[clinic input]
1991bytes.rstrip
1992
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001993 bytes: object = None
1994 /
1995
1996Strip trailing bytes contained in the argument.
1997
1998If the argument is omitted or None, strip trailing ASCII whitespace.
1999[clinic start generated code]*/
2000
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002001static PyObject *
2002bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002003/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002004{
2005 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002006}
Neal Norwitz6968b052007-02-27 19:02:19 +00002007
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002008
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002009static PyObject *
2010bytes_count(PyBytesObject *self, PyObject *args)
2011{
2012 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2013}
2014
2015
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002016/*[clinic input]
2017bytes.translate
2018
Victor Stinner049e5092014-08-17 22:20:00 +02002019 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002020 Translation table, which must be a bytes object of length 256.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002021 /
Martin Panter1b6c6da2016-08-27 08:35:02 +00002022 delete as deletechars: object(c_default="NULL") = b''
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002023
2024Return a copy with each character mapped by the given translation table.
2025
Martin Panter1b6c6da2016-08-27 08:35:02 +00002026All characters occurring in the optional argument delete are removed.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002027The remaining characters are mapped through the given translation table.
2028[clinic start generated code]*/
2029
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002030static PyObject *
Martin Panter1b6c6da2016-08-27 08:35:02 +00002031bytes_translate_impl(PyBytesObject *self, PyObject *table,
Larry Hastings89964c42015-04-14 18:07:59 -04002032 PyObject *deletechars)
Martin Panter1b6c6da2016-08-27 08:35:02 +00002033/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002034{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002035 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002036 Py_buffer table_view = {NULL, NULL};
2037 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002038 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002039 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002040 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002041 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002042 Py_ssize_t inlen, tablen, dellen = 0;
2043 PyObject *result;
2044 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002045
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002046 if (PyBytes_Check(table)) {
2047 table_chars = PyBytes_AS_STRING(table);
2048 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002049 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002050 else if (table == Py_None) {
2051 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002052 tablen = 256;
2053 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002054 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002055 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002056 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002057 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002058 tablen = table_view.len;
2059 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002060
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002061 if (tablen != 256) {
2062 PyErr_SetString(PyExc_ValueError,
2063 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002064 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002065 return NULL;
2066 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002067
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002068 if (deletechars != NULL) {
2069 if (PyBytes_Check(deletechars)) {
2070 del_table_chars = PyBytes_AS_STRING(deletechars);
2071 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002072 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002073 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002074 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002075 PyBuffer_Release(&table_view);
2076 return NULL;
2077 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002078 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002079 dellen = del_table_view.len;
2080 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002081 }
2082 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002083 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002084 dellen = 0;
2085 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002086
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002087 inlen = PyBytes_GET_SIZE(input_obj);
2088 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002089 if (result == NULL) {
2090 PyBuffer_Release(&del_table_view);
2091 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002092 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002093 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002094 output_start = output = PyBytes_AS_STRING(result);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002095 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002096
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002097 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002098 /* If no deletions are required, use faster code */
2099 for (i = inlen; --i >= 0; ) {
2100 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002101 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002102 changed = 1;
2103 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002104 if (!changed && PyBytes_CheckExact(input_obj)) {
2105 Py_INCREF(input_obj);
2106 Py_DECREF(result);
2107 result = input_obj;
2108 }
2109 PyBuffer_Release(&del_table_view);
2110 PyBuffer_Release(&table_view);
2111 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002112 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002113
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002114 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002115 for (i = 0; i < 256; i++)
2116 trans_table[i] = Py_CHARMASK(i);
2117 } else {
2118 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002119 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002120 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002121 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002122
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002123 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002124 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002125 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002126
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002127 for (i = inlen; --i >= 0; ) {
2128 c = Py_CHARMASK(*input++);
2129 if (trans_table[c] != -1)
2130 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2131 continue;
2132 changed = 1;
2133 }
2134 if (!changed && PyBytes_CheckExact(input_obj)) {
2135 Py_DECREF(result);
2136 Py_INCREF(input_obj);
2137 return input_obj;
2138 }
2139 /* Fix the size of the resulting string */
2140 if (inlen > 0)
2141 _PyBytes_Resize(&result, output - output_start);
2142 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002143}
2144
2145
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002146/*[clinic input]
2147
2148@staticmethod
2149bytes.maketrans
2150
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002151 frm: Py_buffer
2152 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002153 /
2154
2155Return a translation table useable for the bytes or bytearray translate method.
2156
2157The returned table will be one where each byte in frm is mapped to the byte at
2158the same position in to.
2159
2160The bytes objects frm and to must be of the same length.
2161[clinic start generated code]*/
2162
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002163static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002164bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002165/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002166{
2167 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002168}
2169
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002170
2171/*[clinic input]
2172bytes.replace
2173
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002174 old: Py_buffer
2175 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002176 count: Py_ssize_t = -1
2177 Maximum number of occurrences to replace.
2178 -1 (the default value) means replace all occurrences.
2179 /
2180
2181Return a copy with all occurrences of substring old replaced by new.
2182
2183If the optional argument count is given, only the first count occurrences are
2184replaced.
2185[clinic start generated code]*/
2186
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002187static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002188bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
Larry Hastings89964c42015-04-14 18:07:59 -04002189 Py_ssize_t count)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002190/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002191{
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +03002192 return stringlib_replace((PyObject *)self,
2193 (const char *)old->buf, old->len,
2194 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002195}
2196
2197/** End DALKE **/
2198
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002199
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002200static PyObject *
2201bytes_startswith(PyBytesObject *self, PyObject *args)
2202{
2203 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2204}
2205
2206static PyObject *
2207bytes_endswith(PyBytesObject *self, PyObject *args)
2208{
2209 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2210}
2211
2212
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002213/*[clinic input]
2214bytes.decode
2215
2216 encoding: str(c_default="NULL") = 'utf-8'
2217 The encoding with which to decode the bytes.
2218 errors: str(c_default="NULL") = 'strict'
2219 The error handling scheme to use for the handling of decoding errors.
2220 The default is 'strict' meaning that decoding errors raise a
2221 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2222 as well as any other name registered with codecs.register_error that
2223 can handle UnicodeDecodeErrors.
2224
2225Decode the bytes using the codec registered for encoding.
2226[clinic start generated code]*/
2227
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002228static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002229bytes_decode_impl(PyBytesObject *self, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04002230 const char *errors)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002231/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002232{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002233 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002234}
2235
Guido van Rossum20188312006-05-05 15:15:40 +00002236
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002237/*[clinic input]
2238bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002239
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002240 keepends: bool(accept={int}) = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002241
2242Return a list of the lines in the bytes, breaking at line boundaries.
2243
2244Line breaks are not included in the resulting list unless keepends is given and
2245true.
2246[clinic start generated code]*/
2247
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002248static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002249bytes_splitlines_impl(PyBytesObject *self, int keepends)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002250/*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002251{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002252 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002253 (PyObject*) self, PyBytes_AS_STRING(self),
2254 PyBytes_GET_SIZE(self), keepends
2255 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002256}
2257
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002258/*[clinic input]
2259@classmethod
2260bytes.fromhex
2261
2262 string: unicode
2263 /
2264
2265Create a bytes object from a string of hexadecimal numbers.
2266
2267Spaces between two numbers are accepted.
2268Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2269[clinic start generated code]*/
2270
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002271static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002272bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002273/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002274{
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002275 PyObject *result = _PyBytes_FromHex(string, 0);
2276 if (type != &PyBytes_Type && result != NULL) {
Jeroen Demeyer196a5302019-07-04 12:31:34 +02002277 Py_SETREF(result, _PyObject_CallOneArg((PyObject *)type, result));
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002278 }
2279 return result;
Victor Stinner2bf89932015-10-14 11:25:33 +02002280}
2281
2282PyObject*
2283_PyBytes_FromHex(PyObject *string, int use_bytearray)
2284{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002285 char *buf;
Victor Stinner2bf89932015-10-14 11:25:33 +02002286 Py_ssize_t hexlen, invalid_char;
2287 unsigned int top, bot;
2288 Py_UCS1 *str, *end;
2289 _PyBytesWriter writer;
2290
2291 _PyBytesWriter_Init(&writer);
2292 writer.use_bytearray = use_bytearray;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002293
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002294 assert(PyUnicode_Check(string));
2295 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002296 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002297 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002298
Victor Stinner2bf89932015-10-14 11:25:33 +02002299 if (!PyUnicode_IS_ASCII(string)) {
2300 void *data = PyUnicode_DATA(string);
2301 unsigned int kind = PyUnicode_KIND(string);
2302 Py_ssize_t i;
2303
2304 /* search for the first non-ASCII character */
2305 for (i = 0; i < hexlen; i++) {
2306 if (PyUnicode_READ(kind, data, i) >= 128)
2307 break;
2308 }
2309 invalid_char = i;
2310 goto error;
2311 }
2312
2313 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2314 str = PyUnicode_1BYTE_DATA(string);
2315
2316 /* This overestimates if there are spaces */
2317 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2318 if (buf == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002319 return NULL;
Victor Stinner2bf89932015-10-14 11:25:33 +02002320
2321 end = str + hexlen;
2322 while (str < end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002323 /* skip over spaces in the input */
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002324 if (Py_ISSPACE(*str)) {
Victor Stinner2bf89932015-10-14 11:25:33 +02002325 do {
2326 str++;
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002327 } while (Py_ISSPACE(*str));
Victor Stinner2bf89932015-10-14 11:25:33 +02002328 if (str >= end)
2329 break;
2330 }
2331
2332 top = _PyLong_DigitValue[*str];
2333 if (top >= 16) {
2334 invalid_char = str - PyUnicode_1BYTE_DATA(string);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002335 goto error;
2336 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002337 str++;
2338
2339 bot = _PyLong_DigitValue[*str];
2340 if (bot >= 16) {
2341 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2342 goto error;
2343 }
2344 str++;
2345
2346 *buf++ = (unsigned char)((top << 4) + bot);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002347 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002348
2349 return _PyBytesWriter_Finish(&writer, buf);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002350
2351 error:
Victor Stinner2bf89932015-10-14 11:25:33 +02002352 PyErr_Format(PyExc_ValueError,
2353 "non-hexadecimal number found in "
2354 "fromhex() arg at position %zd", invalid_char);
2355 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002356 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002357}
2358
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002359/*[clinic input]
2360bytes.hex
2361
2362 sep: object = NULL
2363 An optional single character or byte to separate hex bytes.
2364 bytes_per_sep: int = 1
2365 How many bytes between separators. Positive values count from the
2366 right, negative values count from the left.
2367
2368Create a str of hexadecimal numbers from a bytes object.
2369
2370Example:
2371>>> value = b'\xb9\x01\xef'
2372>>> value.hex()
2373'b901ef'
2374>>> value.hex(':')
2375'b9:01:ef'
2376>>> value.hex(':', 2)
2377'b9:01ef'
2378>>> value.hex(':', -2)
2379'b901:ef'
2380[clinic start generated code]*/
Gregory P. Smith8cb65692015-04-25 23:22:26 +00002381
2382static PyObject *
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002383bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2384/*[clinic end generated code: output=1f134da504064139 input=f1238d3455990218]*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002385{
2386 char* argbuf = PyBytes_AS_STRING(self);
2387 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002388 return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002389}
2390
2391static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302392bytes_getnewargs(PyBytesObject *v, PyObject *Py_UNUSED(ignored))
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002393{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002394 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002395}
2396
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002397
2398static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002399bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002400 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302401 {"capitalize", stringlib_capitalize, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002402 _Py_capitalize__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002403 STRINGLIB_CENTER_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002404 {"count", (PyCFunction)bytes_count, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002405 _Py_count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002406 BYTES_DECODE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002407 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002408 _Py_endswith__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002409 STRINGLIB_EXPANDTABS_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002410 {"find", (PyCFunction)bytes_find, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002411 _Py_find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002412 BYTES_FROMHEX_METHODDEF
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002413 BYTES_HEX_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002414 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302415 {"isalnum", stringlib_isalnum, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002416 _Py_isalnum__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302417 {"isalpha", stringlib_isalpha, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002418 _Py_isalpha__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302419 {"isascii", stringlib_isascii, METH_NOARGS,
INADA Naokia49ac992018-01-27 14:06:21 +09002420 _Py_isascii__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302421 {"isdigit", stringlib_isdigit, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002422 _Py_isdigit__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302423 {"islower", stringlib_islower, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002424 _Py_islower__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302425 {"isspace", stringlib_isspace, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002426 _Py_isspace__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302427 {"istitle", stringlib_istitle, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002428 _Py_istitle__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302429 {"isupper", stringlib_isupper, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002430 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002431 BYTES_JOIN_METHODDEF
Tal Einatc929df32018-07-06 13:17:38 +03002432 STRINGLIB_LJUST_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302433 {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002434 BYTES_LSTRIP_METHODDEF
2435 BYTES_MAKETRANS_METHODDEF
2436 BYTES_PARTITION_METHODDEF
2437 BYTES_REPLACE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002438 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2439 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002440 STRINGLIB_RJUST_METHODDEF
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002441 BYTES_RPARTITION_METHODDEF
2442 BYTES_RSPLIT_METHODDEF
2443 BYTES_RSTRIP_METHODDEF
2444 BYTES_SPLIT_METHODDEF
2445 BYTES_SPLITLINES_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002446 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002447 _Py_startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002448 BYTES_STRIP_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302449 {"swapcase", stringlib_swapcase, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002450 _Py_swapcase__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302451 {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002452 BYTES_TRANSLATE_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302453 {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002454 STRINGLIB_ZFILL_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002455 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002456};
2457
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002458static PyObject *
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002459bytes_mod(PyObject *self, PyObject *arg)
Ethan Furmanb95b5612015-01-23 20:05:18 -08002460{
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002461 if (!PyBytes_Check(self)) {
2462 Py_RETURN_NOTIMPLEMENTED;
Victor Stinner772b2b02015-10-14 09:56:53 +02002463 }
Victor Stinner772b2b02015-10-14 09:56:53 +02002464 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002465 arg, 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -08002466}
2467
2468static PyNumberMethods bytes_as_number = {
2469 0, /*nb_add*/
2470 0, /*nb_subtract*/
2471 0, /*nb_multiply*/
2472 bytes_mod, /*nb_remainder*/
2473};
2474
2475static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002476bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002477
2478static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002479bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002480{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002481 PyObject *x = NULL;
2482 const char *encoding = NULL;
2483 const char *errors = NULL;
2484 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002485 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002486 Py_ssize_t size;
2487 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002488 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002489
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002490 if (type != &PyBytes_Type)
Serhiy Storchaka15095802015-11-25 15:47:01 +02002491 return bytes_subtype_new(type, args, kwds);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002492 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2493 &encoding, &errors))
2494 return NULL;
2495 if (x == NULL) {
2496 if (encoding != NULL || errors != NULL) {
2497 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka2c2044e2018-10-21 15:29:12 +03002498 encoding != NULL ?
2499 "encoding without a string argument" :
2500 "errors without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002501 return NULL;
2502 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02002503 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002504 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002505
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002506 if (encoding != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002507 /* Encode via the codec registry */
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002508 if (!PyUnicode_Check(x)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002509 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002510 "encoding without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002511 return NULL;
2512 }
2513 new = PyUnicode_AsEncodedString(x, encoding, errors);
2514 if (new == NULL)
2515 return NULL;
2516 assert(PyBytes_Check(new));
2517 return new;
2518 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002519
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002520 if (errors != NULL) {
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002521 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002522 PyUnicode_Check(x) ?
2523 "string argument without an encoding" :
2524 "errors without a string argument");
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002525 return NULL;
2526 }
2527
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002528 /* We'd like to call PyObject_Bytes here, but we need to check for an
2529 integer argument before deferring to PyBytes_FromObject, something
2530 PyObject_Bytes doesn't do. */
2531 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2532 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +01002533 new = _PyObject_CallNoArg(func);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002534 Py_DECREF(func);
2535 if (new == NULL)
2536 return NULL;
2537 if (!PyBytes_Check(new)) {
2538 PyErr_Format(PyExc_TypeError,
2539 "__bytes__ returned non-bytes (type %.200s)",
2540 Py_TYPE(new)->tp_name);
2541 Py_DECREF(new);
2542 return NULL;
2543 }
2544 return new;
2545 }
2546 else if (PyErr_Occurred())
2547 return NULL;
2548
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002549 if (PyUnicode_Check(x)) {
2550 PyErr_SetString(PyExc_TypeError,
2551 "string argument without an encoding");
2552 return NULL;
2553 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002554 /* Is it an integer? */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002555 if (PyIndex_Check(x)) {
2556 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2557 if (size == -1 && PyErr_Occurred()) {
Serhiy Storchakae8904212018-10-15 00:02:57 +03002558 if (!PyErr_ExceptionMatches(PyExc_TypeError))
INADA Naokia634e232017-01-06 17:32:01 +09002559 return NULL;
2560 PyErr_Clear(); /* fall through */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002561 }
INADA Naokia634e232017-01-06 17:32:01 +09002562 else {
2563 if (size < 0) {
2564 PyErr_SetString(PyExc_ValueError, "negative count");
2565 return NULL;
2566 }
2567 new = _PyBytes_FromSize(size, 1);
2568 if (new == NULL)
2569 return NULL;
2570 return new;
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002571 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002572 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002573
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002574 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002575}
2576
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002577static PyObject*
2578_PyBytes_FromBuffer(PyObject *x)
2579{
2580 PyObject *new;
2581 Py_buffer view;
2582
2583 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2584 return NULL;
2585
2586 new = PyBytes_FromStringAndSize(NULL, view.len);
2587 if (!new)
2588 goto fail;
2589 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2590 &view, view.len, 'C') < 0)
2591 goto fail;
2592 PyBuffer_Release(&view);
2593 return new;
2594
2595fail:
2596 Py_XDECREF(new);
2597 PyBuffer_Release(&view);
2598 return NULL;
2599}
2600
2601static PyObject*
2602_PyBytes_FromList(PyObject *x)
2603{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002604 Py_ssize_t i, size = PyList_GET_SIZE(x);
2605 Py_ssize_t value;
2606 char *str;
2607 PyObject *item;
2608 _PyBytesWriter writer;
2609
2610 _PyBytesWriter_Init(&writer);
2611 str = _PyBytesWriter_Alloc(&writer, size);
2612 if (str == NULL)
2613 return NULL;
2614 writer.overallocate = 1;
2615 size = writer.allocated;
2616
2617 for (i = 0; i < PyList_GET_SIZE(x); i++) {
2618 item = PyList_GET_ITEM(x, i);
2619 Py_INCREF(item);
2620 value = PyNumber_AsSsize_t(item, NULL);
2621 Py_DECREF(item);
2622 if (value == -1 && PyErr_Occurred())
2623 goto error;
2624
2625 if (value < 0 || value >= 256) {
2626 PyErr_SetString(PyExc_ValueError,
2627 "bytes must be in range(0, 256)");
2628 goto error;
2629 }
2630
2631 if (i >= size) {
2632 str = _PyBytesWriter_Resize(&writer, str, size+1);
2633 if (str == NULL)
2634 return NULL;
2635 size = writer.allocated;
2636 }
2637 *str++ = (char) value;
2638 }
2639 return _PyBytesWriter_Finish(&writer, str);
2640
2641 error:
2642 _PyBytesWriter_Dealloc(&writer);
2643 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002644}
2645
2646static PyObject*
2647_PyBytes_FromTuple(PyObject *x)
2648{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002649 PyObject *bytes;
2650 Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2651 Py_ssize_t value;
2652 char *str;
2653 PyObject *item;
2654
2655 bytes = PyBytes_FromStringAndSize(NULL, size);
2656 if (bytes == NULL)
2657 return NULL;
2658 str = ((PyBytesObject *)bytes)->ob_sval;
2659
2660 for (i = 0; i < size; i++) {
2661 item = PyTuple_GET_ITEM(x, i);
2662 value = PyNumber_AsSsize_t(item, NULL);
2663 if (value == -1 && PyErr_Occurred())
2664 goto error;
2665
2666 if (value < 0 || value >= 256) {
2667 PyErr_SetString(PyExc_ValueError,
2668 "bytes must be in range(0, 256)");
2669 goto error;
2670 }
2671 *str++ = (char) value;
2672 }
2673 return bytes;
2674
2675 error:
2676 Py_DECREF(bytes);
2677 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002678}
2679
2680static PyObject *
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002681_PyBytes_FromIterator(PyObject *it, PyObject *x)
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002682{
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002683 char *str;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002684 Py_ssize_t i, size;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002685 _PyBytesWriter writer;
2686
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002687 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02002688 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002689 if (size == -1 && PyErr_Occurred())
2690 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002691
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002692 _PyBytesWriter_Init(&writer);
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002693 str = _PyBytesWriter_Alloc(&writer, size);
2694 if (str == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002695 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002696 writer.overallocate = 1;
2697 size = writer.allocated;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002698
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002699 /* Run the iterator to exhaustion */
2700 for (i = 0; ; i++) {
2701 PyObject *item;
2702 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002703
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002704 /* Get the next item */
2705 item = PyIter_Next(it);
2706 if (item == NULL) {
2707 if (PyErr_Occurred())
2708 goto error;
2709 break;
2710 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002711
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002712 /* Interpret it as an int (__index__) */
Serhiy Storchakaf54d7812016-07-06 21:39:44 +03002713 value = PyNumber_AsSsize_t(item, NULL);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002714 Py_DECREF(item);
2715 if (value == -1 && PyErr_Occurred())
2716 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002717
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002718 /* Range check */
2719 if (value < 0 || value >= 256) {
2720 PyErr_SetString(PyExc_ValueError,
2721 "bytes must be in range(0, 256)");
2722 goto error;
2723 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002724
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002725 /* Append the byte */
2726 if (i >= size) {
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002727 str = _PyBytesWriter_Resize(&writer, str, size+1);
2728 if (str == NULL)
2729 return NULL;
2730 size = writer.allocated;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002731 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002732 *str++ = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002733 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002734
2735 return _PyBytesWriter_Finish(&writer, str);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002736
2737 error:
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002738 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002739 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002740}
2741
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002742PyObject *
2743PyBytes_FromObject(PyObject *x)
2744{
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002745 PyObject *it, *result;
2746
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002747 if (x == NULL) {
2748 PyErr_BadInternalCall();
2749 return NULL;
2750 }
2751
2752 if (PyBytes_CheckExact(x)) {
2753 Py_INCREF(x);
2754 return x;
2755 }
2756
2757 /* Use the modern buffer interface */
2758 if (PyObject_CheckBuffer(x))
2759 return _PyBytes_FromBuffer(x);
2760
2761 if (PyList_CheckExact(x))
2762 return _PyBytes_FromList(x);
2763
2764 if (PyTuple_CheckExact(x))
2765 return _PyBytes_FromTuple(x);
2766
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002767 if (!PyUnicode_Check(x)) {
2768 it = PyObject_GetIter(x);
2769 if (it != NULL) {
2770 result = _PyBytes_FromIterator(it, x);
2771 Py_DECREF(it);
2772 return result;
2773 }
Serhiy Storchakae8904212018-10-15 00:02:57 +03002774 if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2775 return NULL;
2776 }
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002777 }
2778
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002779 PyErr_Format(PyExc_TypeError,
2780 "cannot convert '%.200s' object to bytes",
2781 x->ob_type->tp_name);
2782 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002783}
2784
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002785static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002786bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002787{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002788 PyObject *tmp, *pnew;
2789 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002790
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002791 assert(PyType_IsSubtype(type, &PyBytes_Type));
2792 tmp = bytes_new(&PyBytes_Type, args, kwds);
2793 if (tmp == NULL)
2794 return NULL;
Serhiy Storchaka15095802015-11-25 15:47:01 +02002795 assert(PyBytes_Check(tmp));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002796 n = PyBytes_GET_SIZE(tmp);
2797 pnew = type->tp_alloc(type, n);
2798 if (pnew != NULL) {
Christian Heimesf051e432016-09-13 20:22:02 +02002799 memcpy(PyBytes_AS_STRING(pnew),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002800 PyBytes_AS_STRING(tmp), n+1);
2801 ((PyBytesObject *)pnew)->ob_shash =
2802 ((PyBytesObject *)tmp)->ob_shash;
2803 }
2804 Py_DECREF(tmp);
2805 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002806}
2807
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002808PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002809"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002810bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002811bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002812bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2813bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002814\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002815Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002816 - an iterable yielding integers in range(256)\n\
2817 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002818 - any object implementing the buffer API.\n\
2819 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002820
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002821static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002822
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002823PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002824 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2825 "bytes",
2826 PyBytesObject_SIZE,
2827 sizeof(char),
Inada Naoki7d408692019-05-29 17:23:27 +09002828 0, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002829 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002830 0, /* tp_getattr */
2831 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002832 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002833 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08002834 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002835 &bytes_as_sequence, /* tp_as_sequence */
2836 &bytes_as_mapping, /* tp_as_mapping */
2837 (hashfunc)bytes_hash, /* tp_hash */
2838 0, /* tp_call */
2839 bytes_str, /* tp_str */
2840 PyObject_GenericGetAttr, /* tp_getattro */
2841 0, /* tp_setattro */
2842 &bytes_as_buffer, /* tp_as_buffer */
2843 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2844 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2845 bytes_doc, /* tp_doc */
2846 0, /* tp_traverse */
2847 0, /* tp_clear */
2848 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2849 0, /* tp_weaklistoffset */
2850 bytes_iter, /* tp_iter */
2851 0, /* tp_iternext */
2852 bytes_methods, /* tp_methods */
2853 0, /* tp_members */
2854 0, /* tp_getset */
2855 &PyBaseObject_Type, /* tp_base */
2856 0, /* tp_dict */
2857 0, /* tp_descr_get */
2858 0, /* tp_descr_set */
2859 0, /* tp_dictoffset */
2860 0, /* tp_init */
2861 0, /* tp_alloc */
2862 bytes_new, /* tp_new */
2863 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002864};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002865
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002866void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002867PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002868{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002869 assert(pv != NULL);
2870 if (*pv == NULL)
2871 return;
2872 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002873 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002874 return;
2875 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02002876
2877 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2878 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05002879 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02002880 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02002881
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002882 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02002883 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2884 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2885 Py_CLEAR(*pv);
2886 return;
2887 }
2888
2889 oldsize = PyBytes_GET_SIZE(*pv);
2890 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2891 PyErr_NoMemory();
2892 goto error;
2893 }
2894 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2895 goto error;
2896
2897 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2898 PyBuffer_Release(&wb);
2899 return;
2900
2901 error:
2902 PyBuffer_Release(&wb);
2903 Py_CLEAR(*pv);
2904 return;
2905 }
2906
2907 else {
2908 /* Multiple references, need to create new object */
2909 PyObject *v;
2910 v = bytes_concat(*pv, w);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002911 Py_SETREF(*pv, v);
Antoine Pitrou161d6952014-05-01 14:36:20 +02002912 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002913}
2914
2915void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002916PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002917{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002918 PyBytes_Concat(pv, w);
2919 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002920}
2921
2922
Ethan Furmanb95b5612015-01-23 20:05:18 -08002923/* The following function breaks the notion that bytes are immutable:
2924 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002925 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08002926 as creating a new bytes object and destroying the old one, only
2927 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002928 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08002929 Note that if there's not enough memory to resize the bytes object, the
2930 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002931 memory" exception is set, and -1 is returned. Else (on success) 0 is
2932 returned, and the value in *pv may or may not be the same as on input.
2933 As always, an extra byte is allocated for a trailing \0 byte (newsize
2934 does *not* include that), and a trailing \0 byte is stored.
2935*/
2936
2937int
2938_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2939{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002940 PyObject *v;
2941 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002942 v = *pv;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002943 if (!PyBytes_Check(v) || newsize < 0) {
2944 goto error;
2945 }
2946 if (Py_SIZE(v) == newsize) {
2947 /* return early if newsize equals to v->ob_size */
2948 return 0;
2949 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02002950 if (Py_SIZE(v) == 0) {
2951 if (newsize == 0) {
2952 return 0;
2953 }
2954 *pv = _PyBytes_FromSize(newsize, 0);
2955 Py_DECREF(v);
2956 return (*pv == NULL) ? -1 : 0;
2957 }
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002958 if (Py_REFCNT(v) != 1) {
2959 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002960 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02002961 if (newsize == 0) {
2962 *pv = _PyBytes_FromSize(0, 0);
2963 Py_DECREF(v);
2964 return (*pv == NULL) ? -1 : 0;
2965 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002966 /* XXX UNREF/NEWREF interface should be more symmetrical */
2967 _Py_DEC_REFTOTAL;
2968 _Py_ForgetReference(v);
2969 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03002970 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002971 if (*pv == NULL) {
2972 PyObject_Del(v);
2973 PyErr_NoMemory();
2974 return -1;
2975 }
2976 _Py_NewReference(*pv);
2977 sv = (PyBytesObject *) *pv;
2978 Py_SIZE(sv) = newsize;
2979 sv->ob_sval[newsize] = '\0';
2980 sv->ob_shash = -1; /* invalidate cached hash value */
2981 return 0;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002982error:
2983 *pv = 0;
2984 Py_DECREF(v);
2985 PyErr_BadInternalCall();
2986 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002987}
2988
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002989void
Victor Stinnerbed48172019-08-27 00:12:32 +02002990_PyBytes_Fini(void)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002991{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002992 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002993 for (i = 0; i < UCHAR_MAX + 1; i++)
2994 Py_CLEAR(characters[i]);
2995 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002996}
2997
Benjamin Peterson4116f362008-05-27 00:36:20 +00002998/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002999
3000typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003001 PyObject_HEAD
3002 Py_ssize_t it_index;
3003 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003004} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003005
3006static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003007striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003008{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003009 _PyObject_GC_UNTRACK(it);
3010 Py_XDECREF(it->it_seq);
3011 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003012}
3013
3014static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003015striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003016{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003017 Py_VISIT(it->it_seq);
3018 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003019}
3020
3021static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003022striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003023{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003024 PyBytesObject *seq;
3025 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003026
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003027 assert(it != NULL);
3028 seq = it->it_seq;
3029 if (seq == NULL)
3030 return NULL;
3031 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003032
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003033 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3034 item = PyLong_FromLong(
3035 (unsigned char)seq->ob_sval[it->it_index]);
3036 if (item != NULL)
3037 ++it->it_index;
3038 return item;
3039 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003040
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003041 it->it_seq = NULL;
Serhiy Storchakafbb1c5e2016-03-30 20:40:02 +03003042 Py_DECREF(seq);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003043 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003044}
3045
3046static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303047striter_len(striterobject *it, PyObject *Py_UNUSED(ignored))
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003048{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003049 Py_ssize_t len = 0;
3050 if (it->it_seq)
3051 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3052 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003053}
3054
3055PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003056 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003057
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003058static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303059striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003060{
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003061 _Py_IDENTIFIER(iter);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003062 if (it->it_seq != NULL) {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003063 return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003064 it->it_seq, it->it_index);
3065 } else {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003066 return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003067 }
3068}
3069
3070PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3071
3072static PyObject *
3073striter_setstate(striterobject *it, PyObject *state)
3074{
3075 Py_ssize_t index = PyLong_AsSsize_t(state);
3076 if (index == -1 && PyErr_Occurred())
3077 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003078 if (it->it_seq != NULL) {
3079 if (index < 0)
3080 index = 0;
3081 else if (index > PyBytes_GET_SIZE(it->it_seq))
3082 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3083 it->it_index = index;
3084 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003085 Py_RETURN_NONE;
3086}
3087
3088PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3089
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003090static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003091 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3092 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003093 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3094 reduce_doc},
3095 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3096 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003097 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003098};
3099
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003100PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003101 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3102 "bytes_iterator", /* tp_name */
3103 sizeof(striterobject), /* tp_basicsize */
3104 0, /* tp_itemsize */
3105 /* methods */
3106 (destructor)striter_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003107 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003108 0, /* tp_getattr */
3109 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003110 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003111 0, /* tp_repr */
3112 0, /* tp_as_number */
3113 0, /* tp_as_sequence */
3114 0, /* tp_as_mapping */
3115 0, /* tp_hash */
3116 0, /* tp_call */
3117 0, /* tp_str */
3118 PyObject_GenericGetAttr, /* tp_getattro */
3119 0, /* tp_setattro */
3120 0, /* tp_as_buffer */
3121 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3122 0, /* tp_doc */
3123 (traverseproc)striter_traverse, /* tp_traverse */
3124 0, /* tp_clear */
3125 0, /* tp_richcompare */
3126 0, /* tp_weaklistoffset */
3127 PyObject_SelfIter, /* tp_iter */
3128 (iternextfunc)striter_next, /* tp_iternext */
3129 striter_methods, /* tp_methods */
3130 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003131};
3132
3133static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003134bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003135{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003136 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003137
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003138 if (!PyBytes_Check(seq)) {
3139 PyErr_BadInternalCall();
3140 return NULL;
3141 }
3142 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3143 if (it == NULL)
3144 return NULL;
3145 it->it_index = 0;
3146 Py_INCREF(seq);
3147 it->it_seq = (PyBytesObject *)seq;
3148 _PyObject_GC_TRACK(it);
3149 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003150}
Victor Stinner00165072015-10-09 01:53:21 +02003151
3152
3153/* _PyBytesWriter API */
3154
3155#ifdef MS_WINDOWS
3156 /* On Windows, overallocate by 50% is the best factor */
3157# define OVERALLOCATE_FACTOR 2
3158#else
3159 /* On Linux, overallocate by 25% is the best factor */
3160# define OVERALLOCATE_FACTOR 4
3161#endif
3162
3163void
3164_PyBytesWriter_Init(_PyBytesWriter *writer)
3165{
Victor Stinner661aacc2015-10-14 09:41:48 +02003166 /* Set all attributes before small_buffer to 0 */
3167 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003168#ifndef NDEBUG
3169 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3170 sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003171#endif
3172}
3173
3174void
3175_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3176{
3177 Py_CLEAR(writer->buffer);
3178}
3179
3180Py_LOCAL_INLINE(char*)
3181_PyBytesWriter_AsString(_PyBytesWriter *writer)
3182{
Victor Stinner661aacc2015-10-14 09:41:48 +02003183 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003184 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003185 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003186 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003187 else if (writer->use_bytearray) {
3188 assert(writer->buffer != NULL);
3189 return PyByteArray_AS_STRING(writer->buffer);
3190 }
3191 else {
3192 assert(writer->buffer != NULL);
3193 return PyBytes_AS_STRING(writer->buffer);
3194 }
Victor Stinner00165072015-10-09 01:53:21 +02003195}
3196
3197Py_LOCAL_INLINE(Py_ssize_t)
Victor Stinner2bf89932015-10-14 11:25:33 +02003198_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
Victor Stinner00165072015-10-09 01:53:21 +02003199{
3200 char *start = _PyBytesWriter_AsString(writer);
3201 assert(str != NULL);
3202 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003203 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003204 return str - start;
3205}
3206
Victor Stinner68762572019-10-07 18:42:01 +02003207#ifndef NDEBUG
3208Py_LOCAL_INLINE(int)
Victor Stinner00165072015-10-09 01:53:21 +02003209_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3210{
Victor Stinner00165072015-10-09 01:53:21 +02003211 char *start, *end;
3212
Victor Stinner661aacc2015-10-14 09:41:48 +02003213 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003214 assert(writer->buffer == NULL);
3215 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003216 else {
3217 assert(writer->buffer != NULL);
3218 if (writer->use_bytearray)
3219 assert(PyByteArray_CheckExact(writer->buffer));
3220 else
3221 assert(PyBytes_CheckExact(writer->buffer));
3222 assert(Py_REFCNT(writer->buffer) == 1);
3223 }
Victor Stinner00165072015-10-09 01:53:21 +02003224
Victor Stinner661aacc2015-10-14 09:41:48 +02003225 if (writer->use_bytearray) {
3226 /* bytearray has its own overallocation algorithm,
3227 writer overallocation must be disabled */
3228 assert(!writer->overallocate);
3229 }
3230
3231 assert(0 <= writer->allocated);
Victor Stinner53926a12015-10-09 12:37:03 +02003232 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003233 /* the last byte must always be null */
Victor Stinner661aacc2015-10-14 09:41:48 +02003234 start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003235 assert(start[writer->allocated] == 0);
3236
3237 end = start + writer->allocated;
3238 assert(str != NULL);
3239 assert(start <= str && str <= end);
Victor Stinner68762572019-10-07 18:42:01 +02003240 return 1;
Victor Stinner00165072015-10-09 01:53:21 +02003241}
Victor Stinner68762572019-10-07 18:42:01 +02003242#endif
Victor Stinner00165072015-10-09 01:53:21 +02003243
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003244void*
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003245_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
Victor Stinner00165072015-10-09 01:53:21 +02003246{
3247 Py_ssize_t allocated, pos;
3248
Victor Stinner68762572019-10-07 18:42:01 +02003249 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003250 assert(writer->allocated < size);
Victor Stinner00165072015-10-09 01:53:21 +02003251
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003252 allocated = size;
Victor Stinner00165072015-10-09 01:53:21 +02003253 if (writer->overallocate
3254 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3255 /* overallocate to limit the number of realloc() */
3256 allocated += allocated / OVERALLOCATE_FACTOR;
3257 }
3258
Victor Stinner2bf89932015-10-14 11:25:33 +02003259 pos = _PyBytesWriter_GetSize(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003260 if (!writer->use_small_buffer) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003261 if (writer->use_bytearray) {
3262 if (PyByteArray_Resize(writer->buffer, allocated))
3263 goto error;
3264 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3265 but we cannot use ob_alloc because bytes may need to be moved
3266 to use the whole buffer. bytearray uses an internal optimization
3267 to avoid moving or copying bytes when bytes are removed at the
3268 beginning (ex: del bytearray[:1]). */
3269 }
3270 else {
3271 if (_PyBytes_Resize(&writer->buffer, allocated))
3272 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003273 }
3274 }
3275 else {
3276 /* convert from stack buffer to bytes object buffer */
3277 assert(writer->buffer == NULL);
3278
Victor Stinner661aacc2015-10-14 09:41:48 +02003279 if (writer->use_bytearray)
3280 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3281 else
3282 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003283 if (writer->buffer == NULL)
Victor Stinner661aacc2015-10-14 09:41:48 +02003284 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003285
3286 if (pos != 0) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003287 char *dest;
3288 if (writer->use_bytearray)
3289 dest = PyByteArray_AS_STRING(writer->buffer);
3290 else
3291 dest = PyBytes_AS_STRING(writer->buffer);
Christian Heimesf051e432016-09-13 20:22:02 +02003292 memcpy(dest,
Victor Stinnerb3653a32015-10-09 03:38:24 +02003293 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02003294 pos);
3295 }
3296
Victor Stinnerb3653a32015-10-09 03:38:24 +02003297 writer->use_small_buffer = 0;
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003298#ifndef NDEBUG
3299 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3300 sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003301#endif
Victor Stinner00165072015-10-09 01:53:21 +02003302 }
3303 writer->allocated = allocated;
3304
3305 str = _PyBytesWriter_AsString(writer) + pos;
Victor Stinner68762572019-10-07 18:42:01 +02003306 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinner00165072015-10-09 01:53:21 +02003307 return str;
Victor Stinner661aacc2015-10-14 09:41:48 +02003308
3309error:
3310 _PyBytesWriter_Dealloc(writer);
3311 return NULL;
Victor Stinner00165072015-10-09 01:53:21 +02003312}
3313
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003314void*
3315_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3316{
3317 Py_ssize_t new_min_size;
3318
Victor Stinner68762572019-10-07 18:42:01 +02003319 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003320 assert(size >= 0);
3321
3322 if (size == 0) {
3323 /* nothing to do */
3324 return str;
3325 }
3326
3327 if (writer->min_size > PY_SSIZE_T_MAX - size) {
3328 PyErr_NoMemory();
3329 _PyBytesWriter_Dealloc(writer);
3330 return NULL;
3331 }
3332 new_min_size = writer->min_size + size;
3333
3334 if (new_min_size > writer->allocated)
3335 str = _PyBytesWriter_Resize(writer, str, new_min_size);
3336
3337 writer->min_size = new_min_size;
3338 return str;
3339}
3340
Victor Stinner00165072015-10-09 01:53:21 +02003341/* Allocate the buffer to write size bytes.
3342 Return the pointer to the beginning of buffer data.
3343 Raise an exception and return NULL on error. */
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003344void*
Victor Stinner00165072015-10-09 01:53:21 +02003345_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3346{
3347 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02003348 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02003349 assert(size >= 0);
3350
Victor Stinnerb3653a32015-10-09 03:38:24 +02003351 writer->use_small_buffer = 1;
Victor Stinner60ec6ef2019-10-07 22:31:42 +02003352#ifndef NDEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003353 writer->allocated = sizeof(writer->small_buffer) - 1;
Victor Stinnerf6358a72015-10-14 12:02:39 +02003354 /* In debug mode, don't use the full small buffer because it is less
3355 efficient than bytes and bytearray objects to detect buffer underflow
3356 and buffer overflow. Use 10 bytes of the small buffer to test also
3357 code using the smaller buffer in debug mode.
3358
3359 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3360 in debug mode to also be able to detect stack overflow when running
3361 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3362 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3363 stack overflow. */
3364 writer->allocated = Py_MIN(writer->allocated, 10);
3365 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3366 to detect buffer overflow */
Victor Stinnerb3653a32015-10-09 03:38:24 +02003367 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003368#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02003369 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02003370#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02003371 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02003372}
3373
3374PyObject *
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003375_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
Victor Stinner00165072015-10-09 01:53:21 +02003376{
Victor Stinner2bf89932015-10-14 11:25:33 +02003377 Py_ssize_t size;
Victor Stinner00165072015-10-09 01:53:21 +02003378 PyObject *result;
3379
Victor Stinner68762572019-10-07 18:42:01 +02003380 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinner00165072015-10-09 01:53:21 +02003381
Victor Stinner2bf89932015-10-14 11:25:33 +02003382 size = _PyBytesWriter_GetSize(writer, str);
3383 if (size == 0 && !writer->use_bytearray) {
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003384 Py_CLEAR(writer->buffer);
3385 /* Get the empty byte string singleton */
3386 result = PyBytes_FromStringAndSize(NULL, 0);
3387 }
3388 else if (writer->use_small_buffer) {
Victor Stinnere914d412016-04-15 17:52:27 +02003389 if (writer->use_bytearray) {
3390 result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3391 }
3392 else {
3393 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3394 }
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003395 }
3396 else {
3397 result = writer->buffer;
3398 writer->buffer = NULL;
3399
Victor Stinner2bf89932015-10-14 11:25:33 +02003400 if (size != writer->allocated) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003401 if (writer->use_bytearray) {
Victor Stinner2bf89932015-10-14 11:25:33 +02003402 if (PyByteArray_Resize(result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003403 Py_DECREF(result);
3404 return NULL;
3405 }
3406 }
3407 else {
Victor Stinner2bf89932015-10-14 11:25:33 +02003408 if (_PyBytes_Resize(&result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003409 assert(result == NULL);
3410 return NULL;
3411 }
Victor Stinner00165072015-10-09 01:53:21 +02003412 }
3413 }
Victor Stinner00165072015-10-09 01:53:21 +02003414 }
Victor Stinner00165072015-10-09 01:53:21 +02003415 return result;
3416}
Victor Stinnerce179bf2015-10-09 12:57:22 +02003417
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003418void*
Victor Stinnere9aa5952015-10-12 13:57:47 +02003419_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003420 const void *bytes, Py_ssize_t size)
Victor Stinnerce179bf2015-10-09 12:57:22 +02003421{
Victor Stinnere9aa5952015-10-12 13:57:47 +02003422 char *str = (char *)ptr;
3423
Victor Stinnerce179bf2015-10-09 12:57:22 +02003424 str = _PyBytesWriter_Prepare(writer, str, size);
3425 if (str == NULL)
3426 return NULL;
3427
Christian Heimesf051e432016-09-13 20:22:02 +02003428 memcpy(str, bytes, size);
Victor Stinnerce179bf2015-10-09 12:57:22 +02003429 str += size;
3430
3431 return str;
3432}