blob: 7df93440a14c6b2a9c2225f780f938ee93492464 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Victor Stinnerbcda8f12018-11-21 22:27:47 +01006#include "pycore_object.h"
Victor Stinner621cebe2018-11-12 16:53:38 +01007#include "pycore_pymem.h"
8#include "pycore_pystate.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00009
Gregory P. Smith60d241f2007-10-16 06:31:30 +000010#include "bytes_methods.h"
Gregory P. Smith8cb65692015-04-25 23:22:26 +000011#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +000012#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000013
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020014/*[clinic input]
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030015class bytes "PyBytesObject *" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020016[clinic start generated code]*/
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030017/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020018
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030019#include "clinic/bytesobject.c.h"
20
Christian Heimes2c9c7a52008-05-26 13:42:13 +000021#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +000022Py_ssize_t _Py_null_strings, _Py_one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000023#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000024
Christian Heimes2c9c7a52008-05-26 13:42:13 +000025static PyBytesObject *characters[UCHAR_MAX + 1];
26static PyBytesObject *nullstring;
27
Mark Dickinsonfd24b322008-12-06 15:33:31 +000028/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
29 for a string of length n should request PyBytesObject_SIZE + n bytes.
30
31 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
32 3 bytes per string allocation on a typical system.
33*/
34#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
35
Victor Stinner2bf89932015-10-14 11:25:33 +020036/* Forward declaration */
37Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
38 char *str);
39
Christian Heimes2c9c7a52008-05-26 13:42:13 +000040/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000041 For PyBytes_FromString(), the parameter `str' points to a null-terminated
42 string containing exactly `size' bytes.
43
Martin Pantera90a4a92016-05-30 04:04:50 +000044 For PyBytes_FromStringAndSize(), the parameter `str' is
Christian Heimes2c9c7a52008-05-26 13:42:13 +000045 either NULL or else points to a string containing at least `size' bytes.
46 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
47 not have to be null-terminated. (Therefore it is safe to construct a
48 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
49 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
50 bytes (setting the last byte to the null terminating character) and you can
51 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000052 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000053 alter the data yourself, since the strings may be shared.
54
55 The PyObject member `op->ob_size', which denotes the number of "extra
56 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020057 allocated for string data, not counting the null terminating character.
58 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000059 PyBytes_FromStringAndSize()) or the length of the string in the `str'
60 parameter (for PyBytes_FromString()).
61*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020062static PyObject *
63_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000064{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020065 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020066 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020067
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000068 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000069#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +000070 _Py_null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000071#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000072 Py_INCREF(op);
73 return (PyObject *)op;
74 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000075
Victor Stinner049e5092014-08-17 22:20:00 +020076 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 PyErr_SetString(PyExc_OverflowError,
78 "byte string is too large");
79 return NULL;
80 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000081
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000082 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020083 if (use_calloc)
84 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
85 else
86 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 if (op == NULL)
88 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +010089 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000090 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020091 if (!use_calloc)
92 op->ob_sval[size] = '\0';
93 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000094 if (size == 0) {
95 nullstring = op;
96 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020097 }
98 return (PyObject *) op;
99}
100
101PyObject *
102PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
103{
104 PyBytesObject *op;
105 if (size < 0) {
106 PyErr_SetString(PyExc_SystemError,
107 "Negative size passed to PyBytes_FromStringAndSize");
108 return NULL;
109 }
110 if (size == 1 && str != NULL &&
111 (op = characters[*str & UCHAR_MAX]) != NULL)
112 {
113#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +0000114 _Py_one_strings++;
Victor Stinnerdb067af2014-05-02 22:31:14 +0200115#endif
116 Py_INCREF(op);
117 return (PyObject *)op;
118 }
119
120 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
121 if (op == NULL)
122 return NULL;
123 if (str == NULL)
124 return (PyObject *) op;
125
Christian Heimesf051e432016-09-13 20:22:02 +0200126 memcpy(op->ob_sval, str, size);
Victor Stinnerdb067af2014-05-02 22:31:14 +0200127 /* share short strings */
128 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000129 characters[*str & UCHAR_MAX] = op;
130 Py_INCREF(op);
131 }
132 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000133}
134
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000135PyObject *
136PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000137{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200138 size_t size;
139 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000140
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 assert(str != NULL);
142 size = strlen(str);
143 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
144 PyErr_SetString(PyExc_OverflowError,
145 "byte string is too long");
146 return NULL;
147 }
148 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000149#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +0000150 _Py_null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000151#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000152 Py_INCREF(op);
153 return (PyObject *)op;
154 }
155 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000156#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +0000157 _Py_one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000158#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000159 Py_INCREF(op);
160 return (PyObject *)op;
161 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000162
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000163 /* Inline PyObject_NewVar */
164 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
165 if (op == NULL)
166 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +0100167 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000168 op->ob_shash = -1;
Christian Heimesf051e432016-09-13 20:22:02 +0200169 memcpy(op->ob_sval, str, size+1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000170 /* share short strings */
171 if (size == 0) {
172 nullstring = op;
173 Py_INCREF(op);
174 } else if (size == 1) {
175 characters[*str & UCHAR_MAX] = op;
176 Py_INCREF(op);
177 }
178 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000179}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000180
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000181PyObject *
182PyBytes_FromFormatV(const char *format, va_list vargs)
183{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000184 char *s;
Victor Stinner03dab782015-10-14 00:21:35 +0200185 const char *f;
186 const char *p;
187 Py_ssize_t prec;
188 int longflag;
189 int size_tflag;
190 /* Longest 64-bit formatted numbers:
191 - "18446744073709551615\0" (21 bytes)
192 - "-9223372036854775808\0" (21 bytes)
193 Decimal takes the most space (it isn't enough for octal.)
Guido van Rossum343e97f2007-04-09 00:43:24 +0000194
Victor Stinner03dab782015-10-14 00:21:35 +0200195 Longest 64-bit pointer representation:
196 "0xffffffffffffffff\0" (19 bytes). */
197 char buffer[21];
198 _PyBytesWriter writer;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000199
Victor Stinner03dab782015-10-14 00:21:35 +0200200 _PyBytesWriter_Init(&writer);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000201
Victor Stinner03dab782015-10-14 00:21:35 +0200202 s = _PyBytesWriter_Alloc(&writer, strlen(format));
203 if (s == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000204 return NULL;
Victor Stinner03dab782015-10-14 00:21:35 +0200205 writer.overallocate = 1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000206
Victor Stinner03dab782015-10-14 00:21:35 +0200207#define WRITE_BYTES(str) \
208 do { \
209 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
210 if (s == NULL) \
211 goto error; \
212 } while (0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000213
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000214 for (f = format; *f; f++) {
Victor Stinner03dab782015-10-14 00:21:35 +0200215 if (*f != '%') {
216 *s++ = *f;
217 continue;
218 }
219
220 p = f++;
221
222 /* ignore the width (ex: 10 in "%10s") */
223 while (Py_ISDIGIT(*f))
224 f++;
225
226 /* parse the precision (ex: 10 in "%.10s") */
227 prec = 0;
228 if (*f == '.') {
229 f++;
230 for (; Py_ISDIGIT(*f); f++) {
231 prec = (prec * 10) + (*f - '0');
232 }
233 }
234
235 while (*f && *f != '%' && !Py_ISALPHA(*f))
236 f++;
237
238 /* handle the long flag ('l'), but only for %ld and %lu.
239 others can be added when necessary. */
240 longflag = 0;
241 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
242 longflag = 1;
243 ++f;
244 }
245
246 /* handle the size_t flag ('z'). */
247 size_tflag = 0;
248 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
249 size_tflag = 1;
250 ++f;
251 }
252
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700253 /* subtract bytes preallocated for the format string
Victor Stinner03dab782015-10-14 00:21:35 +0200254 (ex: 2 for "%s") */
255 writer.min_size -= (f - p + 1);
256
257 switch (*f) {
258 case 'c':
259 {
260 int c = va_arg(vargs, int);
261 if (c < 0 || c > 255) {
262 PyErr_SetString(PyExc_OverflowError,
263 "PyBytes_FromFormatV(): %c format "
264 "expects an integer in range [0; 255]");
265 goto error;
266 }
267 writer.min_size++;
268 *s++ = (unsigned char)c;
269 break;
270 }
271
272 case 'd':
273 if (longflag)
274 sprintf(buffer, "%ld", va_arg(vargs, long));
275 else if (size_tflag)
276 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
277 va_arg(vargs, Py_ssize_t));
278 else
279 sprintf(buffer, "%d", va_arg(vargs, int));
280 assert(strlen(buffer) < sizeof(buffer));
281 WRITE_BYTES(buffer);
282 break;
283
284 case 'u':
285 if (longflag)
286 sprintf(buffer, "%lu",
287 va_arg(vargs, unsigned long));
288 else if (size_tflag)
289 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
290 va_arg(vargs, size_t));
291 else
292 sprintf(buffer, "%u",
293 va_arg(vargs, unsigned int));
294 assert(strlen(buffer) < sizeof(buffer));
295 WRITE_BYTES(buffer);
296 break;
297
298 case 'i':
299 sprintf(buffer, "%i", va_arg(vargs, int));
300 assert(strlen(buffer) < sizeof(buffer));
301 WRITE_BYTES(buffer);
302 break;
303
304 case 'x':
305 sprintf(buffer, "%x", va_arg(vargs, int));
306 assert(strlen(buffer) < sizeof(buffer));
307 WRITE_BYTES(buffer);
308 break;
309
310 case 's':
311 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000312 Py_ssize_t i;
Victor Stinner03dab782015-10-14 00:21:35 +0200313
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200314 p = va_arg(vargs, const char*);
Serhiy Storchakad586ccb2019-01-12 10:30:35 +0200315 if (prec <= 0) {
316 i = strlen(p);
317 }
318 else {
319 i = 0;
320 while (i < prec && p[i]) {
321 i++;
322 }
323 }
Victor Stinner03dab782015-10-14 00:21:35 +0200324 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
325 if (s == NULL)
326 goto error;
327 break;
328 }
329
330 case 'p':
331 sprintf(buffer, "%p", va_arg(vargs, void*));
332 assert(strlen(buffer) < sizeof(buffer));
333 /* %p is ill-defined: ensure leading 0x. */
334 if (buffer[1] == 'X')
335 buffer[1] = 'x';
336 else if (buffer[1] != 'x') {
337 memmove(buffer+2, buffer, strlen(buffer)+1);
338 buffer[0] = '0';
339 buffer[1] = 'x';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000340 }
Victor Stinner03dab782015-10-14 00:21:35 +0200341 WRITE_BYTES(buffer);
342 break;
343
344 case '%':
345 writer.min_size++;
346 *s++ = '%';
347 break;
348
349 default:
350 if (*f == 0) {
351 /* fix min_size if we reached the end of the format string */
352 writer.min_size++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000353 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000354
Victor Stinner03dab782015-10-14 00:21:35 +0200355 /* invalid format string: copy unformatted string and exit */
356 WRITE_BYTES(p);
357 return _PyBytesWriter_Finish(&writer, s);
358 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000360
Victor Stinner03dab782015-10-14 00:21:35 +0200361#undef WRITE_BYTES
362
363 return _PyBytesWriter_Finish(&writer, s);
364
365 error:
366 _PyBytesWriter_Dealloc(&writer);
367 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000368}
369
370PyObject *
371PyBytes_FromFormat(const char *format, ...)
372{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000373 PyObject* ret;
374 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000375
376#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000377 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000378#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000379 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000380#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000381 ret = PyBytes_FromFormatV(format, vargs);
382 va_end(vargs);
383 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000384}
385
Ethan Furmanb95b5612015-01-23 20:05:18 -0800386/* Helpers for formatstring */
387
388Py_LOCAL_INLINE(PyObject *)
389getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
390{
391 Py_ssize_t argidx = *p_argidx;
392 if (argidx < arglen) {
393 (*p_argidx)++;
394 if (arglen < 0)
395 return args;
396 else
397 return PyTuple_GetItem(args, argidx);
398 }
399 PyErr_SetString(PyExc_TypeError,
400 "not enough arguments for format string");
401 return NULL;
402}
403
404/* Format codes
405 * F_LJUST '-'
406 * F_SIGN '+'
407 * F_BLANK ' '
408 * F_ALT '#'
409 * F_ZERO '0'
410 */
411#define F_LJUST (1<<0)
412#define F_SIGN (1<<1)
413#define F_BLANK (1<<2)
414#define F_ALT (1<<3)
415#define F_ZERO (1<<4)
416
417/* Returns a new reference to a PyBytes object, or NULL on failure. */
418
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200419static char*
420formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200421 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800422{
423 char *p;
424 PyObject *result;
425 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200426 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800427
428 x = PyFloat_AsDouble(v);
429 if (x == -1.0 && PyErr_Occurred()) {
430 PyErr_Format(PyExc_TypeError, "float argument required, "
431 "not %.200s", Py_TYPE(v)->tp_name);
432 return NULL;
433 }
434
435 if (prec < 0)
436 prec = 6;
437
438 p = PyOS_double_to_string(x, type, prec,
439 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
440
441 if (p == NULL)
442 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200443
444 len = strlen(p);
445 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200446 str = _PyBytesWriter_Prepare(writer, str, len);
447 if (str == NULL)
448 return NULL;
Christian Heimesf051e432016-09-13 20:22:02 +0200449 memcpy(str, p, len);
Victor Stinner71dc3d82016-04-26 12:35:13 +0200450 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200451 str += len;
452 return str;
453 }
454
455 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800456 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200457 *p_result = result;
Zackery Spytz96c59322018-10-03 00:01:30 -0600458 return result != NULL ? str : NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800459}
460
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300461static PyObject *
462formatlong(PyObject *v, int flags, int prec, int type)
463{
464 PyObject *result, *iobj;
465 if (type == 'i')
466 type = 'd';
467 if (PyLong_Check(v))
468 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
469 if (PyNumber_Check(v)) {
470 /* make sure number is a type of integer for o, x, and X */
471 if (type == 'o' || type == 'x' || type == 'X')
472 iobj = PyNumber_Index(v);
473 else
474 iobj = PyNumber_Long(v);
475 if (iobj == NULL) {
476 if (!PyErr_ExceptionMatches(PyExc_TypeError))
477 return NULL;
478 }
479 else if (!PyLong_Check(iobj))
480 Py_CLEAR(iobj);
481 if (iobj != NULL) {
482 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
483 Py_DECREF(iobj);
484 return result;
485 }
486 }
487 PyErr_Format(PyExc_TypeError,
488 "%%%c format: %s is required, not %.200s", type,
489 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
490 : "a number",
491 Py_TYPE(v)->tp_name);
492 return NULL;
493}
494
495static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200496byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800497{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300498 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200499 *p = PyBytes_AS_STRING(arg)[0];
500 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800501 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300502 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200503 *p = PyByteArray_AS_STRING(arg)[0];
504 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800505 }
506 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300507 PyObject *iobj;
508 long ival;
509 int overflow;
510 /* make sure number is a type of integer */
511 if (PyLong_Check(arg)) {
512 ival = PyLong_AsLongAndOverflow(arg, &overflow);
513 }
514 else {
515 iobj = PyNumber_Index(arg);
516 if (iobj == NULL) {
517 if (!PyErr_ExceptionMatches(PyExc_TypeError))
518 return 0;
519 goto onError;
520 }
521 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
522 Py_DECREF(iobj);
523 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300524 if (!overflow && ival == -1 && PyErr_Occurred())
525 goto onError;
526 if (overflow || !(0 <= ival && ival <= 255)) {
527 PyErr_SetString(PyExc_OverflowError,
528 "%c arg not in range(256)");
529 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800530 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300531 *p = (char)ival;
532 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800533 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300534 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200535 PyErr_SetString(PyExc_TypeError,
536 "%c requires an integer in range(256) or a single byte");
537 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800538}
539
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800540static PyObject *_PyBytes_FromBuffer(PyObject *x);
541
Ethan Furmanb95b5612015-01-23 20:05:18 -0800542static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200543format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800544{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200545 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800546 _Py_IDENTIFIER(__bytes__);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800547 /* is it a bytes object? */
548 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200549 *pbuf = PyBytes_AS_STRING(v);
550 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800551 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200552 return v;
553 }
554 if (PyByteArray_Check(v)) {
555 *pbuf = PyByteArray_AS_STRING(v);
556 *plen = PyByteArray_GET_SIZE(v);
557 Py_INCREF(v);
558 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800559 }
560 /* does it support __bytes__? */
561 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
562 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +0100563 result = _PyObject_CallNoArg(func);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800564 Py_DECREF(func);
565 if (result == NULL)
566 return NULL;
567 if (!PyBytes_Check(result)) {
568 PyErr_Format(PyExc_TypeError,
569 "__bytes__ returned non-bytes (type %.200s)",
570 Py_TYPE(result)->tp_name);
571 Py_DECREF(result);
572 return NULL;
573 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200574 *pbuf = PyBytes_AS_STRING(result);
575 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800576 return result;
577 }
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800578 /* does it support buffer protocol? */
579 if (PyObject_CheckBuffer(v)) {
580 /* maybe we can avoid making a copy of the buffer object here? */
581 result = _PyBytes_FromBuffer(v);
582 if (result == NULL)
583 return NULL;
584 *pbuf = PyBytes_AS_STRING(result);
585 *plen = PyBytes_GET_SIZE(result);
586 return result;
587 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800588 PyErr_Format(PyExc_TypeError,
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800589 "%%b requires a bytes-like object, "
590 "or an object that implements __bytes__, not '%.100s'",
Ethan Furmanb95b5612015-01-23 20:05:18 -0800591 Py_TYPE(v)->tp_name);
592 return NULL;
593}
594
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200595/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800596
597PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +0200598_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
599 PyObject *args, int use_bytearray)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800600{
Victor Stinner772b2b02015-10-14 09:56:53 +0200601 const char *fmt;
602 char *res;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800603 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200604 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800605 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800606 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200607 _PyBytesWriter writer;
608
Victor Stinner772b2b02015-10-14 09:56:53 +0200609 if (args == NULL) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800610 PyErr_BadInternalCall();
611 return NULL;
612 }
Victor Stinner772b2b02015-10-14 09:56:53 +0200613 fmt = format;
614 fmtcnt = format_len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200615
616 _PyBytesWriter_Init(&writer);
Victor Stinner772b2b02015-10-14 09:56:53 +0200617 writer.use_bytearray = use_bytearray;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200618
619 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
620 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800621 return NULL;
Victor Stinner772b2b02015-10-14 09:56:53 +0200622 if (!use_bytearray)
623 writer.overallocate = 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200624
Ethan Furmanb95b5612015-01-23 20:05:18 -0800625 if (PyTuple_Check(args)) {
626 arglen = PyTuple_GET_SIZE(args);
627 argidx = 0;
628 }
629 else {
630 arglen = -1;
631 argidx = -2;
632 }
633 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
634 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
635 !PyByteArray_Check(args)) {
636 dict = args;
637 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200638
Ethan Furmanb95b5612015-01-23 20:05:18 -0800639 while (--fmtcnt >= 0) {
640 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200641 Py_ssize_t len;
642 char *pos;
643
Xiang Zhangb76ad512017-03-06 17:17:05 +0800644 pos = (char *)memchr(fmt + 1, '%', fmtcnt);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200645 if (pos != NULL)
646 len = pos - fmt;
Victor Stinner772b2b02015-10-14 09:56:53 +0200647 else
Xiang Zhangb76ad512017-03-06 17:17:05 +0800648 len = fmtcnt + 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200649 assert(len != 0);
650
Christian Heimesf051e432016-09-13 20:22:02 +0200651 memcpy(res, fmt, len);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200652 res += len;
653 fmt += len;
654 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800655 }
656 else {
657 /* Got a format specifier */
658 int flags = 0;
659 Py_ssize_t width = -1;
660 int prec = -1;
661 int c = '\0';
662 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800663 PyObject *v = NULL;
664 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200665 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800666 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200667 Py_ssize_t len = 0;
668 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200669 Py_ssize_t alloc;
670#ifdef Py_DEBUG
671 char *before;
672#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -0800673
Ethan Furmanb95b5612015-01-23 20:05:18 -0800674 fmt++;
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200675 if (*fmt == '%') {
676 *res++ = '%';
677 fmt++;
678 fmtcnt--;
679 continue;
680 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800681 if (*fmt == '(') {
Victor Stinner772b2b02015-10-14 09:56:53 +0200682 const char *keystart;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800683 Py_ssize_t keylen;
684 PyObject *key;
685 int pcount = 1;
686
687 if (dict == NULL) {
688 PyErr_SetString(PyExc_TypeError,
689 "format requires a mapping");
690 goto error;
691 }
692 ++fmt;
693 --fmtcnt;
694 keystart = fmt;
695 /* Skip over balanced parentheses */
696 while (pcount > 0 && --fmtcnt >= 0) {
697 if (*fmt == ')')
698 --pcount;
699 else if (*fmt == '(')
700 ++pcount;
701 fmt++;
702 }
703 keylen = fmt - keystart - 1;
704 if (fmtcnt < 0 || pcount > 0) {
705 PyErr_SetString(PyExc_ValueError,
706 "incomplete format key");
707 goto error;
708 }
709 key = PyBytes_FromStringAndSize(keystart,
710 keylen);
711 if (key == NULL)
712 goto error;
713 if (args_owned) {
714 Py_DECREF(args);
715 args_owned = 0;
716 }
717 args = PyObject_GetItem(dict, key);
718 Py_DECREF(key);
719 if (args == NULL) {
720 goto error;
721 }
722 args_owned = 1;
723 arglen = -1;
724 argidx = -2;
725 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200726
727 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800728 while (--fmtcnt >= 0) {
729 switch (c = *fmt++) {
730 case '-': flags |= F_LJUST; continue;
731 case '+': flags |= F_SIGN; continue;
732 case ' ': flags |= F_BLANK; continue;
733 case '#': flags |= F_ALT; continue;
734 case '0': flags |= F_ZERO; continue;
735 }
736 break;
737 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200738
739 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800740 if (c == '*') {
741 v = getnextarg(args, arglen, &argidx);
742 if (v == NULL)
743 goto error;
744 if (!PyLong_Check(v)) {
745 PyErr_SetString(PyExc_TypeError,
746 "* wants int");
747 goto error;
748 }
749 width = PyLong_AsSsize_t(v);
750 if (width == -1 && PyErr_Occurred())
751 goto error;
752 if (width < 0) {
753 flags |= F_LJUST;
754 width = -width;
755 }
756 if (--fmtcnt >= 0)
757 c = *fmt++;
758 }
759 else if (c >= 0 && isdigit(c)) {
760 width = c - '0';
761 while (--fmtcnt >= 0) {
762 c = Py_CHARMASK(*fmt++);
763 if (!isdigit(c))
764 break;
765 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
766 PyErr_SetString(
767 PyExc_ValueError,
768 "width too big");
769 goto error;
770 }
771 width = width*10 + (c - '0');
772 }
773 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200774
775 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800776 if (c == '.') {
777 prec = 0;
778 if (--fmtcnt >= 0)
779 c = *fmt++;
780 if (c == '*') {
781 v = getnextarg(args, arglen, &argidx);
782 if (v == NULL)
783 goto error;
784 if (!PyLong_Check(v)) {
785 PyErr_SetString(
786 PyExc_TypeError,
787 "* wants int");
788 goto error;
789 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200790 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800791 if (prec == -1 && PyErr_Occurred())
792 goto error;
793 if (prec < 0)
794 prec = 0;
795 if (--fmtcnt >= 0)
796 c = *fmt++;
797 }
798 else if (c >= 0 && isdigit(c)) {
799 prec = c - '0';
800 while (--fmtcnt >= 0) {
801 c = Py_CHARMASK(*fmt++);
802 if (!isdigit(c))
803 break;
804 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
805 PyErr_SetString(
806 PyExc_ValueError,
807 "prec too big");
808 goto error;
809 }
810 prec = prec*10 + (c - '0');
811 }
812 }
813 } /* prec */
814 if (fmtcnt >= 0) {
815 if (c == 'h' || c == 'l' || c == 'L') {
816 if (--fmtcnt >= 0)
817 c = *fmt++;
818 }
819 }
820 if (fmtcnt < 0) {
821 PyErr_SetString(PyExc_ValueError,
822 "incomplete format");
823 goto error;
824 }
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200825 v = getnextarg(args, arglen, &argidx);
826 if (v == NULL)
827 goto error;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200828
Alexey Izbyshevccd99752018-08-23 10:50:52 +0300829 if (fmtcnt == 0) {
830 /* last write: disable writer overallocation */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200831 writer.overallocate = 0;
832 }
833
Ethan Furmanb95b5612015-01-23 20:05:18 -0800834 sign = 0;
835 fill = ' ';
836 switch (c) {
Ethan Furman62e977f2015-03-11 08:17:00 -0700837 case 'r':
838 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800839 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200840 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800841 if (temp == NULL)
842 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200843 assert(PyUnicode_IS_ASCII(temp));
844 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
845 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800846 if (prec >= 0 && len > prec)
847 len = prec;
848 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200849
Ethan Furmanb95b5612015-01-23 20:05:18 -0800850 case 's':
851 // %s is only for 2/3 code; 3 only code should use %b
852 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200853 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800854 if (temp == NULL)
855 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800856 if (prec >= 0 && len > prec)
857 len = prec;
858 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200859
Ethan Furmanb95b5612015-01-23 20:05:18 -0800860 case 'i':
861 case 'd':
862 case 'u':
863 case 'o':
864 case 'x':
865 case 'X':
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200866 if (PyLong_CheckExact(v)
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200867 && width == -1 && prec == -1
868 && !(flags & (F_SIGN | F_BLANK))
869 && c != 'X')
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200870 {
871 /* Fast path */
872 int alternate = flags & F_ALT;
873 int base;
874
875 switch(c)
876 {
877 default:
Barry Warsawb2e57942017-09-14 18:13:16 -0700878 Py_UNREACHABLE();
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200879 case 'd':
880 case 'i':
881 case 'u':
882 base = 10;
883 break;
884 case 'o':
885 base = 8;
886 break;
887 case 'x':
888 case 'X':
889 base = 16;
890 break;
891 }
892
893 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200894 writer.min_size -= 2; /* size preallocated for "%d" */
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200895 res = _PyLong_FormatBytesWriter(&writer, res,
896 v, base, alternate);
897 if (res == NULL)
898 goto error;
899 continue;
900 }
901
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300902 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200903 if (!temp)
904 goto error;
905 assert(PyUnicode_IS_ASCII(temp));
906 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
907 len = PyUnicode_GET_LENGTH(temp);
908 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800909 if (flags & F_ZERO)
910 fill = '0';
911 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200912
Ethan Furmanb95b5612015-01-23 20:05:18 -0800913 case 'e':
914 case 'E':
915 case 'f':
916 case 'F':
917 case 'g':
918 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200919 if (width == -1 && prec == -1
920 && !(flags & (F_SIGN | F_BLANK)))
921 {
922 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200923 writer.min_size -= 2; /* size preallocated for "%f" */
Victor Stinnerad771582015-10-09 12:38:53 +0200924 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200925 if (res == NULL)
926 goto error;
927 continue;
928 }
929
Victor Stinnerad771582015-10-09 12:38:53 +0200930 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800931 goto error;
932 pbuf = PyBytes_AS_STRING(temp);
933 len = PyBytes_GET_SIZE(temp);
934 sign = 1;
935 if (flags & F_ZERO)
936 fill = '0';
937 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200938
Ethan Furmanb95b5612015-01-23 20:05:18 -0800939 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200940 pbuf = &onechar;
941 len = byte_converter(v, &onechar);
942 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800943 goto error;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200944 if (width == -1) {
945 /* Fast path */
946 *res++ = onechar;
947 continue;
948 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800949 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200950
Ethan Furmanb95b5612015-01-23 20:05:18 -0800951 default:
952 PyErr_Format(PyExc_ValueError,
953 "unsupported format character '%c' (0x%x) "
954 "at index %zd",
955 c, c,
Victor Stinner772b2b02015-10-14 09:56:53 +0200956 (Py_ssize_t)(fmt - 1 - format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800957 goto error;
958 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200959
Ethan Furmanb95b5612015-01-23 20:05:18 -0800960 if (sign) {
961 if (*pbuf == '-' || *pbuf == '+') {
962 sign = *pbuf++;
963 len--;
964 }
965 else if (flags & F_SIGN)
966 sign = '+';
967 else if (flags & F_BLANK)
968 sign = ' ';
969 else
970 sign = 0;
971 }
972 if (width < len)
973 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200974
975 alloc = width;
976 if (sign != 0 && len == width)
977 alloc++;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200978 /* 2: size preallocated for %s */
979 if (alloc > 2) {
980 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200981 if (res == NULL)
982 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800983 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200984#ifdef Py_DEBUG
985 before = res;
986#endif
987
988 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800989 if (sign) {
990 if (fill != ' ')
991 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800992 if (width > len)
993 width--;
994 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200995
996 /* Write the numeric prefix for "x", "X" and "o" formats
997 if the alternate form is used.
998 For example, write "0x" for the "%#x" format. */
Serhiy Storchakab1a16192016-12-17 21:48:03 +0200999 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001000 assert(pbuf[0] == '0');
1001 assert(pbuf[1] == c);
1002 if (fill != ' ') {
1003 *res++ = *pbuf++;
1004 *res++ = *pbuf++;
1005 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001006 width -= 2;
1007 if (width < 0)
1008 width = 0;
1009 len -= 2;
1010 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001011
1012 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001013 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001014 memset(res, fill, width - len);
1015 res += (width - len);
1016 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -08001017 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001018
1019 /* If padding with spaces: write sign if needed and/or numeric
1020 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001021 if (fill == ' ') {
1022 if (sign)
1023 *res++ = sign;
Serhiy Storchakab1a16192016-12-17 21:48:03 +02001024 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001025 assert(pbuf[0] == '0');
1026 assert(pbuf[1] == c);
1027 *res++ = *pbuf++;
1028 *res++ = *pbuf++;
1029 }
1030 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001031
1032 /* Copy bytes */
Christian Heimesf051e432016-09-13 20:22:02 +02001033 memcpy(res, pbuf, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001034 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001035
1036 /* Pad right with the fill character if needed */
1037 if (width > len) {
1038 memset(res, ' ', width - len);
1039 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001040 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001041
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +02001042 if (dict && (argidx < arglen)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001043 PyErr_SetString(PyExc_TypeError,
1044 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -08001045 Py_XDECREF(temp);
1046 goto error;
1047 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001048 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001049
1050#ifdef Py_DEBUG
1051 /* check that we computed the exact size for this write */
1052 assert((res - before) == alloc);
1053#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -08001054 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001055
1056 /* If overallocation was disabled, ensure that it was the last
1057 write. Otherwise, we missed an optimization */
Alexey Izbyshevccd99752018-08-23 10:50:52 +03001058 assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001059 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001060
Ethan Furmanb95b5612015-01-23 20:05:18 -08001061 if (argidx < arglen && !dict) {
1062 PyErr_SetString(PyExc_TypeError,
1063 "not all arguments converted during bytes formatting");
1064 goto error;
1065 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001066
Ethan Furmanb95b5612015-01-23 20:05:18 -08001067 if (args_owned) {
1068 Py_DECREF(args);
1069 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001070 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001071
1072 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001073 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001074 if (args_owned) {
1075 Py_DECREF(args);
1076 }
1077 return NULL;
1078}
1079
Greg Price3a4f6672019-09-12 11:12:22 -07001080/* Unescape a backslash-escaped string. */
Eric V. Smith42454af2016-10-31 09:22:08 -04001081PyObject *_PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001082 Py_ssize_t len,
1083 const char *errors,
Eric V. Smith42454af2016-10-31 09:22:08 -04001084 const char **first_invalid_escape)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001085{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001086 int c;
Victor Stinner2ec80632015-10-14 13:32:13 +02001087 char *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001088 const char *end;
Victor Stinner2ec80632015-10-14 13:32:13 +02001089 _PyBytesWriter writer;
1090
1091 _PyBytesWriter_Init(&writer);
1092
1093 p = _PyBytesWriter_Alloc(&writer, len);
1094 if (p == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001095 return NULL;
Victor Stinner2ec80632015-10-14 13:32:13 +02001096 writer.overallocate = 1;
1097
Eric V. Smith42454af2016-10-31 09:22:08 -04001098 *first_invalid_escape = NULL;
1099
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001100 end = s + len;
1101 while (s < end) {
1102 if (*s != '\\') {
Greg Price3a4f6672019-09-12 11:12:22 -07001103 *p++ = *s++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001104 continue;
1105 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001106
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001107 s++;
Victor Stinner2ec80632015-10-14 13:32:13 +02001108 if (s == end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001109 PyErr_SetString(PyExc_ValueError,
1110 "Trailing \\ in string");
1111 goto failed;
1112 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001113
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001114 switch (*s++) {
1115 /* XXX This assumes ASCII! */
1116 case '\n': break;
1117 case '\\': *p++ = '\\'; break;
1118 case '\'': *p++ = '\''; break;
1119 case '\"': *p++ = '\"'; break;
1120 case 'b': *p++ = '\b'; break;
1121 case 'f': *p++ = '\014'; break; /* FF */
1122 case 't': *p++ = '\t'; break;
1123 case 'n': *p++ = '\n'; break;
1124 case 'r': *p++ = '\r'; break;
1125 case 'v': *p++ = '\013'; break; /* VT */
1126 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1127 case '0': case '1': case '2': case '3':
1128 case '4': case '5': case '6': case '7':
1129 c = s[-1] - '0';
1130 if (s < end && '0' <= *s && *s <= '7') {
1131 c = (c<<3) + *s++ - '0';
1132 if (s < end && '0' <= *s && *s <= '7')
1133 c = (c<<3) + *s++ - '0';
1134 }
1135 *p++ = c;
1136 break;
1137 case 'x':
Victor Stinner2ec80632015-10-14 13:32:13 +02001138 if (s+1 < end) {
1139 int digit1, digit2;
1140 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1141 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1142 if (digit1 < 16 && digit2 < 16) {
1143 *p++ = (unsigned char)((digit1 << 4) + digit2);
1144 s += 2;
1145 break;
1146 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001147 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001148 /* invalid hexadecimal digits */
1149
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001150 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001151 PyErr_Format(PyExc_ValueError,
Serhiy Storchakad53fe5f2019-03-13 22:59:55 +02001152 "invalid \\x escape at position %zd",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001153 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001154 goto failed;
1155 }
1156 if (strcmp(errors, "replace") == 0) {
1157 *p++ = '?';
1158 } else if (strcmp(errors, "ignore") == 0)
1159 /* do nothing */;
1160 else {
1161 PyErr_Format(PyExc_ValueError,
1162 "decoding error; unknown "
1163 "error handling code: %.400s",
1164 errors);
1165 goto failed;
1166 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001167 /* skip \x */
1168 if (s < end && Py_ISXDIGIT(s[0]))
1169 s++; /* and a hexdigit */
1170 break;
Victor Stinner2ec80632015-10-14 13:32:13 +02001171
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001172 default:
Eric V. Smith42454af2016-10-31 09:22:08 -04001173 if (*first_invalid_escape == NULL) {
1174 *first_invalid_escape = s-1; /* Back up one char, since we've
1175 already incremented s. */
1176 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001177 *p++ = '\\';
Eric V. Smith42454af2016-10-31 09:22:08 -04001178 s--;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001179 }
1180 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001181
1182 return _PyBytesWriter_Finish(&writer, p);
1183
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001184 failed:
Victor Stinner2ec80632015-10-14 13:32:13 +02001185 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001186 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001187}
1188
Eric V. Smith42454af2016-10-31 09:22:08 -04001189PyObject *PyBytes_DecodeEscape(const char *s,
1190 Py_ssize_t len,
1191 const char *errors,
Greg Price3a4f6672019-09-12 11:12:22 -07001192 Py_ssize_t Py_UNUSED(unicode),
1193 const char *Py_UNUSED(recode_encoding))
Eric V. Smith42454af2016-10-31 09:22:08 -04001194{
1195 const char* first_invalid_escape;
Greg Price3a4f6672019-09-12 11:12:22 -07001196 PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
Eric V. Smith42454af2016-10-31 09:22:08 -04001197 &first_invalid_escape);
1198 if (result == NULL)
1199 return NULL;
1200 if (first_invalid_escape != NULL) {
1201 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1202 "invalid escape sequence '\\%c'",
Serhiy Storchaka56cb4652017-10-20 17:08:15 +03001203 (unsigned char)*first_invalid_escape) < 0) {
Eric V. Smith42454af2016-10-31 09:22:08 -04001204 Py_DECREF(result);
1205 return NULL;
1206 }
1207 }
1208 return result;
1209
1210}
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001211/* -------------------------------------------------------------------- */
1212/* object api */
1213
1214Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001215PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001216{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001217 if (!PyBytes_Check(op)) {
1218 PyErr_Format(PyExc_TypeError,
1219 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1220 return -1;
1221 }
1222 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001223}
1224
1225char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001226PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001227{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001228 if (!PyBytes_Check(op)) {
1229 PyErr_Format(PyExc_TypeError,
1230 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1231 return NULL;
1232 }
1233 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001234}
1235
1236int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001237PyBytes_AsStringAndSize(PyObject *obj,
1238 char **s,
1239 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001240{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001241 if (s == NULL) {
1242 PyErr_BadInternalCall();
1243 return -1;
1244 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001245
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001246 if (!PyBytes_Check(obj)) {
1247 PyErr_Format(PyExc_TypeError,
1248 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1249 return -1;
1250 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001251
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001252 *s = PyBytes_AS_STRING(obj);
1253 if (len != NULL)
1254 *len = PyBytes_GET_SIZE(obj);
1255 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001256 PyErr_SetString(PyExc_ValueError,
1257 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001258 return -1;
1259 }
1260 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001261}
Neal Norwitz6968b052007-02-27 19:02:19 +00001262
1263/* -------------------------------------------------------------------- */
1264/* Methods */
1265
Eric Smith0923d1d2009-04-16 20:16:10 +00001266#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001267
1268#include "stringlib/fastsearch.h"
1269#include "stringlib/count.h"
1270#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001271#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001272#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001273#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001274#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001275
Eric Smith0f78bff2009-11-30 01:01:42 +00001276#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001277
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001278PyObject *
1279PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001280{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001281 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001282 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001283 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001284 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001285 unsigned char quote, *s, *p;
1286
1287 /* Compute size of output string */
1288 squotes = dquotes = 0;
1289 newsize = 3; /* b'' */
1290 s = (unsigned char*)op->ob_sval;
1291 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001292 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001293 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001294 case '\'': squotes++; break;
1295 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001296 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001297 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001298 default:
1299 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001300 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001301 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001302 if (newsize > PY_SSIZE_T_MAX - incr)
1303 goto overflow;
1304 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001305 }
1306 quote = '\'';
1307 if (smartquotes && squotes && !dquotes)
1308 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001309 if (squotes && quote == '\'') {
1310 if (newsize > PY_SSIZE_T_MAX - squotes)
1311 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001312 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001313 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001314
1315 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001316 if (v == NULL) {
1317 return NULL;
1318 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001319 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001320
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001321 *p++ = 'b', *p++ = quote;
1322 for (i = 0; i < length; i++) {
1323 unsigned char c = op->ob_sval[i];
1324 if (c == quote || c == '\\')
1325 *p++ = '\\', *p++ = c;
1326 else if (c == '\t')
1327 *p++ = '\\', *p++ = 't';
1328 else if (c == '\n')
1329 *p++ = '\\', *p++ = 'n';
1330 else if (c == '\r')
1331 *p++ = '\\', *p++ = 'r';
1332 else if (c < ' ' || c >= 0x7f) {
1333 *p++ = '\\';
1334 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001335 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1336 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001337 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001338 else
1339 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001340 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001341 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001342 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001343 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001344
1345 overflow:
1346 PyErr_SetString(PyExc_OverflowError,
1347 "bytes object is too large to make repr");
1348 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001349}
1350
Neal Norwitz6968b052007-02-27 19:02:19 +00001351static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001352bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001353{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001354 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001355}
1356
Neal Norwitz6968b052007-02-27 19:02:19 +00001357static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001358bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001359{
Victor Stinner331a6a52019-05-27 16:39:22 +02001360 PyConfig *config = &_PyInterpreterState_GET_UNSAFE()->config;
Victor Stinnerc96be812019-05-14 17:34:56 +02001361 if (config->bytes_warning) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001362 if (PyErr_WarnEx(PyExc_BytesWarning,
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001363 "str() on a bytes instance", 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001364 return NULL;
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001365 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001366 }
1367 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001368}
1369
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001370static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001371bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001372{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001373 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001374}
Neal Norwitz6968b052007-02-27 19:02:19 +00001375
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001376/* This is also used by PyBytes_Concat() */
1377static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001378bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001379{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001380 Py_buffer va, vb;
1381 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001382
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001383 va.len = -1;
1384 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001385 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1386 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001387 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
Serhiy Storchaka6b5a9ec2017-03-19 19:47:02 +02001388 Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001389 goto done;
1390 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001391
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001392 /* Optimize end cases */
1393 if (va.len == 0 && PyBytes_CheckExact(b)) {
1394 result = b;
1395 Py_INCREF(result);
1396 goto done;
1397 }
1398 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1399 result = a;
1400 Py_INCREF(result);
1401 goto done;
1402 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001403
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001404 if (va.len > PY_SSIZE_T_MAX - vb.len) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001405 PyErr_NoMemory();
1406 goto done;
1407 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001408
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001409 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001410 if (result != NULL) {
1411 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1412 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1413 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001414
1415 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001416 if (va.len != -1)
1417 PyBuffer_Release(&va);
1418 if (vb.len != -1)
1419 PyBuffer_Release(&vb);
1420 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001421}
Neal Norwitz6968b052007-02-27 19:02:19 +00001422
1423static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001424bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001425{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001426 Py_ssize_t i;
1427 Py_ssize_t j;
1428 Py_ssize_t size;
1429 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001430 size_t nbytes;
1431 if (n < 0)
1432 n = 0;
1433 /* watch out for overflows: the size can overflow int,
1434 * and the # of bytes needed can overflow size_t
1435 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001436 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001437 PyErr_SetString(PyExc_OverflowError,
1438 "repeated bytes are too long");
1439 return NULL;
1440 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001441 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001442 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1443 Py_INCREF(a);
1444 return (PyObject *)a;
1445 }
1446 nbytes = (size_t)size;
1447 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1448 PyErr_SetString(PyExc_OverflowError,
1449 "repeated bytes are too long");
1450 return NULL;
1451 }
1452 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1453 if (op == NULL)
1454 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +01001455 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001456 op->ob_shash = -1;
1457 op->ob_sval[size] = '\0';
1458 if (Py_SIZE(a) == 1 && n > 0) {
1459 memset(op->ob_sval, a->ob_sval[0] , n);
1460 return (PyObject *) op;
1461 }
1462 i = 0;
1463 if (i < size) {
Christian Heimesf051e432016-09-13 20:22:02 +02001464 memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001465 i = Py_SIZE(a);
1466 }
1467 while (i < size) {
1468 j = (i <= size-i) ? i : size-i;
Christian Heimesf051e432016-09-13 20:22:02 +02001469 memcpy(op->ob_sval+i, op->ob_sval, j);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001470 i += j;
1471 }
1472 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001473}
1474
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001475static int
1476bytes_contains(PyObject *self, PyObject *arg)
1477{
1478 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1479}
1480
Neal Norwitz6968b052007-02-27 19:02:19 +00001481static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001482bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001483{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001484 if (i < 0 || i >= Py_SIZE(a)) {
1485 PyErr_SetString(PyExc_IndexError, "index out of range");
1486 return NULL;
1487 }
1488 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001489}
1490
Benjamin Peterson621b4302016-09-09 13:54:34 -07001491static int
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001492bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1493{
1494 int cmp;
1495 Py_ssize_t len;
1496
1497 len = Py_SIZE(a);
1498 if (Py_SIZE(b) != len)
1499 return 0;
1500
1501 if (a->ob_sval[0] != b->ob_sval[0])
1502 return 0;
1503
1504 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1505 return (cmp == 0);
1506}
1507
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001508static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001509bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001510{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001511 int c;
1512 Py_ssize_t len_a, len_b;
1513 Py_ssize_t min_len;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001514 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001515
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001516 /* Make sure both arguments are strings. */
1517 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Victor Stinner331a6a52019-05-27 16:39:22 +02001518 PyConfig *config = &_PyInterpreterState_GET_UNSAFE()->config;
Victor Stinnerc96be812019-05-14 17:34:56 +02001519 if (config->bytes_warning && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001520 rc = PyObject_IsInstance((PyObject*)a,
1521 (PyObject*)&PyUnicode_Type);
1522 if (!rc)
1523 rc = PyObject_IsInstance((PyObject*)b,
1524 (PyObject*)&PyUnicode_Type);
1525 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001526 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001527 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001528 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001529 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001530 return NULL;
1531 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001532 else {
1533 rc = PyObject_IsInstance((PyObject*)a,
1534 (PyObject*)&PyLong_Type);
1535 if (!rc)
1536 rc = PyObject_IsInstance((PyObject*)b,
1537 (PyObject*)&PyLong_Type);
1538 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001539 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001540 if (rc) {
1541 if (PyErr_WarnEx(PyExc_BytesWarning,
1542 "Comparison between bytes and int", 1))
1543 return NULL;
1544 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001545 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001546 }
stratakise8b19652017-11-02 11:32:54 +01001547 Py_RETURN_NOTIMPLEMENTED;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001548 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001549 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001550 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001551 case Py_EQ:
1552 case Py_LE:
1553 case Py_GE:
1554 /* a string is equal to itself */
stratakise8b19652017-11-02 11:32:54 +01001555 Py_RETURN_TRUE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001556 case Py_NE:
1557 case Py_LT:
1558 case Py_GT:
stratakise8b19652017-11-02 11:32:54 +01001559 Py_RETURN_FALSE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001560 default:
1561 PyErr_BadArgument();
1562 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001563 }
1564 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001565 else if (op == Py_EQ || op == Py_NE) {
1566 int eq = bytes_compare_eq(a, b);
1567 eq ^= (op == Py_NE);
stratakise8b19652017-11-02 11:32:54 +01001568 return PyBool_FromLong(eq);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001569 }
1570 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001571 len_a = Py_SIZE(a);
1572 len_b = Py_SIZE(b);
1573 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001574 if (min_len > 0) {
1575 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001576 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001577 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001578 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001579 else
1580 c = 0;
stratakise8b19652017-11-02 11:32:54 +01001581 if (c != 0)
1582 Py_RETURN_RICHCOMPARE(c, 0, op);
1583 Py_RETURN_RICHCOMPARE(len_a, len_b, op);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001584 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001585}
1586
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001587static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001588bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001589{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001590 if (a->ob_shash == -1) {
1591 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001592 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001593 }
1594 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001595}
1596
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001597static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001598bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001599{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001600 if (PyIndex_Check(item)) {
1601 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1602 if (i == -1 && PyErr_Occurred())
1603 return NULL;
1604 if (i < 0)
1605 i += PyBytes_GET_SIZE(self);
1606 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1607 PyErr_SetString(PyExc_IndexError,
1608 "index out of range");
1609 return NULL;
1610 }
1611 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1612 }
1613 else if (PySlice_Check(item)) {
Zackery Spytz14514d92019-05-17 01:13:03 -06001614 Py_ssize_t start, stop, step, slicelength, i;
1615 size_t cur;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001616 char* source_buf;
1617 char* result_buf;
1618 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001619
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001620 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001621 return NULL;
1622 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001623 slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1624 &stop, step);
Neal Norwitz6968b052007-02-27 19:02:19 +00001625
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001626 if (slicelength <= 0) {
1627 return PyBytes_FromStringAndSize("", 0);
1628 }
1629 else if (start == 0 && step == 1 &&
1630 slicelength == PyBytes_GET_SIZE(self) &&
1631 PyBytes_CheckExact(self)) {
1632 Py_INCREF(self);
1633 return (PyObject *)self;
1634 }
1635 else if (step == 1) {
1636 return PyBytes_FromStringAndSize(
1637 PyBytes_AS_STRING(self) + start,
1638 slicelength);
1639 }
1640 else {
1641 source_buf = PyBytes_AS_STRING(self);
1642 result = PyBytes_FromStringAndSize(NULL, slicelength);
1643 if (result == NULL)
1644 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001645
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001646 result_buf = PyBytes_AS_STRING(result);
1647 for (cur = start, i = 0; i < slicelength;
1648 cur += step, i++) {
1649 result_buf[i] = source_buf[cur];
1650 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001651
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001652 return result;
1653 }
1654 }
1655 else {
1656 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001657 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001658 Py_TYPE(item)->tp_name);
1659 return NULL;
1660 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001661}
1662
1663static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001664bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001665{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001666 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1667 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001668}
1669
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001670static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001671 (lenfunc)bytes_length, /*sq_length*/
1672 (binaryfunc)bytes_concat, /*sq_concat*/
1673 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1674 (ssizeargfunc)bytes_item, /*sq_item*/
1675 0, /*sq_slice*/
1676 0, /*sq_ass_item*/
1677 0, /*sq_ass_slice*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001678 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001679};
1680
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001681static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001682 (lenfunc)bytes_length,
1683 (binaryfunc)bytes_subscript,
1684 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001685};
1686
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001687static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001688 (getbufferproc)bytes_buffer_getbuffer,
1689 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001690};
1691
1692
1693#define LEFTSTRIP 0
1694#define RIGHTSTRIP 1
1695#define BOTHSTRIP 2
1696
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001697/*[clinic input]
1698bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001699
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001700 sep: object = None
1701 The delimiter according which to split the bytes.
1702 None (the default value) means split on ASCII whitespace characters
1703 (space, tab, return, newline, formfeed, vertical tab).
1704 maxsplit: Py_ssize_t = -1
1705 Maximum number of splits to do.
1706 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001707
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001708Return a list of the sections in the bytes, using sep as the delimiter.
1709[clinic start generated code]*/
1710
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001711static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001712bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1713/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001714{
1715 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001716 const char *s = PyBytes_AS_STRING(self), *sub;
1717 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001718 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001719
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001720 if (maxsplit < 0)
1721 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001722 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001723 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001724 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001725 return NULL;
1726 sub = vsub.buf;
1727 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001728
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001729 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1730 PyBuffer_Release(&vsub);
1731 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001732}
1733
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001734/*[clinic input]
1735bytes.partition
1736
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001737 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001738 /
1739
1740Partition the bytes into three parts using the given separator.
1741
1742This will search for the separator sep in the bytes. If the separator is found,
1743returns a 3-tuple containing the part before the separator, the separator
1744itself, and the part after it.
1745
1746If the separator is not found, returns a 3-tuple containing the original bytes
1747object and two empty bytes objects.
1748[clinic start generated code]*/
1749
Neal Norwitz6968b052007-02-27 19:02:19 +00001750static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001751bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001752/*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001753{
Neal Norwitz6968b052007-02-27 19:02:19 +00001754 return stringlib_partition(
1755 (PyObject*) self,
1756 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001757 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001758 );
1759}
1760
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001761/*[clinic input]
1762bytes.rpartition
1763
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001764 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001765 /
1766
1767Partition the bytes into three parts using the given separator.
1768
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001769This will search for the separator sep in the bytes, starting at the end. If
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001770the separator is found, returns a 3-tuple containing the part before the
1771separator, the separator itself, and the part after it.
1772
1773If the separator is not found, returns a 3-tuple containing two empty bytes
1774objects and the original bytes object.
1775[clinic start generated code]*/
1776
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001777static PyObject *
1778bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001779/*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001780{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001781 return stringlib_rpartition(
1782 (PyObject*) self,
1783 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001784 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001785 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001786}
1787
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001788/*[clinic input]
1789bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001790
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001791Return a list of the sections in the bytes, using sep as the delimiter.
1792
1793Splitting is done starting at the end of the bytes and working to the front.
1794[clinic start generated code]*/
1795
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001796static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001797bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1798/*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001799{
1800 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001801 const char *s = PyBytes_AS_STRING(self), *sub;
1802 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001803 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001804
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001805 if (maxsplit < 0)
1806 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001807 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001808 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001809 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001810 return NULL;
1811 sub = vsub.buf;
1812 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001813
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001814 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1815 PyBuffer_Release(&vsub);
1816 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001817}
1818
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001819
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001820/*[clinic input]
1821bytes.join
1822
1823 iterable_of_bytes: object
1824 /
1825
1826Concatenate any number of bytes objects.
1827
1828The bytes whose method is called is inserted in between each pair.
1829
1830The result is returned as a new bytes object.
1831
1832Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1833[clinic start generated code]*/
1834
Neal Norwitz6968b052007-02-27 19:02:19 +00001835static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001836bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1837/*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001838{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001839 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001840}
1841
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001842PyObject *
1843_PyBytes_Join(PyObject *sep, PyObject *x)
1844{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001845 assert(sep != NULL && PyBytes_Check(sep));
1846 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001847 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001848}
1849
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001850static PyObject *
1851bytes_find(PyBytesObject *self, PyObject *args)
1852{
1853 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1854}
1855
1856static PyObject *
1857bytes_index(PyBytesObject *self, PyObject *args)
1858{
1859 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1860}
1861
1862
1863static PyObject *
1864bytes_rfind(PyBytesObject *self, PyObject *args)
1865{
1866 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1867}
1868
1869
1870static PyObject *
1871bytes_rindex(PyBytesObject *self, PyObject *args)
1872{
1873 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1874}
1875
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001876
1877Py_LOCAL_INLINE(PyObject *)
1878do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001879{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001880 Py_buffer vsep;
1881 char *s = PyBytes_AS_STRING(self);
1882 Py_ssize_t len = PyBytes_GET_SIZE(self);
1883 char *sep;
1884 Py_ssize_t seplen;
1885 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001886
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001887 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001888 return NULL;
1889 sep = vsep.buf;
1890 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001891
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001892 i = 0;
1893 if (striptype != RIGHTSTRIP) {
1894 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1895 i++;
1896 }
1897 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001898
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001899 j = len;
1900 if (striptype != LEFTSTRIP) {
1901 do {
1902 j--;
1903 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1904 j++;
1905 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001906
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001907 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001908
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001909 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1910 Py_INCREF(self);
1911 return (PyObject*)self;
1912 }
1913 else
1914 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001915}
1916
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001917
1918Py_LOCAL_INLINE(PyObject *)
1919do_strip(PyBytesObject *self, int striptype)
1920{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001921 char *s = PyBytes_AS_STRING(self);
1922 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001923
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001924 i = 0;
1925 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001926 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001927 i++;
1928 }
1929 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001930
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001931 j = len;
1932 if (striptype != LEFTSTRIP) {
1933 do {
1934 j--;
David Malcolm96960882010-11-05 17:23:41 +00001935 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001936 j++;
1937 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001938
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001939 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1940 Py_INCREF(self);
1941 return (PyObject*)self;
1942 }
1943 else
1944 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001945}
1946
1947
1948Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001949do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001950{
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001951 if (bytes != Py_None) {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001952 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001953 }
1954 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001955}
1956
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001957/*[clinic input]
1958bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001959
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001960 bytes: object = None
1961 /
1962
1963Strip leading and trailing bytes contained in the argument.
1964
1965If the argument is omitted or None, strip leading and trailing ASCII whitespace.
1966[clinic start generated code]*/
1967
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001968static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001969bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001970/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001971{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001972 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001973}
1974
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001975/*[clinic input]
1976bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001977
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001978 bytes: object = None
1979 /
1980
1981Strip leading bytes contained in the argument.
1982
1983If the argument is omitted or None, strip leading ASCII whitespace.
1984[clinic start generated code]*/
1985
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001986static PyObject *
1987bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001988/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001989{
1990 return do_argstrip(self, LEFTSTRIP, bytes);
1991}
1992
1993/*[clinic input]
1994bytes.rstrip
1995
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001996 bytes: object = None
1997 /
1998
1999Strip trailing bytes contained in the argument.
2000
2001If the argument is omitted or None, strip trailing ASCII whitespace.
2002[clinic start generated code]*/
2003
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002004static PyObject *
2005bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002006/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002007{
2008 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002009}
Neal Norwitz6968b052007-02-27 19:02:19 +00002010
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002011
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002012static PyObject *
2013bytes_count(PyBytesObject *self, PyObject *args)
2014{
2015 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2016}
2017
2018
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002019/*[clinic input]
2020bytes.translate
2021
Victor Stinner049e5092014-08-17 22:20:00 +02002022 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002023 Translation table, which must be a bytes object of length 256.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002024 /
Martin Panter1b6c6da2016-08-27 08:35:02 +00002025 delete as deletechars: object(c_default="NULL") = b''
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002026
2027Return a copy with each character mapped by the given translation table.
2028
Martin Panter1b6c6da2016-08-27 08:35:02 +00002029All characters occurring in the optional argument delete are removed.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002030The remaining characters are mapped through the given translation table.
2031[clinic start generated code]*/
2032
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002033static PyObject *
Martin Panter1b6c6da2016-08-27 08:35:02 +00002034bytes_translate_impl(PyBytesObject *self, PyObject *table,
Larry Hastings89964c42015-04-14 18:07:59 -04002035 PyObject *deletechars)
Martin Panter1b6c6da2016-08-27 08:35:02 +00002036/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002037{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002038 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002039 Py_buffer table_view = {NULL, NULL};
2040 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002041 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002042 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002043 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002044 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002045 Py_ssize_t inlen, tablen, dellen = 0;
2046 PyObject *result;
2047 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002048
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002049 if (PyBytes_Check(table)) {
2050 table_chars = PyBytes_AS_STRING(table);
2051 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002052 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002053 else if (table == Py_None) {
2054 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002055 tablen = 256;
2056 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002057 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002058 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002059 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002060 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002061 tablen = table_view.len;
2062 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002063
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002064 if (tablen != 256) {
2065 PyErr_SetString(PyExc_ValueError,
2066 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002067 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002068 return NULL;
2069 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002070
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002071 if (deletechars != NULL) {
2072 if (PyBytes_Check(deletechars)) {
2073 del_table_chars = PyBytes_AS_STRING(deletechars);
2074 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002075 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002076 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002077 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002078 PyBuffer_Release(&table_view);
2079 return NULL;
2080 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002081 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002082 dellen = del_table_view.len;
2083 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002084 }
2085 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002086 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002087 dellen = 0;
2088 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002089
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002090 inlen = PyBytes_GET_SIZE(input_obj);
2091 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002092 if (result == NULL) {
2093 PyBuffer_Release(&del_table_view);
2094 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002095 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002096 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002097 output_start = output = PyBytes_AS_STRING(result);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002098 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002099
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002100 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002101 /* If no deletions are required, use faster code */
2102 for (i = inlen; --i >= 0; ) {
2103 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002104 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002105 changed = 1;
2106 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002107 if (!changed && PyBytes_CheckExact(input_obj)) {
2108 Py_INCREF(input_obj);
2109 Py_DECREF(result);
2110 result = input_obj;
2111 }
2112 PyBuffer_Release(&del_table_view);
2113 PyBuffer_Release(&table_view);
2114 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002115 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002116
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002117 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002118 for (i = 0; i < 256; i++)
2119 trans_table[i] = Py_CHARMASK(i);
2120 } else {
2121 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002122 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002123 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002124 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002125
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002126 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002127 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002128 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002129
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002130 for (i = inlen; --i >= 0; ) {
2131 c = Py_CHARMASK(*input++);
2132 if (trans_table[c] != -1)
2133 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2134 continue;
2135 changed = 1;
2136 }
2137 if (!changed && PyBytes_CheckExact(input_obj)) {
2138 Py_DECREF(result);
2139 Py_INCREF(input_obj);
2140 return input_obj;
2141 }
2142 /* Fix the size of the resulting string */
2143 if (inlen > 0)
2144 _PyBytes_Resize(&result, output - output_start);
2145 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002146}
2147
2148
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002149/*[clinic input]
2150
2151@staticmethod
2152bytes.maketrans
2153
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002154 frm: Py_buffer
2155 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002156 /
2157
2158Return a translation table useable for the bytes or bytearray translate method.
2159
2160The returned table will be one where each byte in frm is mapped to the byte at
2161the same position in to.
2162
2163The bytes objects frm and to must be of the same length.
2164[clinic start generated code]*/
2165
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002166static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002167bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002168/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002169{
2170 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002171}
2172
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002173
2174/*[clinic input]
2175bytes.replace
2176
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002177 old: Py_buffer
2178 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002179 count: Py_ssize_t = -1
2180 Maximum number of occurrences to replace.
2181 -1 (the default value) means replace all occurrences.
2182 /
2183
2184Return a copy with all occurrences of substring old replaced by new.
2185
2186If the optional argument count is given, only the first count occurrences are
2187replaced.
2188[clinic start generated code]*/
2189
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002190static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002191bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
Larry Hastings89964c42015-04-14 18:07:59 -04002192 Py_ssize_t count)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002193/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002194{
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +03002195 return stringlib_replace((PyObject *)self,
2196 (const char *)old->buf, old->len,
2197 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002198}
2199
2200/** End DALKE **/
2201
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002202
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002203static PyObject *
2204bytes_startswith(PyBytesObject *self, PyObject *args)
2205{
2206 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2207}
2208
2209static PyObject *
2210bytes_endswith(PyBytesObject *self, PyObject *args)
2211{
2212 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2213}
2214
2215
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002216/*[clinic input]
2217bytes.decode
2218
2219 encoding: str(c_default="NULL") = 'utf-8'
2220 The encoding with which to decode the bytes.
2221 errors: str(c_default="NULL") = 'strict'
2222 The error handling scheme to use for the handling of decoding errors.
2223 The default is 'strict' meaning that decoding errors raise a
2224 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2225 as well as any other name registered with codecs.register_error that
2226 can handle UnicodeDecodeErrors.
2227
2228Decode the bytes using the codec registered for encoding.
2229[clinic start generated code]*/
2230
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002231static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002232bytes_decode_impl(PyBytesObject *self, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04002233 const char *errors)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002234/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002235{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002236 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002237}
2238
Guido van Rossum20188312006-05-05 15:15:40 +00002239
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002240/*[clinic input]
2241bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002242
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002243 keepends: bool(accept={int}) = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002244
2245Return a list of the lines in the bytes, breaking at line boundaries.
2246
2247Line breaks are not included in the resulting list unless keepends is given and
2248true.
2249[clinic start generated code]*/
2250
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002251static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002252bytes_splitlines_impl(PyBytesObject *self, int keepends)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002253/*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002254{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002255 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002256 (PyObject*) self, PyBytes_AS_STRING(self),
2257 PyBytes_GET_SIZE(self), keepends
2258 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002259}
2260
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002261/*[clinic input]
2262@classmethod
2263bytes.fromhex
2264
2265 string: unicode
2266 /
2267
2268Create a bytes object from a string of hexadecimal numbers.
2269
2270Spaces between two numbers are accepted.
2271Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2272[clinic start generated code]*/
2273
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002274static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002275bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002276/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002277{
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002278 PyObject *result = _PyBytes_FromHex(string, 0);
2279 if (type != &PyBytes_Type && result != NULL) {
Jeroen Demeyer196a5302019-07-04 12:31:34 +02002280 Py_SETREF(result, _PyObject_CallOneArg((PyObject *)type, result));
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002281 }
2282 return result;
Victor Stinner2bf89932015-10-14 11:25:33 +02002283}
2284
2285PyObject*
2286_PyBytes_FromHex(PyObject *string, int use_bytearray)
2287{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002288 char *buf;
Victor Stinner2bf89932015-10-14 11:25:33 +02002289 Py_ssize_t hexlen, invalid_char;
2290 unsigned int top, bot;
2291 Py_UCS1 *str, *end;
2292 _PyBytesWriter writer;
2293
2294 _PyBytesWriter_Init(&writer);
2295 writer.use_bytearray = use_bytearray;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002296
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002297 assert(PyUnicode_Check(string));
2298 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002299 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002300 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002301
Victor Stinner2bf89932015-10-14 11:25:33 +02002302 if (!PyUnicode_IS_ASCII(string)) {
2303 void *data = PyUnicode_DATA(string);
2304 unsigned int kind = PyUnicode_KIND(string);
2305 Py_ssize_t i;
2306
2307 /* search for the first non-ASCII character */
2308 for (i = 0; i < hexlen; i++) {
2309 if (PyUnicode_READ(kind, data, i) >= 128)
2310 break;
2311 }
2312 invalid_char = i;
2313 goto error;
2314 }
2315
2316 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2317 str = PyUnicode_1BYTE_DATA(string);
2318
2319 /* This overestimates if there are spaces */
2320 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2321 if (buf == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002322 return NULL;
Victor Stinner2bf89932015-10-14 11:25:33 +02002323
2324 end = str + hexlen;
2325 while (str < end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002326 /* skip over spaces in the input */
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002327 if (Py_ISSPACE(*str)) {
Victor Stinner2bf89932015-10-14 11:25:33 +02002328 do {
2329 str++;
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002330 } while (Py_ISSPACE(*str));
Victor Stinner2bf89932015-10-14 11:25:33 +02002331 if (str >= end)
2332 break;
2333 }
2334
2335 top = _PyLong_DigitValue[*str];
2336 if (top >= 16) {
2337 invalid_char = str - PyUnicode_1BYTE_DATA(string);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002338 goto error;
2339 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002340 str++;
2341
2342 bot = _PyLong_DigitValue[*str];
2343 if (bot >= 16) {
2344 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2345 goto error;
2346 }
2347 str++;
2348
2349 *buf++ = (unsigned char)((top << 4) + bot);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002350 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002351
2352 return _PyBytesWriter_Finish(&writer, buf);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002353
2354 error:
Victor Stinner2bf89932015-10-14 11:25:33 +02002355 PyErr_Format(PyExc_ValueError,
2356 "non-hexadecimal number found in "
2357 "fromhex() arg at position %zd", invalid_char);
2358 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002359 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002360}
2361
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002362/*[clinic input]
2363bytes.hex
2364
2365 sep: object = NULL
2366 An optional single character or byte to separate hex bytes.
2367 bytes_per_sep: int = 1
2368 How many bytes between separators. Positive values count from the
2369 right, negative values count from the left.
2370
2371Create a str of hexadecimal numbers from a bytes object.
2372
2373Example:
2374>>> value = b'\xb9\x01\xef'
2375>>> value.hex()
2376'b901ef'
2377>>> value.hex(':')
2378'b9:01:ef'
2379>>> value.hex(':', 2)
2380'b9:01ef'
2381>>> value.hex(':', -2)
2382'b901:ef'
2383[clinic start generated code]*/
Gregory P. Smith8cb65692015-04-25 23:22:26 +00002384
2385static PyObject *
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002386bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2387/*[clinic end generated code: output=1f134da504064139 input=f1238d3455990218]*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002388{
2389 char* argbuf = PyBytes_AS_STRING(self);
2390 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002391 return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002392}
2393
2394static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302395bytes_getnewargs(PyBytesObject *v, PyObject *Py_UNUSED(ignored))
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002396{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002397 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002398}
2399
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002400
2401static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002402bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002403 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302404 {"capitalize", stringlib_capitalize, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002405 _Py_capitalize__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002406 STRINGLIB_CENTER_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002407 {"count", (PyCFunction)bytes_count, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002408 _Py_count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002409 BYTES_DECODE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002410 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002411 _Py_endswith__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002412 STRINGLIB_EXPANDTABS_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002413 {"find", (PyCFunction)bytes_find, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002414 _Py_find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002415 BYTES_FROMHEX_METHODDEF
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002416 BYTES_HEX_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002417 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302418 {"isalnum", stringlib_isalnum, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002419 _Py_isalnum__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302420 {"isalpha", stringlib_isalpha, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002421 _Py_isalpha__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302422 {"isascii", stringlib_isascii, METH_NOARGS,
INADA Naokia49ac992018-01-27 14:06:21 +09002423 _Py_isascii__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302424 {"isdigit", stringlib_isdigit, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002425 _Py_isdigit__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302426 {"islower", stringlib_islower, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002427 _Py_islower__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302428 {"isspace", stringlib_isspace, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002429 _Py_isspace__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302430 {"istitle", stringlib_istitle, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002431 _Py_istitle__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302432 {"isupper", stringlib_isupper, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002433 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002434 BYTES_JOIN_METHODDEF
Tal Einatc929df32018-07-06 13:17:38 +03002435 STRINGLIB_LJUST_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302436 {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002437 BYTES_LSTRIP_METHODDEF
2438 BYTES_MAKETRANS_METHODDEF
2439 BYTES_PARTITION_METHODDEF
2440 BYTES_REPLACE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002441 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2442 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002443 STRINGLIB_RJUST_METHODDEF
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002444 BYTES_RPARTITION_METHODDEF
2445 BYTES_RSPLIT_METHODDEF
2446 BYTES_RSTRIP_METHODDEF
2447 BYTES_SPLIT_METHODDEF
2448 BYTES_SPLITLINES_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002449 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002450 _Py_startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002451 BYTES_STRIP_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302452 {"swapcase", stringlib_swapcase, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002453 _Py_swapcase__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302454 {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002455 BYTES_TRANSLATE_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302456 {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002457 STRINGLIB_ZFILL_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002458 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002459};
2460
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002461static PyObject *
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002462bytes_mod(PyObject *self, PyObject *arg)
Ethan Furmanb95b5612015-01-23 20:05:18 -08002463{
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002464 if (!PyBytes_Check(self)) {
2465 Py_RETURN_NOTIMPLEMENTED;
Victor Stinner772b2b02015-10-14 09:56:53 +02002466 }
Victor Stinner772b2b02015-10-14 09:56:53 +02002467 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002468 arg, 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -08002469}
2470
2471static PyNumberMethods bytes_as_number = {
2472 0, /*nb_add*/
2473 0, /*nb_subtract*/
2474 0, /*nb_multiply*/
2475 bytes_mod, /*nb_remainder*/
2476};
2477
2478static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002479bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002480
2481static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002482bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002483{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002484 PyObject *x = NULL;
2485 const char *encoding = NULL;
2486 const char *errors = NULL;
2487 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002488 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002489 Py_ssize_t size;
2490 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002491 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002492
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002493 if (type != &PyBytes_Type)
Serhiy Storchaka15095802015-11-25 15:47:01 +02002494 return bytes_subtype_new(type, args, kwds);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002495 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2496 &encoding, &errors))
2497 return NULL;
2498 if (x == NULL) {
2499 if (encoding != NULL || errors != NULL) {
2500 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka2c2044e2018-10-21 15:29:12 +03002501 encoding != NULL ?
2502 "encoding without a string argument" :
2503 "errors without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002504 return NULL;
2505 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02002506 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002507 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002508
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002509 if (encoding != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002510 /* Encode via the codec registry */
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002511 if (!PyUnicode_Check(x)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002512 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002513 "encoding without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002514 return NULL;
2515 }
2516 new = PyUnicode_AsEncodedString(x, encoding, errors);
2517 if (new == NULL)
2518 return NULL;
2519 assert(PyBytes_Check(new));
2520 return new;
2521 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002522
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002523 if (errors != NULL) {
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002524 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002525 PyUnicode_Check(x) ?
2526 "string argument without an encoding" :
2527 "errors without a string argument");
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002528 return NULL;
2529 }
2530
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002531 /* We'd like to call PyObject_Bytes here, but we need to check for an
2532 integer argument before deferring to PyBytes_FromObject, something
2533 PyObject_Bytes doesn't do. */
2534 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2535 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +01002536 new = _PyObject_CallNoArg(func);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002537 Py_DECREF(func);
2538 if (new == NULL)
2539 return NULL;
2540 if (!PyBytes_Check(new)) {
2541 PyErr_Format(PyExc_TypeError,
2542 "__bytes__ returned non-bytes (type %.200s)",
2543 Py_TYPE(new)->tp_name);
2544 Py_DECREF(new);
2545 return NULL;
2546 }
2547 return new;
2548 }
2549 else if (PyErr_Occurred())
2550 return NULL;
2551
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002552 if (PyUnicode_Check(x)) {
2553 PyErr_SetString(PyExc_TypeError,
2554 "string argument without an encoding");
2555 return NULL;
2556 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002557 /* Is it an integer? */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002558 if (PyIndex_Check(x)) {
2559 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2560 if (size == -1 && PyErr_Occurred()) {
Serhiy Storchakae8904212018-10-15 00:02:57 +03002561 if (!PyErr_ExceptionMatches(PyExc_TypeError))
INADA Naokia634e232017-01-06 17:32:01 +09002562 return NULL;
2563 PyErr_Clear(); /* fall through */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002564 }
INADA Naokia634e232017-01-06 17:32:01 +09002565 else {
2566 if (size < 0) {
2567 PyErr_SetString(PyExc_ValueError, "negative count");
2568 return NULL;
2569 }
2570 new = _PyBytes_FromSize(size, 1);
2571 if (new == NULL)
2572 return NULL;
2573 return new;
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002574 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002575 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002576
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002577 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002578}
2579
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002580static PyObject*
2581_PyBytes_FromBuffer(PyObject *x)
2582{
2583 PyObject *new;
2584 Py_buffer view;
2585
2586 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2587 return NULL;
2588
2589 new = PyBytes_FromStringAndSize(NULL, view.len);
2590 if (!new)
2591 goto fail;
2592 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2593 &view, view.len, 'C') < 0)
2594 goto fail;
2595 PyBuffer_Release(&view);
2596 return new;
2597
2598fail:
2599 Py_XDECREF(new);
2600 PyBuffer_Release(&view);
2601 return NULL;
2602}
2603
2604static PyObject*
2605_PyBytes_FromList(PyObject *x)
2606{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002607 Py_ssize_t i, size = PyList_GET_SIZE(x);
2608 Py_ssize_t value;
2609 char *str;
2610 PyObject *item;
2611 _PyBytesWriter writer;
2612
2613 _PyBytesWriter_Init(&writer);
2614 str = _PyBytesWriter_Alloc(&writer, size);
2615 if (str == NULL)
2616 return NULL;
2617 writer.overallocate = 1;
2618 size = writer.allocated;
2619
2620 for (i = 0; i < PyList_GET_SIZE(x); i++) {
2621 item = PyList_GET_ITEM(x, i);
2622 Py_INCREF(item);
2623 value = PyNumber_AsSsize_t(item, NULL);
2624 Py_DECREF(item);
2625 if (value == -1 && PyErr_Occurred())
2626 goto error;
2627
2628 if (value < 0 || value >= 256) {
2629 PyErr_SetString(PyExc_ValueError,
2630 "bytes must be in range(0, 256)");
2631 goto error;
2632 }
2633
2634 if (i >= size) {
2635 str = _PyBytesWriter_Resize(&writer, str, size+1);
2636 if (str == NULL)
2637 return NULL;
2638 size = writer.allocated;
2639 }
2640 *str++ = (char) value;
2641 }
2642 return _PyBytesWriter_Finish(&writer, str);
2643
2644 error:
2645 _PyBytesWriter_Dealloc(&writer);
2646 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002647}
2648
2649static PyObject*
2650_PyBytes_FromTuple(PyObject *x)
2651{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002652 PyObject *bytes;
2653 Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2654 Py_ssize_t value;
2655 char *str;
2656 PyObject *item;
2657
2658 bytes = PyBytes_FromStringAndSize(NULL, size);
2659 if (bytes == NULL)
2660 return NULL;
2661 str = ((PyBytesObject *)bytes)->ob_sval;
2662
2663 for (i = 0; i < size; i++) {
2664 item = PyTuple_GET_ITEM(x, i);
2665 value = PyNumber_AsSsize_t(item, NULL);
2666 if (value == -1 && PyErr_Occurred())
2667 goto error;
2668
2669 if (value < 0 || value >= 256) {
2670 PyErr_SetString(PyExc_ValueError,
2671 "bytes must be in range(0, 256)");
2672 goto error;
2673 }
2674 *str++ = (char) value;
2675 }
2676 return bytes;
2677
2678 error:
2679 Py_DECREF(bytes);
2680 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002681}
2682
2683static PyObject *
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002684_PyBytes_FromIterator(PyObject *it, PyObject *x)
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002685{
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002686 char *str;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002687 Py_ssize_t i, size;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002688 _PyBytesWriter writer;
2689
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002690 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02002691 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002692 if (size == -1 && PyErr_Occurred())
2693 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002694
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002695 _PyBytesWriter_Init(&writer);
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002696 str = _PyBytesWriter_Alloc(&writer, size);
2697 if (str == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002698 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002699 writer.overallocate = 1;
2700 size = writer.allocated;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002701
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002702 /* Run the iterator to exhaustion */
2703 for (i = 0; ; i++) {
2704 PyObject *item;
2705 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002706
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002707 /* Get the next item */
2708 item = PyIter_Next(it);
2709 if (item == NULL) {
2710 if (PyErr_Occurred())
2711 goto error;
2712 break;
2713 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002714
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002715 /* Interpret it as an int (__index__) */
Serhiy Storchakaf54d7812016-07-06 21:39:44 +03002716 value = PyNumber_AsSsize_t(item, NULL);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002717 Py_DECREF(item);
2718 if (value == -1 && PyErr_Occurred())
2719 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002720
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002721 /* Range check */
2722 if (value < 0 || value >= 256) {
2723 PyErr_SetString(PyExc_ValueError,
2724 "bytes must be in range(0, 256)");
2725 goto error;
2726 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002727
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002728 /* Append the byte */
2729 if (i >= size) {
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002730 str = _PyBytesWriter_Resize(&writer, str, size+1);
2731 if (str == NULL)
2732 return NULL;
2733 size = writer.allocated;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002734 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002735 *str++ = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002736 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002737
2738 return _PyBytesWriter_Finish(&writer, str);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002739
2740 error:
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002741 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002742 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002743}
2744
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002745PyObject *
2746PyBytes_FromObject(PyObject *x)
2747{
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002748 PyObject *it, *result;
2749
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002750 if (x == NULL) {
2751 PyErr_BadInternalCall();
2752 return NULL;
2753 }
2754
2755 if (PyBytes_CheckExact(x)) {
2756 Py_INCREF(x);
2757 return x;
2758 }
2759
2760 /* Use the modern buffer interface */
2761 if (PyObject_CheckBuffer(x))
2762 return _PyBytes_FromBuffer(x);
2763
2764 if (PyList_CheckExact(x))
2765 return _PyBytes_FromList(x);
2766
2767 if (PyTuple_CheckExact(x))
2768 return _PyBytes_FromTuple(x);
2769
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002770 if (!PyUnicode_Check(x)) {
2771 it = PyObject_GetIter(x);
2772 if (it != NULL) {
2773 result = _PyBytes_FromIterator(it, x);
2774 Py_DECREF(it);
2775 return result;
2776 }
Serhiy Storchakae8904212018-10-15 00:02:57 +03002777 if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2778 return NULL;
2779 }
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002780 }
2781
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002782 PyErr_Format(PyExc_TypeError,
2783 "cannot convert '%.200s' object to bytes",
2784 x->ob_type->tp_name);
2785 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002786}
2787
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002788static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002789bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002790{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002791 PyObject *tmp, *pnew;
2792 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002793
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002794 assert(PyType_IsSubtype(type, &PyBytes_Type));
2795 tmp = bytes_new(&PyBytes_Type, args, kwds);
2796 if (tmp == NULL)
2797 return NULL;
Serhiy Storchaka15095802015-11-25 15:47:01 +02002798 assert(PyBytes_Check(tmp));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002799 n = PyBytes_GET_SIZE(tmp);
2800 pnew = type->tp_alloc(type, n);
2801 if (pnew != NULL) {
Christian Heimesf051e432016-09-13 20:22:02 +02002802 memcpy(PyBytes_AS_STRING(pnew),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002803 PyBytes_AS_STRING(tmp), n+1);
2804 ((PyBytesObject *)pnew)->ob_shash =
2805 ((PyBytesObject *)tmp)->ob_shash;
2806 }
2807 Py_DECREF(tmp);
2808 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002809}
2810
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002811PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002812"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002813bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002814bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002815bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2816bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002817\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002818Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002819 - an iterable yielding integers in range(256)\n\
2820 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002821 - any object implementing the buffer API.\n\
2822 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002823
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002824static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002825
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002826PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002827 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2828 "bytes",
2829 PyBytesObject_SIZE,
2830 sizeof(char),
Inada Naoki7d408692019-05-29 17:23:27 +09002831 0, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002832 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002833 0, /* tp_getattr */
2834 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002835 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002836 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08002837 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002838 &bytes_as_sequence, /* tp_as_sequence */
2839 &bytes_as_mapping, /* tp_as_mapping */
2840 (hashfunc)bytes_hash, /* tp_hash */
2841 0, /* tp_call */
2842 bytes_str, /* tp_str */
2843 PyObject_GenericGetAttr, /* tp_getattro */
2844 0, /* tp_setattro */
2845 &bytes_as_buffer, /* tp_as_buffer */
2846 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2847 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2848 bytes_doc, /* tp_doc */
2849 0, /* tp_traverse */
2850 0, /* tp_clear */
2851 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2852 0, /* tp_weaklistoffset */
2853 bytes_iter, /* tp_iter */
2854 0, /* tp_iternext */
2855 bytes_methods, /* tp_methods */
2856 0, /* tp_members */
2857 0, /* tp_getset */
2858 &PyBaseObject_Type, /* tp_base */
2859 0, /* tp_dict */
2860 0, /* tp_descr_get */
2861 0, /* tp_descr_set */
2862 0, /* tp_dictoffset */
2863 0, /* tp_init */
2864 0, /* tp_alloc */
2865 bytes_new, /* tp_new */
2866 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002867};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002868
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002869void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002870PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002871{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002872 assert(pv != NULL);
2873 if (*pv == NULL)
2874 return;
2875 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002876 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002877 return;
2878 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02002879
2880 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2881 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05002882 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02002883 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02002884
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002885 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02002886 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2887 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2888 Py_CLEAR(*pv);
2889 return;
2890 }
2891
2892 oldsize = PyBytes_GET_SIZE(*pv);
2893 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2894 PyErr_NoMemory();
2895 goto error;
2896 }
2897 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2898 goto error;
2899
2900 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2901 PyBuffer_Release(&wb);
2902 return;
2903
2904 error:
2905 PyBuffer_Release(&wb);
2906 Py_CLEAR(*pv);
2907 return;
2908 }
2909
2910 else {
2911 /* Multiple references, need to create new object */
2912 PyObject *v;
2913 v = bytes_concat(*pv, w);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002914 Py_SETREF(*pv, v);
Antoine Pitrou161d6952014-05-01 14:36:20 +02002915 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002916}
2917
2918void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002919PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002920{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002921 PyBytes_Concat(pv, w);
2922 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002923}
2924
2925
Ethan Furmanb95b5612015-01-23 20:05:18 -08002926/* The following function breaks the notion that bytes are immutable:
2927 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002928 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08002929 as creating a new bytes object and destroying the old one, only
2930 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002931 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08002932 Note that if there's not enough memory to resize the bytes object, the
2933 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002934 memory" exception is set, and -1 is returned. Else (on success) 0 is
2935 returned, and the value in *pv may or may not be the same as on input.
2936 As always, an extra byte is allocated for a trailing \0 byte (newsize
2937 does *not* include that), and a trailing \0 byte is stored.
2938*/
2939
2940int
2941_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2942{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002943 PyObject *v;
2944 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002945 v = *pv;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002946 if (!PyBytes_Check(v) || newsize < 0) {
2947 goto error;
2948 }
2949 if (Py_SIZE(v) == newsize) {
2950 /* return early if newsize equals to v->ob_size */
2951 return 0;
2952 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02002953 if (Py_SIZE(v) == 0) {
2954 if (newsize == 0) {
2955 return 0;
2956 }
2957 *pv = _PyBytes_FromSize(newsize, 0);
2958 Py_DECREF(v);
2959 return (*pv == NULL) ? -1 : 0;
2960 }
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002961 if (Py_REFCNT(v) != 1) {
2962 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002963 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02002964 if (newsize == 0) {
2965 *pv = _PyBytes_FromSize(0, 0);
2966 Py_DECREF(v);
2967 return (*pv == NULL) ? -1 : 0;
2968 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002969 /* XXX UNREF/NEWREF interface should be more symmetrical */
2970 _Py_DEC_REFTOTAL;
2971 _Py_ForgetReference(v);
2972 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03002973 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002974 if (*pv == NULL) {
2975 PyObject_Del(v);
2976 PyErr_NoMemory();
2977 return -1;
2978 }
2979 _Py_NewReference(*pv);
2980 sv = (PyBytesObject *) *pv;
2981 Py_SIZE(sv) = newsize;
2982 sv->ob_sval[newsize] = '\0';
2983 sv->ob_shash = -1; /* invalidate cached hash value */
2984 return 0;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002985error:
2986 *pv = 0;
2987 Py_DECREF(v);
2988 PyErr_BadInternalCall();
2989 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002990}
2991
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002992void
Victor Stinnerbed48172019-08-27 00:12:32 +02002993_PyBytes_Fini(void)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002994{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002995 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002996 for (i = 0; i < UCHAR_MAX + 1; i++)
2997 Py_CLEAR(characters[i]);
2998 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002999}
3000
Benjamin Peterson4116f362008-05-27 00:36:20 +00003001/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003002
3003typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003004 PyObject_HEAD
3005 Py_ssize_t it_index;
3006 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003007} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003008
3009static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003010striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003011{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003012 _PyObject_GC_UNTRACK(it);
3013 Py_XDECREF(it->it_seq);
3014 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003015}
3016
3017static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003018striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003019{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003020 Py_VISIT(it->it_seq);
3021 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003022}
3023
3024static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003025striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003026{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003027 PyBytesObject *seq;
3028 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003029
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003030 assert(it != NULL);
3031 seq = it->it_seq;
3032 if (seq == NULL)
3033 return NULL;
3034 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003035
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003036 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3037 item = PyLong_FromLong(
3038 (unsigned char)seq->ob_sval[it->it_index]);
3039 if (item != NULL)
3040 ++it->it_index;
3041 return item;
3042 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003043
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003044 it->it_seq = NULL;
Serhiy Storchakafbb1c5e2016-03-30 20:40:02 +03003045 Py_DECREF(seq);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003046 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003047}
3048
3049static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303050striter_len(striterobject *it, PyObject *Py_UNUSED(ignored))
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003051{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003052 Py_ssize_t len = 0;
3053 if (it->it_seq)
3054 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3055 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003056}
3057
3058PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003059 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003060
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003061static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303062striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003063{
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003064 _Py_IDENTIFIER(iter);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003065 if (it->it_seq != NULL) {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003066 return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003067 it->it_seq, it->it_index);
3068 } else {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003069 return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003070 }
3071}
3072
3073PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3074
3075static PyObject *
3076striter_setstate(striterobject *it, PyObject *state)
3077{
3078 Py_ssize_t index = PyLong_AsSsize_t(state);
3079 if (index == -1 && PyErr_Occurred())
3080 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003081 if (it->it_seq != NULL) {
3082 if (index < 0)
3083 index = 0;
3084 else if (index > PyBytes_GET_SIZE(it->it_seq))
3085 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3086 it->it_index = index;
3087 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003088 Py_RETURN_NONE;
3089}
3090
3091PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3092
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003093static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003094 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3095 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003096 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3097 reduce_doc},
3098 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3099 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003100 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003101};
3102
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003103PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003104 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3105 "bytes_iterator", /* tp_name */
3106 sizeof(striterobject), /* tp_basicsize */
3107 0, /* tp_itemsize */
3108 /* methods */
3109 (destructor)striter_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003110 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003111 0, /* tp_getattr */
3112 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003113 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003114 0, /* tp_repr */
3115 0, /* tp_as_number */
3116 0, /* tp_as_sequence */
3117 0, /* tp_as_mapping */
3118 0, /* tp_hash */
3119 0, /* tp_call */
3120 0, /* tp_str */
3121 PyObject_GenericGetAttr, /* tp_getattro */
3122 0, /* tp_setattro */
3123 0, /* tp_as_buffer */
3124 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3125 0, /* tp_doc */
3126 (traverseproc)striter_traverse, /* tp_traverse */
3127 0, /* tp_clear */
3128 0, /* tp_richcompare */
3129 0, /* tp_weaklistoffset */
3130 PyObject_SelfIter, /* tp_iter */
3131 (iternextfunc)striter_next, /* tp_iternext */
3132 striter_methods, /* tp_methods */
3133 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003134};
3135
3136static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003137bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003138{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003139 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003140
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003141 if (!PyBytes_Check(seq)) {
3142 PyErr_BadInternalCall();
3143 return NULL;
3144 }
3145 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3146 if (it == NULL)
3147 return NULL;
3148 it->it_index = 0;
3149 Py_INCREF(seq);
3150 it->it_seq = (PyBytesObject *)seq;
3151 _PyObject_GC_TRACK(it);
3152 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003153}
Victor Stinner00165072015-10-09 01:53:21 +02003154
3155
3156/* _PyBytesWriter API */
3157
3158#ifdef MS_WINDOWS
3159 /* On Windows, overallocate by 50% is the best factor */
3160# define OVERALLOCATE_FACTOR 2
3161#else
3162 /* On Linux, overallocate by 25% is the best factor */
3163# define OVERALLOCATE_FACTOR 4
3164#endif
3165
3166void
3167_PyBytesWriter_Init(_PyBytesWriter *writer)
3168{
Victor Stinner661aacc2015-10-14 09:41:48 +02003169 /* Set all attributes before small_buffer to 0 */
3170 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003171#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003172 memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003173#endif
3174}
3175
3176void
3177_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3178{
3179 Py_CLEAR(writer->buffer);
3180}
3181
3182Py_LOCAL_INLINE(char*)
3183_PyBytesWriter_AsString(_PyBytesWriter *writer)
3184{
Victor Stinner661aacc2015-10-14 09:41:48 +02003185 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003186 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003187 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003188 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003189 else if (writer->use_bytearray) {
3190 assert(writer->buffer != NULL);
3191 return PyByteArray_AS_STRING(writer->buffer);
3192 }
3193 else {
3194 assert(writer->buffer != NULL);
3195 return PyBytes_AS_STRING(writer->buffer);
3196 }
Victor Stinner00165072015-10-09 01:53:21 +02003197}
3198
3199Py_LOCAL_INLINE(Py_ssize_t)
Victor Stinner2bf89932015-10-14 11:25:33 +02003200_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
Victor Stinner00165072015-10-09 01:53:21 +02003201{
3202 char *start = _PyBytesWriter_AsString(writer);
3203 assert(str != NULL);
3204 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003205 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003206 return str - start;
3207}
3208
3209Py_LOCAL_INLINE(void)
3210_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3211{
3212#ifdef Py_DEBUG
3213 char *start, *end;
3214
Victor Stinner661aacc2015-10-14 09:41:48 +02003215 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003216 assert(writer->buffer == NULL);
3217 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003218 else {
3219 assert(writer->buffer != NULL);
3220 if (writer->use_bytearray)
3221 assert(PyByteArray_CheckExact(writer->buffer));
3222 else
3223 assert(PyBytes_CheckExact(writer->buffer));
3224 assert(Py_REFCNT(writer->buffer) == 1);
3225 }
Victor Stinner00165072015-10-09 01:53:21 +02003226
Victor Stinner661aacc2015-10-14 09:41:48 +02003227 if (writer->use_bytearray) {
3228 /* bytearray has its own overallocation algorithm,
3229 writer overallocation must be disabled */
3230 assert(!writer->overallocate);
3231 }
3232
3233 assert(0 <= writer->allocated);
Victor Stinner53926a12015-10-09 12:37:03 +02003234 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003235 /* the last byte must always be null */
Victor Stinner661aacc2015-10-14 09:41:48 +02003236 start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003237 assert(start[writer->allocated] == 0);
3238
3239 end = start + writer->allocated;
3240 assert(str != NULL);
3241 assert(start <= str && str <= end);
3242#endif
3243}
3244
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003245void*
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003246_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
Victor Stinner00165072015-10-09 01:53:21 +02003247{
3248 Py_ssize_t allocated, pos;
3249
3250 _PyBytesWriter_CheckConsistency(writer, str);
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003251 assert(writer->allocated < size);
Victor Stinner00165072015-10-09 01:53:21 +02003252
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003253 allocated = size;
Victor Stinner00165072015-10-09 01:53:21 +02003254 if (writer->overallocate
3255 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3256 /* overallocate to limit the number of realloc() */
3257 allocated += allocated / OVERALLOCATE_FACTOR;
3258 }
3259
Victor Stinner2bf89932015-10-14 11:25:33 +02003260 pos = _PyBytesWriter_GetSize(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003261 if (!writer->use_small_buffer) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003262 if (writer->use_bytearray) {
3263 if (PyByteArray_Resize(writer->buffer, allocated))
3264 goto error;
3265 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3266 but we cannot use ob_alloc because bytes may need to be moved
3267 to use the whole buffer. bytearray uses an internal optimization
3268 to avoid moving or copying bytes when bytes are removed at the
3269 beginning (ex: del bytearray[:1]). */
3270 }
3271 else {
3272 if (_PyBytes_Resize(&writer->buffer, allocated))
3273 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003274 }
3275 }
3276 else {
3277 /* convert from stack buffer to bytes object buffer */
3278 assert(writer->buffer == NULL);
3279
Victor Stinner661aacc2015-10-14 09:41:48 +02003280 if (writer->use_bytearray)
3281 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3282 else
3283 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003284 if (writer->buffer == NULL)
Victor Stinner661aacc2015-10-14 09:41:48 +02003285 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003286
3287 if (pos != 0) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003288 char *dest;
3289 if (writer->use_bytearray)
3290 dest = PyByteArray_AS_STRING(writer->buffer);
3291 else
3292 dest = PyBytes_AS_STRING(writer->buffer);
Christian Heimesf051e432016-09-13 20:22:02 +02003293 memcpy(dest,
Victor Stinnerb3653a32015-10-09 03:38:24 +02003294 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02003295 pos);
3296 }
3297
Victor Stinnerb3653a32015-10-09 03:38:24 +02003298 writer->use_small_buffer = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003299#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003300 memset(writer->small_buffer, 0xDB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003301#endif
Victor Stinner00165072015-10-09 01:53:21 +02003302 }
3303 writer->allocated = allocated;
3304
3305 str = _PyBytesWriter_AsString(writer) + pos;
3306 _PyBytesWriter_CheckConsistency(writer, str);
3307 return str;
Victor Stinner661aacc2015-10-14 09:41:48 +02003308
3309error:
3310 _PyBytesWriter_Dealloc(writer);
3311 return NULL;
Victor Stinner00165072015-10-09 01:53:21 +02003312}
3313
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003314void*
3315_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3316{
3317 Py_ssize_t new_min_size;
3318
3319 _PyBytesWriter_CheckConsistency(writer, str);
3320 assert(size >= 0);
3321
3322 if (size == 0) {
3323 /* nothing to do */
3324 return str;
3325 }
3326
3327 if (writer->min_size > PY_SSIZE_T_MAX - size) {
3328 PyErr_NoMemory();
3329 _PyBytesWriter_Dealloc(writer);
3330 return NULL;
3331 }
3332 new_min_size = writer->min_size + size;
3333
3334 if (new_min_size > writer->allocated)
3335 str = _PyBytesWriter_Resize(writer, str, new_min_size);
3336
3337 writer->min_size = new_min_size;
3338 return str;
3339}
3340
Victor Stinner00165072015-10-09 01:53:21 +02003341/* Allocate the buffer to write size bytes.
3342 Return the pointer to the beginning of buffer data.
3343 Raise an exception and return NULL on error. */
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003344void*
Victor Stinner00165072015-10-09 01:53:21 +02003345_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3346{
3347 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02003348 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02003349 assert(size >= 0);
3350
Victor Stinnerb3653a32015-10-09 03:38:24 +02003351 writer->use_small_buffer = 1;
Victor Stinnerb13b97d2015-10-09 02:52:16 +02003352#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003353 writer->allocated = sizeof(writer->small_buffer) - 1;
Victor Stinnerf6358a72015-10-14 12:02:39 +02003354 /* In debug mode, don't use the full small buffer because it is less
3355 efficient than bytes and bytearray objects to detect buffer underflow
3356 and buffer overflow. Use 10 bytes of the small buffer to test also
3357 code using the smaller buffer in debug mode.
3358
3359 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3360 in debug mode to also be able to detect stack overflow when running
3361 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3362 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3363 stack overflow. */
3364 writer->allocated = Py_MIN(writer->allocated, 10);
3365 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3366 to detect buffer overflow */
Victor Stinnerb3653a32015-10-09 03:38:24 +02003367 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003368#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02003369 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02003370#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02003371 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02003372}
3373
3374PyObject *
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003375_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
Victor Stinner00165072015-10-09 01:53:21 +02003376{
Victor Stinner2bf89932015-10-14 11:25:33 +02003377 Py_ssize_t size;
Victor Stinner00165072015-10-09 01:53:21 +02003378 PyObject *result;
3379
3380 _PyBytesWriter_CheckConsistency(writer, str);
3381
Victor Stinner2bf89932015-10-14 11:25:33 +02003382 size = _PyBytesWriter_GetSize(writer, str);
3383 if (size == 0 && !writer->use_bytearray) {
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003384 Py_CLEAR(writer->buffer);
3385 /* Get the empty byte string singleton */
3386 result = PyBytes_FromStringAndSize(NULL, 0);
3387 }
3388 else if (writer->use_small_buffer) {
Victor Stinnere914d412016-04-15 17:52:27 +02003389 if (writer->use_bytearray) {
3390 result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3391 }
3392 else {
3393 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3394 }
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003395 }
3396 else {
3397 result = writer->buffer;
3398 writer->buffer = NULL;
3399
Victor Stinner2bf89932015-10-14 11:25:33 +02003400 if (size != writer->allocated) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003401 if (writer->use_bytearray) {
Victor Stinner2bf89932015-10-14 11:25:33 +02003402 if (PyByteArray_Resize(result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003403 Py_DECREF(result);
3404 return NULL;
3405 }
3406 }
3407 else {
Victor Stinner2bf89932015-10-14 11:25:33 +02003408 if (_PyBytes_Resize(&result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003409 assert(result == NULL);
3410 return NULL;
3411 }
Victor Stinner00165072015-10-09 01:53:21 +02003412 }
3413 }
Victor Stinner00165072015-10-09 01:53:21 +02003414 }
Victor Stinner00165072015-10-09 01:53:21 +02003415 return result;
3416}
Victor Stinnerce179bf2015-10-09 12:57:22 +02003417
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003418void*
Victor Stinnere9aa5952015-10-12 13:57:47 +02003419_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003420 const void *bytes, Py_ssize_t size)
Victor Stinnerce179bf2015-10-09 12:57:22 +02003421{
Victor Stinnere9aa5952015-10-12 13:57:47 +02003422 char *str = (char *)ptr;
3423
Victor Stinnerce179bf2015-10-09 12:57:22 +02003424 str = _PyBytesWriter_Prepare(writer, str, size);
3425 if (str == NULL)
3426 return NULL;
3427
Christian Heimesf051e432016-09-13 20:22:02 +02003428 memcpy(str, bytes, size);
Victor Stinnerce179bf2015-10-09 12:57:22 +02003429 str += size;
3430
3431 return str;
3432}