blob: e1f5ee2f62f336e595604ddddb6c644d0a66dd24 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Victor Stinnerbcda8f12018-11-21 22:27:47 +01006#include "pycore_object.h"
Victor Stinner621cebe2018-11-12 16:53:38 +01007#include "pycore_pymem.h"
8#include "pycore_pystate.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00009
Gregory P. Smith60d241f2007-10-16 06:31:30 +000010#include "bytes_methods.h"
Gregory P. Smith8cb65692015-04-25 23:22:26 +000011#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +000012#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000013
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020014/*[clinic input]
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030015class bytes "PyBytesObject *" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020016[clinic start generated code]*/
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030017/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020018
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030019#include "clinic/bytesobject.c.h"
20
Christian Heimes2c9c7a52008-05-26 13:42:13 +000021#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +000022Py_ssize_t _Py_null_strings, _Py_one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000023#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000024
Christian Heimes2c9c7a52008-05-26 13:42:13 +000025static PyBytesObject *characters[UCHAR_MAX + 1];
26static PyBytesObject *nullstring;
27
Mark Dickinsonfd24b322008-12-06 15:33:31 +000028/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
29 for a string of length n should request PyBytesObject_SIZE + n bytes.
30
31 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
32 3 bytes per string allocation on a typical system.
33*/
34#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
35
Victor Stinner2bf89932015-10-14 11:25:33 +020036/* Forward declaration */
37Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
38 char *str);
39
Christian Heimes2c9c7a52008-05-26 13:42:13 +000040/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000041 For PyBytes_FromString(), the parameter `str' points to a null-terminated
42 string containing exactly `size' bytes.
43
Martin Pantera90a4a92016-05-30 04:04:50 +000044 For PyBytes_FromStringAndSize(), the parameter `str' is
Christian Heimes2c9c7a52008-05-26 13:42:13 +000045 either NULL or else points to a string containing at least `size' bytes.
46 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
47 not have to be null-terminated. (Therefore it is safe to construct a
48 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
49 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
50 bytes (setting the last byte to the null terminating character) and you can
51 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000052 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000053 alter the data yourself, since the strings may be shared.
54
55 The PyObject member `op->ob_size', which denotes the number of "extra
56 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020057 allocated for string data, not counting the null terminating character.
58 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000059 PyBytes_FromStringAndSize()) or the length of the string in the `str'
60 parameter (for PyBytes_FromString()).
61*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020062static PyObject *
63_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000064{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020065 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020066 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020067
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000068 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000069#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +000070 _Py_null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000071#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000072 Py_INCREF(op);
73 return (PyObject *)op;
74 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000075
Victor Stinner049e5092014-08-17 22:20:00 +020076 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 PyErr_SetString(PyExc_OverflowError,
78 "byte string is too large");
79 return NULL;
80 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000081
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000082 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020083 if (use_calloc)
84 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
85 else
86 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 if (op == NULL)
88 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +010089 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000090 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020091 if (!use_calloc)
92 op->ob_sval[size] = '\0';
93 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000094 if (size == 0) {
95 nullstring = op;
96 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020097 }
98 return (PyObject *) op;
99}
100
101PyObject *
102PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
103{
104 PyBytesObject *op;
105 if (size < 0) {
106 PyErr_SetString(PyExc_SystemError,
107 "Negative size passed to PyBytes_FromStringAndSize");
108 return NULL;
109 }
110 if (size == 1 && str != NULL &&
111 (op = characters[*str & UCHAR_MAX]) != NULL)
112 {
113#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +0000114 _Py_one_strings++;
Victor Stinnerdb067af2014-05-02 22:31:14 +0200115#endif
116 Py_INCREF(op);
117 return (PyObject *)op;
118 }
119
120 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
121 if (op == NULL)
122 return NULL;
123 if (str == NULL)
124 return (PyObject *) op;
125
Christian Heimesf051e432016-09-13 20:22:02 +0200126 memcpy(op->ob_sval, str, size);
Victor Stinnerdb067af2014-05-02 22:31:14 +0200127 /* share short strings */
128 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000129 characters[*str & UCHAR_MAX] = op;
130 Py_INCREF(op);
131 }
132 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000133}
134
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000135PyObject *
136PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000137{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200138 size_t size;
139 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000140
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 assert(str != NULL);
142 size = strlen(str);
143 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
144 PyErr_SetString(PyExc_OverflowError,
145 "byte string is too long");
146 return NULL;
147 }
148 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000149#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +0000150 _Py_null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000151#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000152 Py_INCREF(op);
153 return (PyObject *)op;
154 }
155 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000156#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +0000157 _Py_one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000158#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000159 Py_INCREF(op);
160 return (PyObject *)op;
161 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000162
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000163 /* Inline PyObject_NewVar */
164 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
165 if (op == NULL)
166 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +0100167 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000168 op->ob_shash = -1;
Christian Heimesf051e432016-09-13 20:22:02 +0200169 memcpy(op->ob_sval, str, size+1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000170 /* share short strings */
171 if (size == 0) {
172 nullstring = op;
173 Py_INCREF(op);
174 } else if (size == 1) {
175 characters[*str & UCHAR_MAX] = op;
176 Py_INCREF(op);
177 }
178 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000179}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000180
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000181PyObject *
182PyBytes_FromFormatV(const char *format, va_list vargs)
183{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000184 char *s;
Victor Stinner03dab782015-10-14 00:21:35 +0200185 const char *f;
186 const char *p;
187 Py_ssize_t prec;
188 int longflag;
189 int size_tflag;
190 /* Longest 64-bit formatted numbers:
191 - "18446744073709551615\0" (21 bytes)
192 - "-9223372036854775808\0" (21 bytes)
193 Decimal takes the most space (it isn't enough for octal.)
Guido van Rossum343e97f2007-04-09 00:43:24 +0000194
Victor Stinner03dab782015-10-14 00:21:35 +0200195 Longest 64-bit pointer representation:
196 "0xffffffffffffffff\0" (19 bytes). */
197 char buffer[21];
198 _PyBytesWriter writer;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000199
Victor Stinner03dab782015-10-14 00:21:35 +0200200 _PyBytesWriter_Init(&writer);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000201
Victor Stinner03dab782015-10-14 00:21:35 +0200202 s = _PyBytesWriter_Alloc(&writer, strlen(format));
203 if (s == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000204 return NULL;
Victor Stinner03dab782015-10-14 00:21:35 +0200205 writer.overallocate = 1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000206
Victor Stinner03dab782015-10-14 00:21:35 +0200207#define WRITE_BYTES(str) \
208 do { \
209 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
210 if (s == NULL) \
211 goto error; \
212 } while (0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000213
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000214 for (f = format; *f; f++) {
Victor Stinner03dab782015-10-14 00:21:35 +0200215 if (*f != '%') {
216 *s++ = *f;
217 continue;
218 }
219
220 p = f++;
221
222 /* ignore the width (ex: 10 in "%10s") */
223 while (Py_ISDIGIT(*f))
224 f++;
225
226 /* parse the precision (ex: 10 in "%.10s") */
227 prec = 0;
228 if (*f == '.') {
229 f++;
230 for (; Py_ISDIGIT(*f); f++) {
231 prec = (prec * 10) + (*f - '0');
232 }
233 }
234
235 while (*f && *f != '%' && !Py_ISALPHA(*f))
236 f++;
237
238 /* handle the long flag ('l'), but only for %ld and %lu.
239 others can be added when necessary. */
240 longflag = 0;
241 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
242 longflag = 1;
243 ++f;
244 }
245
246 /* handle the size_t flag ('z'). */
247 size_tflag = 0;
248 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
249 size_tflag = 1;
250 ++f;
251 }
252
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700253 /* subtract bytes preallocated for the format string
Victor Stinner03dab782015-10-14 00:21:35 +0200254 (ex: 2 for "%s") */
255 writer.min_size -= (f - p + 1);
256
257 switch (*f) {
258 case 'c':
259 {
260 int c = va_arg(vargs, int);
261 if (c < 0 || c > 255) {
262 PyErr_SetString(PyExc_OverflowError,
263 "PyBytes_FromFormatV(): %c format "
264 "expects an integer in range [0; 255]");
265 goto error;
266 }
267 writer.min_size++;
268 *s++ = (unsigned char)c;
269 break;
270 }
271
272 case 'd':
273 if (longflag)
274 sprintf(buffer, "%ld", va_arg(vargs, long));
275 else if (size_tflag)
276 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
277 va_arg(vargs, Py_ssize_t));
278 else
279 sprintf(buffer, "%d", va_arg(vargs, int));
280 assert(strlen(buffer) < sizeof(buffer));
281 WRITE_BYTES(buffer);
282 break;
283
284 case 'u':
285 if (longflag)
286 sprintf(buffer, "%lu",
287 va_arg(vargs, unsigned long));
288 else if (size_tflag)
289 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
290 va_arg(vargs, size_t));
291 else
292 sprintf(buffer, "%u",
293 va_arg(vargs, unsigned int));
294 assert(strlen(buffer) < sizeof(buffer));
295 WRITE_BYTES(buffer);
296 break;
297
298 case 'i':
299 sprintf(buffer, "%i", va_arg(vargs, int));
300 assert(strlen(buffer) < sizeof(buffer));
301 WRITE_BYTES(buffer);
302 break;
303
304 case 'x':
305 sprintf(buffer, "%x", va_arg(vargs, int));
306 assert(strlen(buffer) < sizeof(buffer));
307 WRITE_BYTES(buffer);
308 break;
309
310 case 's':
311 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000312 Py_ssize_t i;
Victor Stinner03dab782015-10-14 00:21:35 +0200313
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200314 p = va_arg(vargs, const char*);
Serhiy Storchakad586ccb2019-01-12 10:30:35 +0200315 if (prec <= 0) {
316 i = strlen(p);
317 }
318 else {
319 i = 0;
320 while (i < prec && p[i]) {
321 i++;
322 }
323 }
Victor Stinner03dab782015-10-14 00:21:35 +0200324 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
325 if (s == NULL)
326 goto error;
327 break;
328 }
329
330 case 'p':
331 sprintf(buffer, "%p", va_arg(vargs, void*));
332 assert(strlen(buffer) < sizeof(buffer));
333 /* %p is ill-defined: ensure leading 0x. */
334 if (buffer[1] == 'X')
335 buffer[1] = 'x';
336 else if (buffer[1] != 'x') {
337 memmove(buffer+2, buffer, strlen(buffer)+1);
338 buffer[0] = '0';
339 buffer[1] = 'x';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000340 }
Victor Stinner03dab782015-10-14 00:21:35 +0200341 WRITE_BYTES(buffer);
342 break;
343
344 case '%':
345 writer.min_size++;
346 *s++ = '%';
347 break;
348
349 default:
350 if (*f == 0) {
351 /* fix min_size if we reached the end of the format string */
352 writer.min_size++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000353 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000354
Victor Stinner03dab782015-10-14 00:21:35 +0200355 /* invalid format string: copy unformatted string and exit */
356 WRITE_BYTES(p);
357 return _PyBytesWriter_Finish(&writer, s);
358 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000360
Victor Stinner03dab782015-10-14 00:21:35 +0200361#undef WRITE_BYTES
362
363 return _PyBytesWriter_Finish(&writer, s);
364
365 error:
366 _PyBytesWriter_Dealloc(&writer);
367 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000368}
369
370PyObject *
371PyBytes_FromFormat(const char *format, ...)
372{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000373 PyObject* ret;
374 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000375
376#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000377 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000378#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000379 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000380#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000381 ret = PyBytes_FromFormatV(format, vargs);
382 va_end(vargs);
383 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000384}
385
Ethan Furmanb95b5612015-01-23 20:05:18 -0800386/* Helpers for formatstring */
387
388Py_LOCAL_INLINE(PyObject *)
389getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
390{
391 Py_ssize_t argidx = *p_argidx;
392 if (argidx < arglen) {
393 (*p_argidx)++;
394 if (arglen < 0)
395 return args;
396 else
397 return PyTuple_GetItem(args, argidx);
398 }
399 PyErr_SetString(PyExc_TypeError,
400 "not enough arguments for format string");
401 return NULL;
402}
403
404/* Format codes
405 * F_LJUST '-'
406 * F_SIGN '+'
407 * F_BLANK ' '
408 * F_ALT '#'
409 * F_ZERO '0'
410 */
411#define F_LJUST (1<<0)
412#define F_SIGN (1<<1)
413#define F_BLANK (1<<2)
414#define F_ALT (1<<3)
415#define F_ZERO (1<<4)
416
417/* Returns a new reference to a PyBytes object, or NULL on failure. */
418
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200419static char*
420formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200421 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800422{
423 char *p;
424 PyObject *result;
425 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200426 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800427
428 x = PyFloat_AsDouble(v);
429 if (x == -1.0 && PyErr_Occurred()) {
430 PyErr_Format(PyExc_TypeError, "float argument required, "
431 "not %.200s", Py_TYPE(v)->tp_name);
432 return NULL;
433 }
434
435 if (prec < 0)
436 prec = 6;
437
438 p = PyOS_double_to_string(x, type, prec,
439 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
440
441 if (p == NULL)
442 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200443
444 len = strlen(p);
445 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200446 str = _PyBytesWriter_Prepare(writer, str, len);
447 if (str == NULL)
448 return NULL;
Christian Heimesf051e432016-09-13 20:22:02 +0200449 memcpy(str, p, len);
Victor Stinner71dc3d82016-04-26 12:35:13 +0200450 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200451 str += len;
452 return str;
453 }
454
455 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800456 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200457 *p_result = result;
Zackery Spytz96c59322018-10-03 00:01:30 -0600458 return result != NULL ? str : NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800459}
460
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300461static PyObject *
462formatlong(PyObject *v, int flags, int prec, int type)
463{
464 PyObject *result, *iobj;
465 if (type == 'i')
466 type = 'd';
467 if (PyLong_Check(v))
468 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
469 if (PyNumber_Check(v)) {
470 /* make sure number is a type of integer for o, x, and X */
471 if (type == 'o' || type == 'x' || type == 'X')
472 iobj = PyNumber_Index(v);
473 else
474 iobj = PyNumber_Long(v);
475 if (iobj == NULL) {
476 if (!PyErr_ExceptionMatches(PyExc_TypeError))
477 return NULL;
478 }
479 else if (!PyLong_Check(iobj))
480 Py_CLEAR(iobj);
481 if (iobj != NULL) {
482 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
483 Py_DECREF(iobj);
484 return result;
485 }
486 }
487 PyErr_Format(PyExc_TypeError,
488 "%%%c format: %s is required, not %.200s", type,
489 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
490 : "a number",
491 Py_TYPE(v)->tp_name);
492 return NULL;
493}
494
495static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200496byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800497{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300498 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200499 *p = PyBytes_AS_STRING(arg)[0];
500 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800501 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300502 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200503 *p = PyByteArray_AS_STRING(arg)[0];
504 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800505 }
506 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300507 PyObject *iobj;
508 long ival;
509 int overflow;
510 /* make sure number is a type of integer */
511 if (PyLong_Check(arg)) {
512 ival = PyLong_AsLongAndOverflow(arg, &overflow);
513 }
514 else {
515 iobj = PyNumber_Index(arg);
516 if (iobj == NULL) {
517 if (!PyErr_ExceptionMatches(PyExc_TypeError))
518 return 0;
519 goto onError;
520 }
521 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
522 Py_DECREF(iobj);
523 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300524 if (!overflow && ival == -1 && PyErr_Occurred())
525 goto onError;
526 if (overflow || !(0 <= ival && ival <= 255)) {
527 PyErr_SetString(PyExc_OverflowError,
528 "%c arg not in range(256)");
529 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800530 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300531 *p = (char)ival;
532 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800533 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300534 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200535 PyErr_SetString(PyExc_TypeError,
536 "%c requires an integer in range(256) or a single byte");
537 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800538}
539
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800540static PyObject *_PyBytes_FromBuffer(PyObject *x);
541
Ethan Furmanb95b5612015-01-23 20:05:18 -0800542static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200543format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800544{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200545 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800546 _Py_IDENTIFIER(__bytes__);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800547 /* is it a bytes object? */
548 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200549 *pbuf = PyBytes_AS_STRING(v);
550 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800551 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200552 return v;
553 }
554 if (PyByteArray_Check(v)) {
555 *pbuf = PyByteArray_AS_STRING(v);
556 *plen = PyByteArray_GET_SIZE(v);
557 Py_INCREF(v);
558 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800559 }
560 /* does it support __bytes__? */
561 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
562 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +0100563 result = _PyObject_CallNoArg(func);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800564 Py_DECREF(func);
565 if (result == NULL)
566 return NULL;
567 if (!PyBytes_Check(result)) {
568 PyErr_Format(PyExc_TypeError,
569 "__bytes__ returned non-bytes (type %.200s)",
570 Py_TYPE(result)->tp_name);
571 Py_DECREF(result);
572 return NULL;
573 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200574 *pbuf = PyBytes_AS_STRING(result);
575 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800576 return result;
577 }
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800578 /* does it support buffer protocol? */
579 if (PyObject_CheckBuffer(v)) {
580 /* maybe we can avoid making a copy of the buffer object here? */
581 result = _PyBytes_FromBuffer(v);
582 if (result == NULL)
583 return NULL;
584 *pbuf = PyBytes_AS_STRING(result);
585 *plen = PyBytes_GET_SIZE(result);
586 return result;
587 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800588 PyErr_Format(PyExc_TypeError,
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800589 "%%b requires a bytes-like object, "
590 "or an object that implements __bytes__, not '%.100s'",
Ethan Furmanb95b5612015-01-23 20:05:18 -0800591 Py_TYPE(v)->tp_name);
592 return NULL;
593}
594
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200595/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800596
597PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +0200598_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
599 PyObject *args, int use_bytearray)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800600{
Victor Stinner772b2b02015-10-14 09:56:53 +0200601 const char *fmt;
602 char *res;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800603 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200604 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800605 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800606 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200607 _PyBytesWriter writer;
608
Victor Stinner772b2b02015-10-14 09:56:53 +0200609 if (args == NULL) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800610 PyErr_BadInternalCall();
611 return NULL;
612 }
Victor Stinner772b2b02015-10-14 09:56:53 +0200613 fmt = format;
614 fmtcnt = format_len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200615
616 _PyBytesWriter_Init(&writer);
Victor Stinner772b2b02015-10-14 09:56:53 +0200617 writer.use_bytearray = use_bytearray;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200618
619 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
620 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800621 return NULL;
Victor Stinner772b2b02015-10-14 09:56:53 +0200622 if (!use_bytearray)
623 writer.overallocate = 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200624
Ethan Furmanb95b5612015-01-23 20:05:18 -0800625 if (PyTuple_Check(args)) {
626 arglen = PyTuple_GET_SIZE(args);
627 argidx = 0;
628 }
629 else {
630 arglen = -1;
631 argidx = -2;
632 }
633 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
634 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
635 !PyByteArray_Check(args)) {
636 dict = args;
637 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200638
Ethan Furmanb95b5612015-01-23 20:05:18 -0800639 while (--fmtcnt >= 0) {
640 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200641 Py_ssize_t len;
642 char *pos;
643
Xiang Zhangb76ad512017-03-06 17:17:05 +0800644 pos = (char *)memchr(fmt + 1, '%', fmtcnt);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200645 if (pos != NULL)
646 len = pos - fmt;
Victor Stinner772b2b02015-10-14 09:56:53 +0200647 else
Xiang Zhangb76ad512017-03-06 17:17:05 +0800648 len = fmtcnt + 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200649 assert(len != 0);
650
Christian Heimesf051e432016-09-13 20:22:02 +0200651 memcpy(res, fmt, len);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200652 res += len;
653 fmt += len;
654 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800655 }
656 else {
657 /* Got a format specifier */
658 int flags = 0;
659 Py_ssize_t width = -1;
660 int prec = -1;
661 int c = '\0';
662 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800663 PyObject *v = NULL;
664 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200665 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800666 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200667 Py_ssize_t len = 0;
668 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200669 Py_ssize_t alloc;
670#ifdef Py_DEBUG
671 char *before;
672#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -0800673
Ethan Furmanb95b5612015-01-23 20:05:18 -0800674 fmt++;
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200675 if (*fmt == '%') {
676 *res++ = '%';
677 fmt++;
678 fmtcnt--;
679 continue;
680 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800681 if (*fmt == '(') {
Victor Stinner772b2b02015-10-14 09:56:53 +0200682 const char *keystart;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800683 Py_ssize_t keylen;
684 PyObject *key;
685 int pcount = 1;
686
687 if (dict == NULL) {
688 PyErr_SetString(PyExc_TypeError,
689 "format requires a mapping");
690 goto error;
691 }
692 ++fmt;
693 --fmtcnt;
694 keystart = fmt;
695 /* Skip over balanced parentheses */
696 while (pcount > 0 && --fmtcnt >= 0) {
697 if (*fmt == ')')
698 --pcount;
699 else if (*fmt == '(')
700 ++pcount;
701 fmt++;
702 }
703 keylen = fmt - keystart - 1;
704 if (fmtcnt < 0 || pcount > 0) {
705 PyErr_SetString(PyExc_ValueError,
706 "incomplete format key");
707 goto error;
708 }
709 key = PyBytes_FromStringAndSize(keystart,
710 keylen);
711 if (key == NULL)
712 goto error;
713 if (args_owned) {
714 Py_DECREF(args);
715 args_owned = 0;
716 }
717 args = PyObject_GetItem(dict, key);
718 Py_DECREF(key);
719 if (args == NULL) {
720 goto error;
721 }
722 args_owned = 1;
723 arglen = -1;
724 argidx = -2;
725 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200726
727 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800728 while (--fmtcnt >= 0) {
729 switch (c = *fmt++) {
730 case '-': flags |= F_LJUST; continue;
731 case '+': flags |= F_SIGN; continue;
732 case ' ': flags |= F_BLANK; continue;
733 case '#': flags |= F_ALT; continue;
734 case '0': flags |= F_ZERO; continue;
735 }
736 break;
737 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200738
739 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800740 if (c == '*') {
741 v = getnextarg(args, arglen, &argidx);
742 if (v == NULL)
743 goto error;
744 if (!PyLong_Check(v)) {
745 PyErr_SetString(PyExc_TypeError,
746 "* wants int");
747 goto error;
748 }
749 width = PyLong_AsSsize_t(v);
750 if (width == -1 && PyErr_Occurred())
751 goto error;
752 if (width < 0) {
753 flags |= F_LJUST;
754 width = -width;
755 }
756 if (--fmtcnt >= 0)
757 c = *fmt++;
758 }
759 else if (c >= 0 && isdigit(c)) {
760 width = c - '0';
761 while (--fmtcnt >= 0) {
762 c = Py_CHARMASK(*fmt++);
763 if (!isdigit(c))
764 break;
765 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
766 PyErr_SetString(
767 PyExc_ValueError,
768 "width too big");
769 goto error;
770 }
771 width = width*10 + (c - '0');
772 }
773 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200774
775 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800776 if (c == '.') {
777 prec = 0;
778 if (--fmtcnt >= 0)
779 c = *fmt++;
780 if (c == '*') {
781 v = getnextarg(args, arglen, &argidx);
782 if (v == NULL)
783 goto error;
784 if (!PyLong_Check(v)) {
785 PyErr_SetString(
786 PyExc_TypeError,
787 "* wants int");
788 goto error;
789 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200790 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800791 if (prec == -1 && PyErr_Occurred())
792 goto error;
793 if (prec < 0)
794 prec = 0;
795 if (--fmtcnt >= 0)
796 c = *fmt++;
797 }
798 else if (c >= 0 && isdigit(c)) {
799 prec = c - '0';
800 while (--fmtcnt >= 0) {
801 c = Py_CHARMASK(*fmt++);
802 if (!isdigit(c))
803 break;
804 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
805 PyErr_SetString(
806 PyExc_ValueError,
807 "prec too big");
808 goto error;
809 }
810 prec = prec*10 + (c - '0');
811 }
812 }
813 } /* prec */
814 if (fmtcnt >= 0) {
815 if (c == 'h' || c == 'l' || c == 'L') {
816 if (--fmtcnt >= 0)
817 c = *fmt++;
818 }
819 }
820 if (fmtcnt < 0) {
821 PyErr_SetString(PyExc_ValueError,
822 "incomplete format");
823 goto error;
824 }
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200825 v = getnextarg(args, arglen, &argidx);
826 if (v == NULL)
827 goto error;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200828
Alexey Izbyshevccd99752018-08-23 10:50:52 +0300829 if (fmtcnt == 0) {
830 /* last write: disable writer overallocation */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200831 writer.overallocate = 0;
832 }
833
Ethan Furmanb95b5612015-01-23 20:05:18 -0800834 sign = 0;
835 fill = ' ';
836 switch (c) {
Ethan Furman62e977f2015-03-11 08:17:00 -0700837 case 'r':
838 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800839 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200840 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800841 if (temp == NULL)
842 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200843 assert(PyUnicode_IS_ASCII(temp));
844 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
845 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800846 if (prec >= 0 && len > prec)
847 len = prec;
848 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200849
Ethan Furmanb95b5612015-01-23 20:05:18 -0800850 case 's':
851 // %s is only for 2/3 code; 3 only code should use %b
852 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200853 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800854 if (temp == NULL)
855 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800856 if (prec >= 0 && len > prec)
857 len = prec;
858 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200859
Ethan Furmanb95b5612015-01-23 20:05:18 -0800860 case 'i':
861 case 'd':
862 case 'u':
863 case 'o':
864 case 'x':
865 case 'X':
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200866 if (PyLong_CheckExact(v)
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200867 && width == -1 && prec == -1
868 && !(flags & (F_SIGN | F_BLANK))
869 && c != 'X')
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200870 {
871 /* Fast path */
872 int alternate = flags & F_ALT;
873 int base;
874
875 switch(c)
876 {
877 default:
Barry Warsawb2e57942017-09-14 18:13:16 -0700878 Py_UNREACHABLE();
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200879 case 'd':
880 case 'i':
881 case 'u':
882 base = 10;
883 break;
884 case 'o':
885 base = 8;
886 break;
887 case 'x':
888 case 'X':
889 base = 16;
890 break;
891 }
892
893 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200894 writer.min_size -= 2; /* size preallocated for "%d" */
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200895 res = _PyLong_FormatBytesWriter(&writer, res,
896 v, base, alternate);
897 if (res == NULL)
898 goto error;
899 continue;
900 }
901
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300902 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200903 if (!temp)
904 goto error;
905 assert(PyUnicode_IS_ASCII(temp));
906 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
907 len = PyUnicode_GET_LENGTH(temp);
908 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800909 if (flags & F_ZERO)
910 fill = '0';
911 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200912
Ethan Furmanb95b5612015-01-23 20:05:18 -0800913 case 'e':
914 case 'E':
915 case 'f':
916 case 'F':
917 case 'g':
918 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200919 if (width == -1 && prec == -1
920 && !(flags & (F_SIGN | F_BLANK)))
921 {
922 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200923 writer.min_size -= 2; /* size preallocated for "%f" */
Victor Stinnerad771582015-10-09 12:38:53 +0200924 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200925 if (res == NULL)
926 goto error;
927 continue;
928 }
929
Victor Stinnerad771582015-10-09 12:38:53 +0200930 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800931 goto error;
932 pbuf = PyBytes_AS_STRING(temp);
933 len = PyBytes_GET_SIZE(temp);
934 sign = 1;
935 if (flags & F_ZERO)
936 fill = '0';
937 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200938
Ethan Furmanb95b5612015-01-23 20:05:18 -0800939 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200940 pbuf = &onechar;
941 len = byte_converter(v, &onechar);
942 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800943 goto error;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200944 if (width == -1) {
945 /* Fast path */
946 *res++ = onechar;
947 continue;
948 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800949 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200950
Ethan Furmanb95b5612015-01-23 20:05:18 -0800951 default:
952 PyErr_Format(PyExc_ValueError,
953 "unsupported format character '%c' (0x%x) "
954 "at index %zd",
955 c, c,
Victor Stinner772b2b02015-10-14 09:56:53 +0200956 (Py_ssize_t)(fmt - 1 - format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800957 goto error;
958 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200959
Ethan Furmanb95b5612015-01-23 20:05:18 -0800960 if (sign) {
961 if (*pbuf == '-' || *pbuf == '+') {
962 sign = *pbuf++;
963 len--;
964 }
965 else if (flags & F_SIGN)
966 sign = '+';
967 else if (flags & F_BLANK)
968 sign = ' ';
969 else
970 sign = 0;
971 }
972 if (width < len)
973 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200974
975 alloc = width;
976 if (sign != 0 && len == width)
977 alloc++;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200978 /* 2: size preallocated for %s */
979 if (alloc > 2) {
980 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200981 if (res == NULL)
982 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800983 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200984#ifdef Py_DEBUG
985 before = res;
986#endif
987
988 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800989 if (sign) {
990 if (fill != ' ')
991 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800992 if (width > len)
993 width--;
994 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200995
996 /* Write the numeric prefix for "x", "X" and "o" formats
997 if the alternate form is used.
998 For example, write "0x" for the "%#x" format. */
Serhiy Storchakab1a16192016-12-17 21:48:03 +0200999 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001000 assert(pbuf[0] == '0');
1001 assert(pbuf[1] == c);
1002 if (fill != ' ') {
1003 *res++ = *pbuf++;
1004 *res++ = *pbuf++;
1005 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001006 width -= 2;
1007 if (width < 0)
1008 width = 0;
1009 len -= 2;
1010 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001011
1012 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001013 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001014 memset(res, fill, width - len);
1015 res += (width - len);
1016 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -08001017 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001018
1019 /* If padding with spaces: write sign if needed and/or numeric
1020 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001021 if (fill == ' ') {
1022 if (sign)
1023 *res++ = sign;
Serhiy Storchakab1a16192016-12-17 21:48:03 +02001024 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001025 assert(pbuf[0] == '0');
1026 assert(pbuf[1] == c);
1027 *res++ = *pbuf++;
1028 *res++ = *pbuf++;
1029 }
1030 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001031
1032 /* Copy bytes */
Christian Heimesf051e432016-09-13 20:22:02 +02001033 memcpy(res, pbuf, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001034 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001035
1036 /* Pad right with the fill character if needed */
1037 if (width > len) {
1038 memset(res, ' ', width - len);
1039 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001040 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001041
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +02001042 if (dict && (argidx < arglen)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001043 PyErr_SetString(PyExc_TypeError,
1044 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -08001045 Py_XDECREF(temp);
1046 goto error;
1047 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001048 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001049
1050#ifdef Py_DEBUG
1051 /* check that we computed the exact size for this write */
1052 assert((res - before) == alloc);
1053#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -08001054 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001055
1056 /* If overallocation was disabled, ensure that it was the last
1057 write. Otherwise, we missed an optimization */
Alexey Izbyshevccd99752018-08-23 10:50:52 +03001058 assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001059 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001060
Ethan Furmanb95b5612015-01-23 20:05:18 -08001061 if (argidx < arglen && !dict) {
1062 PyErr_SetString(PyExc_TypeError,
1063 "not all arguments converted during bytes formatting");
1064 goto error;
1065 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001066
Ethan Furmanb95b5612015-01-23 20:05:18 -08001067 if (args_owned) {
1068 Py_DECREF(args);
1069 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001070 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001071
1072 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001073 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001074 if (args_owned) {
1075 Py_DECREF(args);
1076 }
1077 return NULL;
1078}
1079
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001080/* Unescape a backslash-escaped string. If unicode is non-zero,
1081 the string is a u-literal. If recode_encoding is non-zero,
1082 the string is UTF-8 encoded and should be re-encoded in the
1083 specified encoding. */
1084
Victor Stinner2ec80632015-10-14 13:32:13 +02001085static char *
1086_PyBytes_DecodeEscapeRecode(const char **s, const char *end,
1087 const char *errors, const char *recode_encoding,
1088 _PyBytesWriter *writer, char *p)
1089{
1090 PyObject *u, *w;
1091 const char* t;
1092
1093 t = *s;
1094 /* Decode non-ASCII bytes as UTF-8. */
1095 while (t < end && (*t & 0x80))
1096 t++;
1097 u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
1098 if (u == NULL)
1099 return NULL;
1100
1101 /* Recode them in target encoding. */
1102 w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
1103 Py_DECREF(u);
1104 if (w == NULL)
1105 return NULL;
1106 assert(PyBytes_Check(w));
1107
1108 /* Append bytes to output buffer. */
Raymond Hettinger15f44ab2016-08-30 10:47:49 -07001109 writer->min_size--; /* subtract 1 preallocated byte */
Victor Stinner2ec80632015-10-14 13:32:13 +02001110 p = _PyBytesWriter_WriteBytes(writer, p,
1111 PyBytes_AS_STRING(w),
1112 PyBytes_GET_SIZE(w));
1113 Py_DECREF(w);
1114 if (p == NULL)
1115 return NULL;
1116
1117 *s = t;
1118 return p;
1119}
1120
Eric V. Smith42454af2016-10-31 09:22:08 -04001121PyObject *_PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001122 Py_ssize_t len,
1123 const char *errors,
1124 Py_ssize_t unicode,
Eric V. Smith42454af2016-10-31 09:22:08 -04001125 const char *recode_encoding,
1126 const char **first_invalid_escape)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001127{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001128 int c;
Victor Stinner2ec80632015-10-14 13:32:13 +02001129 char *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001130 const char *end;
Victor Stinner2ec80632015-10-14 13:32:13 +02001131 _PyBytesWriter writer;
1132
1133 _PyBytesWriter_Init(&writer);
1134
1135 p = _PyBytesWriter_Alloc(&writer, len);
1136 if (p == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001137 return NULL;
Victor Stinner2ec80632015-10-14 13:32:13 +02001138 writer.overallocate = 1;
1139
Eric V. Smith42454af2016-10-31 09:22:08 -04001140 *first_invalid_escape = NULL;
1141
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001142 end = s + len;
1143 while (s < end) {
1144 if (*s != '\\') {
Victor Stinner2ec80632015-10-14 13:32:13 +02001145 if (!(recode_encoding && (*s & 0x80))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001146 *p++ = *s++;
1147 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001148 else {
1149 /* non-ASCII character and need to recode */
1150 p = _PyBytes_DecodeEscapeRecode(&s, end,
1151 errors, recode_encoding,
1152 &writer, p);
1153 if (p == NULL)
1154 goto failed;
1155 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001156 continue;
1157 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001158
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001159 s++;
Victor Stinner2ec80632015-10-14 13:32:13 +02001160 if (s == end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001161 PyErr_SetString(PyExc_ValueError,
1162 "Trailing \\ in string");
1163 goto failed;
1164 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001165
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001166 switch (*s++) {
1167 /* XXX This assumes ASCII! */
1168 case '\n': break;
1169 case '\\': *p++ = '\\'; break;
1170 case '\'': *p++ = '\''; break;
1171 case '\"': *p++ = '\"'; break;
1172 case 'b': *p++ = '\b'; break;
1173 case 'f': *p++ = '\014'; break; /* FF */
1174 case 't': *p++ = '\t'; break;
1175 case 'n': *p++ = '\n'; break;
1176 case 'r': *p++ = '\r'; break;
1177 case 'v': *p++ = '\013'; break; /* VT */
1178 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1179 case '0': case '1': case '2': case '3':
1180 case '4': case '5': case '6': case '7':
1181 c = s[-1] - '0';
1182 if (s < end && '0' <= *s && *s <= '7') {
1183 c = (c<<3) + *s++ - '0';
1184 if (s < end && '0' <= *s && *s <= '7')
1185 c = (c<<3) + *s++ - '0';
1186 }
1187 *p++ = c;
1188 break;
1189 case 'x':
Victor Stinner2ec80632015-10-14 13:32:13 +02001190 if (s+1 < end) {
1191 int digit1, digit2;
1192 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1193 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1194 if (digit1 < 16 && digit2 < 16) {
1195 *p++ = (unsigned char)((digit1 << 4) + digit2);
1196 s += 2;
1197 break;
1198 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001199 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001200 /* invalid hexadecimal digits */
1201
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001202 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001203 PyErr_Format(PyExc_ValueError,
Serhiy Storchakad53fe5f2019-03-13 22:59:55 +02001204 "invalid \\x escape at position %zd",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001205 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001206 goto failed;
1207 }
1208 if (strcmp(errors, "replace") == 0) {
1209 *p++ = '?';
1210 } else if (strcmp(errors, "ignore") == 0)
1211 /* do nothing */;
1212 else {
1213 PyErr_Format(PyExc_ValueError,
1214 "decoding error; unknown "
1215 "error handling code: %.400s",
1216 errors);
1217 goto failed;
1218 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001219 /* skip \x */
1220 if (s < end && Py_ISXDIGIT(s[0]))
1221 s++; /* and a hexdigit */
1222 break;
Victor Stinner2ec80632015-10-14 13:32:13 +02001223
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001224 default:
Eric V. Smith42454af2016-10-31 09:22:08 -04001225 if (*first_invalid_escape == NULL) {
1226 *first_invalid_escape = s-1; /* Back up one char, since we've
1227 already incremented s. */
1228 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001229 *p++ = '\\';
Eric V. Smith42454af2016-10-31 09:22:08 -04001230 s--;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001231 }
1232 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001233
1234 return _PyBytesWriter_Finish(&writer, p);
1235
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001236 failed:
Victor Stinner2ec80632015-10-14 13:32:13 +02001237 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001238 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001239}
1240
Eric V. Smith42454af2016-10-31 09:22:08 -04001241PyObject *PyBytes_DecodeEscape(const char *s,
1242 Py_ssize_t len,
1243 const char *errors,
1244 Py_ssize_t unicode,
1245 const char *recode_encoding)
1246{
1247 const char* first_invalid_escape;
1248 PyObject *result = _PyBytes_DecodeEscape(s, len, errors, unicode,
1249 recode_encoding,
1250 &first_invalid_escape);
1251 if (result == NULL)
1252 return NULL;
1253 if (first_invalid_escape != NULL) {
1254 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1255 "invalid escape sequence '\\%c'",
Serhiy Storchaka56cb4652017-10-20 17:08:15 +03001256 (unsigned char)*first_invalid_escape) < 0) {
Eric V. Smith42454af2016-10-31 09:22:08 -04001257 Py_DECREF(result);
1258 return NULL;
1259 }
1260 }
1261 return result;
1262
1263}
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001264/* -------------------------------------------------------------------- */
1265/* object api */
1266
1267Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001268PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001269{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001270 if (!PyBytes_Check(op)) {
1271 PyErr_Format(PyExc_TypeError,
1272 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1273 return -1;
1274 }
1275 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001276}
1277
1278char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001279PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001280{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001281 if (!PyBytes_Check(op)) {
1282 PyErr_Format(PyExc_TypeError,
1283 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1284 return NULL;
1285 }
1286 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001287}
1288
1289int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001290PyBytes_AsStringAndSize(PyObject *obj,
1291 char **s,
1292 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001293{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001294 if (s == NULL) {
1295 PyErr_BadInternalCall();
1296 return -1;
1297 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001298
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001299 if (!PyBytes_Check(obj)) {
1300 PyErr_Format(PyExc_TypeError,
1301 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1302 return -1;
1303 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001304
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001305 *s = PyBytes_AS_STRING(obj);
1306 if (len != NULL)
1307 *len = PyBytes_GET_SIZE(obj);
1308 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001309 PyErr_SetString(PyExc_ValueError,
1310 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001311 return -1;
1312 }
1313 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001314}
Neal Norwitz6968b052007-02-27 19:02:19 +00001315
1316/* -------------------------------------------------------------------- */
1317/* Methods */
1318
Eric Smith0923d1d2009-04-16 20:16:10 +00001319#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001320
1321#include "stringlib/fastsearch.h"
1322#include "stringlib/count.h"
1323#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001324#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001325#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001326#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001327#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001328
Eric Smith0f78bff2009-11-30 01:01:42 +00001329#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001330
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001331PyObject *
1332PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001333{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001334 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001335 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001336 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001337 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001338 unsigned char quote, *s, *p;
1339
1340 /* Compute size of output string */
1341 squotes = dquotes = 0;
1342 newsize = 3; /* b'' */
1343 s = (unsigned char*)op->ob_sval;
1344 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001345 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001346 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001347 case '\'': squotes++; break;
1348 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001349 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001350 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001351 default:
1352 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001353 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001354 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001355 if (newsize > PY_SSIZE_T_MAX - incr)
1356 goto overflow;
1357 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001358 }
1359 quote = '\'';
1360 if (smartquotes && squotes && !dquotes)
1361 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001362 if (squotes && quote == '\'') {
1363 if (newsize > PY_SSIZE_T_MAX - squotes)
1364 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001365 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001366 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001367
1368 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001369 if (v == NULL) {
1370 return NULL;
1371 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001372 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001373
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001374 *p++ = 'b', *p++ = quote;
1375 for (i = 0; i < length; i++) {
1376 unsigned char c = op->ob_sval[i];
1377 if (c == quote || c == '\\')
1378 *p++ = '\\', *p++ = c;
1379 else if (c == '\t')
1380 *p++ = '\\', *p++ = 't';
1381 else if (c == '\n')
1382 *p++ = '\\', *p++ = 'n';
1383 else if (c == '\r')
1384 *p++ = '\\', *p++ = 'r';
1385 else if (c < ' ' || c >= 0x7f) {
1386 *p++ = '\\';
1387 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001388 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1389 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001390 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001391 else
1392 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001393 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001394 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001395 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001396 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001397
1398 overflow:
1399 PyErr_SetString(PyExc_OverflowError,
1400 "bytes object is too large to make repr");
1401 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001402}
1403
Neal Norwitz6968b052007-02-27 19:02:19 +00001404static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001405bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001406{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001407 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001408}
1409
Neal Norwitz6968b052007-02-27 19:02:19 +00001410static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001411bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001412{
Victor Stinner331a6a52019-05-27 16:39:22 +02001413 PyConfig *config = &_PyInterpreterState_GET_UNSAFE()->config;
Victor Stinnerc96be812019-05-14 17:34:56 +02001414 if (config->bytes_warning) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001415 if (PyErr_WarnEx(PyExc_BytesWarning,
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001416 "str() on a bytes instance", 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001417 return NULL;
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001418 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001419 }
1420 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001421}
1422
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001423static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001424bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001425{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001426 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001427}
Neal Norwitz6968b052007-02-27 19:02:19 +00001428
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001429/* This is also used by PyBytes_Concat() */
1430static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001431bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001432{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001433 Py_buffer va, vb;
1434 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001435
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001436 va.len = -1;
1437 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001438 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1439 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001440 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
Serhiy Storchaka6b5a9ec2017-03-19 19:47:02 +02001441 Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001442 goto done;
1443 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001444
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001445 /* Optimize end cases */
1446 if (va.len == 0 && PyBytes_CheckExact(b)) {
1447 result = b;
1448 Py_INCREF(result);
1449 goto done;
1450 }
1451 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1452 result = a;
1453 Py_INCREF(result);
1454 goto done;
1455 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001456
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001457 if (va.len > PY_SSIZE_T_MAX - vb.len) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001458 PyErr_NoMemory();
1459 goto done;
1460 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001461
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001462 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001463 if (result != NULL) {
1464 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1465 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1466 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001467
1468 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001469 if (va.len != -1)
1470 PyBuffer_Release(&va);
1471 if (vb.len != -1)
1472 PyBuffer_Release(&vb);
1473 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001474}
Neal Norwitz6968b052007-02-27 19:02:19 +00001475
1476static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001477bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001478{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001479 Py_ssize_t i;
1480 Py_ssize_t j;
1481 Py_ssize_t size;
1482 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001483 size_t nbytes;
1484 if (n < 0)
1485 n = 0;
1486 /* watch out for overflows: the size can overflow int,
1487 * and the # of bytes needed can overflow size_t
1488 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001489 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001490 PyErr_SetString(PyExc_OverflowError,
1491 "repeated bytes are too long");
1492 return NULL;
1493 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001494 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001495 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1496 Py_INCREF(a);
1497 return (PyObject *)a;
1498 }
1499 nbytes = (size_t)size;
1500 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1501 PyErr_SetString(PyExc_OverflowError,
1502 "repeated bytes are too long");
1503 return NULL;
1504 }
1505 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1506 if (op == NULL)
1507 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +01001508 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001509 op->ob_shash = -1;
1510 op->ob_sval[size] = '\0';
1511 if (Py_SIZE(a) == 1 && n > 0) {
1512 memset(op->ob_sval, a->ob_sval[0] , n);
1513 return (PyObject *) op;
1514 }
1515 i = 0;
1516 if (i < size) {
Christian Heimesf051e432016-09-13 20:22:02 +02001517 memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001518 i = Py_SIZE(a);
1519 }
1520 while (i < size) {
1521 j = (i <= size-i) ? i : size-i;
Christian Heimesf051e432016-09-13 20:22:02 +02001522 memcpy(op->ob_sval+i, op->ob_sval, j);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001523 i += j;
1524 }
1525 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001526}
1527
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001528static int
1529bytes_contains(PyObject *self, PyObject *arg)
1530{
1531 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1532}
1533
Neal Norwitz6968b052007-02-27 19:02:19 +00001534static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001535bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001536{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001537 if (i < 0 || i >= Py_SIZE(a)) {
1538 PyErr_SetString(PyExc_IndexError, "index out of range");
1539 return NULL;
1540 }
1541 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001542}
1543
Benjamin Peterson621b4302016-09-09 13:54:34 -07001544static int
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001545bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1546{
1547 int cmp;
1548 Py_ssize_t len;
1549
1550 len = Py_SIZE(a);
1551 if (Py_SIZE(b) != len)
1552 return 0;
1553
1554 if (a->ob_sval[0] != b->ob_sval[0])
1555 return 0;
1556
1557 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1558 return (cmp == 0);
1559}
1560
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001561static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001562bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001563{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001564 int c;
1565 Py_ssize_t len_a, len_b;
1566 Py_ssize_t min_len;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001567 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001568
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001569 /* Make sure both arguments are strings. */
1570 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Victor Stinner331a6a52019-05-27 16:39:22 +02001571 PyConfig *config = &_PyInterpreterState_GET_UNSAFE()->config;
Victor Stinnerc96be812019-05-14 17:34:56 +02001572 if (config->bytes_warning && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001573 rc = PyObject_IsInstance((PyObject*)a,
1574 (PyObject*)&PyUnicode_Type);
1575 if (!rc)
1576 rc = PyObject_IsInstance((PyObject*)b,
1577 (PyObject*)&PyUnicode_Type);
1578 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001579 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001580 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001581 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001582 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001583 return NULL;
1584 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001585 else {
1586 rc = PyObject_IsInstance((PyObject*)a,
1587 (PyObject*)&PyLong_Type);
1588 if (!rc)
1589 rc = PyObject_IsInstance((PyObject*)b,
1590 (PyObject*)&PyLong_Type);
1591 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001592 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001593 if (rc) {
1594 if (PyErr_WarnEx(PyExc_BytesWarning,
1595 "Comparison between bytes and int", 1))
1596 return NULL;
1597 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001598 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001599 }
stratakise8b19652017-11-02 11:32:54 +01001600 Py_RETURN_NOTIMPLEMENTED;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001601 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001602 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001603 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001604 case Py_EQ:
1605 case Py_LE:
1606 case Py_GE:
1607 /* a string is equal to itself */
stratakise8b19652017-11-02 11:32:54 +01001608 Py_RETURN_TRUE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001609 case Py_NE:
1610 case Py_LT:
1611 case Py_GT:
stratakise8b19652017-11-02 11:32:54 +01001612 Py_RETURN_FALSE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001613 default:
1614 PyErr_BadArgument();
1615 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001616 }
1617 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001618 else if (op == Py_EQ || op == Py_NE) {
1619 int eq = bytes_compare_eq(a, b);
1620 eq ^= (op == Py_NE);
stratakise8b19652017-11-02 11:32:54 +01001621 return PyBool_FromLong(eq);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001622 }
1623 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001624 len_a = Py_SIZE(a);
1625 len_b = Py_SIZE(b);
1626 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001627 if (min_len > 0) {
1628 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001629 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001630 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001631 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001632 else
1633 c = 0;
stratakise8b19652017-11-02 11:32:54 +01001634 if (c != 0)
1635 Py_RETURN_RICHCOMPARE(c, 0, op);
1636 Py_RETURN_RICHCOMPARE(len_a, len_b, op);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001637 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001638}
1639
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001640static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001641bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001642{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001643 if (a->ob_shash == -1) {
1644 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001645 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001646 }
1647 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001648}
1649
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001650static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001651bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001652{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001653 if (PyIndex_Check(item)) {
1654 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1655 if (i == -1 && PyErr_Occurred())
1656 return NULL;
1657 if (i < 0)
1658 i += PyBytes_GET_SIZE(self);
1659 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1660 PyErr_SetString(PyExc_IndexError,
1661 "index out of range");
1662 return NULL;
1663 }
1664 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1665 }
1666 else if (PySlice_Check(item)) {
Zackery Spytz14514d92019-05-17 01:13:03 -06001667 Py_ssize_t start, stop, step, slicelength, i;
1668 size_t cur;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001669 char* source_buf;
1670 char* result_buf;
1671 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001672
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001673 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001674 return NULL;
1675 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001676 slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1677 &stop, step);
Neal Norwitz6968b052007-02-27 19:02:19 +00001678
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001679 if (slicelength <= 0) {
1680 return PyBytes_FromStringAndSize("", 0);
1681 }
1682 else if (start == 0 && step == 1 &&
1683 slicelength == PyBytes_GET_SIZE(self) &&
1684 PyBytes_CheckExact(self)) {
1685 Py_INCREF(self);
1686 return (PyObject *)self;
1687 }
1688 else if (step == 1) {
1689 return PyBytes_FromStringAndSize(
1690 PyBytes_AS_STRING(self) + start,
1691 slicelength);
1692 }
1693 else {
1694 source_buf = PyBytes_AS_STRING(self);
1695 result = PyBytes_FromStringAndSize(NULL, slicelength);
1696 if (result == NULL)
1697 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001698
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001699 result_buf = PyBytes_AS_STRING(result);
1700 for (cur = start, i = 0; i < slicelength;
1701 cur += step, i++) {
1702 result_buf[i] = source_buf[cur];
1703 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001704
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001705 return result;
1706 }
1707 }
1708 else {
1709 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001710 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001711 Py_TYPE(item)->tp_name);
1712 return NULL;
1713 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001714}
1715
1716static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001717bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001718{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001719 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1720 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001721}
1722
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001723static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001724 (lenfunc)bytes_length, /*sq_length*/
1725 (binaryfunc)bytes_concat, /*sq_concat*/
1726 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1727 (ssizeargfunc)bytes_item, /*sq_item*/
1728 0, /*sq_slice*/
1729 0, /*sq_ass_item*/
1730 0, /*sq_ass_slice*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001731 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001732};
1733
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001734static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001735 (lenfunc)bytes_length,
1736 (binaryfunc)bytes_subscript,
1737 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001738};
1739
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001740static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001741 (getbufferproc)bytes_buffer_getbuffer,
1742 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001743};
1744
1745
1746#define LEFTSTRIP 0
1747#define RIGHTSTRIP 1
1748#define BOTHSTRIP 2
1749
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001750/*[clinic input]
1751bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001752
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001753 sep: object = None
1754 The delimiter according which to split the bytes.
1755 None (the default value) means split on ASCII whitespace characters
1756 (space, tab, return, newline, formfeed, vertical tab).
1757 maxsplit: Py_ssize_t = -1
1758 Maximum number of splits to do.
1759 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001760
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001761Return a list of the sections in the bytes, using sep as the delimiter.
1762[clinic start generated code]*/
1763
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001764static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001765bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1766/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001767{
1768 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001769 const char *s = PyBytes_AS_STRING(self), *sub;
1770 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001771 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001772
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001773 if (maxsplit < 0)
1774 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001775 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001776 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001777 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001778 return NULL;
1779 sub = vsub.buf;
1780 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001781
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001782 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1783 PyBuffer_Release(&vsub);
1784 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001785}
1786
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001787/*[clinic input]
1788bytes.partition
1789
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001790 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001791 /
1792
1793Partition the bytes into three parts using the given separator.
1794
1795This will search for the separator sep in the bytes. If the separator is found,
1796returns a 3-tuple containing the part before the separator, the separator
1797itself, and the part after it.
1798
1799If the separator is not found, returns a 3-tuple containing the original bytes
1800object and two empty bytes objects.
1801[clinic start generated code]*/
1802
Neal Norwitz6968b052007-02-27 19:02:19 +00001803static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001804bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001805/*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001806{
Neal Norwitz6968b052007-02-27 19:02:19 +00001807 return stringlib_partition(
1808 (PyObject*) self,
1809 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001810 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001811 );
1812}
1813
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001814/*[clinic input]
1815bytes.rpartition
1816
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001817 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001818 /
1819
1820Partition the bytes into three parts using the given separator.
1821
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001822This will search for the separator sep in the bytes, starting at the end. If
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001823the separator is found, returns a 3-tuple containing the part before the
1824separator, the separator itself, and the part after it.
1825
1826If the separator is not found, returns a 3-tuple containing two empty bytes
1827objects and the original bytes object.
1828[clinic start generated code]*/
1829
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001830static PyObject *
1831bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001832/*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001833{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001834 return stringlib_rpartition(
1835 (PyObject*) self,
1836 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001837 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001838 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001839}
1840
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001841/*[clinic input]
1842bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001843
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001844Return a list of the sections in the bytes, using sep as the delimiter.
1845
1846Splitting is done starting at the end of the bytes and working to the front.
1847[clinic start generated code]*/
1848
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001849static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001850bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1851/*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001852{
1853 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001854 const char *s = PyBytes_AS_STRING(self), *sub;
1855 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001856 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001857
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001858 if (maxsplit < 0)
1859 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001860 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001861 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001862 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001863 return NULL;
1864 sub = vsub.buf;
1865 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001866
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001867 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1868 PyBuffer_Release(&vsub);
1869 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001870}
1871
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001872
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001873/*[clinic input]
1874bytes.join
1875
1876 iterable_of_bytes: object
1877 /
1878
1879Concatenate any number of bytes objects.
1880
1881The bytes whose method is called is inserted in between each pair.
1882
1883The result is returned as a new bytes object.
1884
1885Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1886[clinic start generated code]*/
1887
Neal Norwitz6968b052007-02-27 19:02:19 +00001888static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001889bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1890/*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001891{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001892 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001893}
1894
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001895PyObject *
1896_PyBytes_Join(PyObject *sep, PyObject *x)
1897{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001898 assert(sep != NULL && PyBytes_Check(sep));
1899 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001900 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001901}
1902
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001903static PyObject *
1904bytes_find(PyBytesObject *self, PyObject *args)
1905{
1906 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1907}
1908
1909static PyObject *
1910bytes_index(PyBytesObject *self, PyObject *args)
1911{
1912 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1913}
1914
1915
1916static PyObject *
1917bytes_rfind(PyBytesObject *self, PyObject *args)
1918{
1919 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1920}
1921
1922
1923static PyObject *
1924bytes_rindex(PyBytesObject *self, PyObject *args)
1925{
1926 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1927}
1928
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001929
1930Py_LOCAL_INLINE(PyObject *)
1931do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001932{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001933 Py_buffer vsep;
1934 char *s = PyBytes_AS_STRING(self);
1935 Py_ssize_t len = PyBytes_GET_SIZE(self);
1936 char *sep;
1937 Py_ssize_t seplen;
1938 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001939
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001940 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001941 return NULL;
1942 sep = vsep.buf;
1943 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001944
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001945 i = 0;
1946 if (striptype != RIGHTSTRIP) {
1947 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1948 i++;
1949 }
1950 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001951
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001952 j = len;
1953 if (striptype != LEFTSTRIP) {
1954 do {
1955 j--;
1956 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1957 j++;
1958 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001959
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001960 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001961
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001962 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1963 Py_INCREF(self);
1964 return (PyObject*)self;
1965 }
1966 else
1967 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001968}
1969
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001970
1971Py_LOCAL_INLINE(PyObject *)
1972do_strip(PyBytesObject *self, int striptype)
1973{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001974 char *s = PyBytes_AS_STRING(self);
1975 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001976
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001977 i = 0;
1978 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001979 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001980 i++;
1981 }
1982 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001983
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001984 j = len;
1985 if (striptype != LEFTSTRIP) {
1986 do {
1987 j--;
David Malcolm96960882010-11-05 17:23:41 +00001988 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001989 j++;
1990 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001991
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001992 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1993 Py_INCREF(self);
1994 return (PyObject*)self;
1995 }
1996 else
1997 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001998}
1999
2000
2001Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002002do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002003{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002004 if (bytes != NULL && bytes != Py_None) {
2005 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002006 }
2007 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002008}
2009
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002010/*[clinic input]
2011bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002012
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002013 bytes: object = None
2014 /
2015
2016Strip leading and trailing bytes contained in the argument.
2017
2018If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2019[clinic start generated code]*/
2020
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002021static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002022bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002023/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002024{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002025 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002026}
2027
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002028/*[clinic input]
2029bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002030
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002031 bytes: object = None
2032 /
2033
2034Strip leading bytes contained in the argument.
2035
2036If the argument is omitted or None, strip leading ASCII whitespace.
2037[clinic start generated code]*/
2038
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002039static PyObject *
2040bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002041/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002042{
2043 return do_argstrip(self, LEFTSTRIP, bytes);
2044}
2045
2046/*[clinic input]
2047bytes.rstrip
2048
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002049 bytes: object = None
2050 /
2051
2052Strip trailing bytes contained in the argument.
2053
2054If the argument is omitted or None, strip trailing ASCII whitespace.
2055[clinic start generated code]*/
2056
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002057static PyObject *
2058bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002059/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002060{
2061 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002062}
Neal Norwitz6968b052007-02-27 19:02:19 +00002063
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002064
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002065static PyObject *
2066bytes_count(PyBytesObject *self, PyObject *args)
2067{
2068 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2069}
2070
2071
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002072/*[clinic input]
2073bytes.translate
2074
Victor Stinner049e5092014-08-17 22:20:00 +02002075 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002076 Translation table, which must be a bytes object of length 256.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002077 /
Martin Panter1b6c6da2016-08-27 08:35:02 +00002078 delete as deletechars: object(c_default="NULL") = b''
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002079
2080Return a copy with each character mapped by the given translation table.
2081
Martin Panter1b6c6da2016-08-27 08:35:02 +00002082All characters occurring in the optional argument delete are removed.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002083The remaining characters are mapped through the given translation table.
2084[clinic start generated code]*/
2085
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002086static PyObject *
Martin Panter1b6c6da2016-08-27 08:35:02 +00002087bytes_translate_impl(PyBytesObject *self, PyObject *table,
Larry Hastings89964c42015-04-14 18:07:59 -04002088 PyObject *deletechars)
Martin Panter1b6c6da2016-08-27 08:35:02 +00002089/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002090{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002091 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002092 Py_buffer table_view = {NULL, NULL};
2093 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002094 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002095 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002096 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002097 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002098 Py_ssize_t inlen, tablen, dellen = 0;
2099 PyObject *result;
2100 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002101
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002102 if (PyBytes_Check(table)) {
2103 table_chars = PyBytes_AS_STRING(table);
2104 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002105 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002106 else if (table == Py_None) {
2107 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002108 tablen = 256;
2109 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002110 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002111 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002112 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002113 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002114 tablen = table_view.len;
2115 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002116
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002117 if (tablen != 256) {
2118 PyErr_SetString(PyExc_ValueError,
2119 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002120 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002121 return NULL;
2122 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002123
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002124 if (deletechars != NULL) {
2125 if (PyBytes_Check(deletechars)) {
2126 del_table_chars = PyBytes_AS_STRING(deletechars);
2127 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002128 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002129 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002130 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002131 PyBuffer_Release(&table_view);
2132 return NULL;
2133 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002134 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002135 dellen = del_table_view.len;
2136 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002137 }
2138 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002139 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002140 dellen = 0;
2141 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002142
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002143 inlen = PyBytes_GET_SIZE(input_obj);
2144 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002145 if (result == NULL) {
2146 PyBuffer_Release(&del_table_view);
2147 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002148 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002149 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002150 output_start = output = PyBytes_AS_STRING(result);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002151 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002152
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002153 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002154 /* If no deletions are required, use faster code */
2155 for (i = inlen; --i >= 0; ) {
2156 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002157 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002158 changed = 1;
2159 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002160 if (!changed && PyBytes_CheckExact(input_obj)) {
2161 Py_INCREF(input_obj);
2162 Py_DECREF(result);
2163 result = input_obj;
2164 }
2165 PyBuffer_Release(&del_table_view);
2166 PyBuffer_Release(&table_view);
2167 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002168 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002169
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002170 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002171 for (i = 0; i < 256; i++)
2172 trans_table[i] = Py_CHARMASK(i);
2173 } else {
2174 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002175 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002176 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002177 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002178
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002179 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002180 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002181 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002182
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002183 for (i = inlen; --i >= 0; ) {
2184 c = Py_CHARMASK(*input++);
2185 if (trans_table[c] != -1)
2186 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2187 continue;
2188 changed = 1;
2189 }
2190 if (!changed && PyBytes_CheckExact(input_obj)) {
2191 Py_DECREF(result);
2192 Py_INCREF(input_obj);
2193 return input_obj;
2194 }
2195 /* Fix the size of the resulting string */
2196 if (inlen > 0)
2197 _PyBytes_Resize(&result, output - output_start);
2198 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002199}
2200
2201
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002202/*[clinic input]
2203
2204@staticmethod
2205bytes.maketrans
2206
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002207 frm: Py_buffer
2208 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002209 /
2210
2211Return a translation table useable for the bytes or bytearray translate method.
2212
2213The returned table will be one where each byte in frm is mapped to the byte at
2214the same position in to.
2215
2216The bytes objects frm and to must be of the same length.
2217[clinic start generated code]*/
2218
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002219static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002220bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002221/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002222{
2223 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002224}
2225
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002226
2227/*[clinic input]
2228bytes.replace
2229
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002230 old: Py_buffer
2231 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002232 count: Py_ssize_t = -1
2233 Maximum number of occurrences to replace.
2234 -1 (the default value) means replace all occurrences.
2235 /
2236
2237Return a copy with all occurrences of substring old replaced by new.
2238
2239If the optional argument count is given, only the first count occurrences are
2240replaced.
2241[clinic start generated code]*/
2242
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002243static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002244bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
Larry Hastings89964c42015-04-14 18:07:59 -04002245 Py_ssize_t count)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002246/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002247{
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +03002248 return stringlib_replace((PyObject *)self,
2249 (const char *)old->buf, old->len,
2250 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002251}
2252
2253/** End DALKE **/
2254
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002255
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002256static PyObject *
2257bytes_startswith(PyBytesObject *self, PyObject *args)
2258{
2259 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2260}
2261
2262static PyObject *
2263bytes_endswith(PyBytesObject *self, PyObject *args)
2264{
2265 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2266}
2267
2268
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002269/*[clinic input]
2270bytes.decode
2271
2272 encoding: str(c_default="NULL") = 'utf-8'
2273 The encoding with which to decode the bytes.
2274 errors: str(c_default="NULL") = 'strict'
2275 The error handling scheme to use for the handling of decoding errors.
2276 The default is 'strict' meaning that decoding errors raise a
2277 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2278 as well as any other name registered with codecs.register_error that
2279 can handle UnicodeDecodeErrors.
2280
2281Decode the bytes using the codec registered for encoding.
2282[clinic start generated code]*/
2283
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002284static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002285bytes_decode_impl(PyBytesObject *self, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04002286 const char *errors)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002287/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002288{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002289 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002290}
2291
Guido van Rossum20188312006-05-05 15:15:40 +00002292
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002293/*[clinic input]
2294bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002295
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002296 keepends: bool(accept={int}) = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002297
2298Return a list of the lines in the bytes, breaking at line boundaries.
2299
2300Line breaks are not included in the resulting list unless keepends is given and
2301true.
2302[clinic start generated code]*/
2303
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002304static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002305bytes_splitlines_impl(PyBytesObject *self, int keepends)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002306/*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002307{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002308 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002309 (PyObject*) self, PyBytes_AS_STRING(self),
2310 PyBytes_GET_SIZE(self), keepends
2311 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002312}
2313
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002314/*[clinic input]
2315@classmethod
2316bytes.fromhex
2317
2318 string: unicode
2319 /
2320
2321Create a bytes object from a string of hexadecimal numbers.
2322
2323Spaces between two numbers are accepted.
2324Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2325[clinic start generated code]*/
2326
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002327static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002328bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002329/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002330{
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002331 PyObject *result = _PyBytes_FromHex(string, 0);
2332 if (type != &PyBytes_Type && result != NULL) {
Jeroen Demeyer196a5302019-07-04 12:31:34 +02002333 Py_SETREF(result, _PyObject_CallOneArg((PyObject *)type, result));
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002334 }
2335 return result;
Victor Stinner2bf89932015-10-14 11:25:33 +02002336}
2337
2338PyObject*
2339_PyBytes_FromHex(PyObject *string, int use_bytearray)
2340{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002341 char *buf;
Victor Stinner2bf89932015-10-14 11:25:33 +02002342 Py_ssize_t hexlen, invalid_char;
2343 unsigned int top, bot;
2344 Py_UCS1 *str, *end;
2345 _PyBytesWriter writer;
2346
2347 _PyBytesWriter_Init(&writer);
2348 writer.use_bytearray = use_bytearray;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002349
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002350 assert(PyUnicode_Check(string));
2351 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002352 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002353 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002354
Victor Stinner2bf89932015-10-14 11:25:33 +02002355 if (!PyUnicode_IS_ASCII(string)) {
2356 void *data = PyUnicode_DATA(string);
2357 unsigned int kind = PyUnicode_KIND(string);
2358 Py_ssize_t i;
2359
2360 /* search for the first non-ASCII character */
2361 for (i = 0; i < hexlen; i++) {
2362 if (PyUnicode_READ(kind, data, i) >= 128)
2363 break;
2364 }
2365 invalid_char = i;
2366 goto error;
2367 }
2368
2369 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2370 str = PyUnicode_1BYTE_DATA(string);
2371
2372 /* This overestimates if there are spaces */
2373 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2374 if (buf == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002375 return NULL;
Victor Stinner2bf89932015-10-14 11:25:33 +02002376
2377 end = str + hexlen;
2378 while (str < end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002379 /* skip over spaces in the input */
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002380 if (Py_ISSPACE(*str)) {
Victor Stinner2bf89932015-10-14 11:25:33 +02002381 do {
2382 str++;
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002383 } while (Py_ISSPACE(*str));
Victor Stinner2bf89932015-10-14 11:25:33 +02002384 if (str >= end)
2385 break;
2386 }
2387
2388 top = _PyLong_DigitValue[*str];
2389 if (top >= 16) {
2390 invalid_char = str - PyUnicode_1BYTE_DATA(string);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002391 goto error;
2392 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002393 str++;
2394
2395 bot = _PyLong_DigitValue[*str];
2396 if (bot >= 16) {
2397 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2398 goto error;
2399 }
2400 str++;
2401
2402 *buf++ = (unsigned char)((top << 4) + bot);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002403 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002404
2405 return _PyBytesWriter_Finish(&writer, buf);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002406
2407 error:
Victor Stinner2bf89932015-10-14 11:25:33 +02002408 PyErr_Format(PyExc_ValueError,
2409 "non-hexadecimal number found in "
2410 "fromhex() arg at position %zd", invalid_char);
2411 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002412 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002413}
2414
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002415/*[clinic input]
2416bytes.hex
2417
2418 sep: object = NULL
2419 An optional single character or byte to separate hex bytes.
2420 bytes_per_sep: int = 1
2421 How many bytes between separators. Positive values count from the
2422 right, negative values count from the left.
2423
2424Create a str of hexadecimal numbers from a bytes object.
2425
2426Example:
2427>>> value = b'\xb9\x01\xef'
2428>>> value.hex()
2429'b901ef'
2430>>> value.hex(':')
2431'b9:01:ef'
2432>>> value.hex(':', 2)
2433'b9:01ef'
2434>>> value.hex(':', -2)
2435'b901:ef'
2436[clinic start generated code]*/
Gregory P. Smith8cb65692015-04-25 23:22:26 +00002437
2438static PyObject *
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002439bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2440/*[clinic end generated code: output=1f134da504064139 input=f1238d3455990218]*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002441{
2442 char* argbuf = PyBytes_AS_STRING(self);
2443 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002444 return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002445}
2446
2447static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302448bytes_getnewargs(PyBytesObject *v, PyObject *Py_UNUSED(ignored))
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002449{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002450 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002451}
2452
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002453
2454static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002455bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002456 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302457 {"capitalize", stringlib_capitalize, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002458 _Py_capitalize__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002459 STRINGLIB_CENTER_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002460 {"count", (PyCFunction)bytes_count, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002461 _Py_count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002462 BYTES_DECODE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002463 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002464 _Py_endswith__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002465 STRINGLIB_EXPANDTABS_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002466 {"find", (PyCFunction)bytes_find, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002467 _Py_find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002468 BYTES_FROMHEX_METHODDEF
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002469 BYTES_HEX_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002470 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302471 {"isalnum", stringlib_isalnum, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002472 _Py_isalnum__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302473 {"isalpha", stringlib_isalpha, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002474 _Py_isalpha__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302475 {"isascii", stringlib_isascii, METH_NOARGS,
INADA Naokia49ac992018-01-27 14:06:21 +09002476 _Py_isascii__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302477 {"isdigit", stringlib_isdigit, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002478 _Py_isdigit__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302479 {"islower", stringlib_islower, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002480 _Py_islower__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302481 {"isspace", stringlib_isspace, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002482 _Py_isspace__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302483 {"istitle", stringlib_istitle, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002484 _Py_istitle__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302485 {"isupper", stringlib_isupper, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002486 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002487 BYTES_JOIN_METHODDEF
Tal Einatc929df32018-07-06 13:17:38 +03002488 STRINGLIB_LJUST_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302489 {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002490 BYTES_LSTRIP_METHODDEF
2491 BYTES_MAKETRANS_METHODDEF
2492 BYTES_PARTITION_METHODDEF
2493 BYTES_REPLACE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002494 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2495 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002496 STRINGLIB_RJUST_METHODDEF
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002497 BYTES_RPARTITION_METHODDEF
2498 BYTES_RSPLIT_METHODDEF
2499 BYTES_RSTRIP_METHODDEF
2500 BYTES_SPLIT_METHODDEF
2501 BYTES_SPLITLINES_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002502 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002503 _Py_startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002504 BYTES_STRIP_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302505 {"swapcase", stringlib_swapcase, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002506 _Py_swapcase__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302507 {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002508 BYTES_TRANSLATE_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302509 {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002510 STRINGLIB_ZFILL_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002511 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002512};
2513
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002514static PyObject *
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002515bytes_mod(PyObject *self, PyObject *arg)
Ethan Furmanb95b5612015-01-23 20:05:18 -08002516{
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002517 if (!PyBytes_Check(self)) {
2518 Py_RETURN_NOTIMPLEMENTED;
Victor Stinner772b2b02015-10-14 09:56:53 +02002519 }
Victor Stinner772b2b02015-10-14 09:56:53 +02002520 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002521 arg, 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -08002522}
2523
2524static PyNumberMethods bytes_as_number = {
2525 0, /*nb_add*/
2526 0, /*nb_subtract*/
2527 0, /*nb_multiply*/
2528 bytes_mod, /*nb_remainder*/
2529};
2530
2531static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002532bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002533
2534static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002535bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002536{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002537 PyObject *x = NULL;
2538 const char *encoding = NULL;
2539 const char *errors = NULL;
2540 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002541 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002542 Py_ssize_t size;
2543 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002544 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002545
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002546 if (type != &PyBytes_Type)
Serhiy Storchaka15095802015-11-25 15:47:01 +02002547 return bytes_subtype_new(type, args, kwds);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002548 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2549 &encoding, &errors))
2550 return NULL;
2551 if (x == NULL) {
2552 if (encoding != NULL || errors != NULL) {
2553 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka2c2044e2018-10-21 15:29:12 +03002554 encoding != NULL ?
2555 "encoding without a string argument" :
2556 "errors without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002557 return NULL;
2558 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02002559 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002560 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002561
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002562 if (encoding != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002563 /* Encode via the codec registry */
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002564 if (!PyUnicode_Check(x)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002565 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002566 "encoding without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002567 return NULL;
2568 }
2569 new = PyUnicode_AsEncodedString(x, encoding, errors);
2570 if (new == NULL)
2571 return NULL;
2572 assert(PyBytes_Check(new));
2573 return new;
2574 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002575
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002576 if (errors != NULL) {
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002577 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002578 PyUnicode_Check(x) ?
2579 "string argument without an encoding" :
2580 "errors without a string argument");
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002581 return NULL;
2582 }
2583
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002584 /* We'd like to call PyObject_Bytes here, but we need to check for an
2585 integer argument before deferring to PyBytes_FromObject, something
2586 PyObject_Bytes doesn't do. */
2587 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2588 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +01002589 new = _PyObject_CallNoArg(func);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002590 Py_DECREF(func);
2591 if (new == NULL)
2592 return NULL;
2593 if (!PyBytes_Check(new)) {
2594 PyErr_Format(PyExc_TypeError,
2595 "__bytes__ returned non-bytes (type %.200s)",
2596 Py_TYPE(new)->tp_name);
2597 Py_DECREF(new);
2598 return NULL;
2599 }
2600 return new;
2601 }
2602 else if (PyErr_Occurred())
2603 return NULL;
2604
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002605 if (PyUnicode_Check(x)) {
2606 PyErr_SetString(PyExc_TypeError,
2607 "string argument without an encoding");
2608 return NULL;
2609 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002610 /* Is it an integer? */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002611 if (PyIndex_Check(x)) {
2612 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2613 if (size == -1 && PyErr_Occurred()) {
Serhiy Storchakae8904212018-10-15 00:02:57 +03002614 if (!PyErr_ExceptionMatches(PyExc_TypeError))
INADA Naokia634e232017-01-06 17:32:01 +09002615 return NULL;
2616 PyErr_Clear(); /* fall through */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002617 }
INADA Naokia634e232017-01-06 17:32:01 +09002618 else {
2619 if (size < 0) {
2620 PyErr_SetString(PyExc_ValueError, "negative count");
2621 return NULL;
2622 }
2623 new = _PyBytes_FromSize(size, 1);
2624 if (new == NULL)
2625 return NULL;
2626 return new;
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002627 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002628 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002629
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002630 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002631}
2632
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002633static PyObject*
2634_PyBytes_FromBuffer(PyObject *x)
2635{
2636 PyObject *new;
2637 Py_buffer view;
2638
2639 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2640 return NULL;
2641
2642 new = PyBytes_FromStringAndSize(NULL, view.len);
2643 if (!new)
2644 goto fail;
2645 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2646 &view, view.len, 'C') < 0)
2647 goto fail;
2648 PyBuffer_Release(&view);
2649 return new;
2650
2651fail:
2652 Py_XDECREF(new);
2653 PyBuffer_Release(&view);
2654 return NULL;
2655}
2656
2657static PyObject*
2658_PyBytes_FromList(PyObject *x)
2659{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002660 Py_ssize_t i, size = PyList_GET_SIZE(x);
2661 Py_ssize_t value;
2662 char *str;
2663 PyObject *item;
2664 _PyBytesWriter writer;
2665
2666 _PyBytesWriter_Init(&writer);
2667 str = _PyBytesWriter_Alloc(&writer, size);
2668 if (str == NULL)
2669 return NULL;
2670 writer.overallocate = 1;
2671 size = writer.allocated;
2672
2673 for (i = 0; i < PyList_GET_SIZE(x); i++) {
2674 item = PyList_GET_ITEM(x, i);
2675 Py_INCREF(item);
2676 value = PyNumber_AsSsize_t(item, NULL);
2677 Py_DECREF(item);
2678 if (value == -1 && PyErr_Occurred())
2679 goto error;
2680
2681 if (value < 0 || value >= 256) {
2682 PyErr_SetString(PyExc_ValueError,
2683 "bytes must be in range(0, 256)");
2684 goto error;
2685 }
2686
2687 if (i >= size) {
2688 str = _PyBytesWriter_Resize(&writer, str, size+1);
2689 if (str == NULL)
2690 return NULL;
2691 size = writer.allocated;
2692 }
2693 *str++ = (char) value;
2694 }
2695 return _PyBytesWriter_Finish(&writer, str);
2696
2697 error:
2698 _PyBytesWriter_Dealloc(&writer);
2699 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002700}
2701
2702static PyObject*
2703_PyBytes_FromTuple(PyObject *x)
2704{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002705 PyObject *bytes;
2706 Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2707 Py_ssize_t value;
2708 char *str;
2709 PyObject *item;
2710
2711 bytes = PyBytes_FromStringAndSize(NULL, size);
2712 if (bytes == NULL)
2713 return NULL;
2714 str = ((PyBytesObject *)bytes)->ob_sval;
2715
2716 for (i = 0; i < size; i++) {
2717 item = PyTuple_GET_ITEM(x, i);
2718 value = PyNumber_AsSsize_t(item, NULL);
2719 if (value == -1 && PyErr_Occurred())
2720 goto error;
2721
2722 if (value < 0 || value >= 256) {
2723 PyErr_SetString(PyExc_ValueError,
2724 "bytes must be in range(0, 256)");
2725 goto error;
2726 }
2727 *str++ = (char) value;
2728 }
2729 return bytes;
2730
2731 error:
2732 Py_DECREF(bytes);
2733 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002734}
2735
2736static PyObject *
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002737_PyBytes_FromIterator(PyObject *it, PyObject *x)
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002738{
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002739 char *str;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002740 Py_ssize_t i, size;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002741 _PyBytesWriter writer;
2742
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002743 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02002744 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002745 if (size == -1 && PyErr_Occurred())
2746 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002747
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002748 _PyBytesWriter_Init(&writer);
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002749 str = _PyBytesWriter_Alloc(&writer, size);
2750 if (str == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002751 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002752 writer.overallocate = 1;
2753 size = writer.allocated;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002754
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002755 /* Run the iterator to exhaustion */
2756 for (i = 0; ; i++) {
2757 PyObject *item;
2758 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002759
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002760 /* Get the next item */
2761 item = PyIter_Next(it);
2762 if (item == NULL) {
2763 if (PyErr_Occurred())
2764 goto error;
2765 break;
2766 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002767
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002768 /* Interpret it as an int (__index__) */
Serhiy Storchakaf54d7812016-07-06 21:39:44 +03002769 value = PyNumber_AsSsize_t(item, NULL);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002770 Py_DECREF(item);
2771 if (value == -1 && PyErr_Occurred())
2772 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002773
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002774 /* Range check */
2775 if (value < 0 || value >= 256) {
2776 PyErr_SetString(PyExc_ValueError,
2777 "bytes must be in range(0, 256)");
2778 goto error;
2779 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002780
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002781 /* Append the byte */
2782 if (i >= size) {
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002783 str = _PyBytesWriter_Resize(&writer, str, size+1);
2784 if (str == NULL)
2785 return NULL;
2786 size = writer.allocated;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002787 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002788 *str++ = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002789 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002790
2791 return _PyBytesWriter_Finish(&writer, str);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002792
2793 error:
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002794 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002795 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002796}
2797
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002798PyObject *
2799PyBytes_FromObject(PyObject *x)
2800{
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002801 PyObject *it, *result;
2802
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002803 if (x == NULL) {
2804 PyErr_BadInternalCall();
2805 return NULL;
2806 }
2807
2808 if (PyBytes_CheckExact(x)) {
2809 Py_INCREF(x);
2810 return x;
2811 }
2812
2813 /* Use the modern buffer interface */
2814 if (PyObject_CheckBuffer(x))
2815 return _PyBytes_FromBuffer(x);
2816
2817 if (PyList_CheckExact(x))
2818 return _PyBytes_FromList(x);
2819
2820 if (PyTuple_CheckExact(x))
2821 return _PyBytes_FromTuple(x);
2822
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002823 if (!PyUnicode_Check(x)) {
2824 it = PyObject_GetIter(x);
2825 if (it != NULL) {
2826 result = _PyBytes_FromIterator(it, x);
2827 Py_DECREF(it);
2828 return result;
2829 }
Serhiy Storchakae8904212018-10-15 00:02:57 +03002830 if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2831 return NULL;
2832 }
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002833 }
2834
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002835 PyErr_Format(PyExc_TypeError,
2836 "cannot convert '%.200s' object to bytes",
2837 x->ob_type->tp_name);
2838 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002839}
2840
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002841static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002842bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002843{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002844 PyObject *tmp, *pnew;
2845 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002846
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002847 assert(PyType_IsSubtype(type, &PyBytes_Type));
2848 tmp = bytes_new(&PyBytes_Type, args, kwds);
2849 if (tmp == NULL)
2850 return NULL;
Serhiy Storchaka15095802015-11-25 15:47:01 +02002851 assert(PyBytes_Check(tmp));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002852 n = PyBytes_GET_SIZE(tmp);
2853 pnew = type->tp_alloc(type, n);
2854 if (pnew != NULL) {
Christian Heimesf051e432016-09-13 20:22:02 +02002855 memcpy(PyBytes_AS_STRING(pnew),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002856 PyBytes_AS_STRING(tmp), n+1);
2857 ((PyBytesObject *)pnew)->ob_shash =
2858 ((PyBytesObject *)tmp)->ob_shash;
2859 }
2860 Py_DECREF(tmp);
2861 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002862}
2863
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002864PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002865"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002866bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002867bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002868bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2869bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002870\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002871Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002872 - an iterable yielding integers in range(256)\n\
2873 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002874 - any object implementing the buffer API.\n\
2875 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002876
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002877static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002878
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002879PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002880 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2881 "bytes",
2882 PyBytesObject_SIZE,
2883 sizeof(char),
Inada Naoki7d408692019-05-29 17:23:27 +09002884 0, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002885 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002886 0, /* tp_getattr */
2887 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002888 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002889 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08002890 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002891 &bytes_as_sequence, /* tp_as_sequence */
2892 &bytes_as_mapping, /* tp_as_mapping */
2893 (hashfunc)bytes_hash, /* tp_hash */
2894 0, /* tp_call */
2895 bytes_str, /* tp_str */
2896 PyObject_GenericGetAttr, /* tp_getattro */
2897 0, /* tp_setattro */
2898 &bytes_as_buffer, /* tp_as_buffer */
2899 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2900 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2901 bytes_doc, /* tp_doc */
2902 0, /* tp_traverse */
2903 0, /* tp_clear */
2904 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2905 0, /* tp_weaklistoffset */
2906 bytes_iter, /* tp_iter */
2907 0, /* tp_iternext */
2908 bytes_methods, /* tp_methods */
2909 0, /* tp_members */
2910 0, /* tp_getset */
2911 &PyBaseObject_Type, /* tp_base */
2912 0, /* tp_dict */
2913 0, /* tp_descr_get */
2914 0, /* tp_descr_set */
2915 0, /* tp_dictoffset */
2916 0, /* tp_init */
2917 0, /* tp_alloc */
2918 bytes_new, /* tp_new */
2919 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002920};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002921
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002922void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002923PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002924{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002925 assert(pv != NULL);
2926 if (*pv == NULL)
2927 return;
2928 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002929 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002930 return;
2931 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02002932
2933 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2934 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05002935 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02002936 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02002937
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002938 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02002939 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2940 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2941 Py_CLEAR(*pv);
2942 return;
2943 }
2944
2945 oldsize = PyBytes_GET_SIZE(*pv);
2946 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2947 PyErr_NoMemory();
2948 goto error;
2949 }
2950 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2951 goto error;
2952
2953 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2954 PyBuffer_Release(&wb);
2955 return;
2956
2957 error:
2958 PyBuffer_Release(&wb);
2959 Py_CLEAR(*pv);
2960 return;
2961 }
2962
2963 else {
2964 /* Multiple references, need to create new object */
2965 PyObject *v;
2966 v = bytes_concat(*pv, w);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002967 Py_SETREF(*pv, v);
Antoine Pitrou161d6952014-05-01 14:36:20 +02002968 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002969}
2970
2971void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002972PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002973{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002974 PyBytes_Concat(pv, w);
2975 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002976}
2977
2978
Ethan Furmanb95b5612015-01-23 20:05:18 -08002979/* The following function breaks the notion that bytes are immutable:
2980 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002981 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08002982 as creating a new bytes object and destroying the old one, only
2983 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002984 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08002985 Note that if there's not enough memory to resize the bytes object, the
2986 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002987 memory" exception is set, and -1 is returned. Else (on success) 0 is
2988 returned, and the value in *pv may or may not be the same as on input.
2989 As always, an extra byte is allocated for a trailing \0 byte (newsize
2990 does *not* include that), and a trailing \0 byte is stored.
2991*/
2992
2993int
2994_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2995{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002996 PyObject *v;
2997 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002998 v = *pv;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002999 if (!PyBytes_Check(v) || newsize < 0) {
3000 goto error;
3001 }
3002 if (Py_SIZE(v) == newsize) {
3003 /* return early if newsize equals to v->ob_size */
3004 return 0;
3005 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02003006 if (Py_SIZE(v) == 0) {
3007 if (newsize == 0) {
3008 return 0;
3009 }
3010 *pv = _PyBytes_FromSize(newsize, 0);
3011 Py_DECREF(v);
3012 return (*pv == NULL) ? -1 : 0;
3013 }
Berker Peksag4a72a7b2016-09-16 17:31:06 +03003014 if (Py_REFCNT(v) != 1) {
3015 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003016 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02003017 if (newsize == 0) {
3018 *pv = _PyBytes_FromSize(0, 0);
3019 Py_DECREF(v);
3020 return (*pv == NULL) ? -1 : 0;
3021 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003022 /* XXX UNREF/NEWREF interface should be more symmetrical */
3023 _Py_DEC_REFTOTAL;
3024 _Py_ForgetReference(v);
3025 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03003026 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003027 if (*pv == NULL) {
3028 PyObject_Del(v);
3029 PyErr_NoMemory();
3030 return -1;
3031 }
3032 _Py_NewReference(*pv);
3033 sv = (PyBytesObject *) *pv;
3034 Py_SIZE(sv) = newsize;
3035 sv->ob_sval[newsize] = '\0';
3036 sv->ob_shash = -1; /* invalidate cached hash value */
3037 return 0;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03003038error:
3039 *pv = 0;
3040 Py_DECREF(v);
3041 PyErr_BadInternalCall();
3042 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003043}
3044
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003045void
Victor Stinnerbed48172019-08-27 00:12:32 +02003046_PyBytes_Fini(void)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003047{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003048 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003049 for (i = 0; i < UCHAR_MAX + 1; i++)
3050 Py_CLEAR(characters[i]);
3051 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003052}
3053
Benjamin Peterson4116f362008-05-27 00:36:20 +00003054/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003055
3056typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003057 PyObject_HEAD
3058 Py_ssize_t it_index;
3059 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003060} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003061
3062static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003063striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003064{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003065 _PyObject_GC_UNTRACK(it);
3066 Py_XDECREF(it->it_seq);
3067 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003068}
3069
3070static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003071striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003072{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003073 Py_VISIT(it->it_seq);
3074 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003075}
3076
3077static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003078striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003079{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003080 PyBytesObject *seq;
3081 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003082
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003083 assert(it != NULL);
3084 seq = it->it_seq;
3085 if (seq == NULL)
3086 return NULL;
3087 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003088
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003089 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3090 item = PyLong_FromLong(
3091 (unsigned char)seq->ob_sval[it->it_index]);
3092 if (item != NULL)
3093 ++it->it_index;
3094 return item;
3095 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003097 it->it_seq = NULL;
Serhiy Storchakafbb1c5e2016-03-30 20:40:02 +03003098 Py_DECREF(seq);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003099 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003100}
3101
3102static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303103striter_len(striterobject *it, PyObject *Py_UNUSED(ignored))
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003104{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003105 Py_ssize_t len = 0;
3106 if (it->it_seq)
3107 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3108 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003109}
3110
3111PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003112 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003113
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003114static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303115striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003116{
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003117 _Py_IDENTIFIER(iter);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003118 if (it->it_seq != NULL) {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003119 return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003120 it->it_seq, it->it_index);
3121 } else {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003122 return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003123 }
3124}
3125
3126PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3127
3128static PyObject *
3129striter_setstate(striterobject *it, PyObject *state)
3130{
3131 Py_ssize_t index = PyLong_AsSsize_t(state);
3132 if (index == -1 && PyErr_Occurred())
3133 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003134 if (it->it_seq != NULL) {
3135 if (index < 0)
3136 index = 0;
3137 else if (index > PyBytes_GET_SIZE(it->it_seq))
3138 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3139 it->it_index = index;
3140 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003141 Py_RETURN_NONE;
3142}
3143
3144PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3145
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003146static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003147 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3148 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003149 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3150 reduce_doc},
3151 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3152 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003153 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003154};
3155
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003156PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003157 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3158 "bytes_iterator", /* tp_name */
3159 sizeof(striterobject), /* tp_basicsize */
3160 0, /* tp_itemsize */
3161 /* methods */
3162 (destructor)striter_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003163 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003164 0, /* tp_getattr */
3165 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003166 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003167 0, /* tp_repr */
3168 0, /* tp_as_number */
3169 0, /* tp_as_sequence */
3170 0, /* tp_as_mapping */
3171 0, /* tp_hash */
3172 0, /* tp_call */
3173 0, /* tp_str */
3174 PyObject_GenericGetAttr, /* tp_getattro */
3175 0, /* tp_setattro */
3176 0, /* tp_as_buffer */
3177 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3178 0, /* tp_doc */
3179 (traverseproc)striter_traverse, /* tp_traverse */
3180 0, /* tp_clear */
3181 0, /* tp_richcompare */
3182 0, /* tp_weaklistoffset */
3183 PyObject_SelfIter, /* tp_iter */
3184 (iternextfunc)striter_next, /* tp_iternext */
3185 striter_methods, /* tp_methods */
3186 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003187};
3188
3189static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003190bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003191{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003192 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003193
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003194 if (!PyBytes_Check(seq)) {
3195 PyErr_BadInternalCall();
3196 return NULL;
3197 }
3198 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3199 if (it == NULL)
3200 return NULL;
3201 it->it_index = 0;
3202 Py_INCREF(seq);
3203 it->it_seq = (PyBytesObject *)seq;
3204 _PyObject_GC_TRACK(it);
3205 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003206}
Victor Stinner00165072015-10-09 01:53:21 +02003207
3208
3209/* _PyBytesWriter API */
3210
3211#ifdef MS_WINDOWS
3212 /* On Windows, overallocate by 50% is the best factor */
3213# define OVERALLOCATE_FACTOR 2
3214#else
3215 /* On Linux, overallocate by 25% is the best factor */
3216# define OVERALLOCATE_FACTOR 4
3217#endif
3218
3219void
3220_PyBytesWriter_Init(_PyBytesWriter *writer)
3221{
Victor Stinner661aacc2015-10-14 09:41:48 +02003222 /* Set all attributes before small_buffer to 0 */
3223 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003224#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003225 memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003226#endif
3227}
3228
3229void
3230_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3231{
3232 Py_CLEAR(writer->buffer);
3233}
3234
3235Py_LOCAL_INLINE(char*)
3236_PyBytesWriter_AsString(_PyBytesWriter *writer)
3237{
Victor Stinner661aacc2015-10-14 09:41:48 +02003238 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003239 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003240 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003241 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003242 else if (writer->use_bytearray) {
3243 assert(writer->buffer != NULL);
3244 return PyByteArray_AS_STRING(writer->buffer);
3245 }
3246 else {
3247 assert(writer->buffer != NULL);
3248 return PyBytes_AS_STRING(writer->buffer);
3249 }
Victor Stinner00165072015-10-09 01:53:21 +02003250}
3251
3252Py_LOCAL_INLINE(Py_ssize_t)
Victor Stinner2bf89932015-10-14 11:25:33 +02003253_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
Victor Stinner00165072015-10-09 01:53:21 +02003254{
3255 char *start = _PyBytesWriter_AsString(writer);
3256 assert(str != NULL);
3257 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003258 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003259 return str - start;
3260}
3261
3262Py_LOCAL_INLINE(void)
3263_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3264{
3265#ifdef Py_DEBUG
3266 char *start, *end;
3267
Victor Stinner661aacc2015-10-14 09:41:48 +02003268 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003269 assert(writer->buffer == NULL);
3270 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003271 else {
3272 assert(writer->buffer != NULL);
3273 if (writer->use_bytearray)
3274 assert(PyByteArray_CheckExact(writer->buffer));
3275 else
3276 assert(PyBytes_CheckExact(writer->buffer));
3277 assert(Py_REFCNT(writer->buffer) == 1);
3278 }
Victor Stinner00165072015-10-09 01:53:21 +02003279
Victor Stinner661aacc2015-10-14 09:41:48 +02003280 if (writer->use_bytearray) {
3281 /* bytearray has its own overallocation algorithm,
3282 writer overallocation must be disabled */
3283 assert(!writer->overallocate);
3284 }
3285
3286 assert(0 <= writer->allocated);
Victor Stinner53926a12015-10-09 12:37:03 +02003287 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003288 /* the last byte must always be null */
Victor Stinner661aacc2015-10-14 09:41:48 +02003289 start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003290 assert(start[writer->allocated] == 0);
3291
3292 end = start + writer->allocated;
3293 assert(str != NULL);
3294 assert(start <= str && str <= end);
3295#endif
3296}
3297
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003298void*
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003299_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
Victor Stinner00165072015-10-09 01:53:21 +02003300{
3301 Py_ssize_t allocated, pos;
3302
3303 _PyBytesWriter_CheckConsistency(writer, str);
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003304 assert(writer->allocated < size);
Victor Stinner00165072015-10-09 01:53:21 +02003305
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003306 allocated = size;
Victor Stinner00165072015-10-09 01:53:21 +02003307 if (writer->overallocate
3308 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3309 /* overallocate to limit the number of realloc() */
3310 allocated += allocated / OVERALLOCATE_FACTOR;
3311 }
3312
Victor Stinner2bf89932015-10-14 11:25:33 +02003313 pos = _PyBytesWriter_GetSize(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003314 if (!writer->use_small_buffer) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003315 if (writer->use_bytearray) {
3316 if (PyByteArray_Resize(writer->buffer, allocated))
3317 goto error;
3318 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3319 but we cannot use ob_alloc because bytes may need to be moved
3320 to use the whole buffer. bytearray uses an internal optimization
3321 to avoid moving or copying bytes when bytes are removed at the
3322 beginning (ex: del bytearray[:1]). */
3323 }
3324 else {
3325 if (_PyBytes_Resize(&writer->buffer, allocated))
3326 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003327 }
3328 }
3329 else {
3330 /* convert from stack buffer to bytes object buffer */
3331 assert(writer->buffer == NULL);
3332
Victor Stinner661aacc2015-10-14 09:41:48 +02003333 if (writer->use_bytearray)
3334 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3335 else
3336 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003337 if (writer->buffer == NULL)
Victor Stinner661aacc2015-10-14 09:41:48 +02003338 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003339
3340 if (pos != 0) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003341 char *dest;
3342 if (writer->use_bytearray)
3343 dest = PyByteArray_AS_STRING(writer->buffer);
3344 else
3345 dest = PyBytes_AS_STRING(writer->buffer);
Christian Heimesf051e432016-09-13 20:22:02 +02003346 memcpy(dest,
Victor Stinnerb3653a32015-10-09 03:38:24 +02003347 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02003348 pos);
3349 }
3350
Victor Stinnerb3653a32015-10-09 03:38:24 +02003351 writer->use_small_buffer = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003352#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003353 memset(writer->small_buffer, 0xDB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003354#endif
Victor Stinner00165072015-10-09 01:53:21 +02003355 }
3356 writer->allocated = allocated;
3357
3358 str = _PyBytesWriter_AsString(writer) + pos;
3359 _PyBytesWriter_CheckConsistency(writer, str);
3360 return str;
Victor Stinner661aacc2015-10-14 09:41:48 +02003361
3362error:
3363 _PyBytesWriter_Dealloc(writer);
3364 return NULL;
Victor Stinner00165072015-10-09 01:53:21 +02003365}
3366
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003367void*
3368_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3369{
3370 Py_ssize_t new_min_size;
3371
3372 _PyBytesWriter_CheckConsistency(writer, str);
3373 assert(size >= 0);
3374
3375 if (size == 0) {
3376 /* nothing to do */
3377 return str;
3378 }
3379
3380 if (writer->min_size > PY_SSIZE_T_MAX - size) {
3381 PyErr_NoMemory();
3382 _PyBytesWriter_Dealloc(writer);
3383 return NULL;
3384 }
3385 new_min_size = writer->min_size + size;
3386
3387 if (new_min_size > writer->allocated)
3388 str = _PyBytesWriter_Resize(writer, str, new_min_size);
3389
3390 writer->min_size = new_min_size;
3391 return str;
3392}
3393
Victor Stinner00165072015-10-09 01:53:21 +02003394/* Allocate the buffer to write size bytes.
3395 Return the pointer to the beginning of buffer data.
3396 Raise an exception and return NULL on error. */
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003397void*
Victor Stinner00165072015-10-09 01:53:21 +02003398_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3399{
3400 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02003401 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02003402 assert(size >= 0);
3403
Victor Stinnerb3653a32015-10-09 03:38:24 +02003404 writer->use_small_buffer = 1;
Victor Stinnerb13b97d2015-10-09 02:52:16 +02003405#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003406 writer->allocated = sizeof(writer->small_buffer) - 1;
Victor Stinnerf6358a72015-10-14 12:02:39 +02003407 /* In debug mode, don't use the full small buffer because it is less
3408 efficient than bytes and bytearray objects to detect buffer underflow
3409 and buffer overflow. Use 10 bytes of the small buffer to test also
3410 code using the smaller buffer in debug mode.
3411
3412 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3413 in debug mode to also be able to detect stack overflow when running
3414 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3415 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3416 stack overflow. */
3417 writer->allocated = Py_MIN(writer->allocated, 10);
3418 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3419 to detect buffer overflow */
Victor Stinnerb3653a32015-10-09 03:38:24 +02003420 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003421#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02003422 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02003423#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02003424 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02003425}
3426
3427PyObject *
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003428_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
Victor Stinner00165072015-10-09 01:53:21 +02003429{
Victor Stinner2bf89932015-10-14 11:25:33 +02003430 Py_ssize_t size;
Victor Stinner00165072015-10-09 01:53:21 +02003431 PyObject *result;
3432
3433 _PyBytesWriter_CheckConsistency(writer, str);
3434
Victor Stinner2bf89932015-10-14 11:25:33 +02003435 size = _PyBytesWriter_GetSize(writer, str);
3436 if (size == 0 && !writer->use_bytearray) {
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003437 Py_CLEAR(writer->buffer);
3438 /* Get the empty byte string singleton */
3439 result = PyBytes_FromStringAndSize(NULL, 0);
3440 }
3441 else if (writer->use_small_buffer) {
Victor Stinnere914d412016-04-15 17:52:27 +02003442 if (writer->use_bytearray) {
3443 result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3444 }
3445 else {
3446 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3447 }
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003448 }
3449 else {
3450 result = writer->buffer;
3451 writer->buffer = NULL;
3452
Victor Stinner2bf89932015-10-14 11:25:33 +02003453 if (size != writer->allocated) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003454 if (writer->use_bytearray) {
Victor Stinner2bf89932015-10-14 11:25:33 +02003455 if (PyByteArray_Resize(result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003456 Py_DECREF(result);
3457 return NULL;
3458 }
3459 }
3460 else {
Victor Stinner2bf89932015-10-14 11:25:33 +02003461 if (_PyBytes_Resize(&result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003462 assert(result == NULL);
3463 return NULL;
3464 }
Victor Stinner00165072015-10-09 01:53:21 +02003465 }
3466 }
Victor Stinner00165072015-10-09 01:53:21 +02003467 }
Victor Stinner00165072015-10-09 01:53:21 +02003468 return result;
3469}
Victor Stinnerce179bf2015-10-09 12:57:22 +02003470
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003471void*
Victor Stinnere9aa5952015-10-12 13:57:47 +02003472_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003473 const void *bytes, Py_ssize_t size)
Victor Stinnerce179bf2015-10-09 12:57:22 +02003474{
Victor Stinnere9aa5952015-10-12 13:57:47 +02003475 char *str = (char *)ptr;
3476
Victor Stinnerce179bf2015-10-09 12:57:22 +02003477 str = _PyBytesWriter_Prepare(writer, str, size);
3478 if (str == NULL)
3479 return NULL;
3480
Christian Heimesf051e432016-09-13 20:22:02 +02003481 memcpy(str, bytes, size);
Victor Stinnerce179bf2015-10-09 12:57:22 +02003482 str += size;
3483
3484 return str;
3485}