blob: c4edcca4f76127726d8b9722e3a21c8ad698b47b [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Victor Stinnerbcda8f12018-11-21 22:27:47 +01006#include "pycore_object.h"
Victor Stinner621cebe2018-11-12 16:53:38 +01007#include "pycore_pymem.h"
8#include "pycore_pystate.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00009
Gregory P. Smith60d241f2007-10-16 06:31:30 +000010#include "bytes_methods.h"
Gregory P. Smith8cb65692015-04-25 23:22:26 +000011#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +000012#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000013
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020014/*[clinic input]
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030015class bytes "PyBytesObject *" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020016[clinic start generated code]*/
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030017/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020018
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030019#include "clinic/bytesobject.c.h"
20
Christian Heimes2c9c7a52008-05-26 13:42:13 +000021#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +000022Py_ssize_t _Py_null_strings, _Py_one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000023#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000024
Christian Heimes2c9c7a52008-05-26 13:42:13 +000025static PyBytesObject *characters[UCHAR_MAX + 1];
26static PyBytesObject *nullstring;
27
Mark Dickinsonfd24b322008-12-06 15:33:31 +000028/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
29 for a string of length n should request PyBytesObject_SIZE + n bytes.
30
31 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
32 3 bytes per string allocation on a typical system.
33*/
34#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
35
Victor Stinner2bf89932015-10-14 11:25:33 +020036/* Forward declaration */
37Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
38 char *str);
39
Christian Heimes2c9c7a52008-05-26 13:42:13 +000040/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000041 For PyBytes_FromString(), the parameter `str' points to a null-terminated
42 string containing exactly `size' bytes.
43
Martin Pantera90a4a92016-05-30 04:04:50 +000044 For PyBytes_FromStringAndSize(), the parameter `str' is
Christian Heimes2c9c7a52008-05-26 13:42:13 +000045 either NULL or else points to a string containing at least `size' bytes.
46 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
47 not have to be null-terminated. (Therefore it is safe to construct a
48 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
49 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
50 bytes (setting the last byte to the null terminating character) and you can
51 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000052 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000053 alter the data yourself, since the strings may be shared.
54
55 The PyObject member `op->ob_size', which denotes the number of "extra
56 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020057 allocated for string data, not counting the null terminating character.
58 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000059 PyBytes_FromStringAndSize()) or the length of the string in the `str'
60 parameter (for PyBytes_FromString()).
61*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020062static PyObject *
63_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000064{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020065 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020066 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020067
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000068 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000069#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +000070 _Py_null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000071#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000072 Py_INCREF(op);
73 return (PyObject *)op;
74 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000075
Victor Stinner049e5092014-08-17 22:20:00 +020076 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 PyErr_SetString(PyExc_OverflowError,
78 "byte string is too large");
79 return NULL;
80 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000081
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000082 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020083 if (use_calloc)
84 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
85 else
86 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 if (op == NULL)
88 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +010089 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000090 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020091 if (!use_calloc)
92 op->ob_sval[size] = '\0';
93 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000094 if (size == 0) {
95 nullstring = op;
96 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020097 }
98 return (PyObject *) op;
99}
100
101PyObject *
102PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
103{
104 PyBytesObject *op;
105 if (size < 0) {
106 PyErr_SetString(PyExc_SystemError,
107 "Negative size passed to PyBytes_FromStringAndSize");
108 return NULL;
109 }
110 if (size == 1 && str != NULL &&
111 (op = characters[*str & UCHAR_MAX]) != NULL)
112 {
113#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +0000114 _Py_one_strings++;
Victor Stinnerdb067af2014-05-02 22:31:14 +0200115#endif
116 Py_INCREF(op);
117 return (PyObject *)op;
118 }
119
120 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
121 if (op == NULL)
122 return NULL;
123 if (str == NULL)
124 return (PyObject *) op;
125
Christian Heimesf051e432016-09-13 20:22:02 +0200126 memcpy(op->ob_sval, str, size);
Victor Stinnerdb067af2014-05-02 22:31:14 +0200127 /* share short strings */
128 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000129 characters[*str & UCHAR_MAX] = op;
130 Py_INCREF(op);
131 }
132 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000133}
134
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000135PyObject *
136PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000137{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200138 size_t size;
139 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000140
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 assert(str != NULL);
142 size = strlen(str);
143 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
144 PyErr_SetString(PyExc_OverflowError,
145 "byte string is too long");
146 return NULL;
147 }
148 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000149#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +0000150 _Py_null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000151#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000152 Py_INCREF(op);
153 return (PyObject *)op;
154 }
155 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000156#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +0000157 _Py_one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000158#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000159 Py_INCREF(op);
160 return (PyObject *)op;
161 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000162
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000163 /* Inline PyObject_NewVar */
164 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
165 if (op == NULL)
166 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +0100167 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000168 op->ob_shash = -1;
Christian Heimesf051e432016-09-13 20:22:02 +0200169 memcpy(op->ob_sval, str, size+1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000170 /* share short strings */
171 if (size == 0) {
172 nullstring = op;
173 Py_INCREF(op);
174 } else if (size == 1) {
175 characters[*str & UCHAR_MAX] = op;
176 Py_INCREF(op);
177 }
178 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000179}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000180
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000181PyObject *
182PyBytes_FromFormatV(const char *format, va_list vargs)
183{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000184 char *s;
Victor Stinner03dab782015-10-14 00:21:35 +0200185 const char *f;
186 const char *p;
187 Py_ssize_t prec;
188 int longflag;
189 int size_tflag;
190 /* Longest 64-bit formatted numbers:
191 - "18446744073709551615\0" (21 bytes)
192 - "-9223372036854775808\0" (21 bytes)
193 Decimal takes the most space (it isn't enough for octal.)
Guido van Rossum343e97f2007-04-09 00:43:24 +0000194
Victor Stinner03dab782015-10-14 00:21:35 +0200195 Longest 64-bit pointer representation:
196 "0xffffffffffffffff\0" (19 bytes). */
197 char buffer[21];
198 _PyBytesWriter writer;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000199
Victor Stinner03dab782015-10-14 00:21:35 +0200200 _PyBytesWriter_Init(&writer);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000201
Victor Stinner03dab782015-10-14 00:21:35 +0200202 s = _PyBytesWriter_Alloc(&writer, strlen(format));
203 if (s == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000204 return NULL;
Victor Stinner03dab782015-10-14 00:21:35 +0200205 writer.overallocate = 1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000206
Victor Stinner03dab782015-10-14 00:21:35 +0200207#define WRITE_BYTES(str) \
208 do { \
209 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
210 if (s == NULL) \
211 goto error; \
212 } while (0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000213
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000214 for (f = format; *f; f++) {
Victor Stinner03dab782015-10-14 00:21:35 +0200215 if (*f != '%') {
216 *s++ = *f;
217 continue;
218 }
219
220 p = f++;
221
222 /* ignore the width (ex: 10 in "%10s") */
223 while (Py_ISDIGIT(*f))
224 f++;
225
226 /* parse the precision (ex: 10 in "%.10s") */
227 prec = 0;
228 if (*f == '.') {
229 f++;
230 for (; Py_ISDIGIT(*f); f++) {
231 prec = (prec * 10) + (*f - '0');
232 }
233 }
234
235 while (*f && *f != '%' && !Py_ISALPHA(*f))
236 f++;
237
238 /* handle the long flag ('l'), but only for %ld and %lu.
239 others can be added when necessary. */
240 longflag = 0;
241 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
242 longflag = 1;
243 ++f;
244 }
245
246 /* handle the size_t flag ('z'). */
247 size_tflag = 0;
248 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
249 size_tflag = 1;
250 ++f;
251 }
252
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700253 /* subtract bytes preallocated for the format string
Victor Stinner03dab782015-10-14 00:21:35 +0200254 (ex: 2 for "%s") */
255 writer.min_size -= (f - p + 1);
256
257 switch (*f) {
258 case 'c':
259 {
260 int c = va_arg(vargs, int);
261 if (c < 0 || c > 255) {
262 PyErr_SetString(PyExc_OverflowError,
263 "PyBytes_FromFormatV(): %c format "
264 "expects an integer in range [0; 255]");
265 goto error;
266 }
267 writer.min_size++;
268 *s++ = (unsigned char)c;
269 break;
270 }
271
272 case 'd':
273 if (longflag)
274 sprintf(buffer, "%ld", va_arg(vargs, long));
275 else if (size_tflag)
276 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
277 va_arg(vargs, Py_ssize_t));
278 else
279 sprintf(buffer, "%d", va_arg(vargs, int));
280 assert(strlen(buffer) < sizeof(buffer));
281 WRITE_BYTES(buffer);
282 break;
283
284 case 'u':
285 if (longflag)
286 sprintf(buffer, "%lu",
287 va_arg(vargs, unsigned long));
288 else if (size_tflag)
289 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
290 va_arg(vargs, size_t));
291 else
292 sprintf(buffer, "%u",
293 va_arg(vargs, unsigned int));
294 assert(strlen(buffer) < sizeof(buffer));
295 WRITE_BYTES(buffer);
296 break;
297
298 case 'i':
299 sprintf(buffer, "%i", va_arg(vargs, int));
300 assert(strlen(buffer) < sizeof(buffer));
301 WRITE_BYTES(buffer);
302 break;
303
304 case 'x':
305 sprintf(buffer, "%x", va_arg(vargs, int));
306 assert(strlen(buffer) < sizeof(buffer));
307 WRITE_BYTES(buffer);
308 break;
309
310 case 's':
311 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000312 Py_ssize_t i;
Victor Stinner03dab782015-10-14 00:21:35 +0200313
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200314 p = va_arg(vargs, const char*);
Serhiy Storchakad586ccb2019-01-12 10:30:35 +0200315 if (prec <= 0) {
316 i = strlen(p);
317 }
318 else {
319 i = 0;
320 while (i < prec && p[i]) {
321 i++;
322 }
323 }
Victor Stinner03dab782015-10-14 00:21:35 +0200324 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
325 if (s == NULL)
326 goto error;
327 break;
328 }
329
330 case 'p':
331 sprintf(buffer, "%p", va_arg(vargs, void*));
332 assert(strlen(buffer) < sizeof(buffer));
333 /* %p is ill-defined: ensure leading 0x. */
334 if (buffer[1] == 'X')
335 buffer[1] = 'x';
336 else if (buffer[1] != 'x') {
337 memmove(buffer+2, buffer, strlen(buffer)+1);
338 buffer[0] = '0';
339 buffer[1] = 'x';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000340 }
Victor Stinner03dab782015-10-14 00:21:35 +0200341 WRITE_BYTES(buffer);
342 break;
343
344 case '%':
345 writer.min_size++;
346 *s++ = '%';
347 break;
348
349 default:
350 if (*f == 0) {
351 /* fix min_size if we reached the end of the format string */
352 writer.min_size++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000353 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000354
Victor Stinner03dab782015-10-14 00:21:35 +0200355 /* invalid format string: copy unformatted string and exit */
356 WRITE_BYTES(p);
357 return _PyBytesWriter_Finish(&writer, s);
358 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000360
Victor Stinner03dab782015-10-14 00:21:35 +0200361#undef WRITE_BYTES
362
363 return _PyBytesWriter_Finish(&writer, s);
364
365 error:
366 _PyBytesWriter_Dealloc(&writer);
367 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000368}
369
370PyObject *
371PyBytes_FromFormat(const char *format, ...)
372{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000373 PyObject* ret;
374 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000375
376#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000377 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000378#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000379 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000380#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000381 ret = PyBytes_FromFormatV(format, vargs);
382 va_end(vargs);
383 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000384}
385
Ethan Furmanb95b5612015-01-23 20:05:18 -0800386/* Helpers for formatstring */
387
388Py_LOCAL_INLINE(PyObject *)
389getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
390{
391 Py_ssize_t argidx = *p_argidx;
392 if (argidx < arglen) {
393 (*p_argidx)++;
394 if (arglen < 0)
395 return args;
396 else
397 return PyTuple_GetItem(args, argidx);
398 }
399 PyErr_SetString(PyExc_TypeError,
400 "not enough arguments for format string");
401 return NULL;
402}
403
404/* Format codes
405 * F_LJUST '-'
406 * F_SIGN '+'
407 * F_BLANK ' '
408 * F_ALT '#'
409 * F_ZERO '0'
410 */
411#define F_LJUST (1<<0)
412#define F_SIGN (1<<1)
413#define F_BLANK (1<<2)
414#define F_ALT (1<<3)
415#define F_ZERO (1<<4)
416
417/* Returns a new reference to a PyBytes object, or NULL on failure. */
418
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200419static char*
420formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200421 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800422{
423 char *p;
424 PyObject *result;
425 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200426 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800427
428 x = PyFloat_AsDouble(v);
429 if (x == -1.0 && PyErr_Occurred()) {
430 PyErr_Format(PyExc_TypeError, "float argument required, "
431 "not %.200s", Py_TYPE(v)->tp_name);
432 return NULL;
433 }
434
435 if (prec < 0)
436 prec = 6;
437
438 p = PyOS_double_to_string(x, type, prec,
439 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
440
441 if (p == NULL)
442 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200443
444 len = strlen(p);
445 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200446 str = _PyBytesWriter_Prepare(writer, str, len);
447 if (str == NULL)
448 return NULL;
Christian Heimesf051e432016-09-13 20:22:02 +0200449 memcpy(str, p, len);
Victor Stinner71dc3d82016-04-26 12:35:13 +0200450 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200451 str += len;
452 return str;
453 }
454
455 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800456 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200457 *p_result = result;
Zackery Spytz96c59322018-10-03 00:01:30 -0600458 return result != NULL ? str : NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800459}
460
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300461static PyObject *
462formatlong(PyObject *v, int flags, int prec, int type)
463{
464 PyObject *result, *iobj;
465 if (type == 'i')
466 type = 'd';
467 if (PyLong_Check(v))
468 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
469 if (PyNumber_Check(v)) {
470 /* make sure number is a type of integer for o, x, and X */
471 if (type == 'o' || type == 'x' || type == 'X')
472 iobj = PyNumber_Index(v);
473 else
474 iobj = PyNumber_Long(v);
475 if (iobj == NULL) {
476 if (!PyErr_ExceptionMatches(PyExc_TypeError))
477 return NULL;
478 }
479 else if (!PyLong_Check(iobj))
480 Py_CLEAR(iobj);
481 if (iobj != NULL) {
482 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
483 Py_DECREF(iobj);
484 return result;
485 }
486 }
487 PyErr_Format(PyExc_TypeError,
488 "%%%c format: %s is required, not %.200s", type,
489 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
490 : "a number",
491 Py_TYPE(v)->tp_name);
492 return NULL;
493}
494
495static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200496byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800497{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300498 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200499 *p = PyBytes_AS_STRING(arg)[0];
500 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800501 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300502 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200503 *p = PyByteArray_AS_STRING(arg)[0];
504 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800505 }
506 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300507 PyObject *iobj;
508 long ival;
509 int overflow;
510 /* make sure number is a type of integer */
511 if (PyLong_Check(arg)) {
512 ival = PyLong_AsLongAndOverflow(arg, &overflow);
513 }
514 else {
515 iobj = PyNumber_Index(arg);
516 if (iobj == NULL) {
517 if (!PyErr_ExceptionMatches(PyExc_TypeError))
518 return 0;
519 goto onError;
520 }
521 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
522 Py_DECREF(iobj);
523 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300524 if (!overflow && ival == -1 && PyErr_Occurred())
525 goto onError;
526 if (overflow || !(0 <= ival && ival <= 255)) {
527 PyErr_SetString(PyExc_OverflowError,
528 "%c arg not in range(256)");
529 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800530 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300531 *p = (char)ival;
532 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800533 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300534 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200535 PyErr_SetString(PyExc_TypeError,
536 "%c requires an integer in range(256) or a single byte");
537 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800538}
539
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800540static PyObject *_PyBytes_FromBuffer(PyObject *x);
541
Ethan Furmanb95b5612015-01-23 20:05:18 -0800542static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200543format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800544{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200545 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800546 _Py_IDENTIFIER(__bytes__);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800547 /* is it a bytes object? */
548 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200549 *pbuf = PyBytes_AS_STRING(v);
550 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800551 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200552 return v;
553 }
554 if (PyByteArray_Check(v)) {
555 *pbuf = PyByteArray_AS_STRING(v);
556 *plen = PyByteArray_GET_SIZE(v);
557 Py_INCREF(v);
558 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800559 }
560 /* does it support __bytes__? */
561 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
562 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +0100563 result = _PyObject_CallNoArg(func);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800564 Py_DECREF(func);
565 if (result == NULL)
566 return NULL;
567 if (!PyBytes_Check(result)) {
568 PyErr_Format(PyExc_TypeError,
569 "__bytes__ returned non-bytes (type %.200s)",
570 Py_TYPE(result)->tp_name);
571 Py_DECREF(result);
572 return NULL;
573 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200574 *pbuf = PyBytes_AS_STRING(result);
575 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800576 return result;
577 }
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800578 /* does it support buffer protocol? */
579 if (PyObject_CheckBuffer(v)) {
580 /* maybe we can avoid making a copy of the buffer object here? */
581 result = _PyBytes_FromBuffer(v);
582 if (result == NULL)
583 return NULL;
584 *pbuf = PyBytes_AS_STRING(result);
585 *plen = PyBytes_GET_SIZE(result);
586 return result;
587 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800588 PyErr_Format(PyExc_TypeError,
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800589 "%%b requires a bytes-like object, "
590 "or an object that implements __bytes__, not '%.100s'",
Ethan Furmanb95b5612015-01-23 20:05:18 -0800591 Py_TYPE(v)->tp_name);
592 return NULL;
593}
594
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200595/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800596
597PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +0200598_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
599 PyObject *args, int use_bytearray)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800600{
Victor Stinner772b2b02015-10-14 09:56:53 +0200601 const char *fmt;
602 char *res;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800603 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200604 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800605 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800606 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200607 _PyBytesWriter writer;
608
Victor Stinner772b2b02015-10-14 09:56:53 +0200609 if (args == NULL) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800610 PyErr_BadInternalCall();
611 return NULL;
612 }
Victor Stinner772b2b02015-10-14 09:56:53 +0200613 fmt = format;
614 fmtcnt = format_len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200615
616 _PyBytesWriter_Init(&writer);
Victor Stinner772b2b02015-10-14 09:56:53 +0200617 writer.use_bytearray = use_bytearray;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200618
619 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
620 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800621 return NULL;
Victor Stinner772b2b02015-10-14 09:56:53 +0200622 if (!use_bytearray)
623 writer.overallocate = 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200624
Ethan Furmanb95b5612015-01-23 20:05:18 -0800625 if (PyTuple_Check(args)) {
626 arglen = PyTuple_GET_SIZE(args);
627 argidx = 0;
628 }
629 else {
630 arglen = -1;
631 argidx = -2;
632 }
633 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
634 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
635 !PyByteArray_Check(args)) {
636 dict = args;
637 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200638
Ethan Furmanb95b5612015-01-23 20:05:18 -0800639 while (--fmtcnt >= 0) {
640 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200641 Py_ssize_t len;
642 char *pos;
643
Xiang Zhangb76ad512017-03-06 17:17:05 +0800644 pos = (char *)memchr(fmt + 1, '%', fmtcnt);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200645 if (pos != NULL)
646 len = pos - fmt;
Victor Stinner772b2b02015-10-14 09:56:53 +0200647 else
Xiang Zhangb76ad512017-03-06 17:17:05 +0800648 len = fmtcnt + 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200649 assert(len != 0);
650
Christian Heimesf051e432016-09-13 20:22:02 +0200651 memcpy(res, fmt, len);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200652 res += len;
653 fmt += len;
654 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800655 }
656 else {
657 /* Got a format specifier */
658 int flags = 0;
659 Py_ssize_t width = -1;
660 int prec = -1;
661 int c = '\0';
662 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800663 PyObject *v = NULL;
664 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200665 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800666 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200667 Py_ssize_t len = 0;
668 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200669 Py_ssize_t alloc;
670#ifdef Py_DEBUG
671 char *before;
672#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -0800673
Ethan Furmanb95b5612015-01-23 20:05:18 -0800674 fmt++;
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200675 if (*fmt == '%') {
676 *res++ = '%';
677 fmt++;
678 fmtcnt--;
679 continue;
680 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800681 if (*fmt == '(') {
Victor Stinner772b2b02015-10-14 09:56:53 +0200682 const char *keystart;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800683 Py_ssize_t keylen;
684 PyObject *key;
685 int pcount = 1;
686
687 if (dict == NULL) {
688 PyErr_SetString(PyExc_TypeError,
689 "format requires a mapping");
690 goto error;
691 }
692 ++fmt;
693 --fmtcnt;
694 keystart = fmt;
695 /* Skip over balanced parentheses */
696 while (pcount > 0 && --fmtcnt >= 0) {
697 if (*fmt == ')')
698 --pcount;
699 else if (*fmt == '(')
700 ++pcount;
701 fmt++;
702 }
703 keylen = fmt - keystart - 1;
704 if (fmtcnt < 0 || pcount > 0) {
705 PyErr_SetString(PyExc_ValueError,
706 "incomplete format key");
707 goto error;
708 }
709 key = PyBytes_FromStringAndSize(keystart,
710 keylen);
711 if (key == NULL)
712 goto error;
713 if (args_owned) {
714 Py_DECREF(args);
715 args_owned = 0;
716 }
717 args = PyObject_GetItem(dict, key);
718 Py_DECREF(key);
719 if (args == NULL) {
720 goto error;
721 }
722 args_owned = 1;
723 arglen = -1;
724 argidx = -2;
725 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200726
727 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800728 while (--fmtcnt >= 0) {
729 switch (c = *fmt++) {
730 case '-': flags |= F_LJUST; continue;
731 case '+': flags |= F_SIGN; continue;
732 case ' ': flags |= F_BLANK; continue;
733 case '#': flags |= F_ALT; continue;
734 case '0': flags |= F_ZERO; continue;
735 }
736 break;
737 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200738
739 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800740 if (c == '*') {
741 v = getnextarg(args, arglen, &argidx);
742 if (v == NULL)
743 goto error;
744 if (!PyLong_Check(v)) {
745 PyErr_SetString(PyExc_TypeError,
746 "* wants int");
747 goto error;
748 }
749 width = PyLong_AsSsize_t(v);
750 if (width == -1 && PyErr_Occurred())
751 goto error;
752 if (width < 0) {
753 flags |= F_LJUST;
754 width = -width;
755 }
756 if (--fmtcnt >= 0)
757 c = *fmt++;
758 }
759 else if (c >= 0 && isdigit(c)) {
760 width = c - '0';
761 while (--fmtcnt >= 0) {
762 c = Py_CHARMASK(*fmt++);
763 if (!isdigit(c))
764 break;
765 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
766 PyErr_SetString(
767 PyExc_ValueError,
768 "width too big");
769 goto error;
770 }
771 width = width*10 + (c - '0');
772 }
773 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200774
775 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800776 if (c == '.') {
777 prec = 0;
778 if (--fmtcnt >= 0)
779 c = *fmt++;
780 if (c == '*') {
781 v = getnextarg(args, arglen, &argidx);
782 if (v == NULL)
783 goto error;
784 if (!PyLong_Check(v)) {
785 PyErr_SetString(
786 PyExc_TypeError,
787 "* wants int");
788 goto error;
789 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200790 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800791 if (prec == -1 && PyErr_Occurred())
792 goto error;
793 if (prec < 0)
794 prec = 0;
795 if (--fmtcnt >= 0)
796 c = *fmt++;
797 }
798 else if (c >= 0 && isdigit(c)) {
799 prec = c - '0';
800 while (--fmtcnt >= 0) {
801 c = Py_CHARMASK(*fmt++);
802 if (!isdigit(c))
803 break;
804 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
805 PyErr_SetString(
806 PyExc_ValueError,
807 "prec too big");
808 goto error;
809 }
810 prec = prec*10 + (c - '0');
811 }
812 }
813 } /* prec */
814 if (fmtcnt >= 0) {
815 if (c == 'h' || c == 'l' || c == 'L') {
816 if (--fmtcnt >= 0)
817 c = *fmt++;
818 }
819 }
820 if (fmtcnt < 0) {
821 PyErr_SetString(PyExc_ValueError,
822 "incomplete format");
823 goto error;
824 }
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200825 v = getnextarg(args, arglen, &argidx);
826 if (v == NULL)
827 goto error;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200828
Alexey Izbyshevccd99752018-08-23 10:50:52 +0300829 if (fmtcnt == 0) {
830 /* last write: disable writer overallocation */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200831 writer.overallocate = 0;
832 }
833
Ethan Furmanb95b5612015-01-23 20:05:18 -0800834 sign = 0;
835 fill = ' ';
836 switch (c) {
Ethan Furman62e977f2015-03-11 08:17:00 -0700837 case 'r':
838 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800839 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200840 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800841 if (temp == NULL)
842 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200843 assert(PyUnicode_IS_ASCII(temp));
844 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
845 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800846 if (prec >= 0 && len > prec)
847 len = prec;
848 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200849
Ethan Furmanb95b5612015-01-23 20:05:18 -0800850 case 's':
851 // %s is only for 2/3 code; 3 only code should use %b
852 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200853 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800854 if (temp == NULL)
855 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800856 if (prec >= 0 && len > prec)
857 len = prec;
858 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200859
Ethan Furmanb95b5612015-01-23 20:05:18 -0800860 case 'i':
861 case 'd':
862 case 'u':
863 case 'o':
864 case 'x':
865 case 'X':
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200866 if (PyLong_CheckExact(v)
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200867 && width == -1 && prec == -1
868 && !(flags & (F_SIGN | F_BLANK))
869 && c != 'X')
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200870 {
871 /* Fast path */
872 int alternate = flags & F_ALT;
873 int base;
874
875 switch(c)
876 {
877 default:
Barry Warsawb2e57942017-09-14 18:13:16 -0700878 Py_UNREACHABLE();
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200879 case 'd':
880 case 'i':
881 case 'u':
882 base = 10;
883 break;
884 case 'o':
885 base = 8;
886 break;
887 case 'x':
888 case 'X':
889 base = 16;
890 break;
891 }
892
893 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200894 writer.min_size -= 2; /* size preallocated for "%d" */
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200895 res = _PyLong_FormatBytesWriter(&writer, res,
896 v, base, alternate);
897 if (res == NULL)
898 goto error;
899 continue;
900 }
901
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300902 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200903 if (!temp)
904 goto error;
905 assert(PyUnicode_IS_ASCII(temp));
906 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
907 len = PyUnicode_GET_LENGTH(temp);
908 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800909 if (flags & F_ZERO)
910 fill = '0';
911 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200912
Ethan Furmanb95b5612015-01-23 20:05:18 -0800913 case 'e':
914 case 'E':
915 case 'f':
916 case 'F':
917 case 'g':
918 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200919 if (width == -1 && prec == -1
920 && !(flags & (F_SIGN | F_BLANK)))
921 {
922 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200923 writer.min_size -= 2; /* size preallocated for "%f" */
Victor Stinnerad771582015-10-09 12:38:53 +0200924 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200925 if (res == NULL)
926 goto error;
927 continue;
928 }
929
Victor Stinnerad771582015-10-09 12:38:53 +0200930 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800931 goto error;
932 pbuf = PyBytes_AS_STRING(temp);
933 len = PyBytes_GET_SIZE(temp);
934 sign = 1;
935 if (flags & F_ZERO)
936 fill = '0';
937 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200938
Ethan Furmanb95b5612015-01-23 20:05:18 -0800939 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200940 pbuf = &onechar;
941 len = byte_converter(v, &onechar);
942 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800943 goto error;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200944 if (width == -1) {
945 /* Fast path */
946 *res++ = onechar;
947 continue;
948 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800949 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200950
Ethan Furmanb95b5612015-01-23 20:05:18 -0800951 default:
952 PyErr_Format(PyExc_ValueError,
953 "unsupported format character '%c' (0x%x) "
954 "at index %zd",
955 c, c,
Victor Stinner772b2b02015-10-14 09:56:53 +0200956 (Py_ssize_t)(fmt - 1 - format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800957 goto error;
958 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200959
Ethan Furmanb95b5612015-01-23 20:05:18 -0800960 if (sign) {
961 if (*pbuf == '-' || *pbuf == '+') {
962 sign = *pbuf++;
963 len--;
964 }
965 else if (flags & F_SIGN)
966 sign = '+';
967 else if (flags & F_BLANK)
968 sign = ' ';
969 else
970 sign = 0;
971 }
972 if (width < len)
973 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200974
975 alloc = width;
976 if (sign != 0 && len == width)
977 alloc++;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200978 /* 2: size preallocated for %s */
979 if (alloc > 2) {
980 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200981 if (res == NULL)
982 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800983 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200984#ifdef Py_DEBUG
985 before = res;
986#endif
987
988 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800989 if (sign) {
990 if (fill != ' ')
991 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800992 if (width > len)
993 width--;
994 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200995
996 /* Write the numeric prefix for "x", "X" and "o" formats
997 if the alternate form is used.
998 For example, write "0x" for the "%#x" format. */
Serhiy Storchakab1a16192016-12-17 21:48:03 +0200999 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001000 assert(pbuf[0] == '0');
1001 assert(pbuf[1] == c);
1002 if (fill != ' ') {
1003 *res++ = *pbuf++;
1004 *res++ = *pbuf++;
1005 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001006 width -= 2;
1007 if (width < 0)
1008 width = 0;
1009 len -= 2;
1010 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001011
1012 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001013 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001014 memset(res, fill, width - len);
1015 res += (width - len);
1016 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -08001017 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001018
1019 /* If padding with spaces: write sign if needed and/or numeric
1020 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001021 if (fill == ' ') {
1022 if (sign)
1023 *res++ = sign;
Serhiy Storchakab1a16192016-12-17 21:48:03 +02001024 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001025 assert(pbuf[0] == '0');
1026 assert(pbuf[1] == c);
1027 *res++ = *pbuf++;
1028 *res++ = *pbuf++;
1029 }
1030 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001031
1032 /* Copy bytes */
Christian Heimesf051e432016-09-13 20:22:02 +02001033 memcpy(res, pbuf, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001034 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001035
1036 /* Pad right with the fill character if needed */
1037 if (width > len) {
1038 memset(res, ' ', width - len);
1039 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001040 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001041
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +02001042 if (dict && (argidx < arglen)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001043 PyErr_SetString(PyExc_TypeError,
1044 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -08001045 Py_XDECREF(temp);
1046 goto error;
1047 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001048 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001049
1050#ifdef Py_DEBUG
1051 /* check that we computed the exact size for this write */
1052 assert((res - before) == alloc);
1053#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -08001054 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001055
1056 /* If overallocation was disabled, ensure that it was the last
1057 write. Otherwise, we missed an optimization */
Alexey Izbyshevccd99752018-08-23 10:50:52 +03001058 assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001059 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001060
Ethan Furmanb95b5612015-01-23 20:05:18 -08001061 if (argidx < arglen && !dict) {
1062 PyErr_SetString(PyExc_TypeError,
1063 "not all arguments converted during bytes formatting");
1064 goto error;
1065 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001066
Ethan Furmanb95b5612015-01-23 20:05:18 -08001067 if (args_owned) {
1068 Py_DECREF(args);
1069 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001070 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001071
1072 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001073 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001074 if (args_owned) {
1075 Py_DECREF(args);
1076 }
1077 return NULL;
1078}
1079
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001080/* Unescape a backslash-escaped string. If unicode is non-zero,
1081 the string is a u-literal. If recode_encoding is non-zero,
1082 the string is UTF-8 encoded and should be re-encoded in the
1083 specified encoding. */
1084
Victor Stinner2ec80632015-10-14 13:32:13 +02001085static char *
1086_PyBytes_DecodeEscapeRecode(const char **s, const char *end,
1087 const char *errors, const char *recode_encoding,
1088 _PyBytesWriter *writer, char *p)
1089{
1090 PyObject *u, *w;
1091 const char* t;
1092
1093 t = *s;
1094 /* Decode non-ASCII bytes as UTF-8. */
1095 while (t < end && (*t & 0x80))
1096 t++;
1097 u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
1098 if (u == NULL)
1099 return NULL;
1100
1101 /* Recode them in target encoding. */
1102 w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
1103 Py_DECREF(u);
1104 if (w == NULL)
1105 return NULL;
1106 assert(PyBytes_Check(w));
1107
1108 /* Append bytes to output buffer. */
Raymond Hettinger15f44ab2016-08-30 10:47:49 -07001109 writer->min_size--; /* subtract 1 preallocated byte */
Victor Stinner2ec80632015-10-14 13:32:13 +02001110 p = _PyBytesWriter_WriteBytes(writer, p,
1111 PyBytes_AS_STRING(w),
1112 PyBytes_GET_SIZE(w));
1113 Py_DECREF(w);
1114 if (p == NULL)
1115 return NULL;
1116
1117 *s = t;
1118 return p;
1119}
1120
Eric V. Smith42454af2016-10-31 09:22:08 -04001121PyObject *_PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001122 Py_ssize_t len,
1123 const char *errors,
1124 Py_ssize_t unicode,
Eric V. Smith42454af2016-10-31 09:22:08 -04001125 const char *recode_encoding,
1126 const char **first_invalid_escape)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001127{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001128 int c;
Victor Stinner2ec80632015-10-14 13:32:13 +02001129 char *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001130 const char *end;
Victor Stinner2ec80632015-10-14 13:32:13 +02001131 _PyBytesWriter writer;
1132
1133 _PyBytesWriter_Init(&writer);
1134
1135 p = _PyBytesWriter_Alloc(&writer, len);
1136 if (p == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001137 return NULL;
Victor Stinner2ec80632015-10-14 13:32:13 +02001138 writer.overallocate = 1;
1139
Eric V. Smith42454af2016-10-31 09:22:08 -04001140 *first_invalid_escape = NULL;
1141
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001142 end = s + len;
1143 while (s < end) {
1144 if (*s != '\\') {
1145 non_esc:
Victor Stinner2ec80632015-10-14 13:32:13 +02001146 if (!(recode_encoding && (*s & 0x80))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001147 *p++ = *s++;
1148 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001149 else {
1150 /* non-ASCII character and need to recode */
1151 p = _PyBytes_DecodeEscapeRecode(&s, end,
1152 errors, recode_encoding,
1153 &writer, p);
1154 if (p == NULL)
1155 goto failed;
1156 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001157 continue;
1158 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001159
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001160 s++;
Victor Stinner2ec80632015-10-14 13:32:13 +02001161 if (s == end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001162 PyErr_SetString(PyExc_ValueError,
1163 "Trailing \\ in string");
1164 goto failed;
1165 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001166
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001167 switch (*s++) {
1168 /* XXX This assumes ASCII! */
1169 case '\n': break;
1170 case '\\': *p++ = '\\'; break;
1171 case '\'': *p++ = '\''; break;
1172 case '\"': *p++ = '\"'; break;
1173 case 'b': *p++ = '\b'; break;
1174 case 'f': *p++ = '\014'; break; /* FF */
1175 case 't': *p++ = '\t'; break;
1176 case 'n': *p++ = '\n'; break;
1177 case 'r': *p++ = '\r'; break;
1178 case 'v': *p++ = '\013'; break; /* VT */
1179 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1180 case '0': case '1': case '2': case '3':
1181 case '4': case '5': case '6': case '7':
1182 c = s[-1] - '0';
1183 if (s < end && '0' <= *s && *s <= '7') {
1184 c = (c<<3) + *s++ - '0';
1185 if (s < end && '0' <= *s && *s <= '7')
1186 c = (c<<3) + *s++ - '0';
1187 }
1188 *p++ = c;
1189 break;
1190 case 'x':
Victor Stinner2ec80632015-10-14 13:32:13 +02001191 if (s+1 < end) {
1192 int digit1, digit2;
1193 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1194 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1195 if (digit1 < 16 && digit2 < 16) {
1196 *p++ = (unsigned char)((digit1 << 4) + digit2);
1197 s += 2;
1198 break;
1199 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001200 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001201 /* invalid hexadecimal digits */
1202
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001203 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001204 PyErr_Format(PyExc_ValueError,
Serhiy Storchakad53fe5f2019-03-13 22:59:55 +02001205 "invalid \\x escape at position %zd",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001206 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001207 goto failed;
1208 }
1209 if (strcmp(errors, "replace") == 0) {
1210 *p++ = '?';
1211 } else if (strcmp(errors, "ignore") == 0)
1212 /* do nothing */;
1213 else {
1214 PyErr_Format(PyExc_ValueError,
1215 "decoding error; unknown "
1216 "error handling code: %.400s",
1217 errors);
1218 goto failed;
1219 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001220 /* skip \x */
1221 if (s < end && Py_ISXDIGIT(s[0]))
1222 s++; /* and a hexdigit */
1223 break;
Victor Stinner2ec80632015-10-14 13:32:13 +02001224
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001225 default:
Eric V. Smith42454af2016-10-31 09:22:08 -04001226 if (*first_invalid_escape == NULL) {
1227 *first_invalid_escape = s-1; /* Back up one char, since we've
1228 already incremented s. */
1229 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001230 *p++ = '\\';
Eric V. Smith42454af2016-10-31 09:22:08 -04001231 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +02001232 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001233 UTF-8 bytes may follow. */
1234 }
1235 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001236
1237 return _PyBytesWriter_Finish(&writer, p);
1238
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001239 failed:
Victor Stinner2ec80632015-10-14 13:32:13 +02001240 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001241 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001242}
1243
Eric V. Smith42454af2016-10-31 09:22:08 -04001244PyObject *PyBytes_DecodeEscape(const char *s,
1245 Py_ssize_t len,
1246 const char *errors,
1247 Py_ssize_t unicode,
1248 const char *recode_encoding)
1249{
1250 const char* first_invalid_escape;
1251 PyObject *result = _PyBytes_DecodeEscape(s, len, errors, unicode,
1252 recode_encoding,
1253 &first_invalid_escape);
1254 if (result == NULL)
1255 return NULL;
1256 if (first_invalid_escape != NULL) {
1257 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1258 "invalid escape sequence '\\%c'",
Serhiy Storchaka56cb4652017-10-20 17:08:15 +03001259 (unsigned char)*first_invalid_escape) < 0) {
Eric V. Smith42454af2016-10-31 09:22:08 -04001260 Py_DECREF(result);
1261 return NULL;
1262 }
1263 }
1264 return result;
1265
1266}
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001267/* -------------------------------------------------------------------- */
1268/* object api */
1269
1270Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001271PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001272{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001273 if (!PyBytes_Check(op)) {
1274 PyErr_Format(PyExc_TypeError,
1275 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1276 return -1;
1277 }
1278 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001279}
1280
1281char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001282PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001283{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001284 if (!PyBytes_Check(op)) {
1285 PyErr_Format(PyExc_TypeError,
1286 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1287 return NULL;
1288 }
1289 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001290}
1291
1292int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001293PyBytes_AsStringAndSize(PyObject *obj,
1294 char **s,
1295 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001296{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001297 if (s == NULL) {
1298 PyErr_BadInternalCall();
1299 return -1;
1300 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001301
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001302 if (!PyBytes_Check(obj)) {
1303 PyErr_Format(PyExc_TypeError,
1304 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1305 return -1;
1306 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001307
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001308 *s = PyBytes_AS_STRING(obj);
1309 if (len != NULL)
1310 *len = PyBytes_GET_SIZE(obj);
1311 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001312 PyErr_SetString(PyExc_ValueError,
1313 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001314 return -1;
1315 }
1316 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001317}
Neal Norwitz6968b052007-02-27 19:02:19 +00001318
1319/* -------------------------------------------------------------------- */
1320/* Methods */
1321
Eric Smith0923d1d2009-04-16 20:16:10 +00001322#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001323
1324#include "stringlib/fastsearch.h"
1325#include "stringlib/count.h"
1326#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001327#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001328#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001329#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001330#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001331
Eric Smith0f78bff2009-11-30 01:01:42 +00001332#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001333
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001334PyObject *
1335PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001336{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001337 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001338 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001339 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001340 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001341 unsigned char quote, *s, *p;
1342
1343 /* Compute size of output string */
1344 squotes = dquotes = 0;
1345 newsize = 3; /* b'' */
1346 s = (unsigned char*)op->ob_sval;
1347 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001348 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001349 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001350 case '\'': squotes++; break;
1351 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001352 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001353 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001354 default:
1355 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001356 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001357 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001358 if (newsize > PY_SSIZE_T_MAX - incr)
1359 goto overflow;
1360 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001361 }
1362 quote = '\'';
1363 if (smartquotes && squotes && !dquotes)
1364 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001365 if (squotes && quote == '\'') {
1366 if (newsize > PY_SSIZE_T_MAX - squotes)
1367 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001368 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001369 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001370
1371 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001372 if (v == NULL) {
1373 return NULL;
1374 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001375 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001376
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001377 *p++ = 'b', *p++ = quote;
1378 for (i = 0; i < length; i++) {
1379 unsigned char c = op->ob_sval[i];
1380 if (c == quote || c == '\\')
1381 *p++ = '\\', *p++ = c;
1382 else if (c == '\t')
1383 *p++ = '\\', *p++ = 't';
1384 else if (c == '\n')
1385 *p++ = '\\', *p++ = 'n';
1386 else if (c == '\r')
1387 *p++ = '\\', *p++ = 'r';
1388 else if (c < ' ' || c >= 0x7f) {
1389 *p++ = '\\';
1390 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001391 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1392 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001393 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001394 else
1395 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001396 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001397 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001398 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001399 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001400
1401 overflow:
1402 PyErr_SetString(PyExc_OverflowError,
1403 "bytes object is too large to make repr");
1404 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001405}
1406
Neal Norwitz6968b052007-02-27 19:02:19 +00001407static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001408bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001409{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001410 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001411}
1412
Neal Norwitz6968b052007-02-27 19:02:19 +00001413static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001414bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001415{
Victor Stinner331a6a52019-05-27 16:39:22 +02001416 PyConfig *config = &_PyInterpreterState_GET_UNSAFE()->config;
Victor Stinnerc96be812019-05-14 17:34:56 +02001417 if (config->bytes_warning) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001418 if (PyErr_WarnEx(PyExc_BytesWarning,
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001419 "str() on a bytes instance", 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001420 return NULL;
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001421 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001422 }
1423 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001424}
1425
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001426static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001427bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001428{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001429 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001430}
Neal Norwitz6968b052007-02-27 19:02:19 +00001431
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001432/* This is also used by PyBytes_Concat() */
1433static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001434bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001435{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001436 Py_buffer va, vb;
1437 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001438
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001439 va.len = -1;
1440 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001441 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1442 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001443 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
Serhiy Storchaka6b5a9ec2017-03-19 19:47:02 +02001444 Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001445 goto done;
1446 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001447
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001448 /* Optimize end cases */
1449 if (va.len == 0 && PyBytes_CheckExact(b)) {
1450 result = b;
1451 Py_INCREF(result);
1452 goto done;
1453 }
1454 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1455 result = a;
1456 Py_INCREF(result);
1457 goto done;
1458 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001459
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001460 if (va.len > PY_SSIZE_T_MAX - vb.len) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001461 PyErr_NoMemory();
1462 goto done;
1463 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001464
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001465 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001466 if (result != NULL) {
1467 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1468 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1469 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001470
1471 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001472 if (va.len != -1)
1473 PyBuffer_Release(&va);
1474 if (vb.len != -1)
1475 PyBuffer_Release(&vb);
1476 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001477}
Neal Norwitz6968b052007-02-27 19:02:19 +00001478
1479static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001480bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001481{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001482 Py_ssize_t i;
1483 Py_ssize_t j;
1484 Py_ssize_t size;
1485 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001486 size_t nbytes;
1487 if (n < 0)
1488 n = 0;
1489 /* watch out for overflows: the size can overflow int,
1490 * and the # of bytes needed can overflow size_t
1491 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001492 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001493 PyErr_SetString(PyExc_OverflowError,
1494 "repeated bytes are too long");
1495 return NULL;
1496 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001497 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001498 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1499 Py_INCREF(a);
1500 return (PyObject *)a;
1501 }
1502 nbytes = (size_t)size;
1503 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1504 PyErr_SetString(PyExc_OverflowError,
1505 "repeated bytes are too long");
1506 return NULL;
1507 }
1508 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1509 if (op == NULL)
1510 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +01001511 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001512 op->ob_shash = -1;
1513 op->ob_sval[size] = '\0';
1514 if (Py_SIZE(a) == 1 && n > 0) {
1515 memset(op->ob_sval, a->ob_sval[0] , n);
1516 return (PyObject *) op;
1517 }
1518 i = 0;
1519 if (i < size) {
Christian Heimesf051e432016-09-13 20:22:02 +02001520 memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001521 i = Py_SIZE(a);
1522 }
1523 while (i < size) {
1524 j = (i <= size-i) ? i : size-i;
Christian Heimesf051e432016-09-13 20:22:02 +02001525 memcpy(op->ob_sval+i, op->ob_sval, j);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001526 i += j;
1527 }
1528 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001529}
1530
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001531static int
1532bytes_contains(PyObject *self, PyObject *arg)
1533{
1534 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1535}
1536
Neal Norwitz6968b052007-02-27 19:02:19 +00001537static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001538bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001539{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001540 if (i < 0 || i >= Py_SIZE(a)) {
1541 PyErr_SetString(PyExc_IndexError, "index out of range");
1542 return NULL;
1543 }
1544 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001545}
1546
Benjamin Peterson621b4302016-09-09 13:54:34 -07001547static int
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001548bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1549{
1550 int cmp;
1551 Py_ssize_t len;
1552
1553 len = Py_SIZE(a);
1554 if (Py_SIZE(b) != len)
1555 return 0;
1556
1557 if (a->ob_sval[0] != b->ob_sval[0])
1558 return 0;
1559
1560 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1561 return (cmp == 0);
1562}
1563
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001564static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001565bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001566{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001567 int c;
1568 Py_ssize_t len_a, len_b;
1569 Py_ssize_t min_len;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001570 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001571
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001572 /* Make sure both arguments are strings. */
1573 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Victor Stinner331a6a52019-05-27 16:39:22 +02001574 PyConfig *config = &_PyInterpreterState_GET_UNSAFE()->config;
Victor Stinnerc96be812019-05-14 17:34:56 +02001575 if (config->bytes_warning && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001576 rc = PyObject_IsInstance((PyObject*)a,
1577 (PyObject*)&PyUnicode_Type);
1578 if (!rc)
1579 rc = PyObject_IsInstance((PyObject*)b,
1580 (PyObject*)&PyUnicode_Type);
1581 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001582 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001583 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001584 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001585 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001586 return NULL;
1587 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001588 else {
1589 rc = PyObject_IsInstance((PyObject*)a,
1590 (PyObject*)&PyLong_Type);
1591 if (!rc)
1592 rc = PyObject_IsInstance((PyObject*)b,
1593 (PyObject*)&PyLong_Type);
1594 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001595 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001596 if (rc) {
1597 if (PyErr_WarnEx(PyExc_BytesWarning,
1598 "Comparison between bytes and int", 1))
1599 return NULL;
1600 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001601 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001602 }
stratakise8b19652017-11-02 11:32:54 +01001603 Py_RETURN_NOTIMPLEMENTED;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001604 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001605 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001606 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001607 case Py_EQ:
1608 case Py_LE:
1609 case Py_GE:
1610 /* a string is equal to itself */
stratakise8b19652017-11-02 11:32:54 +01001611 Py_RETURN_TRUE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001612 case Py_NE:
1613 case Py_LT:
1614 case Py_GT:
stratakise8b19652017-11-02 11:32:54 +01001615 Py_RETURN_FALSE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001616 default:
1617 PyErr_BadArgument();
1618 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001619 }
1620 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001621 else if (op == Py_EQ || op == Py_NE) {
1622 int eq = bytes_compare_eq(a, b);
1623 eq ^= (op == Py_NE);
stratakise8b19652017-11-02 11:32:54 +01001624 return PyBool_FromLong(eq);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001625 }
1626 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001627 len_a = Py_SIZE(a);
1628 len_b = Py_SIZE(b);
1629 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001630 if (min_len > 0) {
1631 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001632 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001633 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001634 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001635 else
1636 c = 0;
stratakise8b19652017-11-02 11:32:54 +01001637 if (c != 0)
1638 Py_RETURN_RICHCOMPARE(c, 0, op);
1639 Py_RETURN_RICHCOMPARE(len_a, len_b, op);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001640 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001641}
1642
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001643static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001644bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001645{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001646 if (a->ob_shash == -1) {
1647 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001648 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001649 }
1650 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001651}
1652
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001653static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001654bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001655{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001656 if (PyIndex_Check(item)) {
1657 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1658 if (i == -1 && PyErr_Occurred())
1659 return NULL;
1660 if (i < 0)
1661 i += PyBytes_GET_SIZE(self);
1662 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1663 PyErr_SetString(PyExc_IndexError,
1664 "index out of range");
1665 return NULL;
1666 }
1667 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1668 }
1669 else if (PySlice_Check(item)) {
Zackery Spytz14514d92019-05-17 01:13:03 -06001670 Py_ssize_t start, stop, step, slicelength, i;
1671 size_t cur;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001672 char* source_buf;
1673 char* result_buf;
1674 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001675
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001676 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001677 return NULL;
1678 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001679 slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1680 &stop, step);
Neal Norwitz6968b052007-02-27 19:02:19 +00001681
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001682 if (slicelength <= 0) {
1683 return PyBytes_FromStringAndSize("", 0);
1684 }
1685 else if (start == 0 && step == 1 &&
1686 slicelength == PyBytes_GET_SIZE(self) &&
1687 PyBytes_CheckExact(self)) {
1688 Py_INCREF(self);
1689 return (PyObject *)self;
1690 }
1691 else if (step == 1) {
1692 return PyBytes_FromStringAndSize(
1693 PyBytes_AS_STRING(self) + start,
1694 slicelength);
1695 }
1696 else {
1697 source_buf = PyBytes_AS_STRING(self);
1698 result = PyBytes_FromStringAndSize(NULL, slicelength);
1699 if (result == NULL)
1700 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001701
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001702 result_buf = PyBytes_AS_STRING(result);
1703 for (cur = start, i = 0; i < slicelength;
1704 cur += step, i++) {
1705 result_buf[i] = source_buf[cur];
1706 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001707
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001708 return result;
1709 }
1710 }
1711 else {
1712 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001713 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001714 Py_TYPE(item)->tp_name);
1715 return NULL;
1716 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001717}
1718
1719static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001720bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001721{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001722 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1723 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001724}
1725
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001726static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001727 (lenfunc)bytes_length, /*sq_length*/
1728 (binaryfunc)bytes_concat, /*sq_concat*/
1729 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1730 (ssizeargfunc)bytes_item, /*sq_item*/
1731 0, /*sq_slice*/
1732 0, /*sq_ass_item*/
1733 0, /*sq_ass_slice*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001734 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001735};
1736
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001737static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001738 (lenfunc)bytes_length,
1739 (binaryfunc)bytes_subscript,
1740 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001741};
1742
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001743static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001744 (getbufferproc)bytes_buffer_getbuffer,
1745 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001746};
1747
1748
1749#define LEFTSTRIP 0
1750#define RIGHTSTRIP 1
1751#define BOTHSTRIP 2
1752
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001753/*[clinic input]
1754bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001755
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001756 sep: object = None
1757 The delimiter according which to split the bytes.
1758 None (the default value) means split on ASCII whitespace characters
1759 (space, tab, return, newline, formfeed, vertical tab).
1760 maxsplit: Py_ssize_t = -1
1761 Maximum number of splits to do.
1762 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001763
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001764Return a list of the sections in the bytes, using sep as the delimiter.
1765[clinic start generated code]*/
1766
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001767static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001768bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1769/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001770{
1771 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001772 const char *s = PyBytes_AS_STRING(self), *sub;
1773 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001774 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001775
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001776 if (maxsplit < 0)
1777 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001778 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001779 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001780 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001781 return NULL;
1782 sub = vsub.buf;
1783 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001784
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001785 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1786 PyBuffer_Release(&vsub);
1787 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001788}
1789
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001790/*[clinic input]
1791bytes.partition
1792
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001793 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001794 /
1795
1796Partition the bytes into three parts using the given separator.
1797
1798This will search for the separator sep in the bytes. If the separator is found,
1799returns a 3-tuple containing the part before the separator, the separator
1800itself, and the part after it.
1801
1802If the separator is not found, returns a 3-tuple containing the original bytes
1803object and two empty bytes objects.
1804[clinic start generated code]*/
1805
Neal Norwitz6968b052007-02-27 19:02:19 +00001806static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001807bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001808/*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001809{
Neal Norwitz6968b052007-02-27 19:02:19 +00001810 return stringlib_partition(
1811 (PyObject*) self,
1812 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001813 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001814 );
1815}
1816
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001817/*[clinic input]
1818bytes.rpartition
1819
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001820 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001821 /
1822
1823Partition the bytes into three parts using the given separator.
1824
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001825This will search for the separator sep in the bytes, starting at the end. If
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001826the separator is found, returns a 3-tuple containing the part before the
1827separator, the separator itself, and the part after it.
1828
1829If the separator is not found, returns a 3-tuple containing two empty bytes
1830objects and the original bytes object.
1831[clinic start generated code]*/
1832
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001833static PyObject *
1834bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001835/*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001836{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001837 return stringlib_rpartition(
1838 (PyObject*) self,
1839 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001840 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001841 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001842}
1843
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001844/*[clinic input]
1845bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001846
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001847Return a list of the sections in the bytes, using sep as the delimiter.
1848
1849Splitting is done starting at the end of the bytes and working to the front.
1850[clinic start generated code]*/
1851
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001852static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001853bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1854/*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001855{
1856 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001857 const char *s = PyBytes_AS_STRING(self), *sub;
1858 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001859 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001860
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001861 if (maxsplit < 0)
1862 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001863 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001864 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001865 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001866 return NULL;
1867 sub = vsub.buf;
1868 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001869
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001870 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1871 PyBuffer_Release(&vsub);
1872 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001873}
1874
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001875
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001876/*[clinic input]
1877bytes.join
1878
1879 iterable_of_bytes: object
1880 /
1881
1882Concatenate any number of bytes objects.
1883
1884The bytes whose method is called is inserted in between each pair.
1885
1886The result is returned as a new bytes object.
1887
1888Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1889[clinic start generated code]*/
1890
Neal Norwitz6968b052007-02-27 19:02:19 +00001891static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001892bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1893/*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001894{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001895 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001896}
1897
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001898PyObject *
1899_PyBytes_Join(PyObject *sep, PyObject *x)
1900{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001901 assert(sep != NULL && PyBytes_Check(sep));
1902 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001903 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001904}
1905
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001906static PyObject *
1907bytes_find(PyBytesObject *self, PyObject *args)
1908{
1909 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1910}
1911
1912static PyObject *
1913bytes_index(PyBytesObject *self, PyObject *args)
1914{
1915 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1916}
1917
1918
1919static PyObject *
1920bytes_rfind(PyBytesObject *self, PyObject *args)
1921{
1922 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1923}
1924
1925
1926static PyObject *
1927bytes_rindex(PyBytesObject *self, PyObject *args)
1928{
1929 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1930}
1931
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001932
1933Py_LOCAL_INLINE(PyObject *)
1934do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001935{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001936 Py_buffer vsep;
1937 char *s = PyBytes_AS_STRING(self);
1938 Py_ssize_t len = PyBytes_GET_SIZE(self);
1939 char *sep;
1940 Py_ssize_t seplen;
1941 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001942
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001943 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001944 return NULL;
1945 sep = vsep.buf;
1946 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001947
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001948 i = 0;
1949 if (striptype != RIGHTSTRIP) {
1950 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1951 i++;
1952 }
1953 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001954
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001955 j = len;
1956 if (striptype != LEFTSTRIP) {
1957 do {
1958 j--;
1959 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1960 j++;
1961 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001962
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001963 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001964
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001965 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1966 Py_INCREF(self);
1967 return (PyObject*)self;
1968 }
1969 else
1970 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001971}
1972
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001973
1974Py_LOCAL_INLINE(PyObject *)
1975do_strip(PyBytesObject *self, int striptype)
1976{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001977 char *s = PyBytes_AS_STRING(self);
1978 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001979
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001980 i = 0;
1981 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001982 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001983 i++;
1984 }
1985 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001986
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001987 j = len;
1988 if (striptype != LEFTSTRIP) {
1989 do {
1990 j--;
David Malcolm96960882010-11-05 17:23:41 +00001991 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001992 j++;
1993 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001994
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001995 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1996 Py_INCREF(self);
1997 return (PyObject*)self;
1998 }
1999 else
2000 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002001}
2002
2003
2004Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002005do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002006{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002007 if (bytes != NULL && bytes != Py_None) {
2008 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002009 }
2010 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002011}
2012
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002013/*[clinic input]
2014bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002015
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002016 bytes: object = None
2017 /
2018
2019Strip leading and trailing bytes contained in the argument.
2020
2021If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2022[clinic start generated code]*/
2023
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002024static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002025bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002026/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002027{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002028 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002029}
2030
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002031/*[clinic input]
2032bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002033
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002034 bytes: object = None
2035 /
2036
2037Strip leading bytes contained in the argument.
2038
2039If the argument is omitted or None, strip leading ASCII whitespace.
2040[clinic start generated code]*/
2041
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002042static PyObject *
2043bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002044/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002045{
2046 return do_argstrip(self, LEFTSTRIP, bytes);
2047}
2048
2049/*[clinic input]
2050bytes.rstrip
2051
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002052 bytes: object = None
2053 /
2054
2055Strip trailing bytes contained in the argument.
2056
2057If the argument is omitted or None, strip trailing ASCII whitespace.
2058[clinic start generated code]*/
2059
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002060static PyObject *
2061bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002062/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002063{
2064 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002065}
Neal Norwitz6968b052007-02-27 19:02:19 +00002066
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002067
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002068static PyObject *
2069bytes_count(PyBytesObject *self, PyObject *args)
2070{
2071 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2072}
2073
2074
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002075/*[clinic input]
2076bytes.translate
2077
Victor Stinner049e5092014-08-17 22:20:00 +02002078 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002079 Translation table, which must be a bytes object of length 256.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002080 /
Martin Panter1b6c6da2016-08-27 08:35:02 +00002081 delete as deletechars: object(c_default="NULL") = b''
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002082
2083Return a copy with each character mapped by the given translation table.
2084
Martin Panter1b6c6da2016-08-27 08:35:02 +00002085All characters occurring in the optional argument delete are removed.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002086The remaining characters are mapped through the given translation table.
2087[clinic start generated code]*/
2088
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002089static PyObject *
Martin Panter1b6c6da2016-08-27 08:35:02 +00002090bytes_translate_impl(PyBytesObject *self, PyObject *table,
Larry Hastings89964c42015-04-14 18:07:59 -04002091 PyObject *deletechars)
Martin Panter1b6c6da2016-08-27 08:35:02 +00002092/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002093{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002094 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002095 Py_buffer table_view = {NULL, NULL};
2096 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002097 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002098 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002099 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002100 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002101 Py_ssize_t inlen, tablen, dellen = 0;
2102 PyObject *result;
2103 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002104
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002105 if (PyBytes_Check(table)) {
2106 table_chars = PyBytes_AS_STRING(table);
2107 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002108 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002109 else if (table == Py_None) {
2110 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002111 tablen = 256;
2112 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002113 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002114 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002115 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002116 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002117 tablen = table_view.len;
2118 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002119
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002120 if (tablen != 256) {
2121 PyErr_SetString(PyExc_ValueError,
2122 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002123 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002124 return NULL;
2125 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002126
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002127 if (deletechars != NULL) {
2128 if (PyBytes_Check(deletechars)) {
2129 del_table_chars = PyBytes_AS_STRING(deletechars);
2130 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002131 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002132 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002133 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002134 PyBuffer_Release(&table_view);
2135 return NULL;
2136 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002137 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002138 dellen = del_table_view.len;
2139 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002140 }
2141 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002142 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002143 dellen = 0;
2144 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002145
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002146 inlen = PyBytes_GET_SIZE(input_obj);
2147 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002148 if (result == NULL) {
2149 PyBuffer_Release(&del_table_view);
2150 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002151 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002152 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002153 output_start = output = PyBytes_AS_STRING(result);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002154 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002155
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002156 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002157 /* If no deletions are required, use faster code */
2158 for (i = inlen; --i >= 0; ) {
2159 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002160 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002161 changed = 1;
2162 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002163 if (!changed && PyBytes_CheckExact(input_obj)) {
2164 Py_INCREF(input_obj);
2165 Py_DECREF(result);
2166 result = input_obj;
2167 }
2168 PyBuffer_Release(&del_table_view);
2169 PyBuffer_Release(&table_view);
2170 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002171 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002172
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002173 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002174 for (i = 0; i < 256; i++)
2175 trans_table[i] = Py_CHARMASK(i);
2176 } else {
2177 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002178 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002179 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002180 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002181
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002182 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002183 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002184 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002185
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002186 for (i = inlen; --i >= 0; ) {
2187 c = Py_CHARMASK(*input++);
2188 if (trans_table[c] != -1)
2189 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2190 continue;
2191 changed = 1;
2192 }
2193 if (!changed && PyBytes_CheckExact(input_obj)) {
2194 Py_DECREF(result);
2195 Py_INCREF(input_obj);
2196 return input_obj;
2197 }
2198 /* Fix the size of the resulting string */
2199 if (inlen > 0)
2200 _PyBytes_Resize(&result, output - output_start);
2201 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002202}
2203
2204
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002205/*[clinic input]
2206
2207@staticmethod
2208bytes.maketrans
2209
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002210 frm: Py_buffer
2211 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002212 /
2213
2214Return a translation table useable for the bytes or bytearray translate method.
2215
2216The returned table will be one where each byte in frm is mapped to the byte at
2217the same position in to.
2218
2219The bytes objects frm and to must be of the same length.
2220[clinic start generated code]*/
2221
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002222static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002223bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002224/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002225{
2226 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002227}
2228
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002229
2230/*[clinic input]
2231bytes.replace
2232
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002233 old: Py_buffer
2234 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002235 count: Py_ssize_t = -1
2236 Maximum number of occurrences to replace.
2237 -1 (the default value) means replace all occurrences.
2238 /
2239
2240Return a copy with all occurrences of substring old replaced by new.
2241
2242If the optional argument count is given, only the first count occurrences are
2243replaced.
2244[clinic start generated code]*/
2245
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002246static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002247bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
Larry Hastings89964c42015-04-14 18:07:59 -04002248 Py_ssize_t count)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002249/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002250{
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +03002251 return stringlib_replace((PyObject *)self,
2252 (const char *)old->buf, old->len,
2253 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002254}
2255
2256/** End DALKE **/
2257
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002258
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002259static PyObject *
2260bytes_startswith(PyBytesObject *self, PyObject *args)
2261{
2262 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2263}
2264
2265static PyObject *
2266bytes_endswith(PyBytesObject *self, PyObject *args)
2267{
2268 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2269}
2270
2271
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002272/*[clinic input]
2273bytes.decode
2274
2275 encoding: str(c_default="NULL") = 'utf-8'
2276 The encoding with which to decode the bytes.
2277 errors: str(c_default="NULL") = 'strict'
2278 The error handling scheme to use for the handling of decoding errors.
2279 The default is 'strict' meaning that decoding errors raise a
2280 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2281 as well as any other name registered with codecs.register_error that
2282 can handle UnicodeDecodeErrors.
2283
2284Decode the bytes using the codec registered for encoding.
2285[clinic start generated code]*/
2286
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002287static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002288bytes_decode_impl(PyBytesObject *self, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04002289 const char *errors)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002290/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002291{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002292 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002293}
2294
Guido van Rossum20188312006-05-05 15:15:40 +00002295
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002296/*[clinic input]
2297bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002298
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002299 keepends: bool(accept={int}) = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002300
2301Return a list of the lines in the bytes, breaking at line boundaries.
2302
2303Line breaks are not included in the resulting list unless keepends is given and
2304true.
2305[clinic start generated code]*/
2306
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002307static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002308bytes_splitlines_impl(PyBytesObject *self, int keepends)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002309/*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002310{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002311 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002312 (PyObject*) self, PyBytes_AS_STRING(self),
2313 PyBytes_GET_SIZE(self), keepends
2314 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002315}
2316
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002317/*[clinic input]
2318@classmethod
2319bytes.fromhex
2320
2321 string: unicode
2322 /
2323
2324Create a bytes object from a string of hexadecimal numbers.
2325
2326Spaces between two numbers are accepted.
2327Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2328[clinic start generated code]*/
2329
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002330static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002331bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002332/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002333{
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002334 PyObject *result = _PyBytes_FromHex(string, 0);
2335 if (type != &PyBytes_Type && result != NULL) {
Jeroen Demeyer196a5302019-07-04 12:31:34 +02002336 Py_SETREF(result, _PyObject_CallOneArg((PyObject *)type, result));
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002337 }
2338 return result;
Victor Stinner2bf89932015-10-14 11:25:33 +02002339}
2340
2341PyObject*
2342_PyBytes_FromHex(PyObject *string, int use_bytearray)
2343{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002344 char *buf;
Victor Stinner2bf89932015-10-14 11:25:33 +02002345 Py_ssize_t hexlen, invalid_char;
2346 unsigned int top, bot;
2347 Py_UCS1 *str, *end;
2348 _PyBytesWriter writer;
2349
2350 _PyBytesWriter_Init(&writer);
2351 writer.use_bytearray = use_bytearray;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002352
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002353 assert(PyUnicode_Check(string));
2354 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002355 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002356 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002357
Victor Stinner2bf89932015-10-14 11:25:33 +02002358 if (!PyUnicode_IS_ASCII(string)) {
2359 void *data = PyUnicode_DATA(string);
2360 unsigned int kind = PyUnicode_KIND(string);
2361 Py_ssize_t i;
2362
2363 /* search for the first non-ASCII character */
2364 for (i = 0; i < hexlen; i++) {
2365 if (PyUnicode_READ(kind, data, i) >= 128)
2366 break;
2367 }
2368 invalid_char = i;
2369 goto error;
2370 }
2371
2372 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2373 str = PyUnicode_1BYTE_DATA(string);
2374
2375 /* This overestimates if there are spaces */
2376 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2377 if (buf == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002378 return NULL;
Victor Stinner2bf89932015-10-14 11:25:33 +02002379
2380 end = str + hexlen;
2381 while (str < end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002382 /* skip over spaces in the input */
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002383 if (Py_ISSPACE(*str)) {
Victor Stinner2bf89932015-10-14 11:25:33 +02002384 do {
2385 str++;
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002386 } while (Py_ISSPACE(*str));
Victor Stinner2bf89932015-10-14 11:25:33 +02002387 if (str >= end)
2388 break;
2389 }
2390
2391 top = _PyLong_DigitValue[*str];
2392 if (top >= 16) {
2393 invalid_char = str - PyUnicode_1BYTE_DATA(string);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002394 goto error;
2395 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002396 str++;
2397
2398 bot = _PyLong_DigitValue[*str];
2399 if (bot >= 16) {
2400 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2401 goto error;
2402 }
2403 str++;
2404
2405 *buf++ = (unsigned char)((top << 4) + bot);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002406 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002407
2408 return _PyBytesWriter_Finish(&writer, buf);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002409
2410 error:
Victor Stinner2bf89932015-10-14 11:25:33 +02002411 PyErr_Format(PyExc_ValueError,
2412 "non-hexadecimal number found in "
2413 "fromhex() arg at position %zd", invalid_char);
2414 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002415 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002416}
2417
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002418/*[clinic input]
2419bytes.hex
2420
2421 sep: object = NULL
2422 An optional single character or byte to separate hex bytes.
2423 bytes_per_sep: int = 1
2424 How many bytes between separators. Positive values count from the
2425 right, negative values count from the left.
2426
2427Create a str of hexadecimal numbers from a bytes object.
2428
2429Example:
2430>>> value = b'\xb9\x01\xef'
2431>>> value.hex()
2432'b901ef'
2433>>> value.hex(':')
2434'b9:01:ef'
2435>>> value.hex(':', 2)
2436'b9:01ef'
2437>>> value.hex(':', -2)
2438'b901:ef'
2439[clinic start generated code]*/
Gregory P. Smith8cb65692015-04-25 23:22:26 +00002440
2441static PyObject *
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002442bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2443/*[clinic end generated code: output=1f134da504064139 input=f1238d3455990218]*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002444{
2445 char* argbuf = PyBytes_AS_STRING(self);
2446 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002447 return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002448}
2449
2450static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302451bytes_getnewargs(PyBytesObject *v, PyObject *Py_UNUSED(ignored))
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002452{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002453 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002454}
2455
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002456
2457static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002458bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002459 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302460 {"capitalize", stringlib_capitalize, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002461 _Py_capitalize__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002462 STRINGLIB_CENTER_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002463 {"count", (PyCFunction)bytes_count, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002464 _Py_count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002465 BYTES_DECODE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002466 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002467 _Py_endswith__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002468 STRINGLIB_EXPANDTABS_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002469 {"find", (PyCFunction)bytes_find, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002470 _Py_find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002471 BYTES_FROMHEX_METHODDEF
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002472 BYTES_HEX_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002473 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302474 {"isalnum", stringlib_isalnum, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002475 _Py_isalnum__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302476 {"isalpha", stringlib_isalpha, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002477 _Py_isalpha__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302478 {"isascii", stringlib_isascii, METH_NOARGS,
INADA Naokia49ac992018-01-27 14:06:21 +09002479 _Py_isascii__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302480 {"isdigit", stringlib_isdigit, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002481 _Py_isdigit__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302482 {"islower", stringlib_islower, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002483 _Py_islower__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302484 {"isspace", stringlib_isspace, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002485 _Py_isspace__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302486 {"istitle", stringlib_istitle, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002487 _Py_istitle__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302488 {"isupper", stringlib_isupper, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002489 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002490 BYTES_JOIN_METHODDEF
Tal Einatc929df32018-07-06 13:17:38 +03002491 STRINGLIB_LJUST_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302492 {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002493 BYTES_LSTRIP_METHODDEF
2494 BYTES_MAKETRANS_METHODDEF
2495 BYTES_PARTITION_METHODDEF
2496 BYTES_REPLACE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002497 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2498 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002499 STRINGLIB_RJUST_METHODDEF
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002500 BYTES_RPARTITION_METHODDEF
2501 BYTES_RSPLIT_METHODDEF
2502 BYTES_RSTRIP_METHODDEF
2503 BYTES_SPLIT_METHODDEF
2504 BYTES_SPLITLINES_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002505 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002506 _Py_startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002507 BYTES_STRIP_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302508 {"swapcase", stringlib_swapcase, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002509 _Py_swapcase__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302510 {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002511 BYTES_TRANSLATE_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302512 {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002513 STRINGLIB_ZFILL_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002514 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002515};
2516
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002517static PyObject *
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002518bytes_mod(PyObject *self, PyObject *arg)
Ethan Furmanb95b5612015-01-23 20:05:18 -08002519{
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002520 if (!PyBytes_Check(self)) {
2521 Py_RETURN_NOTIMPLEMENTED;
Victor Stinner772b2b02015-10-14 09:56:53 +02002522 }
Victor Stinner772b2b02015-10-14 09:56:53 +02002523 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002524 arg, 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -08002525}
2526
2527static PyNumberMethods bytes_as_number = {
2528 0, /*nb_add*/
2529 0, /*nb_subtract*/
2530 0, /*nb_multiply*/
2531 bytes_mod, /*nb_remainder*/
2532};
2533
2534static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002535bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002536
2537static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002538bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002539{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002540 PyObject *x = NULL;
2541 const char *encoding = NULL;
2542 const char *errors = NULL;
2543 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002544 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002545 Py_ssize_t size;
2546 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002547 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002548
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002549 if (type != &PyBytes_Type)
Serhiy Storchaka15095802015-11-25 15:47:01 +02002550 return bytes_subtype_new(type, args, kwds);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002551 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2552 &encoding, &errors))
2553 return NULL;
2554 if (x == NULL) {
2555 if (encoding != NULL || errors != NULL) {
2556 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka2c2044e2018-10-21 15:29:12 +03002557 encoding != NULL ?
2558 "encoding without a string argument" :
2559 "errors without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002560 return NULL;
2561 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02002562 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002563 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002564
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002565 if (encoding != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002566 /* Encode via the codec registry */
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002567 if (!PyUnicode_Check(x)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002568 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002569 "encoding without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002570 return NULL;
2571 }
2572 new = PyUnicode_AsEncodedString(x, encoding, errors);
2573 if (new == NULL)
2574 return NULL;
2575 assert(PyBytes_Check(new));
2576 return new;
2577 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002578
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002579 if (errors != NULL) {
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002580 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002581 PyUnicode_Check(x) ?
2582 "string argument without an encoding" :
2583 "errors without a string argument");
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002584 return NULL;
2585 }
2586
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002587 /* We'd like to call PyObject_Bytes here, but we need to check for an
2588 integer argument before deferring to PyBytes_FromObject, something
2589 PyObject_Bytes doesn't do. */
2590 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2591 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +01002592 new = _PyObject_CallNoArg(func);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002593 Py_DECREF(func);
2594 if (new == NULL)
2595 return NULL;
2596 if (!PyBytes_Check(new)) {
2597 PyErr_Format(PyExc_TypeError,
2598 "__bytes__ returned non-bytes (type %.200s)",
2599 Py_TYPE(new)->tp_name);
2600 Py_DECREF(new);
2601 return NULL;
2602 }
2603 return new;
2604 }
2605 else if (PyErr_Occurred())
2606 return NULL;
2607
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002608 if (PyUnicode_Check(x)) {
2609 PyErr_SetString(PyExc_TypeError,
2610 "string argument without an encoding");
2611 return NULL;
2612 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002613 /* Is it an integer? */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002614 if (PyIndex_Check(x)) {
2615 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2616 if (size == -1 && PyErr_Occurred()) {
Serhiy Storchakae8904212018-10-15 00:02:57 +03002617 if (!PyErr_ExceptionMatches(PyExc_TypeError))
INADA Naokia634e232017-01-06 17:32:01 +09002618 return NULL;
2619 PyErr_Clear(); /* fall through */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002620 }
INADA Naokia634e232017-01-06 17:32:01 +09002621 else {
2622 if (size < 0) {
2623 PyErr_SetString(PyExc_ValueError, "negative count");
2624 return NULL;
2625 }
2626 new = _PyBytes_FromSize(size, 1);
2627 if (new == NULL)
2628 return NULL;
2629 return new;
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002630 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002631 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002632
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002633 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002634}
2635
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002636static PyObject*
2637_PyBytes_FromBuffer(PyObject *x)
2638{
2639 PyObject *new;
2640 Py_buffer view;
2641
2642 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2643 return NULL;
2644
2645 new = PyBytes_FromStringAndSize(NULL, view.len);
2646 if (!new)
2647 goto fail;
2648 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2649 &view, view.len, 'C') < 0)
2650 goto fail;
2651 PyBuffer_Release(&view);
2652 return new;
2653
2654fail:
2655 Py_XDECREF(new);
2656 PyBuffer_Release(&view);
2657 return NULL;
2658}
2659
2660static PyObject*
2661_PyBytes_FromList(PyObject *x)
2662{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002663 Py_ssize_t i, size = PyList_GET_SIZE(x);
2664 Py_ssize_t value;
2665 char *str;
2666 PyObject *item;
2667 _PyBytesWriter writer;
2668
2669 _PyBytesWriter_Init(&writer);
2670 str = _PyBytesWriter_Alloc(&writer, size);
2671 if (str == NULL)
2672 return NULL;
2673 writer.overallocate = 1;
2674 size = writer.allocated;
2675
2676 for (i = 0; i < PyList_GET_SIZE(x); i++) {
2677 item = PyList_GET_ITEM(x, i);
2678 Py_INCREF(item);
2679 value = PyNumber_AsSsize_t(item, NULL);
2680 Py_DECREF(item);
2681 if (value == -1 && PyErr_Occurred())
2682 goto error;
2683
2684 if (value < 0 || value >= 256) {
2685 PyErr_SetString(PyExc_ValueError,
2686 "bytes must be in range(0, 256)");
2687 goto error;
2688 }
2689
2690 if (i >= size) {
2691 str = _PyBytesWriter_Resize(&writer, str, size+1);
2692 if (str == NULL)
2693 return NULL;
2694 size = writer.allocated;
2695 }
2696 *str++ = (char) value;
2697 }
2698 return _PyBytesWriter_Finish(&writer, str);
2699
2700 error:
2701 _PyBytesWriter_Dealloc(&writer);
2702 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002703}
2704
2705static PyObject*
2706_PyBytes_FromTuple(PyObject *x)
2707{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002708 PyObject *bytes;
2709 Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2710 Py_ssize_t value;
2711 char *str;
2712 PyObject *item;
2713
2714 bytes = PyBytes_FromStringAndSize(NULL, size);
2715 if (bytes == NULL)
2716 return NULL;
2717 str = ((PyBytesObject *)bytes)->ob_sval;
2718
2719 for (i = 0; i < size; i++) {
2720 item = PyTuple_GET_ITEM(x, i);
2721 value = PyNumber_AsSsize_t(item, NULL);
2722 if (value == -1 && PyErr_Occurred())
2723 goto error;
2724
2725 if (value < 0 || value >= 256) {
2726 PyErr_SetString(PyExc_ValueError,
2727 "bytes must be in range(0, 256)");
2728 goto error;
2729 }
2730 *str++ = (char) value;
2731 }
2732 return bytes;
2733
2734 error:
2735 Py_DECREF(bytes);
2736 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002737}
2738
2739static PyObject *
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002740_PyBytes_FromIterator(PyObject *it, PyObject *x)
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002741{
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002742 char *str;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002743 Py_ssize_t i, size;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002744 _PyBytesWriter writer;
2745
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002746 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02002747 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002748 if (size == -1 && PyErr_Occurred())
2749 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002750
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002751 _PyBytesWriter_Init(&writer);
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002752 str = _PyBytesWriter_Alloc(&writer, size);
2753 if (str == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002754 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002755 writer.overallocate = 1;
2756 size = writer.allocated;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002757
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002758 /* Run the iterator to exhaustion */
2759 for (i = 0; ; i++) {
2760 PyObject *item;
2761 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002762
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002763 /* Get the next item */
2764 item = PyIter_Next(it);
2765 if (item == NULL) {
2766 if (PyErr_Occurred())
2767 goto error;
2768 break;
2769 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002770
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002771 /* Interpret it as an int (__index__) */
Serhiy Storchakaf54d7812016-07-06 21:39:44 +03002772 value = PyNumber_AsSsize_t(item, NULL);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002773 Py_DECREF(item);
2774 if (value == -1 && PyErr_Occurred())
2775 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002776
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002777 /* Range check */
2778 if (value < 0 || value >= 256) {
2779 PyErr_SetString(PyExc_ValueError,
2780 "bytes must be in range(0, 256)");
2781 goto error;
2782 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002783
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002784 /* Append the byte */
2785 if (i >= size) {
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002786 str = _PyBytesWriter_Resize(&writer, str, size+1);
2787 if (str == NULL)
2788 return NULL;
2789 size = writer.allocated;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002790 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002791 *str++ = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002792 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002793
2794 return _PyBytesWriter_Finish(&writer, str);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002795
2796 error:
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002797 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002798 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002799}
2800
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002801PyObject *
2802PyBytes_FromObject(PyObject *x)
2803{
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002804 PyObject *it, *result;
2805
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002806 if (x == NULL) {
2807 PyErr_BadInternalCall();
2808 return NULL;
2809 }
2810
2811 if (PyBytes_CheckExact(x)) {
2812 Py_INCREF(x);
2813 return x;
2814 }
2815
2816 /* Use the modern buffer interface */
2817 if (PyObject_CheckBuffer(x))
2818 return _PyBytes_FromBuffer(x);
2819
2820 if (PyList_CheckExact(x))
2821 return _PyBytes_FromList(x);
2822
2823 if (PyTuple_CheckExact(x))
2824 return _PyBytes_FromTuple(x);
2825
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002826 if (!PyUnicode_Check(x)) {
2827 it = PyObject_GetIter(x);
2828 if (it != NULL) {
2829 result = _PyBytes_FromIterator(it, x);
2830 Py_DECREF(it);
2831 return result;
2832 }
Serhiy Storchakae8904212018-10-15 00:02:57 +03002833 if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2834 return NULL;
2835 }
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002836 }
2837
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002838 PyErr_Format(PyExc_TypeError,
2839 "cannot convert '%.200s' object to bytes",
2840 x->ob_type->tp_name);
2841 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002842}
2843
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002844static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002845bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002846{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002847 PyObject *tmp, *pnew;
2848 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002849
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002850 assert(PyType_IsSubtype(type, &PyBytes_Type));
2851 tmp = bytes_new(&PyBytes_Type, args, kwds);
2852 if (tmp == NULL)
2853 return NULL;
Serhiy Storchaka15095802015-11-25 15:47:01 +02002854 assert(PyBytes_Check(tmp));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002855 n = PyBytes_GET_SIZE(tmp);
2856 pnew = type->tp_alloc(type, n);
2857 if (pnew != NULL) {
Christian Heimesf051e432016-09-13 20:22:02 +02002858 memcpy(PyBytes_AS_STRING(pnew),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002859 PyBytes_AS_STRING(tmp), n+1);
2860 ((PyBytesObject *)pnew)->ob_shash =
2861 ((PyBytesObject *)tmp)->ob_shash;
2862 }
2863 Py_DECREF(tmp);
2864 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002865}
2866
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002867PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002868"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002869bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002870bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002871bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2872bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002873\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002874Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002875 - an iterable yielding integers in range(256)\n\
2876 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002877 - any object implementing the buffer API.\n\
2878 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002879
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002880static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002881
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002882PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002883 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2884 "bytes",
2885 PyBytesObject_SIZE,
2886 sizeof(char),
Inada Naoki7d408692019-05-29 17:23:27 +09002887 0, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002888 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002889 0, /* tp_getattr */
2890 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002891 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002892 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08002893 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002894 &bytes_as_sequence, /* tp_as_sequence */
2895 &bytes_as_mapping, /* tp_as_mapping */
2896 (hashfunc)bytes_hash, /* tp_hash */
2897 0, /* tp_call */
2898 bytes_str, /* tp_str */
2899 PyObject_GenericGetAttr, /* tp_getattro */
2900 0, /* tp_setattro */
2901 &bytes_as_buffer, /* tp_as_buffer */
2902 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2903 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2904 bytes_doc, /* tp_doc */
2905 0, /* tp_traverse */
2906 0, /* tp_clear */
2907 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2908 0, /* tp_weaklistoffset */
2909 bytes_iter, /* tp_iter */
2910 0, /* tp_iternext */
2911 bytes_methods, /* tp_methods */
2912 0, /* tp_members */
2913 0, /* tp_getset */
2914 &PyBaseObject_Type, /* tp_base */
2915 0, /* tp_dict */
2916 0, /* tp_descr_get */
2917 0, /* tp_descr_set */
2918 0, /* tp_dictoffset */
2919 0, /* tp_init */
2920 0, /* tp_alloc */
2921 bytes_new, /* tp_new */
2922 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002923};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002924
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002925void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002926PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002927{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002928 assert(pv != NULL);
2929 if (*pv == NULL)
2930 return;
2931 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002932 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002933 return;
2934 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02002935
2936 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2937 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05002938 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02002939 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02002940
Antoine Pitrou161d6952014-05-01 14:36:20 +02002941 wb.len = -1;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002942 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02002943 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2944 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2945 Py_CLEAR(*pv);
2946 return;
2947 }
2948
2949 oldsize = PyBytes_GET_SIZE(*pv);
2950 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2951 PyErr_NoMemory();
2952 goto error;
2953 }
2954 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2955 goto error;
2956
2957 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2958 PyBuffer_Release(&wb);
2959 return;
2960
2961 error:
2962 PyBuffer_Release(&wb);
2963 Py_CLEAR(*pv);
2964 return;
2965 }
2966
2967 else {
2968 /* Multiple references, need to create new object */
2969 PyObject *v;
2970 v = bytes_concat(*pv, w);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002971 Py_SETREF(*pv, v);
Antoine Pitrou161d6952014-05-01 14:36:20 +02002972 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002973}
2974
2975void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002976PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002977{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002978 PyBytes_Concat(pv, w);
2979 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002980}
2981
2982
Ethan Furmanb95b5612015-01-23 20:05:18 -08002983/* The following function breaks the notion that bytes are immutable:
2984 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002985 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08002986 as creating a new bytes object and destroying the old one, only
2987 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002988 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08002989 Note that if there's not enough memory to resize the bytes object, the
2990 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002991 memory" exception is set, and -1 is returned. Else (on success) 0 is
2992 returned, and the value in *pv may or may not be the same as on input.
2993 As always, an extra byte is allocated for a trailing \0 byte (newsize
2994 does *not* include that), and a trailing \0 byte is stored.
2995*/
2996
2997int
2998_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2999{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003000 PyObject *v;
3001 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003002 v = *pv;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03003003 if (!PyBytes_Check(v) || newsize < 0) {
3004 goto error;
3005 }
3006 if (Py_SIZE(v) == newsize) {
3007 /* return early if newsize equals to v->ob_size */
3008 return 0;
3009 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02003010 if (Py_SIZE(v) == 0) {
3011 if (newsize == 0) {
3012 return 0;
3013 }
3014 *pv = _PyBytes_FromSize(newsize, 0);
3015 Py_DECREF(v);
3016 return (*pv == NULL) ? -1 : 0;
3017 }
Berker Peksag4a72a7b2016-09-16 17:31:06 +03003018 if (Py_REFCNT(v) != 1) {
3019 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003020 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02003021 if (newsize == 0) {
3022 *pv = _PyBytes_FromSize(0, 0);
3023 Py_DECREF(v);
3024 return (*pv == NULL) ? -1 : 0;
3025 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003026 /* XXX UNREF/NEWREF interface should be more symmetrical */
3027 _Py_DEC_REFTOTAL;
3028 _Py_ForgetReference(v);
3029 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03003030 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003031 if (*pv == NULL) {
3032 PyObject_Del(v);
3033 PyErr_NoMemory();
3034 return -1;
3035 }
3036 _Py_NewReference(*pv);
3037 sv = (PyBytesObject *) *pv;
3038 Py_SIZE(sv) = newsize;
3039 sv->ob_sval[newsize] = '\0';
3040 sv->ob_shash = -1; /* invalidate cached hash value */
3041 return 0;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03003042error:
3043 *pv = 0;
3044 Py_DECREF(v);
3045 PyErr_BadInternalCall();
3046 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003047}
3048
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003049void
3050PyBytes_Fini(void)
3051{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003052 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003053 for (i = 0; i < UCHAR_MAX + 1; i++)
3054 Py_CLEAR(characters[i]);
3055 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003056}
3057
Benjamin Peterson4116f362008-05-27 00:36:20 +00003058/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003059
3060typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003061 PyObject_HEAD
3062 Py_ssize_t it_index;
3063 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003064} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003065
3066static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003067striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003068{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003069 _PyObject_GC_UNTRACK(it);
3070 Py_XDECREF(it->it_seq);
3071 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003072}
3073
3074static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003075striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003076{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003077 Py_VISIT(it->it_seq);
3078 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003079}
3080
3081static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003082striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003083{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003084 PyBytesObject *seq;
3085 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003086
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003087 assert(it != NULL);
3088 seq = it->it_seq;
3089 if (seq == NULL)
3090 return NULL;
3091 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003092
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003093 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3094 item = PyLong_FromLong(
3095 (unsigned char)seq->ob_sval[it->it_index]);
3096 if (item != NULL)
3097 ++it->it_index;
3098 return item;
3099 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003100
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003101 it->it_seq = NULL;
Serhiy Storchakafbb1c5e2016-03-30 20:40:02 +03003102 Py_DECREF(seq);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003103 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003104}
3105
3106static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303107striter_len(striterobject *it, PyObject *Py_UNUSED(ignored))
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003108{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003109 Py_ssize_t len = 0;
3110 if (it->it_seq)
3111 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3112 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003113}
3114
3115PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003116 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003117
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003118static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303119striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003120{
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003121 _Py_IDENTIFIER(iter);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003122 if (it->it_seq != NULL) {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003123 return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003124 it->it_seq, it->it_index);
3125 } else {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003126 return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003127 }
3128}
3129
3130PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3131
3132static PyObject *
3133striter_setstate(striterobject *it, PyObject *state)
3134{
3135 Py_ssize_t index = PyLong_AsSsize_t(state);
3136 if (index == -1 && PyErr_Occurred())
3137 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003138 if (it->it_seq != NULL) {
3139 if (index < 0)
3140 index = 0;
3141 else if (index > PyBytes_GET_SIZE(it->it_seq))
3142 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3143 it->it_index = index;
3144 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003145 Py_RETURN_NONE;
3146}
3147
3148PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3149
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003150static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003151 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3152 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003153 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3154 reduce_doc},
3155 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3156 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003157 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003158};
3159
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003160PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003161 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3162 "bytes_iterator", /* tp_name */
3163 sizeof(striterobject), /* tp_basicsize */
3164 0, /* tp_itemsize */
3165 /* methods */
3166 (destructor)striter_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003167 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003168 0, /* tp_getattr */
3169 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003170 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003171 0, /* tp_repr */
3172 0, /* tp_as_number */
3173 0, /* tp_as_sequence */
3174 0, /* tp_as_mapping */
3175 0, /* tp_hash */
3176 0, /* tp_call */
3177 0, /* tp_str */
3178 PyObject_GenericGetAttr, /* tp_getattro */
3179 0, /* tp_setattro */
3180 0, /* tp_as_buffer */
3181 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3182 0, /* tp_doc */
3183 (traverseproc)striter_traverse, /* tp_traverse */
3184 0, /* tp_clear */
3185 0, /* tp_richcompare */
3186 0, /* tp_weaklistoffset */
3187 PyObject_SelfIter, /* tp_iter */
3188 (iternextfunc)striter_next, /* tp_iternext */
3189 striter_methods, /* tp_methods */
3190 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003191};
3192
3193static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003194bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003195{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003196 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003197
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003198 if (!PyBytes_Check(seq)) {
3199 PyErr_BadInternalCall();
3200 return NULL;
3201 }
3202 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3203 if (it == NULL)
3204 return NULL;
3205 it->it_index = 0;
3206 Py_INCREF(seq);
3207 it->it_seq = (PyBytesObject *)seq;
3208 _PyObject_GC_TRACK(it);
3209 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003210}
Victor Stinner00165072015-10-09 01:53:21 +02003211
3212
3213/* _PyBytesWriter API */
3214
3215#ifdef MS_WINDOWS
3216 /* On Windows, overallocate by 50% is the best factor */
3217# define OVERALLOCATE_FACTOR 2
3218#else
3219 /* On Linux, overallocate by 25% is the best factor */
3220# define OVERALLOCATE_FACTOR 4
3221#endif
3222
3223void
3224_PyBytesWriter_Init(_PyBytesWriter *writer)
3225{
Victor Stinner661aacc2015-10-14 09:41:48 +02003226 /* Set all attributes before small_buffer to 0 */
3227 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003228#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003229 memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003230#endif
3231}
3232
3233void
3234_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3235{
3236 Py_CLEAR(writer->buffer);
3237}
3238
3239Py_LOCAL_INLINE(char*)
3240_PyBytesWriter_AsString(_PyBytesWriter *writer)
3241{
Victor Stinner661aacc2015-10-14 09:41:48 +02003242 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003243 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003244 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003245 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003246 else if (writer->use_bytearray) {
3247 assert(writer->buffer != NULL);
3248 return PyByteArray_AS_STRING(writer->buffer);
3249 }
3250 else {
3251 assert(writer->buffer != NULL);
3252 return PyBytes_AS_STRING(writer->buffer);
3253 }
Victor Stinner00165072015-10-09 01:53:21 +02003254}
3255
3256Py_LOCAL_INLINE(Py_ssize_t)
Victor Stinner2bf89932015-10-14 11:25:33 +02003257_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
Victor Stinner00165072015-10-09 01:53:21 +02003258{
3259 char *start = _PyBytesWriter_AsString(writer);
3260 assert(str != NULL);
3261 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003262 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003263 return str - start;
3264}
3265
3266Py_LOCAL_INLINE(void)
3267_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3268{
3269#ifdef Py_DEBUG
3270 char *start, *end;
3271
Victor Stinner661aacc2015-10-14 09:41:48 +02003272 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003273 assert(writer->buffer == NULL);
3274 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003275 else {
3276 assert(writer->buffer != NULL);
3277 if (writer->use_bytearray)
3278 assert(PyByteArray_CheckExact(writer->buffer));
3279 else
3280 assert(PyBytes_CheckExact(writer->buffer));
3281 assert(Py_REFCNT(writer->buffer) == 1);
3282 }
Victor Stinner00165072015-10-09 01:53:21 +02003283
Victor Stinner661aacc2015-10-14 09:41:48 +02003284 if (writer->use_bytearray) {
3285 /* bytearray has its own overallocation algorithm,
3286 writer overallocation must be disabled */
3287 assert(!writer->overallocate);
3288 }
3289
3290 assert(0 <= writer->allocated);
Victor Stinner53926a12015-10-09 12:37:03 +02003291 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003292 /* the last byte must always be null */
Victor Stinner661aacc2015-10-14 09:41:48 +02003293 start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003294 assert(start[writer->allocated] == 0);
3295
3296 end = start + writer->allocated;
3297 assert(str != NULL);
3298 assert(start <= str && str <= end);
3299#endif
3300}
3301
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003302void*
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003303_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
Victor Stinner00165072015-10-09 01:53:21 +02003304{
3305 Py_ssize_t allocated, pos;
3306
3307 _PyBytesWriter_CheckConsistency(writer, str);
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003308 assert(writer->allocated < size);
Victor Stinner00165072015-10-09 01:53:21 +02003309
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003310 allocated = size;
Victor Stinner00165072015-10-09 01:53:21 +02003311 if (writer->overallocate
3312 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3313 /* overallocate to limit the number of realloc() */
3314 allocated += allocated / OVERALLOCATE_FACTOR;
3315 }
3316
Victor Stinner2bf89932015-10-14 11:25:33 +02003317 pos = _PyBytesWriter_GetSize(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003318 if (!writer->use_small_buffer) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003319 if (writer->use_bytearray) {
3320 if (PyByteArray_Resize(writer->buffer, allocated))
3321 goto error;
3322 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3323 but we cannot use ob_alloc because bytes may need to be moved
3324 to use the whole buffer. bytearray uses an internal optimization
3325 to avoid moving or copying bytes when bytes are removed at the
3326 beginning (ex: del bytearray[:1]). */
3327 }
3328 else {
3329 if (_PyBytes_Resize(&writer->buffer, allocated))
3330 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003331 }
3332 }
3333 else {
3334 /* convert from stack buffer to bytes object buffer */
3335 assert(writer->buffer == NULL);
3336
Victor Stinner661aacc2015-10-14 09:41:48 +02003337 if (writer->use_bytearray)
3338 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3339 else
3340 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003341 if (writer->buffer == NULL)
Victor Stinner661aacc2015-10-14 09:41:48 +02003342 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003343
3344 if (pos != 0) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003345 char *dest;
3346 if (writer->use_bytearray)
3347 dest = PyByteArray_AS_STRING(writer->buffer);
3348 else
3349 dest = PyBytes_AS_STRING(writer->buffer);
Christian Heimesf051e432016-09-13 20:22:02 +02003350 memcpy(dest,
Victor Stinnerb3653a32015-10-09 03:38:24 +02003351 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02003352 pos);
3353 }
3354
Victor Stinnerb3653a32015-10-09 03:38:24 +02003355 writer->use_small_buffer = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003356#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003357 memset(writer->small_buffer, 0xDB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003358#endif
Victor Stinner00165072015-10-09 01:53:21 +02003359 }
3360 writer->allocated = allocated;
3361
3362 str = _PyBytesWriter_AsString(writer) + pos;
3363 _PyBytesWriter_CheckConsistency(writer, str);
3364 return str;
Victor Stinner661aacc2015-10-14 09:41:48 +02003365
3366error:
3367 _PyBytesWriter_Dealloc(writer);
3368 return NULL;
Victor Stinner00165072015-10-09 01:53:21 +02003369}
3370
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003371void*
3372_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3373{
3374 Py_ssize_t new_min_size;
3375
3376 _PyBytesWriter_CheckConsistency(writer, str);
3377 assert(size >= 0);
3378
3379 if (size == 0) {
3380 /* nothing to do */
3381 return str;
3382 }
3383
3384 if (writer->min_size > PY_SSIZE_T_MAX - size) {
3385 PyErr_NoMemory();
3386 _PyBytesWriter_Dealloc(writer);
3387 return NULL;
3388 }
3389 new_min_size = writer->min_size + size;
3390
3391 if (new_min_size > writer->allocated)
3392 str = _PyBytesWriter_Resize(writer, str, new_min_size);
3393
3394 writer->min_size = new_min_size;
3395 return str;
3396}
3397
Victor Stinner00165072015-10-09 01:53:21 +02003398/* Allocate the buffer to write size bytes.
3399 Return the pointer to the beginning of buffer data.
3400 Raise an exception and return NULL on error. */
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003401void*
Victor Stinner00165072015-10-09 01:53:21 +02003402_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3403{
3404 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02003405 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02003406 assert(size >= 0);
3407
Victor Stinnerb3653a32015-10-09 03:38:24 +02003408 writer->use_small_buffer = 1;
Victor Stinnerb13b97d2015-10-09 02:52:16 +02003409#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003410 writer->allocated = sizeof(writer->small_buffer) - 1;
Victor Stinnerf6358a72015-10-14 12:02:39 +02003411 /* In debug mode, don't use the full small buffer because it is less
3412 efficient than bytes and bytearray objects to detect buffer underflow
3413 and buffer overflow. Use 10 bytes of the small buffer to test also
3414 code using the smaller buffer in debug mode.
3415
3416 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3417 in debug mode to also be able to detect stack overflow when running
3418 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3419 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3420 stack overflow. */
3421 writer->allocated = Py_MIN(writer->allocated, 10);
3422 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3423 to detect buffer overflow */
Victor Stinnerb3653a32015-10-09 03:38:24 +02003424 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003425#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02003426 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02003427#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02003428 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02003429}
3430
3431PyObject *
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003432_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
Victor Stinner00165072015-10-09 01:53:21 +02003433{
Victor Stinner2bf89932015-10-14 11:25:33 +02003434 Py_ssize_t size;
Victor Stinner00165072015-10-09 01:53:21 +02003435 PyObject *result;
3436
3437 _PyBytesWriter_CheckConsistency(writer, str);
3438
Victor Stinner2bf89932015-10-14 11:25:33 +02003439 size = _PyBytesWriter_GetSize(writer, str);
3440 if (size == 0 && !writer->use_bytearray) {
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003441 Py_CLEAR(writer->buffer);
3442 /* Get the empty byte string singleton */
3443 result = PyBytes_FromStringAndSize(NULL, 0);
3444 }
3445 else if (writer->use_small_buffer) {
Victor Stinnere914d412016-04-15 17:52:27 +02003446 if (writer->use_bytearray) {
3447 result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3448 }
3449 else {
3450 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3451 }
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003452 }
3453 else {
3454 result = writer->buffer;
3455 writer->buffer = NULL;
3456
Victor Stinner2bf89932015-10-14 11:25:33 +02003457 if (size != writer->allocated) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003458 if (writer->use_bytearray) {
Victor Stinner2bf89932015-10-14 11:25:33 +02003459 if (PyByteArray_Resize(result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003460 Py_DECREF(result);
3461 return NULL;
3462 }
3463 }
3464 else {
Victor Stinner2bf89932015-10-14 11:25:33 +02003465 if (_PyBytes_Resize(&result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003466 assert(result == NULL);
3467 return NULL;
3468 }
Victor Stinner00165072015-10-09 01:53:21 +02003469 }
3470 }
Victor Stinner00165072015-10-09 01:53:21 +02003471 }
Victor Stinner00165072015-10-09 01:53:21 +02003472 return result;
3473}
Victor Stinnerce179bf2015-10-09 12:57:22 +02003474
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003475void*
Victor Stinnere9aa5952015-10-12 13:57:47 +02003476_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003477 const void *bytes, Py_ssize_t size)
Victor Stinnerce179bf2015-10-09 12:57:22 +02003478{
Victor Stinnere9aa5952015-10-12 13:57:47 +02003479 char *str = (char *)ptr;
3480
Victor Stinnerce179bf2015-10-09 12:57:22 +02003481 str = _PyBytesWriter_Prepare(writer, str, size);
3482 if (str == NULL)
3483 return NULL;
3484
Christian Heimesf051e432016-09-13 20:22:02 +02003485 memcpy(str, bytes, size);
Victor Stinnerce179bf2015-10-09 12:57:22 +02003486 str += size;
3487
3488 return str;
3489}