blob: feeabcb8b4c7ad330947ca46a50b955ba8d8236f [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Victor Stinnerbcda8f12018-11-21 22:27:47 +01006#include "pycore_object.h"
Victor Stinner621cebe2018-11-12 16:53:38 +01007#include "pycore_pymem.h"
8#include "pycore_pystate.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00009
Gregory P. Smith60d241f2007-10-16 06:31:30 +000010#include "bytes_methods.h"
Gregory P. Smith8cb65692015-04-25 23:22:26 +000011#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +000012#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000013
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020014/*[clinic input]
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030015class bytes "PyBytesObject *" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020016[clinic start generated code]*/
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030017/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020018
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030019#include "clinic/bytesobject.c.h"
20
Christian Heimes2c9c7a52008-05-26 13:42:13 +000021#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +000022Py_ssize_t _Py_null_strings, _Py_one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000023#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000024
Christian Heimes2c9c7a52008-05-26 13:42:13 +000025static PyBytesObject *characters[UCHAR_MAX + 1];
26static PyBytesObject *nullstring;
27
Mark Dickinsonfd24b322008-12-06 15:33:31 +000028/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
29 for a string of length n should request PyBytesObject_SIZE + n bytes.
30
31 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
32 3 bytes per string allocation on a typical system.
33*/
34#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
35
Victor Stinner2bf89932015-10-14 11:25:33 +020036/* Forward declaration */
37Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
38 char *str);
39
Christian Heimes2c9c7a52008-05-26 13:42:13 +000040/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000041 For PyBytes_FromString(), the parameter `str' points to a null-terminated
42 string containing exactly `size' bytes.
43
Martin Pantera90a4a92016-05-30 04:04:50 +000044 For PyBytes_FromStringAndSize(), the parameter `str' is
Christian Heimes2c9c7a52008-05-26 13:42:13 +000045 either NULL or else points to a string containing at least `size' bytes.
46 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
47 not have to be null-terminated. (Therefore it is safe to construct a
48 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
49 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
50 bytes (setting the last byte to the null terminating character) and you can
51 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000052 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000053 alter the data yourself, since the strings may be shared.
54
55 The PyObject member `op->ob_size', which denotes the number of "extra
56 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020057 allocated for string data, not counting the null terminating character.
58 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000059 PyBytes_FromStringAndSize()) or the length of the string in the `str'
60 parameter (for PyBytes_FromString()).
61*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020062static PyObject *
63_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000064{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020065 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020066 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020067
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000068 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000069#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +000070 _Py_null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000071#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000072 Py_INCREF(op);
73 return (PyObject *)op;
74 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000075
Victor Stinner049e5092014-08-17 22:20:00 +020076 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 PyErr_SetString(PyExc_OverflowError,
78 "byte string is too large");
79 return NULL;
80 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000081
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000082 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020083 if (use_calloc)
84 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
85 else
86 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 if (op == NULL)
88 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +010089 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000090 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020091 if (!use_calloc)
92 op->ob_sval[size] = '\0';
93 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000094 if (size == 0) {
95 nullstring = op;
96 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020097 }
98 return (PyObject *) op;
99}
100
101PyObject *
102PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
103{
104 PyBytesObject *op;
105 if (size < 0) {
106 PyErr_SetString(PyExc_SystemError,
107 "Negative size passed to PyBytes_FromStringAndSize");
108 return NULL;
109 }
110 if (size == 1 && str != NULL &&
111 (op = characters[*str & UCHAR_MAX]) != NULL)
112 {
113#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +0000114 _Py_one_strings++;
Victor Stinnerdb067af2014-05-02 22:31:14 +0200115#endif
116 Py_INCREF(op);
117 return (PyObject *)op;
118 }
119
120 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
121 if (op == NULL)
122 return NULL;
123 if (str == NULL)
124 return (PyObject *) op;
125
Christian Heimesf051e432016-09-13 20:22:02 +0200126 memcpy(op->ob_sval, str, size);
Victor Stinnerdb067af2014-05-02 22:31:14 +0200127 /* share short strings */
128 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000129 characters[*str & UCHAR_MAX] = op;
130 Py_INCREF(op);
131 }
132 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000133}
134
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000135PyObject *
136PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000137{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200138 size_t size;
139 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000140
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 assert(str != NULL);
142 size = strlen(str);
143 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
144 PyErr_SetString(PyExc_OverflowError,
145 "byte string is too long");
146 return NULL;
147 }
148 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000149#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +0000150 _Py_null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000151#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000152 Py_INCREF(op);
153 return (PyObject *)op;
154 }
155 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000156#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +0000157 _Py_one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000158#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000159 Py_INCREF(op);
160 return (PyObject *)op;
161 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000162
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000163 /* Inline PyObject_NewVar */
164 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
165 if (op == NULL)
166 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +0100167 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000168 op->ob_shash = -1;
Christian Heimesf051e432016-09-13 20:22:02 +0200169 memcpy(op->ob_sval, str, size+1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000170 /* share short strings */
171 if (size == 0) {
172 nullstring = op;
173 Py_INCREF(op);
174 } else if (size == 1) {
175 characters[*str & UCHAR_MAX] = op;
176 Py_INCREF(op);
177 }
178 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000179}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000180
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000181PyObject *
182PyBytes_FromFormatV(const char *format, va_list vargs)
183{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000184 char *s;
Victor Stinner03dab782015-10-14 00:21:35 +0200185 const char *f;
186 const char *p;
187 Py_ssize_t prec;
188 int longflag;
189 int size_tflag;
190 /* Longest 64-bit formatted numbers:
191 - "18446744073709551615\0" (21 bytes)
192 - "-9223372036854775808\0" (21 bytes)
193 Decimal takes the most space (it isn't enough for octal.)
Guido van Rossum343e97f2007-04-09 00:43:24 +0000194
Victor Stinner03dab782015-10-14 00:21:35 +0200195 Longest 64-bit pointer representation:
196 "0xffffffffffffffff\0" (19 bytes). */
197 char buffer[21];
198 _PyBytesWriter writer;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000199
Victor Stinner03dab782015-10-14 00:21:35 +0200200 _PyBytesWriter_Init(&writer);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000201
Victor Stinner03dab782015-10-14 00:21:35 +0200202 s = _PyBytesWriter_Alloc(&writer, strlen(format));
203 if (s == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000204 return NULL;
Victor Stinner03dab782015-10-14 00:21:35 +0200205 writer.overallocate = 1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000206
Victor Stinner03dab782015-10-14 00:21:35 +0200207#define WRITE_BYTES(str) \
208 do { \
209 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
210 if (s == NULL) \
211 goto error; \
212 } while (0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000213
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000214 for (f = format; *f; f++) {
Victor Stinner03dab782015-10-14 00:21:35 +0200215 if (*f != '%') {
216 *s++ = *f;
217 continue;
218 }
219
220 p = f++;
221
222 /* ignore the width (ex: 10 in "%10s") */
223 while (Py_ISDIGIT(*f))
224 f++;
225
226 /* parse the precision (ex: 10 in "%.10s") */
227 prec = 0;
228 if (*f == '.') {
229 f++;
230 for (; Py_ISDIGIT(*f); f++) {
231 prec = (prec * 10) + (*f - '0');
232 }
233 }
234
235 while (*f && *f != '%' && !Py_ISALPHA(*f))
236 f++;
237
238 /* handle the long flag ('l'), but only for %ld and %lu.
239 others can be added when necessary. */
240 longflag = 0;
241 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
242 longflag = 1;
243 ++f;
244 }
245
246 /* handle the size_t flag ('z'). */
247 size_tflag = 0;
248 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
249 size_tflag = 1;
250 ++f;
251 }
252
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700253 /* subtract bytes preallocated for the format string
Victor Stinner03dab782015-10-14 00:21:35 +0200254 (ex: 2 for "%s") */
255 writer.min_size -= (f - p + 1);
256
257 switch (*f) {
258 case 'c':
259 {
260 int c = va_arg(vargs, int);
261 if (c < 0 || c > 255) {
262 PyErr_SetString(PyExc_OverflowError,
263 "PyBytes_FromFormatV(): %c format "
264 "expects an integer in range [0; 255]");
265 goto error;
266 }
267 writer.min_size++;
268 *s++ = (unsigned char)c;
269 break;
270 }
271
272 case 'd':
273 if (longflag)
274 sprintf(buffer, "%ld", va_arg(vargs, long));
275 else if (size_tflag)
276 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
277 va_arg(vargs, Py_ssize_t));
278 else
279 sprintf(buffer, "%d", va_arg(vargs, int));
280 assert(strlen(buffer) < sizeof(buffer));
281 WRITE_BYTES(buffer);
282 break;
283
284 case 'u':
285 if (longflag)
286 sprintf(buffer, "%lu",
287 va_arg(vargs, unsigned long));
288 else if (size_tflag)
289 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
290 va_arg(vargs, size_t));
291 else
292 sprintf(buffer, "%u",
293 va_arg(vargs, unsigned int));
294 assert(strlen(buffer) < sizeof(buffer));
295 WRITE_BYTES(buffer);
296 break;
297
298 case 'i':
299 sprintf(buffer, "%i", va_arg(vargs, int));
300 assert(strlen(buffer) < sizeof(buffer));
301 WRITE_BYTES(buffer);
302 break;
303
304 case 'x':
305 sprintf(buffer, "%x", va_arg(vargs, int));
306 assert(strlen(buffer) < sizeof(buffer));
307 WRITE_BYTES(buffer);
308 break;
309
310 case 's':
311 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000312 Py_ssize_t i;
Victor Stinner03dab782015-10-14 00:21:35 +0200313
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200314 p = va_arg(vargs, const char*);
Serhiy Storchakad586ccb2019-01-12 10:30:35 +0200315 if (prec <= 0) {
316 i = strlen(p);
317 }
318 else {
319 i = 0;
320 while (i < prec && p[i]) {
321 i++;
322 }
323 }
Victor Stinner03dab782015-10-14 00:21:35 +0200324 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
325 if (s == NULL)
326 goto error;
327 break;
328 }
329
330 case 'p':
331 sprintf(buffer, "%p", va_arg(vargs, void*));
332 assert(strlen(buffer) < sizeof(buffer));
333 /* %p is ill-defined: ensure leading 0x. */
334 if (buffer[1] == 'X')
335 buffer[1] = 'x';
336 else if (buffer[1] != 'x') {
337 memmove(buffer+2, buffer, strlen(buffer)+1);
338 buffer[0] = '0';
339 buffer[1] = 'x';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000340 }
Victor Stinner03dab782015-10-14 00:21:35 +0200341 WRITE_BYTES(buffer);
342 break;
343
344 case '%':
345 writer.min_size++;
346 *s++ = '%';
347 break;
348
349 default:
350 if (*f == 0) {
351 /* fix min_size if we reached the end of the format string */
352 writer.min_size++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000353 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000354
Victor Stinner03dab782015-10-14 00:21:35 +0200355 /* invalid format string: copy unformatted string and exit */
356 WRITE_BYTES(p);
357 return _PyBytesWriter_Finish(&writer, s);
358 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000360
Victor Stinner03dab782015-10-14 00:21:35 +0200361#undef WRITE_BYTES
362
363 return _PyBytesWriter_Finish(&writer, s);
364
365 error:
366 _PyBytesWriter_Dealloc(&writer);
367 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000368}
369
370PyObject *
371PyBytes_FromFormat(const char *format, ...)
372{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000373 PyObject* ret;
374 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000375
376#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000377 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000378#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000379 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000380#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000381 ret = PyBytes_FromFormatV(format, vargs);
382 va_end(vargs);
383 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000384}
385
Ethan Furmanb95b5612015-01-23 20:05:18 -0800386/* Helpers for formatstring */
387
388Py_LOCAL_INLINE(PyObject *)
389getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
390{
391 Py_ssize_t argidx = *p_argidx;
392 if (argidx < arglen) {
393 (*p_argidx)++;
394 if (arglen < 0)
395 return args;
396 else
397 return PyTuple_GetItem(args, argidx);
398 }
399 PyErr_SetString(PyExc_TypeError,
400 "not enough arguments for format string");
401 return NULL;
402}
403
404/* Format codes
405 * F_LJUST '-'
406 * F_SIGN '+'
407 * F_BLANK ' '
408 * F_ALT '#'
409 * F_ZERO '0'
410 */
411#define F_LJUST (1<<0)
412#define F_SIGN (1<<1)
413#define F_BLANK (1<<2)
414#define F_ALT (1<<3)
415#define F_ZERO (1<<4)
416
417/* Returns a new reference to a PyBytes object, or NULL on failure. */
418
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200419static char*
420formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200421 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800422{
423 char *p;
424 PyObject *result;
425 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200426 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800427
428 x = PyFloat_AsDouble(v);
429 if (x == -1.0 && PyErr_Occurred()) {
430 PyErr_Format(PyExc_TypeError, "float argument required, "
431 "not %.200s", Py_TYPE(v)->tp_name);
432 return NULL;
433 }
434
435 if (prec < 0)
436 prec = 6;
437
438 p = PyOS_double_to_string(x, type, prec,
439 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
440
441 if (p == NULL)
442 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200443
444 len = strlen(p);
445 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200446 str = _PyBytesWriter_Prepare(writer, str, len);
447 if (str == NULL)
448 return NULL;
Christian Heimesf051e432016-09-13 20:22:02 +0200449 memcpy(str, p, len);
Victor Stinner71dc3d82016-04-26 12:35:13 +0200450 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200451 str += len;
452 return str;
453 }
454
455 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800456 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200457 *p_result = result;
Zackery Spytz96c59322018-10-03 00:01:30 -0600458 return result != NULL ? str : NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800459}
460
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300461static PyObject *
462formatlong(PyObject *v, int flags, int prec, int type)
463{
464 PyObject *result, *iobj;
465 if (type == 'i')
466 type = 'd';
467 if (PyLong_Check(v))
468 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
469 if (PyNumber_Check(v)) {
470 /* make sure number is a type of integer for o, x, and X */
471 if (type == 'o' || type == 'x' || type == 'X')
472 iobj = PyNumber_Index(v);
473 else
474 iobj = PyNumber_Long(v);
475 if (iobj == NULL) {
476 if (!PyErr_ExceptionMatches(PyExc_TypeError))
477 return NULL;
478 }
479 else if (!PyLong_Check(iobj))
480 Py_CLEAR(iobj);
481 if (iobj != NULL) {
482 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
483 Py_DECREF(iobj);
484 return result;
485 }
486 }
487 PyErr_Format(PyExc_TypeError,
488 "%%%c format: %s is required, not %.200s", type,
489 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
490 : "a number",
491 Py_TYPE(v)->tp_name);
492 return NULL;
493}
494
495static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200496byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800497{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300498 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200499 *p = PyBytes_AS_STRING(arg)[0];
500 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800501 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300502 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200503 *p = PyByteArray_AS_STRING(arg)[0];
504 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800505 }
506 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300507 PyObject *iobj;
508 long ival;
509 int overflow;
510 /* make sure number is a type of integer */
511 if (PyLong_Check(arg)) {
512 ival = PyLong_AsLongAndOverflow(arg, &overflow);
513 }
514 else {
515 iobj = PyNumber_Index(arg);
516 if (iobj == NULL) {
517 if (!PyErr_ExceptionMatches(PyExc_TypeError))
518 return 0;
519 goto onError;
520 }
521 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
522 Py_DECREF(iobj);
523 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300524 if (!overflow && ival == -1 && PyErr_Occurred())
525 goto onError;
526 if (overflow || !(0 <= ival && ival <= 255)) {
527 PyErr_SetString(PyExc_OverflowError,
528 "%c arg not in range(256)");
529 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800530 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300531 *p = (char)ival;
532 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800533 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300534 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200535 PyErr_SetString(PyExc_TypeError,
536 "%c requires an integer in range(256) or a single byte");
537 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800538}
539
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800540static PyObject *_PyBytes_FromBuffer(PyObject *x);
541
Ethan Furmanb95b5612015-01-23 20:05:18 -0800542static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200543format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800544{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200545 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800546 _Py_IDENTIFIER(__bytes__);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800547 /* is it a bytes object? */
548 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200549 *pbuf = PyBytes_AS_STRING(v);
550 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800551 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200552 return v;
553 }
554 if (PyByteArray_Check(v)) {
555 *pbuf = PyByteArray_AS_STRING(v);
556 *plen = PyByteArray_GET_SIZE(v);
557 Py_INCREF(v);
558 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800559 }
560 /* does it support __bytes__? */
561 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
562 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +0100563 result = _PyObject_CallNoArg(func);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800564 Py_DECREF(func);
565 if (result == NULL)
566 return NULL;
567 if (!PyBytes_Check(result)) {
568 PyErr_Format(PyExc_TypeError,
569 "__bytes__ returned non-bytes (type %.200s)",
570 Py_TYPE(result)->tp_name);
571 Py_DECREF(result);
572 return NULL;
573 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200574 *pbuf = PyBytes_AS_STRING(result);
575 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800576 return result;
577 }
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800578 /* does it support buffer protocol? */
579 if (PyObject_CheckBuffer(v)) {
580 /* maybe we can avoid making a copy of the buffer object here? */
581 result = _PyBytes_FromBuffer(v);
582 if (result == NULL)
583 return NULL;
584 *pbuf = PyBytes_AS_STRING(result);
585 *plen = PyBytes_GET_SIZE(result);
586 return result;
587 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800588 PyErr_Format(PyExc_TypeError,
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800589 "%%b requires a bytes-like object, "
590 "or an object that implements __bytes__, not '%.100s'",
Ethan Furmanb95b5612015-01-23 20:05:18 -0800591 Py_TYPE(v)->tp_name);
592 return NULL;
593}
594
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200595/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800596
597PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +0200598_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
599 PyObject *args, int use_bytearray)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800600{
Victor Stinner772b2b02015-10-14 09:56:53 +0200601 const char *fmt;
602 char *res;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800603 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200604 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800605 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800606 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200607 _PyBytesWriter writer;
608
Victor Stinner772b2b02015-10-14 09:56:53 +0200609 if (args == NULL) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800610 PyErr_BadInternalCall();
611 return NULL;
612 }
Victor Stinner772b2b02015-10-14 09:56:53 +0200613 fmt = format;
614 fmtcnt = format_len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200615
616 _PyBytesWriter_Init(&writer);
Victor Stinner772b2b02015-10-14 09:56:53 +0200617 writer.use_bytearray = use_bytearray;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200618
619 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
620 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800621 return NULL;
Victor Stinner772b2b02015-10-14 09:56:53 +0200622 if (!use_bytearray)
623 writer.overallocate = 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200624
Ethan Furmanb95b5612015-01-23 20:05:18 -0800625 if (PyTuple_Check(args)) {
626 arglen = PyTuple_GET_SIZE(args);
627 argidx = 0;
628 }
629 else {
630 arglen = -1;
631 argidx = -2;
632 }
633 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
634 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
635 !PyByteArray_Check(args)) {
636 dict = args;
637 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200638
Ethan Furmanb95b5612015-01-23 20:05:18 -0800639 while (--fmtcnt >= 0) {
640 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200641 Py_ssize_t len;
642 char *pos;
643
Xiang Zhangb76ad512017-03-06 17:17:05 +0800644 pos = (char *)memchr(fmt + 1, '%', fmtcnt);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200645 if (pos != NULL)
646 len = pos - fmt;
Victor Stinner772b2b02015-10-14 09:56:53 +0200647 else
Xiang Zhangb76ad512017-03-06 17:17:05 +0800648 len = fmtcnt + 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200649 assert(len != 0);
650
Christian Heimesf051e432016-09-13 20:22:02 +0200651 memcpy(res, fmt, len);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200652 res += len;
653 fmt += len;
654 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800655 }
656 else {
657 /* Got a format specifier */
658 int flags = 0;
659 Py_ssize_t width = -1;
660 int prec = -1;
661 int c = '\0';
662 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800663 PyObject *v = NULL;
664 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200665 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800666 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200667 Py_ssize_t len = 0;
668 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200669 Py_ssize_t alloc;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800670
Ethan Furmanb95b5612015-01-23 20:05:18 -0800671 fmt++;
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200672 if (*fmt == '%') {
673 *res++ = '%';
674 fmt++;
675 fmtcnt--;
676 continue;
677 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800678 if (*fmt == '(') {
Victor Stinner772b2b02015-10-14 09:56:53 +0200679 const char *keystart;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800680 Py_ssize_t keylen;
681 PyObject *key;
682 int pcount = 1;
683
684 if (dict == NULL) {
685 PyErr_SetString(PyExc_TypeError,
686 "format requires a mapping");
687 goto error;
688 }
689 ++fmt;
690 --fmtcnt;
691 keystart = fmt;
692 /* Skip over balanced parentheses */
693 while (pcount > 0 && --fmtcnt >= 0) {
694 if (*fmt == ')')
695 --pcount;
696 else if (*fmt == '(')
697 ++pcount;
698 fmt++;
699 }
700 keylen = fmt - keystart - 1;
701 if (fmtcnt < 0 || pcount > 0) {
702 PyErr_SetString(PyExc_ValueError,
703 "incomplete format key");
704 goto error;
705 }
706 key = PyBytes_FromStringAndSize(keystart,
707 keylen);
708 if (key == NULL)
709 goto error;
710 if (args_owned) {
711 Py_DECREF(args);
712 args_owned = 0;
713 }
714 args = PyObject_GetItem(dict, key);
715 Py_DECREF(key);
716 if (args == NULL) {
717 goto error;
718 }
719 args_owned = 1;
720 arglen = -1;
721 argidx = -2;
722 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200723
724 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800725 while (--fmtcnt >= 0) {
726 switch (c = *fmt++) {
727 case '-': flags |= F_LJUST; continue;
728 case '+': flags |= F_SIGN; continue;
729 case ' ': flags |= F_BLANK; continue;
730 case '#': flags |= F_ALT; continue;
731 case '0': flags |= F_ZERO; continue;
732 }
733 break;
734 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200735
736 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800737 if (c == '*') {
738 v = getnextarg(args, arglen, &argidx);
739 if (v == NULL)
740 goto error;
741 if (!PyLong_Check(v)) {
742 PyErr_SetString(PyExc_TypeError,
743 "* wants int");
744 goto error;
745 }
746 width = PyLong_AsSsize_t(v);
747 if (width == -1 && PyErr_Occurred())
748 goto error;
749 if (width < 0) {
750 flags |= F_LJUST;
751 width = -width;
752 }
753 if (--fmtcnt >= 0)
754 c = *fmt++;
755 }
756 else if (c >= 0 && isdigit(c)) {
757 width = c - '0';
758 while (--fmtcnt >= 0) {
759 c = Py_CHARMASK(*fmt++);
760 if (!isdigit(c))
761 break;
762 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
763 PyErr_SetString(
764 PyExc_ValueError,
765 "width too big");
766 goto error;
767 }
768 width = width*10 + (c - '0');
769 }
770 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200771
772 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800773 if (c == '.') {
774 prec = 0;
775 if (--fmtcnt >= 0)
776 c = *fmt++;
777 if (c == '*') {
778 v = getnextarg(args, arglen, &argidx);
779 if (v == NULL)
780 goto error;
781 if (!PyLong_Check(v)) {
782 PyErr_SetString(
783 PyExc_TypeError,
784 "* wants int");
785 goto error;
786 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200787 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800788 if (prec == -1 && PyErr_Occurred())
789 goto error;
790 if (prec < 0)
791 prec = 0;
792 if (--fmtcnt >= 0)
793 c = *fmt++;
794 }
795 else if (c >= 0 && isdigit(c)) {
796 prec = c - '0';
797 while (--fmtcnt >= 0) {
798 c = Py_CHARMASK(*fmt++);
799 if (!isdigit(c))
800 break;
801 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
802 PyErr_SetString(
803 PyExc_ValueError,
804 "prec too big");
805 goto error;
806 }
807 prec = prec*10 + (c - '0');
808 }
809 }
810 } /* prec */
811 if (fmtcnt >= 0) {
812 if (c == 'h' || c == 'l' || c == 'L') {
813 if (--fmtcnt >= 0)
814 c = *fmt++;
815 }
816 }
817 if (fmtcnt < 0) {
818 PyErr_SetString(PyExc_ValueError,
819 "incomplete format");
820 goto error;
821 }
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200822 v = getnextarg(args, arglen, &argidx);
823 if (v == NULL)
824 goto error;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200825
Alexey Izbyshevccd99752018-08-23 10:50:52 +0300826 if (fmtcnt == 0) {
827 /* last write: disable writer overallocation */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200828 writer.overallocate = 0;
829 }
830
Ethan Furmanb95b5612015-01-23 20:05:18 -0800831 sign = 0;
832 fill = ' ';
833 switch (c) {
Ethan Furman62e977f2015-03-11 08:17:00 -0700834 case 'r':
835 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800836 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200837 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800838 if (temp == NULL)
839 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200840 assert(PyUnicode_IS_ASCII(temp));
841 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
842 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800843 if (prec >= 0 && len > prec)
844 len = prec;
845 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200846
Ethan Furmanb95b5612015-01-23 20:05:18 -0800847 case 's':
848 // %s is only for 2/3 code; 3 only code should use %b
849 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200850 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800851 if (temp == NULL)
852 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800853 if (prec >= 0 && len > prec)
854 len = prec;
855 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200856
Ethan Furmanb95b5612015-01-23 20:05:18 -0800857 case 'i':
858 case 'd':
859 case 'u':
860 case 'o':
861 case 'x':
862 case 'X':
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200863 if (PyLong_CheckExact(v)
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200864 && width == -1 && prec == -1
865 && !(flags & (F_SIGN | F_BLANK))
866 && c != 'X')
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200867 {
868 /* Fast path */
869 int alternate = flags & F_ALT;
870 int base;
871
872 switch(c)
873 {
874 default:
Barry Warsawb2e57942017-09-14 18:13:16 -0700875 Py_UNREACHABLE();
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200876 case 'd':
877 case 'i':
878 case 'u':
879 base = 10;
880 break;
881 case 'o':
882 base = 8;
883 break;
884 case 'x':
885 case 'X':
886 base = 16;
887 break;
888 }
889
890 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200891 writer.min_size -= 2; /* size preallocated for "%d" */
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200892 res = _PyLong_FormatBytesWriter(&writer, res,
893 v, base, alternate);
894 if (res == NULL)
895 goto error;
896 continue;
897 }
898
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300899 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200900 if (!temp)
901 goto error;
902 assert(PyUnicode_IS_ASCII(temp));
903 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
904 len = PyUnicode_GET_LENGTH(temp);
905 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800906 if (flags & F_ZERO)
907 fill = '0';
908 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200909
Ethan Furmanb95b5612015-01-23 20:05:18 -0800910 case 'e':
911 case 'E':
912 case 'f':
913 case 'F':
914 case 'g':
915 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200916 if (width == -1 && prec == -1
917 && !(flags & (F_SIGN | F_BLANK)))
918 {
919 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200920 writer.min_size -= 2; /* size preallocated for "%f" */
Victor Stinnerad771582015-10-09 12:38:53 +0200921 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200922 if (res == NULL)
923 goto error;
924 continue;
925 }
926
Victor Stinnerad771582015-10-09 12:38:53 +0200927 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800928 goto error;
929 pbuf = PyBytes_AS_STRING(temp);
930 len = PyBytes_GET_SIZE(temp);
931 sign = 1;
932 if (flags & F_ZERO)
933 fill = '0';
934 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200935
Ethan Furmanb95b5612015-01-23 20:05:18 -0800936 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200937 pbuf = &onechar;
938 len = byte_converter(v, &onechar);
939 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800940 goto error;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200941 if (width == -1) {
942 /* Fast path */
943 *res++ = onechar;
944 continue;
945 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800946 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200947
Ethan Furmanb95b5612015-01-23 20:05:18 -0800948 default:
949 PyErr_Format(PyExc_ValueError,
950 "unsupported format character '%c' (0x%x) "
951 "at index %zd",
952 c, c,
Victor Stinner772b2b02015-10-14 09:56:53 +0200953 (Py_ssize_t)(fmt - 1 - format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800954 goto error;
955 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200956
Ethan Furmanb95b5612015-01-23 20:05:18 -0800957 if (sign) {
958 if (*pbuf == '-' || *pbuf == '+') {
959 sign = *pbuf++;
960 len--;
961 }
962 else if (flags & F_SIGN)
963 sign = '+';
964 else if (flags & F_BLANK)
965 sign = ' ';
966 else
967 sign = 0;
968 }
969 if (width < len)
970 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200971
972 alloc = width;
973 if (sign != 0 && len == width)
974 alloc++;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200975 /* 2: size preallocated for %s */
976 if (alloc > 2) {
977 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200978 if (res == NULL)
979 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800980 }
Victor Stinnerf82ce5b2019-10-15 03:06:16 +0200981#ifndef NDEBUG
982 char *before = res;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200983#endif
984
985 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800986 if (sign) {
987 if (fill != ' ')
988 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800989 if (width > len)
990 width--;
991 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200992
993 /* Write the numeric prefix for "x", "X" and "o" formats
994 if the alternate form is used.
995 For example, write "0x" for the "%#x" format. */
Serhiy Storchakab1a16192016-12-17 21:48:03 +0200996 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800997 assert(pbuf[0] == '0');
998 assert(pbuf[1] == c);
999 if (fill != ' ') {
1000 *res++ = *pbuf++;
1001 *res++ = *pbuf++;
1002 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001003 width -= 2;
1004 if (width < 0)
1005 width = 0;
1006 len -= 2;
1007 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001008
1009 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001010 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001011 memset(res, fill, width - len);
1012 res += (width - len);
1013 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -08001014 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001015
1016 /* If padding with spaces: write sign if needed and/or numeric
1017 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001018 if (fill == ' ') {
1019 if (sign)
1020 *res++ = sign;
Serhiy Storchakab1a16192016-12-17 21:48:03 +02001021 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001022 assert(pbuf[0] == '0');
1023 assert(pbuf[1] == c);
1024 *res++ = *pbuf++;
1025 *res++ = *pbuf++;
1026 }
1027 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001028
1029 /* Copy bytes */
Christian Heimesf051e432016-09-13 20:22:02 +02001030 memcpy(res, pbuf, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001031 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001032
1033 /* Pad right with the fill character if needed */
1034 if (width > len) {
1035 memset(res, ' ', width - len);
1036 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001037 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001038
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +02001039 if (dict && (argidx < arglen)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001040 PyErr_SetString(PyExc_TypeError,
1041 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -08001042 Py_XDECREF(temp);
1043 goto error;
1044 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001045 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001046
Victor Stinnerf82ce5b2019-10-15 03:06:16 +02001047#ifndef NDEBUG
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001048 /* check that we computed the exact size for this write */
1049 assert((res - before) == alloc);
1050#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -08001051 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001052
1053 /* If overallocation was disabled, ensure that it was the last
1054 write. Otherwise, we missed an optimization */
Alexey Izbyshevccd99752018-08-23 10:50:52 +03001055 assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001056 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001057
Ethan Furmanb95b5612015-01-23 20:05:18 -08001058 if (argidx < arglen && !dict) {
1059 PyErr_SetString(PyExc_TypeError,
1060 "not all arguments converted during bytes formatting");
1061 goto error;
1062 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001063
Ethan Furmanb95b5612015-01-23 20:05:18 -08001064 if (args_owned) {
1065 Py_DECREF(args);
1066 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001067 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001068
1069 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001070 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001071 if (args_owned) {
1072 Py_DECREF(args);
1073 }
1074 return NULL;
1075}
1076
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001077/* Unescape a backslash-escaped string. If unicode is non-zero,
1078 the string is a u-literal. If recode_encoding is non-zero,
1079 the string is UTF-8 encoded and should be re-encoded in the
1080 specified encoding. */
1081
Victor Stinner2ec80632015-10-14 13:32:13 +02001082static char *
1083_PyBytes_DecodeEscapeRecode(const char **s, const char *end,
1084 const char *errors, const char *recode_encoding,
1085 _PyBytesWriter *writer, char *p)
1086{
1087 PyObject *u, *w;
1088 const char* t;
1089
1090 t = *s;
1091 /* Decode non-ASCII bytes as UTF-8. */
1092 while (t < end && (*t & 0x80))
1093 t++;
1094 u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
1095 if (u == NULL)
1096 return NULL;
1097
1098 /* Recode them in target encoding. */
1099 w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
1100 Py_DECREF(u);
1101 if (w == NULL)
1102 return NULL;
1103 assert(PyBytes_Check(w));
1104
1105 /* Append bytes to output buffer. */
Raymond Hettinger15f44ab2016-08-30 10:47:49 -07001106 writer->min_size--; /* subtract 1 preallocated byte */
Victor Stinner2ec80632015-10-14 13:32:13 +02001107 p = _PyBytesWriter_WriteBytes(writer, p,
1108 PyBytes_AS_STRING(w),
1109 PyBytes_GET_SIZE(w));
1110 Py_DECREF(w);
1111 if (p == NULL)
1112 return NULL;
1113
1114 *s = t;
1115 return p;
1116}
1117
Eric V. Smith42454af2016-10-31 09:22:08 -04001118PyObject *_PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001119 Py_ssize_t len,
1120 const char *errors,
1121 Py_ssize_t unicode,
Eric V. Smith42454af2016-10-31 09:22:08 -04001122 const char *recode_encoding,
1123 const char **first_invalid_escape)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001124{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001125 int c;
Victor Stinner2ec80632015-10-14 13:32:13 +02001126 char *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001127 const char *end;
Victor Stinner2ec80632015-10-14 13:32:13 +02001128 _PyBytesWriter writer;
1129
1130 _PyBytesWriter_Init(&writer);
1131
1132 p = _PyBytesWriter_Alloc(&writer, len);
1133 if (p == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001134 return NULL;
Victor Stinner2ec80632015-10-14 13:32:13 +02001135 writer.overallocate = 1;
1136
Eric V. Smith42454af2016-10-31 09:22:08 -04001137 *first_invalid_escape = NULL;
1138
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001139 end = s + len;
1140 while (s < end) {
1141 if (*s != '\\') {
1142 non_esc:
Victor Stinner2ec80632015-10-14 13:32:13 +02001143 if (!(recode_encoding && (*s & 0x80))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001144 *p++ = *s++;
1145 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001146 else {
1147 /* non-ASCII character and need to recode */
1148 p = _PyBytes_DecodeEscapeRecode(&s, end,
1149 errors, recode_encoding,
1150 &writer, p);
1151 if (p == NULL)
1152 goto failed;
1153 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001154 continue;
1155 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001156
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001157 s++;
Victor Stinner2ec80632015-10-14 13:32:13 +02001158 if (s == end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001159 PyErr_SetString(PyExc_ValueError,
1160 "Trailing \\ in string");
1161 goto failed;
1162 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001163
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001164 switch (*s++) {
1165 /* XXX This assumes ASCII! */
1166 case '\n': break;
1167 case '\\': *p++ = '\\'; break;
1168 case '\'': *p++ = '\''; break;
1169 case '\"': *p++ = '\"'; break;
1170 case 'b': *p++ = '\b'; break;
1171 case 'f': *p++ = '\014'; break; /* FF */
1172 case 't': *p++ = '\t'; break;
1173 case 'n': *p++ = '\n'; break;
1174 case 'r': *p++ = '\r'; break;
1175 case 'v': *p++ = '\013'; break; /* VT */
1176 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1177 case '0': case '1': case '2': case '3':
1178 case '4': case '5': case '6': case '7':
1179 c = s[-1] - '0';
1180 if (s < end && '0' <= *s && *s <= '7') {
1181 c = (c<<3) + *s++ - '0';
1182 if (s < end && '0' <= *s && *s <= '7')
1183 c = (c<<3) + *s++ - '0';
1184 }
1185 *p++ = c;
1186 break;
1187 case 'x':
Victor Stinner2ec80632015-10-14 13:32:13 +02001188 if (s+1 < end) {
1189 int digit1, digit2;
1190 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1191 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1192 if (digit1 < 16 && digit2 < 16) {
1193 *p++ = (unsigned char)((digit1 << 4) + digit2);
1194 s += 2;
1195 break;
1196 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001197 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001198 /* invalid hexadecimal digits */
1199
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001200 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001201 PyErr_Format(PyExc_ValueError,
Serhiy Storchakad53fe5f2019-03-13 22:59:55 +02001202 "invalid \\x escape at position %zd",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001203 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001204 goto failed;
1205 }
1206 if (strcmp(errors, "replace") == 0) {
1207 *p++ = '?';
1208 } else if (strcmp(errors, "ignore") == 0)
1209 /* do nothing */;
1210 else {
1211 PyErr_Format(PyExc_ValueError,
1212 "decoding error; unknown "
1213 "error handling code: %.400s",
1214 errors);
1215 goto failed;
1216 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001217 /* skip \x */
1218 if (s < end && Py_ISXDIGIT(s[0]))
1219 s++; /* and a hexdigit */
1220 break;
Victor Stinner2ec80632015-10-14 13:32:13 +02001221
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001222 default:
Eric V. Smith42454af2016-10-31 09:22:08 -04001223 if (*first_invalid_escape == NULL) {
1224 *first_invalid_escape = s-1; /* Back up one char, since we've
1225 already incremented s. */
1226 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001227 *p++ = '\\';
Eric V. Smith42454af2016-10-31 09:22:08 -04001228 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +02001229 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001230 UTF-8 bytes may follow. */
1231 }
1232 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001233
1234 return _PyBytesWriter_Finish(&writer, p);
1235
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001236 failed:
Victor Stinner2ec80632015-10-14 13:32:13 +02001237 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001238 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001239}
1240
Eric V. Smith42454af2016-10-31 09:22:08 -04001241PyObject *PyBytes_DecodeEscape(const char *s,
1242 Py_ssize_t len,
1243 const char *errors,
1244 Py_ssize_t unicode,
1245 const char *recode_encoding)
1246{
1247 const char* first_invalid_escape;
1248 PyObject *result = _PyBytes_DecodeEscape(s, len, errors, unicode,
1249 recode_encoding,
1250 &first_invalid_escape);
1251 if (result == NULL)
1252 return NULL;
1253 if (first_invalid_escape != NULL) {
1254 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1255 "invalid escape sequence '\\%c'",
Serhiy Storchaka56cb4652017-10-20 17:08:15 +03001256 (unsigned char)*first_invalid_escape) < 0) {
Eric V. Smith42454af2016-10-31 09:22:08 -04001257 Py_DECREF(result);
1258 return NULL;
1259 }
1260 }
1261 return result;
1262
1263}
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001264/* -------------------------------------------------------------------- */
1265/* object api */
1266
1267Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001268PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001269{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001270 if (!PyBytes_Check(op)) {
1271 PyErr_Format(PyExc_TypeError,
1272 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1273 return -1;
1274 }
1275 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001276}
1277
1278char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001279PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001280{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001281 if (!PyBytes_Check(op)) {
1282 PyErr_Format(PyExc_TypeError,
1283 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1284 return NULL;
1285 }
1286 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001287}
1288
1289int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001290PyBytes_AsStringAndSize(PyObject *obj,
1291 char **s,
1292 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001293{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001294 if (s == NULL) {
1295 PyErr_BadInternalCall();
1296 return -1;
1297 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001298
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001299 if (!PyBytes_Check(obj)) {
1300 PyErr_Format(PyExc_TypeError,
1301 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1302 return -1;
1303 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001304
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001305 *s = PyBytes_AS_STRING(obj);
1306 if (len != NULL)
1307 *len = PyBytes_GET_SIZE(obj);
1308 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001309 PyErr_SetString(PyExc_ValueError,
1310 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001311 return -1;
1312 }
1313 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001314}
Neal Norwitz6968b052007-02-27 19:02:19 +00001315
1316/* -------------------------------------------------------------------- */
1317/* Methods */
1318
Eric Smith0923d1d2009-04-16 20:16:10 +00001319#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001320
1321#include "stringlib/fastsearch.h"
1322#include "stringlib/count.h"
1323#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001324#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001325#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001326#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001327#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001328
Eric Smith0f78bff2009-11-30 01:01:42 +00001329#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001330
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001331PyObject *
1332PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001333{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001334 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001335 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001336 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001337 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001338 unsigned char quote, *s, *p;
1339
1340 /* Compute size of output string */
1341 squotes = dquotes = 0;
1342 newsize = 3; /* b'' */
1343 s = (unsigned char*)op->ob_sval;
1344 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001345 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001346 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001347 case '\'': squotes++; break;
1348 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001349 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001350 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001351 default:
1352 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001353 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001354 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001355 if (newsize > PY_SSIZE_T_MAX - incr)
1356 goto overflow;
1357 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001358 }
1359 quote = '\'';
1360 if (smartquotes && squotes && !dquotes)
1361 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001362 if (squotes && quote == '\'') {
1363 if (newsize > PY_SSIZE_T_MAX - squotes)
1364 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001365 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001366 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001367
1368 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001369 if (v == NULL) {
1370 return NULL;
1371 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001372 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001373
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001374 *p++ = 'b', *p++ = quote;
1375 for (i = 0; i < length; i++) {
1376 unsigned char c = op->ob_sval[i];
1377 if (c == quote || c == '\\')
1378 *p++ = '\\', *p++ = c;
1379 else if (c == '\t')
1380 *p++ = '\\', *p++ = 't';
1381 else if (c == '\n')
1382 *p++ = '\\', *p++ = 'n';
1383 else if (c == '\r')
1384 *p++ = '\\', *p++ = 'r';
1385 else if (c < ' ' || c >= 0x7f) {
1386 *p++ = '\\';
1387 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001388 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1389 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001390 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001391 else
1392 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001393 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001394 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001395 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001396 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001397
1398 overflow:
1399 PyErr_SetString(PyExc_OverflowError,
1400 "bytes object is too large to make repr");
1401 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001402}
1403
Neal Norwitz6968b052007-02-27 19:02:19 +00001404static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001405bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001406{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001407 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001408}
1409
Neal Norwitz6968b052007-02-27 19:02:19 +00001410static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001411bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001412{
Victor Stinner331a6a52019-05-27 16:39:22 +02001413 PyConfig *config = &_PyInterpreterState_GET_UNSAFE()->config;
Victor Stinnerc96be812019-05-14 17:34:56 +02001414 if (config->bytes_warning) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001415 if (PyErr_WarnEx(PyExc_BytesWarning,
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001416 "str() on a bytes instance", 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001417 return NULL;
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001418 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001419 }
1420 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001421}
1422
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001423static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001424bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001425{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001426 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001427}
Neal Norwitz6968b052007-02-27 19:02:19 +00001428
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001429/* This is also used by PyBytes_Concat() */
1430static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001431bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001432{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001433 Py_buffer va, vb;
1434 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001435
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001436 va.len = -1;
1437 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001438 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1439 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001440 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
Serhiy Storchaka6b5a9ec2017-03-19 19:47:02 +02001441 Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001442 goto done;
1443 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001444
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001445 /* Optimize end cases */
1446 if (va.len == 0 && PyBytes_CheckExact(b)) {
1447 result = b;
1448 Py_INCREF(result);
1449 goto done;
1450 }
1451 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1452 result = a;
1453 Py_INCREF(result);
1454 goto done;
1455 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001456
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001457 if (va.len > PY_SSIZE_T_MAX - vb.len) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001458 PyErr_NoMemory();
1459 goto done;
1460 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001461
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001462 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001463 if (result != NULL) {
1464 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1465 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1466 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001467
1468 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001469 if (va.len != -1)
1470 PyBuffer_Release(&va);
1471 if (vb.len != -1)
1472 PyBuffer_Release(&vb);
1473 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001474}
Neal Norwitz6968b052007-02-27 19:02:19 +00001475
1476static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001477bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001478{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001479 Py_ssize_t i;
1480 Py_ssize_t j;
1481 Py_ssize_t size;
1482 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001483 size_t nbytes;
1484 if (n < 0)
1485 n = 0;
1486 /* watch out for overflows: the size can overflow int,
1487 * and the # of bytes needed can overflow size_t
1488 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001489 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001490 PyErr_SetString(PyExc_OverflowError,
1491 "repeated bytes are too long");
1492 return NULL;
1493 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001494 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001495 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1496 Py_INCREF(a);
1497 return (PyObject *)a;
1498 }
1499 nbytes = (size_t)size;
1500 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1501 PyErr_SetString(PyExc_OverflowError,
1502 "repeated bytes are too long");
1503 return NULL;
1504 }
1505 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1506 if (op == NULL)
1507 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +01001508 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001509 op->ob_shash = -1;
1510 op->ob_sval[size] = '\0';
1511 if (Py_SIZE(a) == 1 && n > 0) {
1512 memset(op->ob_sval, a->ob_sval[0] , n);
1513 return (PyObject *) op;
1514 }
1515 i = 0;
1516 if (i < size) {
Christian Heimesf051e432016-09-13 20:22:02 +02001517 memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001518 i = Py_SIZE(a);
1519 }
1520 while (i < size) {
1521 j = (i <= size-i) ? i : size-i;
Christian Heimesf051e432016-09-13 20:22:02 +02001522 memcpy(op->ob_sval+i, op->ob_sval, j);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001523 i += j;
1524 }
1525 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001526}
1527
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001528static int
1529bytes_contains(PyObject *self, PyObject *arg)
1530{
1531 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1532}
1533
Neal Norwitz6968b052007-02-27 19:02:19 +00001534static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001535bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001536{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001537 if (i < 0 || i >= Py_SIZE(a)) {
1538 PyErr_SetString(PyExc_IndexError, "index out of range");
1539 return NULL;
1540 }
1541 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001542}
1543
Benjamin Peterson621b4302016-09-09 13:54:34 -07001544static int
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001545bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1546{
1547 int cmp;
1548 Py_ssize_t len;
1549
1550 len = Py_SIZE(a);
1551 if (Py_SIZE(b) != len)
1552 return 0;
1553
1554 if (a->ob_sval[0] != b->ob_sval[0])
1555 return 0;
1556
1557 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1558 return (cmp == 0);
1559}
1560
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001561static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001562bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001563{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001564 int c;
1565 Py_ssize_t len_a, len_b;
1566 Py_ssize_t min_len;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001567 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001568
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001569 /* Make sure both arguments are strings. */
1570 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Victor Stinner331a6a52019-05-27 16:39:22 +02001571 PyConfig *config = &_PyInterpreterState_GET_UNSAFE()->config;
Victor Stinnerc96be812019-05-14 17:34:56 +02001572 if (config->bytes_warning && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001573 rc = PyObject_IsInstance((PyObject*)a,
1574 (PyObject*)&PyUnicode_Type);
1575 if (!rc)
1576 rc = PyObject_IsInstance((PyObject*)b,
1577 (PyObject*)&PyUnicode_Type);
1578 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001579 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001580 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001581 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001582 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001583 return NULL;
1584 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001585 else {
1586 rc = PyObject_IsInstance((PyObject*)a,
1587 (PyObject*)&PyLong_Type);
1588 if (!rc)
1589 rc = PyObject_IsInstance((PyObject*)b,
1590 (PyObject*)&PyLong_Type);
1591 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001592 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001593 if (rc) {
1594 if (PyErr_WarnEx(PyExc_BytesWarning,
1595 "Comparison between bytes and int", 1))
1596 return NULL;
1597 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001598 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001599 }
stratakise8b19652017-11-02 11:32:54 +01001600 Py_RETURN_NOTIMPLEMENTED;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001601 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001602 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001603 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001604 case Py_EQ:
1605 case Py_LE:
1606 case Py_GE:
1607 /* a string is equal to itself */
stratakise8b19652017-11-02 11:32:54 +01001608 Py_RETURN_TRUE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001609 case Py_NE:
1610 case Py_LT:
1611 case Py_GT:
stratakise8b19652017-11-02 11:32:54 +01001612 Py_RETURN_FALSE;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001613 default:
1614 PyErr_BadArgument();
1615 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001616 }
1617 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001618 else if (op == Py_EQ || op == Py_NE) {
1619 int eq = bytes_compare_eq(a, b);
1620 eq ^= (op == Py_NE);
stratakise8b19652017-11-02 11:32:54 +01001621 return PyBool_FromLong(eq);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001622 }
1623 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001624 len_a = Py_SIZE(a);
1625 len_b = Py_SIZE(b);
1626 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001627 if (min_len > 0) {
1628 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001629 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001630 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001631 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001632 else
1633 c = 0;
stratakise8b19652017-11-02 11:32:54 +01001634 if (c != 0)
1635 Py_RETURN_RICHCOMPARE(c, 0, op);
1636 Py_RETURN_RICHCOMPARE(len_a, len_b, op);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001637 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001638}
1639
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001640static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001641bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001642{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001643 if (a->ob_shash == -1) {
1644 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001645 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001646 }
1647 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001648}
1649
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001650static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001651bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001652{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001653 if (PyIndex_Check(item)) {
1654 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1655 if (i == -1 && PyErr_Occurred())
1656 return NULL;
1657 if (i < 0)
1658 i += PyBytes_GET_SIZE(self);
1659 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1660 PyErr_SetString(PyExc_IndexError,
1661 "index out of range");
1662 return NULL;
1663 }
1664 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1665 }
1666 else if (PySlice_Check(item)) {
Zackery Spytz14514d92019-05-17 01:13:03 -06001667 Py_ssize_t start, stop, step, slicelength, i;
1668 size_t cur;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001669 char* source_buf;
1670 char* result_buf;
1671 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001672
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001673 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001674 return NULL;
1675 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001676 slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1677 &stop, step);
Neal Norwitz6968b052007-02-27 19:02:19 +00001678
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001679 if (slicelength <= 0) {
1680 return PyBytes_FromStringAndSize("", 0);
1681 }
1682 else if (start == 0 && step == 1 &&
1683 slicelength == PyBytes_GET_SIZE(self) &&
1684 PyBytes_CheckExact(self)) {
1685 Py_INCREF(self);
1686 return (PyObject *)self;
1687 }
1688 else if (step == 1) {
1689 return PyBytes_FromStringAndSize(
1690 PyBytes_AS_STRING(self) + start,
1691 slicelength);
1692 }
1693 else {
1694 source_buf = PyBytes_AS_STRING(self);
1695 result = PyBytes_FromStringAndSize(NULL, slicelength);
1696 if (result == NULL)
1697 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001698
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001699 result_buf = PyBytes_AS_STRING(result);
1700 for (cur = start, i = 0; i < slicelength;
1701 cur += step, i++) {
1702 result_buf[i] = source_buf[cur];
1703 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001704
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001705 return result;
1706 }
1707 }
1708 else {
1709 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001710 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001711 Py_TYPE(item)->tp_name);
1712 return NULL;
1713 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001714}
1715
1716static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001717bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001718{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001719 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1720 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001721}
1722
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001723static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001724 (lenfunc)bytes_length, /*sq_length*/
1725 (binaryfunc)bytes_concat, /*sq_concat*/
1726 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1727 (ssizeargfunc)bytes_item, /*sq_item*/
1728 0, /*sq_slice*/
1729 0, /*sq_ass_item*/
1730 0, /*sq_ass_slice*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001731 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001732};
1733
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001734static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001735 (lenfunc)bytes_length,
1736 (binaryfunc)bytes_subscript,
1737 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001738};
1739
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001740static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001741 (getbufferproc)bytes_buffer_getbuffer,
1742 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001743};
1744
1745
1746#define LEFTSTRIP 0
1747#define RIGHTSTRIP 1
1748#define BOTHSTRIP 2
1749
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001750/*[clinic input]
1751bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001752
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001753 sep: object = None
1754 The delimiter according which to split the bytes.
1755 None (the default value) means split on ASCII whitespace characters
1756 (space, tab, return, newline, formfeed, vertical tab).
1757 maxsplit: Py_ssize_t = -1
1758 Maximum number of splits to do.
1759 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001760
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001761Return a list of the sections in the bytes, using sep as the delimiter.
1762[clinic start generated code]*/
1763
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001764static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001765bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1766/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001767{
1768 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001769 const char *s = PyBytes_AS_STRING(self), *sub;
1770 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001771 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001772
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001773 if (maxsplit < 0)
1774 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001775 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001776 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001777 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001778 return NULL;
1779 sub = vsub.buf;
1780 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001781
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001782 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1783 PyBuffer_Release(&vsub);
1784 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001785}
1786
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001787/*[clinic input]
1788bytes.partition
1789
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001790 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001791 /
1792
1793Partition the bytes into three parts using the given separator.
1794
1795This will search for the separator sep in the bytes. If the separator is found,
1796returns a 3-tuple containing the part before the separator, the separator
1797itself, and the part after it.
1798
1799If the separator is not found, returns a 3-tuple containing the original bytes
1800object and two empty bytes objects.
1801[clinic start generated code]*/
1802
Neal Norwitz6968b052007-02-27 19:02:19 +00001803static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001804bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001805/*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001806{
Neal Norwitz6968b052007-02-27 19:02:19 +00001807 return stringlib_partition(
1808 (PyObject*) self,
1809 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001810 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001811 );
1812}
1813
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001814/*[clinic input]
1815bytes.rpartition
1816
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001817 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001818 /
1819
1820Partition the bytes into three parts using the given separator.
1821
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001822This will search for the separator sep in the bytes, starting at the end. If
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001823the separator is found, returns a 3-tuple containing the part before the
1824separator, the separator itself, and the part after it.
1825
1826If the separator is not found, returns a 3-tuple containing two empty bytes
1827objects and the original bytes object.
1828[clinic start generated code]*/
1829
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001830static PyObject *
1831bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001832/*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001833{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001834 return stringlib_rpartition(
1835 (PyObject*) self,
1836 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001837 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001838 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001839}
1840
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001841/*[clinic input]
1842bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001843
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001844Return a list of the sections in the bytes, using sep as the delimiter.
1845
1846Splitting is done starting at the end of the bytes and working to the front.
1847[clinic start generated code]*/
1848
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001849static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001850bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1851/*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001852{
1853 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001854 const char *s = PyBytes_AS_STRING(self), *sub;
1855 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001856 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001857
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001858 if (maxsplit < 0)
1859 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001860 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001861 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001862 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001863 return NULL;
1864 sub = vsub.buf;
1865 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001866
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001867 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1868 PyBuffer_Release(&vsub);
1869 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001870}
1871
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001872
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001873/*[clinic input]
1874bytes.join
1875
1876 iterable_of_bytes: object
1877 /
1878
1879Concatenate any number of bytes objects.
1880
1881The bytes whose method is called is inserted in between each pair.
1882
1883The result is returned as a new bytes object.
1884
1885Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1886[clinic start generated code]*/
1887
Neal Norwitz6968b052007-02-27 19:02:19 +00001888static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001889bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1890/*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001891{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001892 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001893}
1894
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001895PyObject *
1896_PyBytes_Join(PyObject *sep, PyObject *x)
1897{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001898 assert(sep != NULL && PyBytes_Check(sep));
1899 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001900 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001901}
1902
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001903static PyObject *
1904bytes_find(PyBytesObject *self, PyObject *args)
1905{
1906 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1907}
1908
1909static PyObject *
1910bytes_index(PyBytesObject *self, PyObject *args)
1911{
1912 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1913}
1914
1915
1916static PyObject *
1917bytes_rfind(PyBytesObject *self, PyObject *args)
1918{
1919 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1920}
1921
1922
1923static PyObject *
1924bytes_rindex(PyBytesObject *self, PyObject *args)
1925{
1926 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1927}
1928
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001929
1930Py_LOCAL_INLINE(PyObject *)
1931do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001932{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001933 Py_buffer vsep;
1934 char *s = PyBytes_AS_STRING(self);
1935 Py_ssize_t len = PyBytes_GET_SIZE(self);
1936 char *sep;
1937 Py_ssize_t seplen;
1938 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001939
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001940 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001941 return NULL;
1942 sep = vsep.buf;
1943 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001944
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001945 i = 0;
1946 if (striptype != RIGHTSTRIP) {
1947 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1948 i++;
1949 }
1950 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001951
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001952 j = len;
1953 if (striptype != LEFTSTRIP) {
1954 do {
1955 j--;
1956 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1957 j++;
1958 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001959
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001960 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001961
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001962 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1963 Py_INCREF(self);
1964 return (PyObject*)self;
1965 }
1966 else
1967 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001968}
1969
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001970
1971Py_LOCAL_INLINE(PyObject *)
1972do_strip(PyBytesObject *self, int striptype)
1973{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001974 char *s = PyBytes_AS_STRING(self);
1975 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001976
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001977 i = 0;
1978 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001979 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001980 i++;
1981 }
1982 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001983
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001984 j = len;
1985 if (striptype != LEFTSTRIP) {
1986 do {
1987 j--;
David Malcolm96960882010-11-05 17:23:41 +00001988 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001989 j++;
1990 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001991
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001992 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1993 Py_INCREF(self);
1994 return (PyObject*)self;
1995 }
1996 else
1997 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001998}
1999
2000
2001Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002002do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002003{
Serhiy Storchakad322abb2019-09-14 13:31:50 +03002004 if (bytes != Py_None) {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002005 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002006 }
2007 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002008}
2009
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002010/*[clinic input]
2011bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002012
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002013 bytes: object = None
2014 /
2015
2016Strip leading and trailing bytes contained in the argument.
2017
2018If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2019[clinic start generated code]*/
2020
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002021static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002022bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002023/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002024{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002025 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002026}
2027
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002028/*[clinic input]
2029bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002030
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002031 bytes: object = None
2032 /
2033
2034Strip leading bytes contained in the argument.
2035
2036If the argument is omitted or None, strip leading ASCII whitespace.
2037[clinic start generated code]*/
2038
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002039static PyObject *
2040bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002041/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002042{
2043 return do_argstrip(self, LEFTSTRIP, bytes);
2044}
2045
2046/*[clinic input]
2047bytes.rstrip
2048
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002049 bytes: object = None
2050 /
2051
2052Strip trailing bytes contained in the argument.
2053
2054If the argument is omitted or None, strip trailing ASCII whitespace.
2055[clinic start generated code]*/
2056
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002057static PyObject *
2058bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002059/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002060{
2061 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002062}
Neal Norwitz6968b052007-02-27 19:02:19 +00002063
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002064
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002065static PyObject *
2066bytes_count(PyBytesObject *self, PyObject *args)
2067{
2068 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2069}
2070
2071
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002072/*[clinic input]
2073bytes.translate
2074
Victor Stinner049e5092014-08-17 22:20:00 +02002075 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002076 Translation table, which must be a bytes object of length 256.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002077 /
Martin Panter1b6c6da2016-08-27 08:35:02 +00002078 delete as deletechars: object(c_default="NULL") = b''
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002079
2080Return a copy with each character mapped by the given translation table.
2081
Martin Panter1b6c6da2016-08-27 08:35:02 +00002082All characters occurring in the optional argument delete are removed.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002083The remaining characters are mapped through the given translation table.
2084[clinic start generated code]*/
2085
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002086static PyObject *
Martin Panter1b6c6da2016-08-27 08:35:02 +00002087bytes_translate_impl(PyBytesObject *self, PyObject *table,
Larry Hastings89964c42015-04-14 18:07:59 -04002088 PyObject *deletechars)
Martin Panter1b6c6da2016-08-27 08:35:02 +00002089/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002090{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002091 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002092 Py_buffer table_view = {NULL, NULL};
2093 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002094 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002095 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002096 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002097 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002098 Py_ssize_t inlen, tablen, dellen = 0;
2099 PyObject *result;
2100 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002101
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002102 if (PyBytes_Check(table)) {
2103 table_chars = PyBytes_AS_STRING(table);
2104 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002105 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002106 else if (table == Py_None) {
2107 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002108 tablen = 256;
2109 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002110 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002111 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002112 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002113 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002114 tablen = table_view.len;
2115 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002116
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002117 if (tablen != 256) {
2118 PyErr_SetString(PyExc_ValueError,
2119 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002120 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002121 return NULL;
2122 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002123
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002124 if (deletechars != NULL) {
2125 if (PyBytes_Check(deletechars)) {
2126 del_table_chars = PyBytes_AS_STRING(deletechars);
2127 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002128 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002129 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002130 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002131 PyBuffer_Release(&table_view);
2132 return NULL;
2133 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002134 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002135 dellen = del_table_view.len;
2136 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002137 }
2138 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002139 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002140 dellen = 0;
2141 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002142
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002143 inlen = PyBytes_GET_SIZE(input_obj);
2144 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002145 if (result == NULL) {
2146 PyBuffer_Release(&del_table_view);
2147 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002148 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002149 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002150 output_start = output = PyBytes_AS_STRING(result);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002151 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002152
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002153 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002154 /* If no deletions are required, use faster code */
2155 for (i = inlen; --i >= 0; ) {
2156 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002157 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002158 changed = 1;
2159 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002160 if (!changed && PyBytes_CheckExact(input_obj)) {
2161 Py_INCREF(input_obj);
2162 Py_DECREF(result);
2163 result = input_obj;
2164 }
2165 PyBuffer_Release(&del_table_view);
2166 PyBuffer_Release(&table_view);
2167 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002168 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002169
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002170 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002171 for (i = 0; i < 256; i++)
2172 trans_table[i] = Py_CHARMASK(i);
2173 } else {
2174 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002175 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002176 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002177 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002178
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002179 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002180 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002181 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002182
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002183 for (i = inlen; --i >= 0; ) {
2184 c = Py_CHARMASK(*input++);
2185 if (trans_table[c] != -1)
2186 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2187 continue;
2188 changed = 1;
2189 }
2190 if (!changed && PyBytes_CheckExact(input_obj)) {
2191 Py_DECREF(result);
2192 Py_INCREF(input_obj);
2193 return input_obj;
2194 }
2195 /* Fix the size of the resulting string */
2196 if (inlen > 0)
2197 _PyBytes_Resize(&result, output - output_start);
2198 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002199}
2200
2201
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002202/*[clinic input]
2203
2204@staticmethod
2205bytes.maketrans
2206
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002207 frm: Py_buffer
2208 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002209 /
2210
2211Return a translation table useable for the bytes or bytearray translate method.
2212
2213The returned table will be one where each byte in frm is mapped to the byte at
2214the same position in to.
2215
2216The bytes objects frm and to must be of the same length.
2217[clinic start generated code]*/
2218
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002219static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002220bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002221/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002222{
2223 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002224}
2225
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002226
2227/*[clinic input]
2228bytes.replace
2229
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002230 old: Py_buffer
2231 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002232 count: Py_ssize_t = -1
2233 Maximum number of occurrences to replace.
2234 -1 (the default value) means replace all occurrences.
2235 /
2236
2237Return a copy with all occurrences of substring old replaced by new.
2238
2239If the optional argument count is given, only the first count occurrences are
2240replaced.
2241[clinic start generated code]*/
2242
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002243static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002244bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
Larry Hastings89964c42015-04-14 18:07:59 -04002245 Py_ssize_t count)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002246/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002247{
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +03002248 return stringlib_replace((PyObject *)self,
2249 (const char *)old->buf, old->len,
2250 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002251}
2252
2253/** End DALKE **/
2254
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002255
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002256static PyObject *
2257bytes_startswith(PyBytesObject *self, PyObject *args)
2258{
2259 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2260}
2261
2262static PyObject *
2263bytes_endswith(PyBytesObject *self, PyObject *args)
2264{
2265 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2266}
2267
2268
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002269/*[clinic input]
2270bytes.decode
2271
2272 encoding: str(c_default="NULL") = 'utf-8'
2273 The encoding with which to decode the bytes.
2274 errors: str(c_default="NULL") = 'strict'
2275 The error handling scheme to use for the handling of decoding errors.
2276 The default is 'strict' meaning that decoding errors raise a
2277 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2278 as well as any other name registered with codecs.register_error that
2279 can handle UnicodeDecodeErrors.
2280
2281Decode the bytes using the codec registered for encoding.
2282[clinic start generated code]*/
2283
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002284static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002285bytes_decode_impl(PyBytesObject *self, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04002286 const char *errors)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002287/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002288{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002289 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002290}
2291
Guido van Rossum20188312006-05-05 15:15:40 +00002292
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002293/*[clinic input]
2294bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002295
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002296 keepends: bool(accept={int}) = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002297
2298Return a list of the lines in the bytes, breaking at line boundaries.
2299
2300Line breaks are not included in the resulting list unless keepends is given and
2301true.
2302[clinic start generated code]*/
2303
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002304static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002305bytes_splitlines_impl(PyBytesObject *self, int keepends)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002306/*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002307{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002308 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002309 (PyObject*) self, PyBytes_AS_STRING(self),
2310 PyBytes_GET_SIZE(self), keepends
2311 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002312}
2313
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002314/*[clinic input]
2315@classmethod
2316bytes.fromhex
2317
2318 string: unicode
2319 /
2320
2321Create a bytes object from a string of hexadecimal numbers.
2322
2323Spaces between two numbers are accepted.
2324Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2325[clinic start generated code]*/
2326
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002327static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002328bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002329/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002330{
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002331 PyObject *result = _PyBytes_FromHex(string, 0);
2332 if (type != &PyBytes_Type && result != NULL) {
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01002333 Py_SETREF(result, PyObject_CallFunctionObjArgs((PyObject *)type,
2334 result, NULL));
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002335 }
2336 return result;
Victor Stinner2bf89932015-10-14 11:25:33 +02002337}
2338
2339PyObject*
2340_PyBytes_FromHex(PyObject *string, int use_bytearray)
2341{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002342 char *buf;
Victor Stinner2bf89932015-10-14 11:25:33 +02002343 Py_ssize_t hexlen, invalid_char;
2344 unsigned int top, bot;
2345 Py_UCS1 *str, *end;
2346 _PyBytesWriter writer;
2347
2348 _PyBytesWriter_Init(&writer);
2349 writer.use_bytearray = use_bytearray;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002350
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002351 assert(PyUnicode_Check(string));
2352 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002353 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002354 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002355
Victor Stinner2bf89932015-10-14 11:25:33 +02002356 if (!PyUnicode_IS_ASCII(string)) {
2357 void *data = PyUnicode_DATA(string);
2358 unsigned int kind = PyUnicode_KIND(string);
2359 Py_ssize_t i;
2360
2361 /* search for the first non-ASCII character */
2362 for (i = 0; i < hexlen; i++) {
2363 if (PyUnicode_READ(kind, data, i) >= 128)
2364 break;
2365 }
2366 invalid_char = i;
2367 goto error;
2368 }
2369
2370 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2371 str = PyUnicode_1BYTE_DATA(string);
2372
2373 /* This overestimates if there are spaces */
2374 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2375 if (buf == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002376 return NULL;
Victor Stinner2bf89932015-10-14 11:25:33 +02002377
2378 end = str + hexlen;
2379 while (str < end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002380 /* skip over spaces in the input */
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002381 if (Py_ISSPACE(*str)) {
Victor Stinner2bf89932015-10-14 11:25:33 +02002382 do {
2383 str++;
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002384 } while (Py_ISSPACE(*str));
Victor Stinner2bf89932015-10-14 11:25:33 +02002385 if (str >= end)
2386 break;
2387 }
2388
2389 top = _PyLong_DigitValue[*str];
2390 if (top >= 16) {
2391 invalid_char = str - PyUnicode_1BYTE_DATA(string);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002392 goto error;
2393 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002394 str++;
2395
2396 bot = _PyLong_DigitValue[*str];
2397 if (bot >= 16) {
2398 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2399 goto error;
2400 }
2401 str++;
2402
2403 *buf++ = (unsigned char)((top << 4) + bot);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002404 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002405
2406 return _PyBytesWriter_Finish(&writer, buf);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002407
2408 error:
Victor Stinner2bf89932015-10-14 11:25:33 +02002409 PyErr_Format(PyExc_ValueError,
2410 "non-hexadecimal number found in "
2411 "fromhex() arg at position %zd", invalid_char);
2412 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002413 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002414}
2415
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002416/*[clinic input]
2417bytes.hex
2418
2419 sep: object = NULL
2420 An optional single character or byte to separate hex bytes.
2421 bytes_per_sep: int = 1
2422 How many bytes between separators. Positive values count from the
2423 right, negative values count from the left.
2424
2425Create a str of hexadecimal numbers from a bytes object.
2426
2427Example:
2428>>> value = b'\xb9\x01\xef'
2429>>> value.hex()
2430'b901ef'
2431>>> value.hex(':')
2432'b9:01:ef'
2433>>> value.hex(':', 2)
2434'b9:01ef'
2435>>> value.hex(':', -2)
2436'b901:ef'
2437[clinic start generated code]*/
Gregory P. Smith8cb65692015-04-25 23:22:26 +00002438
2439static PyObject *
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002440bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2441/*[clinic end generated code: output=1f134da504064139 input=f1238d3455990218]*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002442{
2443 char* argbuf = PyBytes_AS_STRING(self);
2444 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002445 return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002446}
2447
2448static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302449bytes_getnewargs(PyBytesObject *v, PyObject *Py_UNUSED(ignored))
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002450{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002451 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002452}
2453
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002454
2455static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002456bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002457 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302458 {"capitalize", stringlib_capitalize, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002459 _Py_capitalize__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002460 STRINGLIB_CENTER_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002461 {"count", (PyCFunction)bytes_count, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002462 _Py_count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002463 BYTES_DECODE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002464 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002465 _Py_endswith__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002466 STRINGLIB_EXPANDTABS_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002467 {"find", (PyCFunction)bytes_find, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002468 _Py_find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002469 BYTES_FROMHEX_METHODDEF
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07002470 BYTES_HEX_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002471 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302472 {"isalnum", stringlib_isalnum, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002473 _Py_isalnum__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302474 {"isalpha", stringlib_isalpha, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002475 _Py_isalpha__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302476 {"isascii", stringlib_isascii, METH_NOARGS,
INADA Naokia49ac992018-01-27 14:06:21 +09002477 _Py_isascii__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302478 {"isdigit", stringlib_isdigit, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002479 _Py_isdigit__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302480 {"islower", stringlib_islower, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002481 _Py_islower__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302482 {"isspace", stringlib_isspace, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002483 _Py_isspace__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302484 {"istitle", stringlib_istitle, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002485 _Py_istitle__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302486 {"isupper", stringlib_isupper, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002487 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002488 BYTES_JOIN_METHODDEF
Tal Einatc929df32018-07-06 13:17:38 +03002489 STRINGLIB_LJUST_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302490 {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002491 BYTES_LSTRIP_METHODDEF
2492 BYTES_MAKETRANS_METHODDEF
2493 BYTES_PARTITION_METHODDEF
2494 BYTES_REPLACE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002495 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2496 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002497 STRINGLIB_RJUST_METHODDEF
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002498 BYTES_RPARTITION_METHODDEF
2499 BYTES_RSPLIT_METHODDEF
2500 BYTES_RSTRIP_METHODDEF
2501 BYTES_SPLIT_METHODDEF
2502 BYTES_SPLITLINES_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002503 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002504 _Py_startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002505 BYTES_STRIP_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302506 {"swapcase", stringlib_swapcase, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002507 _Py_swapcase__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302508 {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002509 BYTES_TRANSLATE_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302510 {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002511 STRINGLIB_ZFILL_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002512 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002513};
2514
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002515static PyObject *
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002516bytes_mod(PyObject *self, PyObject *arg)
Ethan Furmanb95b5612015-01-23 20:05:18 -08002517{
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002518 if (!PyBytes_Check(self)) {
2519 Py_RETURN_NOTIMPLEMENTED;
Victor Stinner772b2b02015-10-14 09:56:53 +02002520 }
Victor Stinner772b2b02015-10-14 09:56:53 +02002521 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002522 arg, 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -08002523}
2524
2525static PyNumberMethods bytes_as_number = {
2526 0, /*nb_add*/
2527 0, /*nb_subtract*/
2528 0, /*nb_multiply*/
2529 bytes_mod, /*nb_remainder*/
2530};
2531
2532static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002533bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002534
2535static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002536bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002537{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002538 PyObject *x = NULL;
2539 const char *encoding = NULL;
2540 const char *errors = NULL;
2541 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002542 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002543 Py_ssize_t size;
2544 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002545 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002546
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002547 if (type != &PyBytes_Type)
Serhiy Storchaka15095802015-11-25 15:47:01 +02002548 return bytes_subtype_new(type, args, kwds);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002549 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2550 &encoding, &errors))
2551 return NULL;
2552 if (x == NULL) {
2553 if (encoding != NULL || errors != NULL) {
2554 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka2c2044e2018-10-21 15:29:12 +03002555 encoding != NULL ?
2556 "encoding without a string argument" :
2557 "errors without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002558 return NULL;
2559 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02002560 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002561 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002562
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002563 if (encoding != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002564 /* Encode via the codec registry */
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002565 if (!PyUnicode_Check(x)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002566 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002567 "encoding without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002568 return NULL;
2569 }
2570 new = PyUnicode_AsEncodedString(x, encoding, errors);
2571 if (new == NULL)
2572 return NULL;
2573 assert(PyBytes_Check(new));
2574 return new;
2575 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002576
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002577 if (errors != NULL) {
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002578 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002579 PyUnicode_Check(x) ?
2580 "string argument without an encoding" :
2581 "errors without a string argument");
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002582 return NULL;
2583 }
2584
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002585 /* We'd like to call PyObject_Bytes here, but we need to check for an
2586 integer argument before deferring to PyBytes_FromObject, something
2587 PyObject_Bytes doesn't do. */
2588 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2589 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +01002590 new = _PyObject_CallNoArg(func);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002591 Py_DECREF(func);
2592 if (new == NULL)
2593 return NULL;
2594 if (!PyBytes_Check(new)) {
2595 PyErr_Format(PyExc_TypeError,
2596 "__bytes__ returned non-bytes (type %.200s)",
2597 Py_TYPE(new)->tp_name);
2598 Py_DECREF(new);
2599 return NULL;
2600 }
2601 return new;
2602 }
2603 else if (PyErr_Occurred())
2604 return NULL;
2605
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002606 if (PyUnicode_Check(x)) {
2607 PyErr_SetString(PyExc_TypeError,
2608 "string argument without an encoding");
2609 return NULL;
2610 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002611 /* Is it an integer? */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002612 if (PyIndex_Check(x)) {
2613 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2614 if (size == -1 && PyErr_Occurred()) {
Serhiy Storchakae8904212018-10-15 00:02:57 +03002615 if (!PyErr_ExceptionMatches(PyExc_TypeError))
INADA Naokia634e232017-01-06 17:32:01 +09002616 return NULL;
2617 PyErr_Clear(); /* fall through */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002618 }
INADA Naokia634e232017-01-06 17:32:01 +09002619 else {
2620 if (size < 0) {
2621 PyErr_SetString(PyExc_ValueError, "negative count");
2622 return NULL;
2623 }
2624 new = _PyBytes_FromSize(size, 1);
2625 if (new == NULL)
2626 return NULL;
2627 return new;
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002628 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002629 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002630
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002631 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002632}
2633
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002634static PyObject*
2635_PyBytes_FromBuffer(PyObject *x)
2636{
2637 PyObject *new;
2638 Py_buffer view;
2639
2640 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2641 return NULL;
2642
2643 new = PyBytes_FromStringAndSize(NULL, view.len);
2644 if (!new)
2645 goto fail;
2646 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2647 &view, view.len, 'C') < 0)
2648 goto fail;
2649 PyBuffer_Release(&view);
2650 return new;
2651
2652fail:
2653 Py_XDECREF(new);
2654 PyBuffer_Release(&view);
2655 return NULL;
2656}
2657
2658static PyObject*
2659_PyBytes_FromList(PyObject *x)
2660{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002661 Py_ssize_t i, size = PyList_GET_SIZE(x);
2662 Py_ssize_t value;
2663 char *str;
2664 PyObject *item;
2665 _PyBytesWriter writer;
2666
2667 _PyBytesWriter_Init(&writer);
2668 str = _PyBytesWriter_Alloc(&writer, size);
2669 if (str == NULL)
2670 return NULL;
2671 writer.overallocate = 1;
2672 size = writer.allocated;
2673
2674 for (i = 0; i < PyList_GET_SIZE(x); i++) {
2675 item = PyList_GET_ITEM(x, i);
2676 Py_INCREF(item);
2677 value = PyNumber_AsSsize_t(item, NULL);
2678 Py_DECREF(item);
2679 if (value == -1 && PyErr_Occurred())
2680 goto error;
2681
2682 if (value < 0 || value >= 256) {
2683 PyErr_SetString(PyExc_ValueError,
2684 "bytes must be in range(0, 256)");
2685 goto error;
2686 }
2687
2688 if (i >= size) {
2689 str = _PyBytesWriter_Resize(&writer, str, size+1);
2690 if (str == NULL)
2691 return NULL;
2692 size = writer.allocated;
2693 }
2694 *str++ = (char) value;
2695 }
2696 return _PyBytesWriter_Finish(&writer, str);
2697
2698 error:
2699 _PyBytesWriter_Dealloc(&writer);
2700 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002701}
2702
2703static PyObject*
2704_PyBytes_FromTuple(PyObject *x)
2705{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002706 PyObject *bytes;
2707 Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2708 Py_ssize_t value;
2709 char *str;
2710 PyObject *item;
2711
2712 bytes = PyBytes_FromStringAndSize(NULL, size);
2713 if (bytes == NULL)
2714 return NULL;
2715 str = ((PyBytesObject *)bytes)->ob_sval;
2716
2717 for (i = 0; i < size; i++) {
2718 item = PyTuple_GET_ITEM(x, i);
2719 value = PyNumber_AsSsize_t(item, NULL);
2720 if (value == -1 && PyErr_Occurred())
2721 goto error;
2722
2723 if (value < 0 || value >= 256) {
2724 PyErr_SetString(PyExc_ValueError,
2725 "bytes must be in range(0, 256)");
2726 goto error;
2727 }
2728 *str++ = (char) value;
2729 }
2730 return bytes;
2731
2732 error:
2733 Py_DECREF(bytes);
2734 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002735}
2736
2737static PyObject *
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002738_PyBytes_FromIterator(PyObject *it, PyObject *x)
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002739{
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002740 char *str;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002741 Py_ssize_t i, size;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002742 _PyBytesWriter writer;
2743
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002744 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02002745 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002746 if (size == -1 && PyErr_Occurred())
2747 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002748
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002749 _PyBytesWriter_Init(&writer);
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002750 str = _PyBytesWriter_Alloc(&writer, size);
2751 if (str == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002752 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002753 writer.overallocate = 1;
2754 size = writer.allocated;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002755
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002756 /* Run the iterator to exhaustion */
2757 for (i = 0; ; i++) {
2758 PyObject *item;
2759 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002760
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002761 /* Get the next item */
2762 item = PyIter_Next(it);
2763 if (item == NULL) {
2764 if (PyErr_Occurred())
2765 goto error;
2766 break;
2767 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002768
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002769 /* Interpret it as an int (__index__) */
Serhiy Storchakaf54d7812016-07-06 21:39:44 +03002770 value = PyNumber_AsSsize_t(item, NULL);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002771 Py_DECREF(item);
2772 if (value == -1 && PyErr_Occurred())
2773 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002774
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002775 /* Range check */
2776 if (value < 0 || value >= 256) {
2777 PyErr_SetString(PyExc_ValueError,
2778 "bytes must be in range(0, 256)");
2779 goto error;
2780 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002781
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002782 /* Append the byte */
2783 if (i >= size) {
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002784 str = _PyBytesWriter_Resize(&writer, str, size+1);
2785 if (str == NULL)
2786 return NULL;
2787 size = writer.allocated;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002788 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002789 *str++ = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002790 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002791
2792 return _PyBytesWriter_Finish(&writer, str);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002793
2794 error:
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002795 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002796 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002797}
2798
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002799PyObject *
2800PyBytes_FromObject(PyObject *x)
2801{
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002802 PyObject *it, *result;
2803
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002804 if (x == NULL) {
2805 PyErr_BadInternalCall();
2806 return NULL;
2807 }
2808
2809 if (PyBytes_CheckExact(x)) {
2810 Py_INCREF(x);
2811 return x;
2812 }
2813
2814 /* Use the modern buffer interface */
2815 if (PyObject_CheckBuffer(x))
2816 return _PyBytes_FromBuffer(x);
2817
2818 if (PyList_CheckExact(x))
2819 return _PyBytes_FromList(x);
2820
2821 if (PyTuple_CheckExact(x))
2822 return _PyBytes_FromTuple(x);
2823
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002824 if (!PyUnicode_Check(x)) {
2825 it = PyObject_GetIter(x);
2826 if (it != NULL) {
2827 result = _PyBytes_FromIterator(it, x);
2828 Py_DECREF(it);
2829 return result;
2830 }
Serhiy Storchakae8904212018-10-15 00:02:57 +03002831 if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2832 return NULL;
2833 }
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002834 }
2835
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002836 PyErr_Format(PyExc_TypeError,
2837 "cannot convert '%.200s' object to bytes",
2838 x->ob_type->tp_name);
2839 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002840}
2841
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002842static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002843bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002844{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002845 PyObject *tmp, *pnew;
2846 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002847
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002848 assert(PyType_IsSubtype(type, &PyBytes_Type));
2849 tmp = bytes_new(&PyBytes_Type, args, kwds);
2850 if (tmp == NULL)
2851 return NULL;
Serhiy Storchaka15095802015-11-25 15:47:01 +02002852 assert(PyBytes_Check(tmp));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002853 n = PyBytes_GET_SIZE(tmp);
2854 pnew = type->tp_alloc(type, n);
2855 if (pnew != NULL) {
Christian Heimesf051e432016-09-13 20:22:02 +02002856 memcpy(PyBytes_AS_STRING(pnew),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002857 PyBytes_AS_STRING(tmp), n+1);
2858 ((PyBytesObject *)pnew)->ob_shash =
2859 ((PyBytesObject *)tmp)->ob_shash;
2860 }
2861 Py_DECREF(tmp);
2862 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002863}
2864
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002865PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002866"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002867bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002868bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002869bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2870bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002871\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002872Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002873 - an iterable yielding integers in range(256)\n\
2874 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002875 - any object implementing the buffer API.\n\
2876 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002877
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002878static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002879
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002880PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002881 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2882 "bytes",
2883 PyBytesObject_SIZE,
2884 sizeof(char),
Inada Naoki7d408692019-05-29 17:23:27 +09002885 0, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002886 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002887 0, /* tp_getattr */
2888 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002889 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002890 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08002891 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002892 &bytes_as_sequence, /* tp_as_sequence */
2893 &bytes_as_mapping, /* tp_as_mapping */
2894 (hashfunc)bytes_hash, /* tp_hash */
2895 0, /* tp_call */
2896 bytes_str, /* tp_str */
2897 PyObject_GenericGetAttr, /* tp_getattro */
2898 0, /* tp_setattro */
2899 &bytes_as_buffer, /* tp_as_buffer */
2900 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2901 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2902 bytes_doc, /* tp_doc */
2903 0, /* tp_traverse */
2904 0, /* tp_clear */
2905 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2906 0, /* tp_weaklistoffset */
2907 bytes_iter, /* tp_iter */
2908 0, /* tp_iternext */
2909 bytes_methods, /* tp_methods */
2910 0, /* tp_members */
2911 0, /* tp_getset */
2912 &PyBaseObject_Type, /* tp_base */
2913 0, /* tp_dict */
2914 0, /* tp_descr_get */
2915 0, /* tp_descr_set */
2916 0, /* tp_dictoffset */
2917 0, /* tp_init */
2918 0, /* tp_alloc */
2919 bytes_new, /* tp_new */
2920 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002921};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002922
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002923void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002924PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002925{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002926 assert(pv != NULL);
2927 if (*pv == NULL)
2928 return;
2929 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002930 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002931 return;
2932 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02002933
2934 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2935 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05002936 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02002937 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02002938
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002939 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02002940 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2941 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2942 Py_CLEAR(*pv);
2943 return;
2944 }
2945
2946 oldsize = PyBytes_GET_SIZE(*pv);
2947 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2948 PyErr_NoMemory();
2949 goto error;
2950 }
2951 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2952 goto error;
2953
2954 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2955 PyBuffer_Release(&wb);
2956 return;
2957
2958 error:
2959 PyBuffer_Release(&wb);
2960 Py_CLEAR(*pv);
2961 return;
2962 }
2963
2964 else {
2965 /* Multiple references, need to create new object */
2966 PyObject *v;
2967 v = bytes_concat(*pv, w);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002968 Py_SETREF(*pv, v);
Antoine Pitrou161d6952014-05-01 14:36:20 +02002969 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002970}
2971
2972void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002973PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002974{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002975 PyBytes_Concat(pv, w);
2976 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002977}
2978
2979
Ethan Furmanb95b5612015-01-23 20:05:18 -08002980/* The following function breaks the notion that bytes are immutable:
2981 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002982 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08002983 as creating a new bytes object and destroying the old one, only
2984 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002985 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08002986 Note that if there's not enough memory to resize the bytes object, the
2987 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002988 memory" exception is set, and -1 is returned. Else (on success) 0 is
2989 returned, and the value in *pv may or may not be the same as on input.
2990 As always, an extra byte is allocated for a trailing \0 byte (newsize
2991 does *not* include that), and a trailing \0 byte is stored.
2992*/
2993
2994int
2995_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2996{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002997 PyObject *v;
2998 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002999 v = *pv;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03003000 if (!PyBytes_Check(v) || newsize < 0) {
3001 goto error;
3002 }
3003 if (Py_SIZE(v) == newsize) {
3004 /* return early if newsize equals to v->ob_size */
3005 return 0;
3006 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02003007 if (Py_SIZE(v) == 0) {
3008 if (newsize == 0) {
3009 return 0;
3010 }
3011 *pv = _PyBytes_FromSize(newsize, 0);
3012 Py_DECREF(v);
3013 return (*pv == NULL) ? -1 : 0;
3014 }
Berker Peksag4a72a7b2016-09-16 17:31:06 +03003015 if (Py_REFCNT(v) != 1) {
3016 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003017 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02003018 if (newsize == 0) {
3019 *pv = _PyBytes_FromSize(0, 0);
3020 Py_DECREF(v);
3021 return (*pv == NULL) ? -1 : 0;
3022 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003023 /* XXX UNREF/NEWREF interface should be more symmetrical */
3024 _Py_DEC_REFTOTAL;
3025 _Py_ForgetReference(v);
3026 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03003027 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003028 if (*pv == NULL) {
3029 PyObject_Del(v);
3030 PyErr_NoMemory();
3031 return -1;
3032 }
3033 _Py_NewReference(*pv);
3034 sv = (PyBytesObject *) *pv;
3035 Py_SIZE(sv) = newsize;
3036 sv->ob_sval[newsize] = '\0';
3037 sv->ob_shash = -1; /* invalidate cached hash value */
3038 return 0;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03003039error:
3040 *pv = 0;
3041 Py_DECREF(v);
3042 PyErr_BadInternalCall();
3043 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003044}
3045
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003046void
3047PyBytes_Fini(void)
3048{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003049 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003050 for (i = 0; i < UCHAR_MAX + 1; i++)
3051 Py_CLEAR(characters[i]);
3052 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003053}
3054
Benjamin Peterson4116f362008-05-27 00:36:20 +00003055/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003056
3057typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003058 PyObject_HEAD
3059 Py_ssize_t it_index;
3060 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003061} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003062
3063static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003064striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003065{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003066 _PyObject_GC_UNTRACK(it);
3067 Py_XDECREF(it->it_seq);
3068 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003069}
3070
3071static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003072striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003073{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003074 Py_VISIT(it->it_seq);
3075 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003076}
3077
3078static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003079striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003080{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003081 PyBytesObject *seq;
3082 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003083
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003084 assert(it != NULL);
3085 seq = it->it_seq;
3086 if (seq == NULL)
3087 return NULL;
3088 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003089
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003090 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3091 item = PyLong_FromLong(
3092 (unsigned char)seq->ob_sval[it->it_index]);
3093 if (item != NULL)
3094 ++it->it_index;
3095 return item;
3096 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003097
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003098 it->it_seq = NULL;
Serhiy Storchakafbb1c5e2016-03-30 20:40:02 +03003099 Py_DECREF(seq);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003100 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003101}
3102
3103static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303104striter_len(striterobject *it, PyObject *Py_UNUSED(ignored))
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003105{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003106 Py_ssize_t len = 0;
3107 if (it->it_seq)
3108 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3109 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003110}
3111
3112PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003113 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003114
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003115static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303116striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003117{
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003118 _Py_IDENTIFIER(iter);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003119 if (it->it_seq != NULL) {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003120 return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003121 it->it_seq, it->it_index);
3122 } else {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003123 return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003124 }
3125}
3126
3127PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3128
3129static PyObject *
3130striter_setstate(striterobject *it, PyObject *state)
3131{
3132 Py_ssize_t index = PyLong_AsSsize_t(state);
3133 if (index == -1 && PyErr_Occurred())
3134 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003135 if (it->it_seq != NULL) {
3136 if (index < 0)
3137 index = 0;
3138 else if (index > PyBytes_GET_SIZE(it->it_seq))
3139 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3140 it->it_index = index;
3141 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003142 Py_RETURN_NONE;
3143}
3144
3145PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3146
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003147static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003148 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3149 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003150 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3151 reduce_doc},
3152 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3153 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003154 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003155};
3156
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003157PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003158 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3159 "bytes_iterator", /* tp_name */
3160 sizeof(striterobject), /* tp_basicsize */
3161 0, /* tp_itemsize */
3162 /* methods */
3163 (destructor)striter_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003164 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003165 0, /* tp_getattr */
3166 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003167 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003168 0, /* tp_repr */
3169 0, /* tp_as_number */
3170 0, /* tp_as_sequence */
3171 0, /* tp_as_mapping */
3172 0, /* tp_hash */
3173 0, /* tp_call */
3174 0, /* tp_str */
3175 PyObject_GenericGetAttr, /* tp_getattro */
3176 0, /* tp_setattro */
3177 0, /* tp_as_buffer */
3178 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3179 0, /* tp_doc */
3180 (traverseproc)striter_traverse, /* tp_traverse */
3181 0, /* tp_clear */
3182 0, /* tp_richcompare */
3183 0, /* tp_weaklistoffset */
3184 PyObject_SelfIter, /* tp_iter */
3185 (iternextfunc)striter_next, /* tp_iternext */
3186 striter_methods, /* tp_methods */
3187 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003188};
3189
3190static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003191bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003192{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003193 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003194
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003195 if (!PyBytes_Check(seq)) {
3196 PyErr_BadInternalCall();
3197 return NULL;
3198 }
3199 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3200 if (it == NULL)
3201 return NULL;
3202 it->it_index = 0;
3203 Py_INCREF(seq);
3204 it->it_seq = (PyBytesObject *)seq;
3205 _PyObject_GC_TRACK(it);
3206 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003207}
Victor Stinner00165072015-10-09 01:53:21 +02003208
3209
3210/* _PyBytesWriter API */
3211
3212#ifdef MS_WINDOWS
3213 /* On Windows, overallocate by 50% is the best factor */
3214# define OVERALLOCATE_FACTOR 2
3215#else
3216 /* On Linux, overallocate by 25% is the best factor */
3217# define OVERALLOCATE_FACTOR 4
3218#endif
3219
3220void
3221_PyBytesWriter_Init(_PyBytesWriter *writer)
3222{
Victor Stinner661aacc2015-10-14 09:41:48 +02003223 /* Set all attributes before small_buffer to 0 */
3224 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
Victor Stinnerf82ce5b2019-10-15 03:06:16 +02003225#ifndef NDEBUG
3226 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3227 sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003228#endif
3229}
3230
3231void
3232_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3233{
3234 Py_CLEAR(writer->buffer);
3235}
3236
3237Py_LOCAL_INLINE(char*)
3238_PyBytesWriter_AsString(_PyBytesWriter *writer)
3239{
Victor Stinner661aacc2015-10-14 09:41:48 +02003240 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003241 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003242 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003243 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003244 else if (writer->use_bytearray) {
3245 assert(writer->buffer != NULL);
3246 return PyByteArray_AS_STRING(writer->buffer);
3247 }
3248 else {
3249 assert(writer->buffer != NULL);
3250 return PyBytes_AS_STRING(writer->buffer);
3251 }
Victor Stinner00165072015-10-09 01:53:21 +02003252}
3253
3254Py_LOCAL_INLINE(Py_ssize_t)
Victor Stinner2bf89932015-10-14 11:25:33 +02003255_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
Victor Stinner00165072015-10-09 01:53:21 +02003256{
3257 char *start = _PyBytesWriter_AsString(writer);
3258 assert(str != NULL);
3259 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003260 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003261 return str - start;
3262}
3263
Victor Stinnerf82ce5b2019-10-15 03:06:16 +02003264#ifndef NDEBUG
3265Py_LOCAL_INLINE(int)
Victor Stinner00165072015-10-09 01:53:21 +02003266_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3267{
Victor Stinner00165072015-10-09 01:53:21 +02003268 char *start, *end;
3269
Victor Stinner661aacc2015-10-14 09:41:48 +02003270 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003271 assert(writer->buffer == NULL);
3272 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003273 else {
3274 assert(writer->buffer != NULL);
3275 if (writer->use_bytearray)
3276 assert(PyByteArray_CheckExact(writer->buffer));
3277 else
3278 assert(PyBytes_CheckExact(writer->buffer));
3279 assert(Py_REFCNT(writer->buffer) == 1);
3280 }
Victor Stinner00165072015-10-09 01:53:21 +02003281
Victor Stinner661aacc2015-10-14 09:41:48 +02003282 if (writer->use_bytearray) {
3283 /* bytearray has its own overallocation algorithm,
3284 writer overallocation must be disabled */
3285 assert(!writer->overallocate);
3286 }
3287
3288 assert(0 <= writer->allocated);
Victor Stinner53926a12015-10-09 12:37:03 +02003289 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003290 /* the last byte must always be null */
Victor Stinner661aacc2015-10-14 09:41:48 +02003291 start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003292 assert(start[writer->allocated] == 0);
3293
3294 end = start + writer->allocated;
3295 assert(str != NULL);
3296 assert(start <= str && str <= end);
Victor Stinnerf82ce5b2019-10-15 03:06:16 +02003297 return 1;
Victor Stinner00165072015-10-09 01:53:21 +02003298}
Victor Stinnerf82ce5b2019-10-15 03:06:16 +02003299#endif
Victor Stinner00165072015-10-09 01:53:21 +02003300
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003301void*
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003302_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
Victor Stinner00165072015-10-09 01:53:21 +02003303{
3304 Py_ssize_t allocated, pos;
3305
Victor Stinnerf82ce5b2019-10-15 03:06:16 +02003306 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003307 assert(writer->allocated < size);
Victor Stinner00165072015-10-09 01:53:21 +02003308
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003309 allocated = size;
Victor Stinner00165072015-10-09 01:53:21 +02003310 if (writer->overallocate
3311 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3312 /* overallocate to limit the number of realloc() */
3313 allocated += allocated / OVERALLOCATE_FACTOR;
3314 }
3315
Victor Stinner2bf89932015-10-14 11:25:33 +02003316 pos = _PyBytesWriter_GetSize(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003317 if (!writer->use_small_buffer) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003318 if (writer->use_bytearray) {
3319 if (PyByteArray_Resize(writer->buffer, allocated))
3320 goto error;
3321 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3322 but we cannot use ob_alloc because bytes may need to be moved
3323 to use the whole buffer. bytearray uses an internal optimization
3324 to avoid moving or copying bytes when bytes are removed at the
3325 beginning (ex: del bytearray[:1]). */
3326 }
3327 else {
3328 if (_PyBytes_Resize(&writer->buffer, allocated))
3329 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003330 }
3331 }
3332 else {
3333 /* convert from stack buffer to bytes object buffer */
3334 assert(writer->buffer == NULL);
3335
Victor Stinner661aacc2015-10-14 09:41:48 +02003336 if (writer->use_bytearray)
3337 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3338 else
3339 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003340 if (writer->buffer == NULL)
Victor Stinner661aacc2015-10-14 09:41:48 +02003341 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003342
3343 if (pos != 0) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003344 char *dest;
3345 if (writer->use_bytearray)
3346 dest = PyByteArray_AS_STRING(writer->buffer);
3347 else
3348 dest = PyBytes_AS_STRING(writer->buffer);
Christian Heimesf051e432016-09-13 20:22:02 +02003349 memcpy(dest,
Victor Stinnerb3653a32015-10-09 03:38:24 +02003350 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02003351 pos);
3352 }
3353
Victor Stinnerb3653a32015-10-09 03:38:24 +02003354 writer->use_small_buffer = 0;
Victor Stinnerf82ce5b2019-10-15 03:06:16 +02003355#ifndef NDEBUG
3356 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3357 sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003358#endif
Victor Stinner00165072015-10-09 01:53:21 +02003359 }
3360 writer->allocated = allocated;
3361
3362 str = _PyBytesWriter_AsString(writer) + pos;
Victor Stinnerf82ce5b2019-10-15 03:06:16 +02003363 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinner00165072015-10-09 01:53:21 +02003364 return str;
Victor Stinner661aacc2015-10-14 09:41:48 +02003365
3366error:
3367 _PyBytesWriter_Dealloc(writer);
3368 return NULL;
Victor Stinner00165072015-10-09 01:53:21 +02003369}
3370
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003371void*
3372_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3373{
3374 Py_ssize_t new_min_size;
3375
Victor Stinnerf82ce5b2019-10-15 03:06:16 +02003376 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003377 assert(size >= 0);
3378
3379 if (size == 0) {
3380 /* nothing to do */
3381 return str;
3382 }
3383
3384 if (writer->min_size > PY_SSIZE_T_MAX - size) {
3385 PyErr_NoMemory();
3386 _PyBytesWriter_Dealloc(writer);
3387 return NULL;
3388 }
3389 new_min_size = writer->min_size + size;
3390
3391 if (new_min_size > writer->allocated)
3392 str = _PyBytesWriter_Resize(writer, str, new_min_size);
3393
3394 writer->min_size = new_min_size;
3395 return str;
3396}
3397
Victor Stinner00165072015-10-09 01:53:21 +02003398/* Allocate the buffer to write size bytes.
3399 Return the pointer to the beginning of buffer data.
3400 Raise an exception and return NULL on error. */
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003401void*
Victor Stinner00165072015-10-09 01:53:21 +02003402_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3403{
3404 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02003405 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02003406 assert(size >= 0);
3407
Victor Stinnerb3653a32015-10-09 03:38:24 +02003408 writer->use_small_buffer = 1;
Victor Stinnerf82ce5b2019-10-15 03:06:16 +02003409#ifndef NDEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003410 writer->allocated = sizeof(writer->small_buffer) - 1;
Victor Stinnerf6358a72015-10-14 12:02:39 +02003411 /* In debug mode, don't use the full small buffer because it is less
3412 efficient than bytes and bytearray objects to detect buffer underflow
3413 and buffer overflow. Use 10 bytes of the small buffer to test also
3414 code using the smaller buffer in debug mode.
3415
3416 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3417 in debug mode to also be able to detect stack overflow when running
3418 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3419 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3420 stack overflow. */
3421 writer->allocated = Py_MIN(writer->allocated, 10);
3422 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3423 to detect buffer overflow */
Victor Stinnerb3653a32015-10-09 03:38:24 +02003424 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003425#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02003426 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02003427#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02003428 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02003429}
3430
3431PyObject *
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003432_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
Victor Stinner00165072015-10-09 01:53:21 +02003433{
Victor Stinner2bf89932015-10-14 11:25:33 +02003434 Py_ssize_t size;
Victor Stinner00165072015-10-09 01:53:21 +02003435 PyObject *result;
3436
Victor Stinnerf82ce5b2019-10-15 03:06:16 +02003437 assert(_PyBytesWriter_CheckConsistency(writer, str));
Victor Stinner00165072015-10-09 01:53:21 +02003438
Victor Stinner2bf89932015-10-14 11:25:33 +02003439 size = _PyBytesWriter_GetSize(writer, str);
3440 if (size == 0 && !writer->use_bytearray) {
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003441 Py_CLEAR(writer->buffer);
3442 /* Get the empty byte string singleton */
3443 result = PyBytes_FromStringAndSize(NULL, 0);
3444 }
3445 else if (writer->use_small_buffer) {
Victor Stinnere914d412016-04-15 17:52:27 +02003446 if (writer->use_bytearray) {
3447 result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3448 }
3449 else {
3450 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3451 }
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003452 }
3453 else {
3454 result = writer->buffer;
3455 writer->buffer = NULL;
3456
Victor Stinner2bf89932015-10-14 11:25:33 +02003457 if (size != writer->allocated) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003458 if (writer->use_bytearray) {
Victor Stinner2bf89932015-10-14 11:25:33 +02003459 if (PyByteArray_Resize(result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003460 Py_DECREF(result);
3461 return NULL;
3462 }
3463 }
3464 else {
Victor Stinner2bf89932015-10-14 11:25:33 +02003465 if (_PyBytes_Resize(&result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003466 assert(result == NULL);
3467 return NULL;
3468 }
Victor Stinner00165072015-10-09 01:53:21 +02003469 }
3470 }
Victor Stinner00165072015-10-09 01:53:21 +02003471 }
Victor Stinner00165072015-10-09 01:53:21 +02003472 return result;
3473}
Victor Stinnerce179bf2015-10-09 12:57:22 +02003474
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003475void*
Victor Stinnere9aa5952015-10-12 13:57:47 +02003476_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003477 const void *bytes, Py_ssize_t size)
Victor Stinnerce179bf2015-10-09 12:57:22 +02003478{
Victor Stinnere9aa5952015-10-12 13:57:47 +02003479 char *str = (char *)ptr;
3480
Victor Stinnerce179bf2015-10-09 12:57:22 +02003481 str = _PyBytesWriter_Prepare(writer, str, size);
3482 if (str == NULL)
3483 return NULL;
3484
Christian Heimesf051e432016-09-13 20:22:02 +02003485 memcpy(str, bytes, size);
Victor Stinnerce179bf2015-10-09 12:57:22 +02003486 str += size;
3487
3488 return str;
3489}