blob: ebbdb7c3c1642886fa958b7cdd7aac2dc576e246 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Victor Stinnerbcda8f12018-11-21 22:27:47 +01006#include "pycore_object.h"
Victor Stinner621cebe2018-11-12 16:53:38 +01007#include "pycore_pymem.h"
8#include "pycore_pystate.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00009
Gregory P. Smith60d241f2007-10-16 06:31:30 +000010#include "bytes_methods.h"
Gregory P. Smith8cb65692015-04-25 23:22:26 +000011#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +000012#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000013
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020014/*[clinic input]
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030015class bytes "PyBytesObject *" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020016[clinic start generated code]*/
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030017/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020018
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030019#include "clinic/bytesobject.c.h"
20
Christian Heimes2c9c7a52008-05-26 13:42:13 +000021#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +000022Py_ssize_t _Py_null_strings, _Py_one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000023#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000024
Christian Heimes2c9c7a52008-05-26 13:42:13 +000025static PyBytesObject *characters[UCHAR_MAX + 1];
26static PyBytesObject *nullstring;
27
Mark Dickinsonfd24b322008-12-06 15:33:31 +000028/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
29 for a string of length n should request PyBytesObject_SIZE + n bytes.
30
31 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
32 3 bytes per string allocation on a typical system.
33*/
34#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
35
Victor Stinner2bf89932015-10-14 11:25:33 +020036/* Forward declaration */
37Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
38 char *str);
39
Christian Heimes2c9c7a52008-05-26 13:42:13 +000040/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000041 For PyBytes_FromString(), the parameter `str' points to a null-terminated
42 string containing exactly `size' bytes.
43
Martin Pantera90a4a92016-05-30 04:04:50 +000044 For PyBytes_FromStringAndSize(), the parameter `str' is
Christian Heimes2c9c7a52008-05-26 13:42:13 +000045 either NULL or else points to a string containing at least `size' bytes.
46 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
47 not have to be null-terminated. (Therefore it is safe to construct a
48 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
49 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
50 bytes (setting the last byte to the null terminating character) and you can
51 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000052 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000053 alter the data yourself, since the strings may be shared.
54
55 The PyObject member `op->ob_size', which denotes the number of "extra
56 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020057 allocated for string data, not counting the null terminating character.
58 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000059 PyBytes_FromStringAndSize()) or the length of the string in the `str'
60 parameter (for PyBytes_FromString()).
61*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020062static PyObject *
63_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000064{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020065 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020066 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020067
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000068 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000069#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +000070 _Py_null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000071#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000072 Py_INCREF(op);
73 return (PyObject *)op;
74 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000075
Victor Stinner049e5092014-08-17 22:20:00 +020076 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 PyErr_SetString(PyExc_OverflowError,
78 "byte string is too large");
79 return NULL;
80 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000081
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000082 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020083 if (use_calloc)
84 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
85 else
86 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 if (op == NULL)
88 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +010089 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000090 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020091 if (!use_calloc)
92 op->ob_sval[size] = '\0';
93 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000094 if (size == 0) {
95 nullstring = op;
96 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020097 }
98 return (PyObject *) op;
99}
100
101PyObject *
102PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
103{
104 PyBytesObject *op;
105 if (size < 0) {
106 PyErr_SetString(PyExc_SystemError,
107 "Negative size passed to PyBytes_FromStringAndSize");
108 return NULL;
109 }
110 if (size == 1 && str != NULL &&
111 (op = characters[*str & UCHAR_MAX]) != NULL)
112 {
113#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +0000114 _Py_one_strings++;
Victor Stinnerdb067af2014-05-02 22:31:14 +0200115#endif
116 Py_INCREF(op);
117 return (PyObject *)op;
118 }
119
120 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
121 if (op == NULL)
122 return NULL;
123 if (str == NULL)
124 return (PyObject *) op;
125
Christian Heimesf051e432016-09-13 20:22:02 +0200126 memcpy(op->ob_sval, str, size);
Victor Stinnerdb067af2014-05-02 22:31:14 +0200127 /* share short strings */
128 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000129 characters[*str & UCHAR_MAX] = op;
130 Py_INCREF(op);
131 }
132 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000133}
134
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000135PyObject *
136PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000137{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200138 size_t size;
139 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000140
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 assert(str != NULL);
142 size = strlen(str);
143 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
144 PyErr_SetString(PyExc_OverflowError,
145 "byte string is too long");
146 return NULL;
147 }
148 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000149#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +0000150 _Py_null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000151#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000152 Py_INCREF(op);
153 return (PyObject *)op;
154 }
155 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000156#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +0000157 _Py_one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000158#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000159 Py_INCREF(op);
160 return (PyObject *)op;
161 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000162
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000163 /* Inline PyObject_NewVar */
164 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
165 if (op == NULL)
166 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +0100167 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000168 op->ob_shash = -1;
Christian Heimesf051e432016-09-13 20:22:02 +0200169 memcpy(op->ob_sval, str, size+1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000170 /* share short strings */
171 if (size == 0) {
172 nullstring = op;
173 Py_INCREF(op);
174 } else if (size == 1) {
175 characters[*str & UCHAR_MAX] = op;
176 Py_INCREF(op);
177 }
178 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000179}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000180
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000181PyObject *
182PyBytes_FromFormatV(const char *format, va_list vargs)
183{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000184 char *s;
Victor Stinner03dab782015-10-14 00:21:35 +0200185 const char *f;
186 const char *p;
187 Py_ssize_t prec;
188 int longflag;
189 int size_tflag;
190 /* Longest 64-bit formatted numbers:
191 - "18446744073709551615\0" (21 bytes)
192 - "-9223372036854775808\0" (21 bytes)
193 Decimal takes the most space (it isn't enough for octal.)
Guido van Rossum343e97f2007-04-09 00:43:24 +0000194
Victor Stinner03dab782015-10-14 00:21:35 +0200195 Longest 64-bit pointer representation:
196 "0xffffffffffffffff\0" (19 bytes). */
197 char buffer[21];
198 _PyBytesWriter writer;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000199
Victor Stinner03dab782015-10-14 00:21:35 +0200200 _PyBytesWriter_Init(&writer);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000201
Victor Stinner03dab782015-10-14 00:21:35 +0200202 s = _PyBytesWriter_Alloc(&writer, strlen(format));
203 if (s == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000204 return NULL;
Victor Stinner03dab782015-10-14 00:21:35 +0200205 writer.overallocate = 1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000206
Victor Stinner03dab782015-10-14 00:21:35 +0200207#define WRITE_BYTES(str) \
208 do { \
209 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
210 if (s == NULL) \
211 goto error; \
212 } while (0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000213
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000214 for (f = format; *f; f++) {
Victor Stinner03dab782015-10-14 00:21:35 +0200215 if (*f != '%') {
216 *s++ = *f;
217 continue;
218 }
219
220 p = f++;
221
222 /* ignore the width (ex: 10 in "%10s") */
223 while (Py_ISDIGIT(*f))
224 f++;
225
226 /* parse the precision (ex: 10 in "%.10s") */
227 prec = 0;
228 if (*f == '.') {
229 f++;
230 for (; Py_ISDIGIT(*f); f++) {
231 prec = (prec * 10) + (*f - '0');
232 }
233 }
234
235 while (*f && *f != '%' && !Py_ISALPHA(*f))
236 f++;
237
238 /* handle the long flag ('l'), but only for %ld and %lu.
239 others can be added when necessary. */
240 longflag = 0;
241 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
242 longflag = 1;
243 ++f;
244 }
245
246 /* handle the size_t flag ('z'). */
247 size_tflag = 0;
248 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
249 size_tflag = 1;
250 ++f;
251 }
252
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700253 /* subtract bytes preallocated for the format string
Victor Stinner03dab782015-10-14 00:21:35 +0200254 (ex: 2 for "%s") */
255 writer.min_size -= (f - p + 1);
256
257 switch (*f) {
258 case 'c':
259 {
260 int c = va_arg(vargs, int);
261 if (c < 0 || c > 255) {
262 PyErr_SetString(PyExc_OverflowError,
263 "PyBytes_FromFormatV(): %c format "
264 "expects an integer in range [0; 255]");
265 goto error;
266 }
267 writer.min_size++;
268 *s++ = (unsigned char)c;
269 break;
270 }
271
272 case 'd':
273 if (longflag)
274 sprintf(buffer, "%ld", va_arg(vargs, long));
275 else if (size_tflag)
276 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
277 va_arg(vargs, Py_ssize_t));
278 else
279 sprintf(buffer, "%d", va_arg(vargs, int));
280 assert(strlen(buffer) < sizeof(buffer));
281 WRITE_BYTES(buffer);
282 break;
283
284 case 'u':
285 if (longflag)
286 sprintf(buffer, "%lu",
287 va_arg(vargs, unsigned long));
288 else if (size_tflag)
289 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
290 va_arg(vargs, size_t));
291 else
292 sprintf(buffer, "%u",
293 va_arg(vargs, unsigned int));
294 assert(strlen(buffer) < sizeof(buffer));
295 WRITE_BYTES(buffer);
296 break;
297
298 case 'i':
299 sprintf(buffer, "%i", va_arg(vargs, int));
300 assert(strlen(buffer) < sizeof(buffer));
301 WRITE_BYTES(buffer);
302 break;
303
304 case 'x':
305 sprintf(buffer, "%x", va_arg(vargs, int));
306 assert(strlen(buffer) < sizeof(buffer));
307 WRITE_BYTES(buffer);
308 break;
309
310 case 's':
311 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000312 Py_ssize_t i;
Victor Stinner03dab782015-10-14 00:21:35 +0200313
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200314 p = va_arg(vargs, const char*);
Serhiy Storchakad586ccb2019-01-12 10:30:35 +0200315 if (prec <= 0) {
316 i = strlen(p);
317 }
318 else {
319 i = 0;
320 while (i < prec && p[i]) {
321 i++;
322 }
323 }
Victor Stinner03dab782015-10-14 00:21:35 +0200324 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
325 if (s == NULL)
326 goto error;
327 break;
328 }
329
330 case 'p':
331 sprintf(buffer, "%p", va_arg(vargs, void*));
332 assert(strlen(buffer) < sizeof(buffer));
333 /* %p is ill-defined: ensure leading 0x. */
334 if (buffer[1] == 'X')
335 buffer[1] = 'x';
336 else if (buffer[1] != 'x') {
337 memmove(buffer+2, buffer, strlen(buffer)+1);
338 buffer[0] = '0';
339 buffer[1] = 'x';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000340 }
Victor Stinner03dab782015-10-14 00:21:35 +0200341 WRITE_BYTES(buffer);
342 break;
343
344 case '%':
345 writer.min_size++;
346 *s++ = '%';
347 break;
348
349 default:
350 if (*f == 0) {
351 /* fix min_size if we reached the end of the format string */
352 writer.min_size++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000353 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000354
Victor Stinner03dab782015-10-14 00:21:35 +0200355 /* invalid format string: copy unformatted string and exit */
356 WRITE_BYTES(p);
357 return _PyBytesWriter_Finish(&writer, s);
358 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000360
Victor Stinner03dab782015-10-14 00:21:35 +0200361#undef WRITE_BYTES
362
363 return _PyBytesWriter_Finish(&writer, s);
364
365 error:
366 _PyBytesWriter_Dealloc(&writer);
367 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000368}
369
370PyObject *
371PyBytes_FromFormat(const char *format, ...)
372{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000373 PyObject* ret;
374 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000375
376#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000377 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000378#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000379 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000380#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000381 ret = PyBytes_FromFormatV(format, vargs);
382 va_end(vargs);
383 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000384}
385
Ethan Furmanb95b5612015-01-23 20:05:18 -0800386/* Helpers for formatstring */
387
388Py_LOCAL_INLINE(PyObject *)
389getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
390{
391 Py_ssize_t argidx = *p_argidx;
392 if (argidx < arglen) {
393 (*p_argidx)++;
394 if (arglen < 0)
395 return args;
396 else
397 return PyTuple_GetItem(args, argidx);
398 }
399 PyErr_SetString(PyExc_TypeError,
400 "not enough arguments for format string");
401 return NULL;
402}
403
404/* Format codes
405 * F_LJUST '-'
406 * F_SIGN '+'
407 * F_BLANK ' '
408 * F_ALT '#'
409 * F_ZERO '0'
410 */
411#define F_LJUST (1<<0)
412#define F_SIGN (1<<1)
413#define F_BLANK (1<<2)
414#define F_ALT (1<<3)
415#define F_ZERO (1<<4)
416
417/* Returns a new reference to a PyBytes object, or NULL on failure. */
418
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200419static char*
420formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200421 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800422{
423 char *p;
424 PyObject *result;
425 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200426 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800427
428 x = PyFloat_AsDouble(v);
429 if (x == -1.0 && PyErr_Occurred()) {
430 PyErr_Format(PyExc_TypeError, "float argument required, "
431 "not %.200s", Py_TYPE(v)->tp_name);
432 return NULL;
433 }
434
435 if (prec < 0)
436 prec = 6;
437
438 p = PyOS_double_to_string(x, type, prec,
439 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
440
441 if (p == NULL)
442 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200443
444 len = strlen(p);
445 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200446 str = _PyBytesWriter_Prepare(writer, str, len);
447 if (str == NULL)
448 return NULL;
Christian Heimesf051e432016-09-13 20:22:02 +0200449 memcpy(str, p, len);
Victor Stinner71dc3d82016-04-26 12:35:13 +0200450 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200451 str += len;
452 return str;
453 }
454
455 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800456 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200457 *p_result = result;
Zackery Spytz96c59322018-10-03 00:01:30 -0600458 return result != NULL ? str : NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800459}
460
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300461static PyObject *
462formatlong(PyObject *v, int flags, int prec, int type)
463{
464 PyObject *result, *iobj;
465 if (type == 'i')
466 type = 'd';
467 if (PyLong_Check(v))
468 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
469 if (PyNumber_Check(v)) {
470 /* make sure number is a type of integer for o, x, and X */
471 if (type == 'o' || type == 'x' || type == 'X')
472 iobj = PyNumber_Index(v);
473 else
474 iobj = PyNumber_Long(v);
475 if (iobj == NULL) {
476 if (!PyErr_ExceptionMatches(PyExc_TypeError))
477 return NULL;
478 }
479 else if (!PyLong_Check(iobj))
480 Py_CLEAR(iobj);
481 if (iobj != NULL) {
482 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
483 Py_DECREF(iobj);
484 return result;
485 }
486 }
487 PyErr_Format(PyExc_TypeError,
488 "%%%c format: %s is required, not %.200s", type,
489 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
490 : "a number",
491 Py_TYPE(v)->tp_name);
492 return NULL;
493}
494
495static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200496byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800497{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300498 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200499 *p = PyBytes_AS_STRING(arg)[0];
500 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800501 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300502 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200503 *p = PyByteArray_AS_STRING(arg)[0];
504 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800505 }
506 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300507 PyObject *iobj;
508 long ival;
509 int overflow;
510 /* make sure number is a type of integer */
511 if (PyLong_Check(arg)) {
512 ival = PyLong_AsLongAndOverflow(arg, &overflow);
513 }
514 else {
515 iobj = PyNumber_Index(arg);
516 if (iobj == NULL) {
517 if (!PyErr_ExceptionMatches(PyExc_TypeError))
518 return 0;
519 goto onError;
520 }
521 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
522 Py_DECREF(iobj);
523 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300524 if (!overflow && ival == -1 && PyErr_Occurred())
525 goto onError;
526 if (overflow || !(0 <= ival && ival <= 255)) {
527 PyErr_SetString(PyExc_OverflowError,
528 "%c arg not in range(256)");
529 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800530 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300531 *p = (char)ival;
532 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800533 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300534 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200535 PyErr_SetString(PyExc_TypeError,
536 "%c requires an integer in range(256) or a single byte");
537 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800538}
539
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800540static PyObject *_PyBytes_FromBuffer(PyObject *x);
541
Ethan Furmanb95b5612015-01-23 20:05:18 -0800542static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200543format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800544{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200545 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800546 _Py_IDENTIFIER(__bytes__);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800547 /* is it a bytes object? */
548 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200549 *pbuf = PyBytes_AS_STRING(v);
550 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800551 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200552 return v;
553 }
554 if (PyByteArray_Check(v)) {
555 *pbuf = PyByteArray_AS_STRING(v);
556 *plen = PyByteArray_GET_SIZE(v);
557 Py_INCREF(v);
558 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800559 }
560 /* does it support __bytes__? */
561 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
562 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +0100563 result = _PyObject_CallNoArg(func);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800564 Py_DECREF(func);
565 if (result == NULL)
566 return NULL;
567 if (!PyBytes_Check(result)) {
568 PyErr_Format(PyExc_TypeError,
569 "__bytes__ returned non-bytes (type %.200s)",
570 Py_TYPE(result)->tp_name);
571 Py_DECREF(result);
572 return NULL;
573 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200574 *pbuf = PyBytes_AS_STRING(result);
575 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800576 return result;
577 }
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800578 /* does it support buffer protocol? */
579 if (PyObject_CheckBuffer(v)) {
580 /* maybe we can avoid making a copy of the buffer object here? */
581 result = _PyBytes_FromBuffer(v);
582 if (result == NULL)
583 return NULL;
584 *pbuf = PyBytes_AS_STRING(result);
585 *plen = PyBytes_GET_SIZE(result);
586 return result;
587 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800588 PyErr_Format(PyExc_TypeError,
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800589 "%%b requires a bytes-like object, "
590 "or an object that implements __bytes__, not '%.100s'",
Ethan Furmanb95b5612015-01-23 20:05:18 -0800591 Py_TYPE(v)->tp_name);
592 return NULL;
593}
594
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200595/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800596
597PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +0200598_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
599 PyObject *args, int use_bytearray)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800600{
Victor Stinner772b2b02015-10-14 09:56:53 +0200601 const char *fmt;
602 char *res;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800603 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200604 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800605 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800606 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200607 _PyBytesWriter writer;
608
Victor Stinner772b2b02015-10-14 09:56:53 +0200609 if (args == NULL) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800610 PyErr_BadInternalCall();
611 return NULL;
612 }
Victor Stinner772b2b02015-10-14 09:56:53 +0200613 fmt = format;
614 fmtcnt = format_len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200615
616 _PyBytesWriter_Init(&writer);
Victor Stinner772b2b02015-10-14 09:56:53 +0200617 writer.use_bytearray = use_bytearray;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200618
619 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
620 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800621 return NULL;
Victor Stinner772b2b02015-10-14 09:56:53 +0200622 if (!use_bytearray)
623 writer.overallocate = 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200624
Ethan Furmanb95b5612015-01-23 20:05:18 -0800625 if (PyTuple_Check(args)) {
626 arglen = PyTuple_GET_SIZE(args);
627 argidx = 0;
628 }
629 else {
630 arglen = -1;
631 argidx = -2;
632 }
633 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
634 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
635 !PyByteArray_Check(args)) {
636 dict = args;
637 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200638
Ethan Furmanb95b5612015-01-23 20:05:18 -0800639 while (--fmtcnt >= 0) {
640 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200641 Py_ssize_t len;
642 char *pos;
643
Xiang Zhangb76ad512017-03-06 17:17:05 +0800644 pos = (char *)memchr(fmt + 1, '%', fmtcnt);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200645 if (pos != NULL)
646 len = pos - fmt;
Victor Stinner772b2b02015-10-14 09:56:53 +0200647 else
Xiang Zhangb76ad512017-03-06 17:17:05 +0800648 len = fmtcnt + 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200649 assert(len != 0);
650
Christian Heimesf051e432016-09-13 20:22:02 +0200651 memcpy(res, fmt, len);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200652 res += len;
653 fmt += len;
654 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800655 }
656 else {
657 /* Got a format specifier */
658 int flags = 0;
659 Py_ssize_t width = -1;
660 int prec = -1;
661 int c = '\0';
662 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800663 PyObject *v = NULL;
664 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200665 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800666 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200667 Py_ssize_t len = 0;
668 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200669 Py_ssize_t alloc;
670#ifdef Py_DEBUG
671 char *before;
672#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -0800673
Ethan Furmanb95b5612015-01-23 20:05:18 -0800674 fmt++;
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200675 if (*fmt == '%') {
676 *res++ = '%';
677 fmt++;
678 fmtcnt--;
679 continue;
680 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800681 if (*fmt == '(') {
Victor Stinner772b2b02015-10-14 09:56:53 +0200682 const char *keystart;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800683 Py_ssize_t keylen;
684 PyObject *key;
685 int pcount = 1;
686
687 if (dict == NULL) {
688 PyErr_SetString(PyExc_TypeError,
689 "format requires a mapping");
690 goto error;
691 }
692 ++fmt;
693 --fmtcnt;
694 keystart = fmt;
695 /* Skip over balanced parentheses */
696 while (pcount > 0 && --fmtcnt >= 0) {
697 if (*fmt == ')')
698 --pcount;
699 else if (*fmt == '(')
700 ++pcount;
701 fmt++;
702 }
703 keylen = fmt - keystart - 1;
704 if (fmtcnt < 0 || pcount > 0) {
705 PyErr_SetString(PyExc_ValueError,
706 "incomplete format key");
707 goto error;
708 }
709 key = PyBytes_FromStringAndSize(keystart,
710 keylen);
711 if (key == NULL)
712 goto error;
713 if (args_owned) {
714 Py_DECREF(args);
715 args_owned = 0;
716 }
717 args = PyObject_GetItem(dict, key);
718 Py_DECREF(key);
719 if (args == NULL) {
720 goto error;
721 }
722 args_owned = 1;
723 arglen = -1;
724 argidx = -2;
725 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200726
727 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800728 while (--fmtcnt >= 0) {
729 switch (c = *fmt++) {
730 case '-': flags |= F_LJUST; continue;
731 case '+': flags |= F_SIGN; continue;
732 case ' ': flags |= F_BLANK; continue;
733 case '#': flags |= F_ALT; continue;
734 case '0': flags |= F_ZERO; continue;
735 }
736 break;
737 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200738
739 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800740 if (c == '*') {
741 v = getnextarg(args, arglen, &argidx);
742 if (v == NULL)
743 goto error;
744 if (!PyLong_Check(v)) {
745 PyErr_SetString(PyExc_TypeError,
746 "* wants int");
747 goto error;
748 }
749 width = PyLong_AsSsize_t(v);
750 if (width == -1 && PyErr_Occurred())
751 goto error;
752 if (width < 0) {
753 flags |= F_LJUST;
754 width = -width;
755 }
756 if (--fmtcnt >= 0)
757 c = *fmt++;
758 }
759 else if (c >= 0 && isdigit(c)) {
760 width = c - '0';
761 while (--fmtcnt >= 0) {
762 c = Py_CHARMASK(*fmt++);
763 if (!isdigit(c))
764 break;
765 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
766 PyErr_SetString(
767 PyExc_ValueError,
768 "width too big");
769 goto error;
770 }
771 width = width*10 + (c - '0');
772 }
773 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200774
775 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800776 if (c == '.') {
777 prec = 0;
778 if (--fmtcnt >= 0)
779 c = *fmt++;
780 if (c == '*') {
781 v = getnextarg(args, arglen, &argidx);
782 if (v == NULL)
783 goto error;
784 if (!PyLong_Check(v)) {
785 PyErr_SetString(
786 PyExc_TypeError,
787 "* wants int");
788 goto error;
789 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200790 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800791 if (prec == -1 && PyErr_Occurred())
792 goto error;
793 if (prec < 0)
794 prec = 0;
795 if (--fmtcnt >= 0)
796 c = *fmt++;
797 }
798 else if (c >= 0 && isdigit(c)) {
799 prec = c - '0';
800 while (--fmtcnt >= 0) {
801 c = Py_CHARMASK(*fmt++);
802 if (!isdigit(c))
803 break;
804 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
805 PyErr_SetString(
806 PyExc_ValueError,
807 "prec too big");
808 goto error;
809 }
810 prec = prec*10 + (c - '0');
811 }
812 }
813 } /* prec */
814 if (fmtcnt >= 0) {
815 if (c == 'h' || c == 'l' || c == 'L') {
816 if (--fmtcnt >= 0)
817 c = *fmt++;
818 }
819 }
820 if (fmtcnt < 0) {
821 PyErr_SetString(PyExc_ValueError,
822 "incomplete format");
823 goto error;
824 }
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200825 v = getnextarg(args, arglen, &argidx);
826 if (v == NULL)
827 goto error;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200828
Alexey Izbyshevccd99752018-08-23 10:50:52 +0300829 if (fmtcnt == 0) {
830 /* last write: disable writer overallocation */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200831 writer.overallocate = 0;
832 }
833
Ethan Furmanb95b5612015-01-23 20:05:18 -0800834 sign = 0;
835 fill = ' ';
836 switch (c) {
Ethan Furman62e977f2015-03-11 08:17:00 -0700837 case 'r':
838 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800839 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200840 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800841 if (temp == NULL)
842 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200843 assert(PyUnicode_IS_ASCII(temp));
844 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
845 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800846 if (prec >= 0 && len > prec)
847 len = prec;
848 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200849
Ethan Furmanb95b5612015-01-23 20:05:18 -0800850 case 's':
851 // %s is only for 2/3 code; 3 only code should use %b
852 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200853 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800854 if (temp == NULL)
855 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800856 if (prec >= 0 && len > prec)
857 len = prec;
858 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200859
Ethan Furmanb95b5612015-01-23 20:05:18 -0800860 case 'i':
861 case 'd':
862 case 'u':
863 case 'o':
864 case 'x':
865 case 'X':
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200866 if (PyLong_CheckExact(v)
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200867 && width == -1 && prec == -1
868 && !(flags & (F_SIGN | F_BLANK))
869 && c != 'X')
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200870 {
871 /* Fast path */
872 int alternate = flags & F_ALT;
873 int base;
874
875 switch(c)
876 {
877 default:
Barry Warsawb2e57942017-09-14 18:13:16 -0700878 Py_UNREACHABLE();
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200879 case 'd':
880 case 'i':
881 case 'u':
882 base = 10;
883 break;
884 case 'o':
885 base = 8;
886 break;
887 case 'x':
888 case 'X':
889 base = 16;
890 break;
891 }
892
893 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200894 writer.min_size -= 2; /* size preallocated for "%d" */
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200895 res = _PyLong_FormatBytesWriter(&writer, res,
896 v, base, alternate);
897 if (res == NULL)
898 goto error;
899 continue;
900 }
901
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300902 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200903 if (!temp)
904 goto error;
905 assert(PyUnicode_IS_ASCII(temp));
906 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
907 len = PyUnicode_GET_LENGTH(temp);
908 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800909 if (flags & F_ZERO)
910 fill = '0';
911 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200912
Ethan Furmanb95b5612015-01-23 20:05:18 -0800913 case 'e':
914 case 'E':
915 case 'f':
916 case 'F':
917 case 'g':
918 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200919 if (width == -1 && prec == -1
920 && !(flags & (F_SIGN | F_BLANK)))
921 {
922 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200923 writer.min_size -= 2; /* size preallocated for "%f" */
Victor Stinnerad771582015-10-09 12:38:53 +0200924 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200925 if (res == NULL)
926 goto error;
927 continue;
928 }
929
Victor Stinnerad771582015-10-09 12:38:53 +0200930 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800931 goto error;
932 pbuf = PyBytes_AS_STRING(temp);
933 len = PyBytes_GET_SIZE(temp);
934 sign = 1;
935 if (flags & F_ZERO)
936 fill = '0';
937 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200938
Ethan Furmanb95b5612015-01-23 20:05:18 -0800939 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200940 pbuf = &onechar;
941 len = byte_converter(v, &onechar);
942 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800943 goto error;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200944 if (width == -1) {
945 /* Fast path */
946 *res++ = onechar;
947 continue;
948 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800949 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200950
Ethan Furmanb95b5612015-01-23 20:05:18 -0800951 default:
952 PyErr_Format(PyExc_ValueError,
953 "unsupported format character '%c' (0x%x) "
954 "at index %zd",
955 c, c,
Victor Stinner772b2b02015-10-14 09:56:53 +0200956 (Py_ssize_t)(fmt - 1 - format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800957 goto error;
958 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200959
Ethan Furmanb95b5612015-01-23 20:05:18 -0800960 if (sign) {
961 if (*pbuf == '-' || *pbuf == '+') {
962 sign = *pbuf++;
963 len--;
964 }
965 else if (flags & F_SIGN)
966 sign = '+';
967 else if (flags & F_BLANK)
968 sign = ' ';
969 else
970 sign = 0;
971 }
972 if (width < len)
973 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200974
975 alloc = width;
976 if (sign != 0 && len == width)
977 alloc++;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200978 /* 2: size preallocated for %s */
979 if (alloc > 2) {
980 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200981 if (res == NULL)
982 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800983 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200984#ifdef Py_DEBUG
985 before = res;
986#endif
987
988 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800989 if (sign) {
990 if (fill != ' ')
991 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800992 if (width > len)
993 width--;
994 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200995
996 /* Write the numeric prefix for "x", "X" and "o" formats
997 if the alternate form is used.
998 For example, write "0x" for the "%#x" format. */
Serhiy Storchakab1a16192016-12-17 21:48:03 +0200999 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001000 assert(pbuf[0] == '0');
1001 assert(pbuf[1] == c);
1002 if (fill != ' ') {
1003 *res++ = *pbuf++;
1004 *res++ = *pbuf++;
1005 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001006 width -= 2;
1007 if (width < 0)
1008 width = 0;
1009 len -= 2;
1010 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001011
1012 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001013 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001014 memset(res, fill, width - len);
1015 res += (width - len);
1016 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -08001017 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001018
1019 /* If padding with spaces: write sign if needed and/or numeric
1020 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001021 if (fill == ' ') {
1022 if (sign)
1023 *res++ = sign;
Serhiy Storchakab1a16192016-12-17 21:48:03 +02001024 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001025 assert(pbuf[0] == '0');
1026 assert(pbuf[1] == c);
1027 *res++ = *pbuf++;
1028 *res++ = *pbuf++;
1029 }
1030 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001031
1032 /* Copy bytes */
Christian Heimesf051e432016-09-13 20:22:02 +02001033 memcpy(res, pbuf, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001034 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001035
1036 /* Pad right with the fill character if needed */
1037 if (width > len) {
1038 memset(res, ' ', width - len);
1039 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001040 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001041
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +02001042 if (dict && (argidx < arglen)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001043 PyErr_SetString(PyExc_TypeError,
1044 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -08001045 Py_XDECREF(temp);
1046 goto error;
1047 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001048 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001049
1050#ifdef Py_DEBUG
1051 /* check that we computed the exact size for this write */
1052 assert((res - before) == alloc);
1053#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -08001054 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001055
1056 /* If overallocation was disabled, ensure that it was the last
1057 write. Otherwise, we missed an optimization */
Alexey Izbyshevccd99752018-08-23 10:50:52 +03001058 assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001059 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001060
Ethan Furmanb95b5612015-01-23 20:05:18 -08001061 if (argidx < arglen && !dict) {
1062 PyErr_SetString(PyExc_TypeError,
1063 "not all arguments converted during bytes formatting");
1064 goto error;
1065 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001066
Ethan Furmanb95b5612015-01-23 20:05:18 -08001067 if (args_owned) {
1068 Py_DECREF(args);
1069 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001070 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001071
1072 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001073 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001074 if (args_owned) {
1075 Py_DECREF(args);
1076 }
1077 return NULL;
1078}
1079
1080/* =-= */
1081
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001082static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001083bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001084{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001085 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001086}
1087
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001088/* Unescape a backslash-escaped string. If unicode is non-zero,
1089 the string is a u-literal. If recode_encoding is non-zero,
1090 the string is UTF-8 encoded and should be re-encoded in the
1091 specified encoding. */
1092
Victor Stinner2ec80632015-10-14 13:32:13 +02001093static char *
1094_PyBytes_DecodeEscapeRecode(const char **s, const char *end,
1095 const char *errors, const char *recode_encoding,
1096 _PyBytesWriter *writer, char *p)
1097{
1098 PyObject *u, *w;
1099 const char* t;
1100
1101 t = *s;
1102 /* Decode non-ASCII bytes as UTF-8. */
1103 while (t < end && (*t & 0x80))
1104 t++;
1105 u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
1106 if (u == NULL)
1107 return NULL;
1108
1109 /* Recode them in target encoding. */
1110 w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
1111 Py_DECREF(u);
1112 if (w == NULL)
1113 return NULL;
1114 assert(PyBytes_Check(w));
1115
1116 /* Append bytes to output buffer. */
Raymond Hettinger15f44ab2016-08-30 10:47:49 -07001117 writer->min_size--; /* subtract 1 preallocated byte */
Victor Stinner2ec80632015-10-14 13:32:13 +02001118 p = _PyBytesWriter_WriteBytes(writer, p,
1119 PyBytes_AS_STRING(w),
1120 PyBytes_GET_SIZE(w));
1121 Py_DECREF(w);
1122 if (p == NULL)
1123 return NULL;
1124
1125 *s = t;
1126 return p;
1127}
1128
Eric V. Smith42454af2016-10-31 09:22:08 -04001129PyObject *_PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001130 Py_ssize_t len,
1131 const char *errors,
1132 Py_ssize_t unicode,
Eric V. Smith42454af2016-10-31 09:22:08 -04001133 const char *recode_encoding,
1134 const char **first_invalid_escape)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001135{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001136 int c;
Victor Stinner2ec80632015-10-14 13:32:13 +02001137 char *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001138 const char *end;
Victor Stinner2ec80632015-10-14 13:32:13 +02001139 _PyBytesWriter writer;
1140
1141 _PyBytesWriter_Init(&writer);
1142
1143 p = _PyBytesWriter_Alloc(&writer, len);
1144 if (p == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001145 return NULL;
Victor Stinner2ec80632015-10-14 13:32:13 +02001146 writer.overallocate = 1;
1147
Eric V. Smith42454af2016-10-31 09:22:08 -04001148 *first_invalid_escape = NULL;
1149
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001150 end = s + len;
1151 while (s < end) {
1152 if (*s != '\\') {
1153 non_esc:
Victor Stinner2ec80632015-10-14 13:32:13 +02001154 if (!(recode_encoding && (*s & 0x80))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001155 *p++ = *s++;
1156 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001157 else {
1158 /* non-ASCII character and need to recode */
1159 p = _PyBytes_DecodeEscapeRecode(&s, end,
1160 errors, recode_encoding,
1161 &writer, p);
1162 if (p == NULL)
1163 goto failed;
1164 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001165 continue;
1166 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001167
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001168 s++;
Victor Stinner2ec80632015-10-14 13:32:13 +02001169 if (s == end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001170 PyErr_SetString(PyExc_ValueError,
1171 "Trailing \\ in string");
1172 goto failed;
1173 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001174
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001175 switch (*s++) {
1176 /* XXX This assumes ASCII! */
1177 case '\n': break;
1178 case '\\': *p++ = '\\'; break;
1179 case '\'': *p++ = '\''; break;
1180 case '\"': *p++ = '\"'; break;
1181 case 'b': *p++ = '\b'; break;
1182 case 'f': *p++ = '\014'; break; /* FF */
1183 case 't': *p++ = '\t'; break;
1184 case 'n': *p++ = '\n'; break;
1185 case 'r': *p++ = '\r'; break;
1186 case 'v': *p++ = '\013'; break; /* VT */
1187 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1188 case '0': case '1': case '2': case '3':
1189 case '4': case '5': case '6': case '7':
1190 c = s[-1] - '0';
1191 if (s < end && '0' <= *s && *s <= '7') {
1192 c = (c<<3) + *s++ - '0';
1193 if (s < end && '0' <= *s && *s <= '7')
1194 c = (c<<3) + *s++ - '0';
1195 }
1196 *p++ = c;
1197 break;
1198 case 'x':
Victor Stinner2ec80632015-10-14 13:32:13 +02001199 if (s+1 < end) {
1200 int digit1, digit2;
1201 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1202 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1203 if (digit1 < 16 && digit2 < 16) {
1204 *p++ = (unsigned char)((digit1 << 4) + digit2);
1205 s += 2;
1206 break;
1207 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001208 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001209 /* invalid hexadecimal digits */
1210
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001211 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001212 PyErr_Format(PyExc_ValueError,
Serhiy Storchakad53fe5f2019-03-13 22:59:55 +02001213 "invalid \\x escape at position %zd",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001214 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001215 goto failed;
1216 }
1217 if (strcmp(errors, "replace") == 0) {
1218 *p++ = '?';
1219 } else if (strcmp(errors, "ignore") == 0)
1220 /* do nothing */;
1221 else {
1222 PyErr_Format(PyExc_ValueError,
1223 "decoding error; unknown "
1224 "error handling code: %.400s",
1225 errors);
1226 goto failed;
1227 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001228 /* skip \x */
1229 if (s < end && Py_ISXDIGIT(s[0]))
1230 s++; /* and a hexdigit */
1231 break;
Victor Stinner2ec80632015-10-14 13:32:13 +02001232
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001233 default:
Eric V. Smith42454af2016-10-31 09:22:08 -04001234 if (*first_invalid_escape == NULL) {
1235 *first_invalid_escape = s-1; /* Back up one char, since we've
1236 already incremented s. */
1237 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001238 *p++ = '\\';
Eric V. Smith42454af2016-10-31 09:22:08 -04001239 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +02001240 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001241 UTF-8 bytes may follow. */
1242 }
1243 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001244
1245 return _PyBytesWriter_Finish(&writer, p);
1246
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001247 failed:
Victor Stinner2ec80632015-10-14 13:32:13 +02001248 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001249 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001250}
1251
Eric V. Smith42454af2016-10-31 09:22:08 -04001252PyObject *PyBytes_DecodeEscape(const char *s,
1253 Py_ssize_t len,
1254 const char *errors,
1255 Py_ssize_t unicode,
1256 const char *recode_encoding)
1257{
1258 const char* first_invalid_escape;
1259 PyObject *result = _PyBytes_DecodeEscape(s, len, errors, unicode,
1260 recode_encoding,
1261 &first_invalid_escape);
1262 if (result == NULL)
1263 return NULL;
1264 if (first_invalid_escape != NULL) {
1265 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1266 "invalid escape sequence '\\%c'",
Serhiy Storchaka56cb4652017-10-20 17:08:15 +03001267 (unsigned char)*first_invalid_escape) < 0) {
Eric V. Smith42454af2016-10-31 09:22:08 -04001268 Py_DECREF(result);
1269 return NULL;
1270 }
1271 }
1272 return result;
1273
1274}
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001275/* -------------------------------------------------------------------- */
1276/* object api */
1277
1278Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001279PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001280{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001281 if (!PyBytes_Check(op)) {
1282 PyErr_Format(PyExc_TypeError,
1283 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1284 return -1;
1285 }
1286 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001287}
1288
1289char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001290PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001291{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001292 if (!PyBytes_Check(op)) {
1293 PyErr_Format(PyExc_TypeError,
1294 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1295 return NULL;
1296 }
1297 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001298}
1299
1300int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001301PyBytes_AsStringAndSize(PyObject *obj,
1302 char **s,
1303 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001304{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001305 if (s == NULL) {
1306 PyErr_BadInternalCall();
1307 return -1;
1308 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001309
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001310 if (!PyBytes_Check(obj)) {
1311 PyErr_Format(PyExc_TypeError,
1312 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1313 return -1;
1314 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001315
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001316 *s = PyBytes_AS_STRING(obj);
1317 if (len != NULL)
1318 *len = PyBytes_GET_SIZE(obj);
1319 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001320 PyErr_SetString(PyExc_ValueError,
1321 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001322 return -1;
1323 }
1324 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001325}
Neal Norwitz6968b052007-02-27 19:02:19 +00001326
1327/* -------------------------------------------------------------------- */
1328/* Methods */
1329
Eric Smith0923d1d2009-04-16 20:16:10 +00001330#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001331
1332#include "stringlib/fastsearch.h"
1333#include "stringlib/count.h"
1334#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001335#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001336#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001337#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001338#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001339
Eric Smith0f78bff2009-11-30 01:01:42 +00001340#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001341
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001342PyObject *
1343PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001344{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001345 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001346 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001347 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001348 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001349 unsigned char quote, *s, *p;
1350
1351 /* Compute size of output string */
1352 squotes = dquotes = 0;
1353 newsize = 3; /* b'' */
1354 s = (unsigned char*)op->ob_sval;
1355 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001356 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001357 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001358 case '\'': squotes++; break;
1359 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001360 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001361 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001362 default:
1363 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001364 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001365 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001366 if (newsize > PY_SSIZE_T_MAX - incr)
1367 goto overflow;
1368 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001369 }
1370 quote = '\'';
1371 if (smartquotes && squotes && !dquotes)
1372 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001373 if (squotes && quote == '\'') {
1374 if (newsize > PY_SSIZE_T_MAX - squotes)
1375 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001376 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001377 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001378
1379 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001380 if (v == NULL) {
1381 return NULL;
1382 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001383 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001384
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001385 *p++ = 'b', *p++ = quote;
1386 for (i = 0; i < length; i++) {
1387 unsigned char c = op->ob_sval[i];
1388 if (c == quote || c == '\\')
1389 *p++ = '\\', *p++ = c;
1390 else if (c == '\t')
1391 *p++ = '\\', *p++ = 't';
1392 else if (c == '\n')
1393 *p++ = '\\', *p++ = 'n';
1394 else if (c == '\r')
1395 *p++ = '\\', *p++ = 'r';
1396 else if (c < ' ' || c >= 0x7f) {
1397 *p++ = '\\';
1398 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001399 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1400 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001401 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001402 else
1403 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001404 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001405 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001406 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001407 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001408
1409 overflow:
1410 PyErr_SetString(PyExc_OverflowError,
1411 "bytes object is too large to make repr");
1412 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001413}
1414
Neal Norwitz6968b052007-02-27 19:02:19 +00001415static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001416bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001417{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001418 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001419}
1420
Neal Norwitz6968b052007-02-27 19:02:19 +00001421static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001422bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001423{
Victor Stinnerc96be812019-05-14 17:34:56 +02001424 _PyCoreConfig *config = &_PyInterpreterState_GET_UNSAFE()->core_config;
1425 if (config->bytes_warning) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001426 if (PyErr_WarnEx(PyExc_BytesWarning,
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001427 "str() on a bytes instance", 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001428 return NULL;
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001429 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001430 }
1431 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001432}
1433
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001434static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001435bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001436{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001437 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001438}
Neal Norwitz6968b052007-02-27 19:02:19 +00001439
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001440/* This is also used by PyBytes_Concat() */
1441static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001442bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001443{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001444 Py_buffer va, vb;
1445 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001446
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001447 va.len = -1;
1448 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001449 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1450 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001451 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
Serhiy Storchaka6b5a9ec2017-03-19 19:47:02 +02001452 Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001453 goto done;
1454 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001455
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001456 /* Optimize end cases */
1457 if (va.len == 0 && PyBytes_CheckExact(b)) {
1458 result = b;
1459 Py_INCREF(result);
1460 goto done;
1461 }
1462 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1463 result = a;
1464 Py_INCREF(result);
1465 goto done;
1466 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001467
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001468 if (va.len > PY_SSIZE_T_MAX - vb.len) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001469 PyErr_NoMemory();
1470 goto done;
1471 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001472
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001473 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001474 if (result != NULL) {
1475 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1476 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1477 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001478
1479 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001480 if (va.len != -1)
1481 PyBuffer_Release(&va);
1482 if (vb.len != -1)
1483 PyBuffer_Release(&vb);
1484 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001485}
Neal Norwitz6968b052007-02-27 19:02:19 +00001486
1487static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001488bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001489{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001490 Py_ssize_t i;
1491 Py_ssize_t j;
1492 Py_ssize_t size;
1493 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001494 size_t nbytes;
1495 if (n < 0)
1496 n = 0;
1497 /* watch out for overflows: the size can overflow int,
1498 * and the # of bytes needed can overflow size_t
1499 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001500 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001501 PyErr_SetString(PyExc_OverflowError,
1502 "repeated bytes are too long");
1503 return NULL;
1504 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001505 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001506 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1507 Py_INCREF(a);
1508 return (PyObject *)a;
1509 }
1510 nbytes = (size_t)size;
1511 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1512 PyErr_SetString(PyExc_OverflowError,
1513 "repeated bytes are too long");
1514 return NULL;
1515 }
1516 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1517 if (op == NULL)
1518 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +01001519 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001520 op->ob_shash = -1;
1521 op->ob_sval[size] = '\0';
1522 if (Py_SIZE(a) == 1 && n > 0) {
1523 memset(op->ob_sval, a->ob_sval[0] , n);
1524 return (PyObject *) op;
1525 }
1526 i = 0;
1527 if (i < size) {
Christian Heimesf051e432016-09-13 20:22:02 +02001528 memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001529 i = Py_SIZE(a);
1530 }
1531 while (i < size) {
1532 j = (i <= size-i) ? i : size-i;
Christian Heimesf051e432016-09-13 20:22:02 +02001533 memcpy(op->ob_sval+i, op->ob_sval, j);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001534 i += j;
1535 }
1536 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001537}
1538
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001539static int
1540bytes_contains(PyObject *self, PyObject *arg)
1541{
1542 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1543}
1544
Neal Norwitz6968b052007-02-27 19:02:19 +00001545static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001546bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001547{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001548 if (i < 0 || i >= Py_SIZE(a)) {
1549 PyErr_SetString(PyExc_IndexError, "index out of range");
1550 return NULL;
1551 }
1552 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001553}
1554
Benjamin Peterson621b4302016-09-09 13:54:34 -07001555static int
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001556bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1557{
1558 int cmp;
1559 Py_ssize_t len;
1560
1561 len = Py_SIZE(a);
1562 if (Py_SIZE(b) != len)
1563 return 0;
1564
1565 if (a->ob_sval[0] != b->ob_sval[0])
1566 return 0;
1567
1568 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1569 return (cmp == 0);
1570}
1571
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001572static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001573bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001574{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001575 int c;
1576 Py_ssize_t len_a, len_b;
1577 Py_ssize_t min_len;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001578 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001579
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001580 /* Make sure both arguments are strings. */
1581 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Victor Stinnerc96be812019-05-14 17:34:56 +02001582 _PyCoreConfig *config = &_PyInterpreterState_GET_UNSAFE()->core_config;
1583 if (config->bytes_warning && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001584 rc = PyObject_IsInstance((PyObject*)a,
1585 (PyObject*)&PyUnicode_Type);
1586 if (!rc)
1587 rc = PyObject_IsInstance((PyObject*)b,
1588 (PyObject*)&PyUnicode_Type);
1589 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001590 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001591 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001592 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001593 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001594 return NULL;
1595 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001596 else {
1597 rc = PyObject_IsInstance((PyObject*)a,
1598 (PyObject*)&PyLong_Type);
1599 if (!rc)
1600 rc = PyObject_IsInstance((PyObject*)b,
1601 (PyObject*)&PyLong_Type);
1602 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001603 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001604 if (rc) {
1605 if (PyErr_WarnEx(PyExc_BytesWarning,
1606 "Comparison between bytes and int", 1))
1607 return NULL;
1608 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001609 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001610 }
stratakise8b19652017-11-02 11:32:54 +01001611 Py_RETURN_NOTIMPLEMENTED;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001612 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001613 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001614 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001615 case Py_EQ:
1616 case Py_LE:
1617 case Py_GE:
1618 /* a string is equal to itself */
stratakise8b19652017-11-02 11:32:54 +01001619 Py_RETURN_TRUE;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001620 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001621 case Py_NE:
1622 case Py_LT:
1623 case Py_GT:
stratakise8b19652017-11-02 11:32:54 +01001624 Py_RETURN_FALSE;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001625 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001626 default:
1627 PyErr_BadArgument();
1628 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001629 }
1630 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001631 else if (op == Py_EQ || op == Py_NE) {
1632 int eq = bytes_compare_eq(a, b);
1633 eq ^= (op == Py_NE);
stratakise8b19652017-11-02 11:32:54 +01001634 return PyBool_FromLong(eq);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001635 }
1636 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001637 len_a = Py_SIZE(a);
1638 len_b = Py_SIZE(b);
1639 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001640 if (min_len > 0) {
1641 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001642 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001643 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001644 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001645 else
1646 c = 0;
stratakise8b19652017-11-02 11:32:54 +01001647 if (c != 0)
1648 Py_RETURN_RICHCOMPARE(c, 0, op);
1649 Py_RETURN_RICHCOMPARE(len_a, len_b, op);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001650 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001651}
1652
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001653static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001654bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001655{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001656 if (a->ob_shash == -1) {
1657 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001658 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001659 }
1660 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001661}
1662
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001663static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001664bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001665{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001666 if (PyIndex_Check(item)) {
1667 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1668 if (i == -1 && PyErr_Occurred())
1669 return NULL;
1670 if (i < 0)
1671 i += PyBytes_GET_SIZE(self);
1672 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1673 PyErr_SetString(PyExc_IndexError,
1674 "index out of range");
1675 return NULL;
1676 }
1677 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1678 }
1679 else if (PySlice_Check(item)) {
Zackery Spytz14514d92019-05-17 01:13:03 -06001680 Py_ssize_t start, stop, step, slicelength, i;
1681 size_t cur;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001682 char* source_buf;
1683 char* result_buf;
1684 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001685
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001686 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001687 return NULL;
1688 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001689 slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1690 &stop, step);
Neal Norwitz6968b052007-02-27 19:02:19 +00001691
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001692 if (slicelength <= 0) {
1693 return PyBytes_FromStringAndSize("", 0);
1694 }
1695 else if (start == 0 && step == 1 &&
1696 slicelength == PyBytes_GET_SIZE(self) &&
1697 PyBytes_CheckExact(self)) {
1698 Py_INCREF(self);
1699 return (PyObject *)self;
1700 }
1701 else if (step == 1) {
1702 return PyBytes_FromStringAndSize(
1703 PyBytes_AS_STRING(self) + start,
1704 slicelength);
1705 }
1706 else {
1707 source_buf = PyBytes_AS_STRING(self);
1708 result = PyBytes_FromStringAndSize(NULL, slicelength);
1709 if (result == NULL)
1710 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001711
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001712 result_buf = PyBytes_AS_STRING(result);
1713 for (cur = start, i = 0; i < slicelength;
1714 cur += step, i++) {
1715 result_buf[i] = source_buf[cur];
1716 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001717
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001718 return result;
1719 }
1720 }
1721 else {
1722 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001723 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001724 Py_TYPE(item)->tp_name);
1725 return NULL;
1726 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001727}
1728
1729static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001730bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001731{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001732 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1733 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001734}
1735
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001736static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001737 (lenfunc)bytes_length, /*sq_length*/
1738 (binaryfunc)bytes_concat, /*sq_concat*/
1739 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1740 (ssizeargfunc)bytes_item, /*sq_item*/
1741 0, /*sq_slice*/
1742 0, /*sq_ass_item*/
1743 0, /*sq_ass_slice*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001744 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001745};
1746
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001747static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001748 (lenfunc)bytes_length,
1749 (binaryfunc)bytes_subscript,
1750 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001751};
1752
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001753static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001754 (getbufferproc)bytes_buffer_getbuffer,
1755 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001756};
1757
1758
1759#define LEFTSTRIP 0
1760#define RIGHTSTRIP 1
1761#define BOTHSTRIP 2
1762
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001763/*[clinic input]
1764bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001765
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001766 sep: object = None
1767 The delimiter according which to split the bytes.
1768 None (the default value) means split on ASCII whitespace characters
1769 (space, tab, return, newline, formfeed, vertical tab).
1770 maxsplit: Py_ssize_t = -1
1771 Maximum number of splits to do.
1772 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001773
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001774Return a list of the sections in the bytes, using sep as the delimiter.
1775[clinic start generated code]*/
1776
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001777static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001778bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1779/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001780{
1781 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001782 const char *s = PyBytes_AS_STRING(self), *sub;
1783 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001784 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001785
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001786 if (maxsplit < 0)
1787 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001788 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001789 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001790 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001791 return NULL;
1792 sub = vsub.buf;
1793 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001794
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001795 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1796 PyBuffer_Release(&vsub);
1797 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001798}
1799
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001800/*[clinic input]
1801bytes.partition
1802
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001803 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001804 /
1805
1806Partition the bytes into three parts using the given separator.
1807
1808This will search for the separator sep in the bytes. If the separator is found,
1809returns a 3-tuple containing the part before the separator, the separator
1810itself, and the part after it.
1811
1812If the separator is not found, returns a 3-tuple containing the original bytes
1813object and two empty bytes objects.
1814[clinic start generated code]*/
1815
Neal Norwitz6968b052007-02-27 19:02:19 +00001816static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001817bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001818/*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001819{
Neal Norwitz6968b052007-02-27 19:02:19 +00001820 return stringlib_partition(
1821 (PyObject*) self,
1822 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001823 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001824 );
1825}
1826
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001827/*[clinic input]
1828bytes.rpartition
1829
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001830 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001831 /
1832
1833Partition the bytes into three parts using the given separator.
1834
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001835This will search for the separator sep in the bytes, starting at the end. If
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001836the separator is found, returns a 3-tuple containing the part before the
1837separator, the separator itself, and the part after it.
1838
1839If the separator is not found, returns a 3-tuple containing two empty bytes
1840objects and the original bytes object.
1841[clinic start generated code]*/
1842
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001843static PyObject *
1844bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001845/*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001846{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001847 return stringlib_rpartition(
1848 (PyObject*) self,
1849 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001850 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001851 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001852}
1853
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001854/*[clinic input]
1855bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001856
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001857Return a list of the sections in the bytes, using sep as the delimiter.
1858
1859Splitting is done starting at the end of the bytes and working to the front.
1860[clinic start generated code]*/
1861
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001862static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001863bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1864/*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001865{
1866 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001867 const char *s = PyBytes_AS_STRING(self), *sub;
1868 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001869 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001870
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001871 if (maxsplit < 0)
1872 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001873 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001874 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001875 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001876 return NULL;
1877 sub = vsub.buf;
1878 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001879
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001880 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1881 PyBuffer_Release(&vsub);
1882 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001883}
1884
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001885
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001886/*[clinic input]
1887bytes.join
1888
1889 iterable_of_bytes: object
1890 /
1891
1892Concatenate any number of bytes objects.
1893
1894The bytes whose method is called is inserted in between each pair.
1895
1896The result is returned as a new bytes object.
1897
1898Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1899[clinic start generated code]*/
1900
Neal Norwitz6968b052007-02-27 19:02:19 +00001901static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001902bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1903/*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001904{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001905 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001906}
1907
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001908PyObject *
1909_PyBytes_Join(PyObject *sep, PyObject *x)
1910{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001911 assert(sep != NULL && PyBytes_Check(sep));
1912 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001913 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001914}
1915
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001916static PyObject *
1917bytes_find(PyBytesObject *self, PyObject *args)
1918{
1919 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1920}
1921
1922static PyObject *
1923bytes_index(PyBytesObject *self, PyObject *args)
1924{
1925 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1926}
1927
1928
1929static PyObject *
1930bytes_rfind(PyBytesObject *self, PyObject *args)
1931{
1932 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1933}
1934
1935
1936static PyObject *
1937bytes_rindex(PyBytesObject *self, PyObject *args)
1938{
1939 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1940}
1941
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001942
1943Py_LOCAL_INLINE(PyObject *)
1944do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001945{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001946 Py_buffer vsep;
1947 char *s = PyBytes_AS_STRING(self);
1948 Py_ssize_t len = PyBytes_GET_SIZE(self);
1949 char *sep;
1950 Py_ssize_t seplen;
1951 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001952
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001953 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001954 return NULL;
1955 sep = vsep.buf;
1956 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001957
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001958 i = 0;
1959 if (striptype != RIGHTSTRIP) {
1960 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1961 i++;
1962 }
1963 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001964
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001965 j = len;
1966 if (striptype != LEFTSTRIP) {
1967 do {
1968 j--;
1969 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1970 j++;
1971 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001972
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001973 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001974
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001975 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1976 Py_INCREF(self);
1977 return (PyObject*)self;
1978 }
1979 else
1980 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001981}
1982
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001983
1984Py_LOCAL_INLINE(PyObject *)
1985do_strip(PyBytesObject *self, int striptype)
1986{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001987 char *s = PyBytes_AS_STRING(self);
1988 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001989
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001990 i = 0;
1991 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001992 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001993 i++;
1994 }
1995 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001996
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001997 j = len;
1998 if (striptype != LEFTSTRIP) {
1999 do {
2000 j--;
David Malcolm96960882010-11-05 17:23:41 +00002001 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002002 j++;
2003 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002004
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002005 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2006 Py_INCREF(self);
2007 return (PyObject*)self;
2008 }
2009 else
2010 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002011}
2012
2013
2014Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002015do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002016{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002017 if (bytes != NULL && bytes != Py_None) {
2018 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002019 }
2020 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002021}
2022
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002023/*[clinic input]
2024bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002025
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002026 bytes: object = None
2027 /
2028
2029Strip leading and trailing bytes contained in the argument.
2030
2031If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2032[clinic start generated code]*/
2033
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002034static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002035bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002036/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002037{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002038 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002039}
2040
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002041/*[clinic input]
2042bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002043
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002044 bytes: object = None
2045 /
2046
2047Strip leading bytes contained in the argument.
2048
2049If the argument is omitted or None, strip leading ASCII whitespace.
2050[clinic start generated code]*/
2051
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002052static PyObject *
2053bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002054/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002055{
2056 return do_argstrip(self, LEFTSTRIP, bytes);
2057}
2058
2059/*[clinic input]
2060bytes.rstrip
2061
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002062 bytes: object = None
2063 /
2064
2065Strip trailing bytes contained in the argument.
2066
2067If the argument is omitted or None, strip trailing ASCII whitespace.
2068[clinic start generated code]*/
2069
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002070static PyObject *
2071bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002072/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002073{
2074 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002075}
Neal Norwitz6968b052007-02-27 19:02:19 +00002076
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002077
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002078static PyObject *
2079bytes_count(PyBytesObject *self, PyObject *args)
2080{
2081 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2082}
2083
2084
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002085/*[clinic input]
2086bytes.translate
2087
Victor Stinner049e5092014-08-17 22:20:00 +02002088 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002089 Translation table, which must be a bytes object of length 256.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002090 /
Martin Panter1b6c6da2016-08-27 08:35:02 +00002091 delete as deletechars: object(c_default="NULL") = b''
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002092
2093Return a copy with each character mapped by the given translation table.
2094
Martin Panter1b6c6da2016-08-27 08:35:02 +00002095All characters occurring in the optional argument delete are removed.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002096The remaining characters are mapped through the given translation table.
2097[clinic start generated code]*/
2098
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002099static PyObject *
Martin Panter1b6c6da2016-08-27 08:35:02 +00002100bytes_translate_impl(PyBytesObject *self, PyObject *table,
Larry Hastings89964c42015-04-14 18:07:59 -04002101 PyObject *deletechars)
Martin Panter1b6c6da2016-08-27 08:35:02 +00002102/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002103{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002104 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002105 Py_buffer table_view = {NULL, NULL};
2106 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002107 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002108 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002109 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002110 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002111 Py_ssize_t inlen, tablen, dellen = 0;
2112 PyObject *result;
2113 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002114
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002115 if (PyBytes_Check(table)) {
2116 table_chars = PyBytes_AS_STRING(table);
2117 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002118 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002119 else if (table == Py_None) {
2120 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002121 tablen = 256;
2122 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002123 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002124 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002125 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002126 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002127 tablen = table_view.len;
2128 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002129
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002130 if (tablen != 256) {
2131 PyErr_SetString(PyExc_ValueError,
2132 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002133 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002134 return NULL;
2135 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002136
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002137 if (deletechars != NULL) {
2138 if (PyBytes_Check(deletechars)) {
2139 del_table_chars = PyBytes_AS_STRING(deletechars);
2140 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002141 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002142 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002143 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002144 PyBuffer_Release(&table_view);
2145 return NULL;
2146 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002147 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002148 dellen = del_table_view.len;
2149 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002150 }
2151 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002152 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002153 dellen = 0;
2154 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002155
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002156 inlen = PyBytes_GET_SIZE(input_obj);
2157 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002158 if (result == NULL) {
2159 PyBuffer_Release(&del_table_view);
2160 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002161 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002162 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002163 output_start = output = PyBytes_AS_STRING(result);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002164 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002165
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002166 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002167 /* If no deletions are required, use faster code */
2168 for (i = inlen; --i >= 0; ) {
2169 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002170 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002171 changed = 1;
2172 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002173 if (!changed && PyBytes_CheckExact(input_obj)) {
2174 Py_INCREF(input_obj);
2175 Py_DECREF(result);
2176 result = input_obj;
2177 }
2178 PyBuffer_Release(&del_table_view);
2179 PyBuffer_Release(&table_view);
2180 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002181 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002182
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002183 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002184 for (i = 0; i < 256; i++)
2185 trans_table[i] = Py_CHARMASK(i);
2186 } else {
2187 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002188 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002189 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002190 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002191
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002192 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002193 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002194 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002195
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002196 for (i = inlen; --i >= 0; ) {
2197 c = Py_CHARMASK(*input++);
2198 if (trans_table[c] != -1)
2199 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2200 continue;
2201 changed = 1;
2202 }
2203 if (!changed && PyBytes_CheckExact(input_obj)) {
2204 Py_DECREF(result);
2205 Py_INCREF(input_obj);
2206 return input_obj;
2207 }
2208 /* Fix the size of the resulting string */
2209 if (inlen > 0)
2210 _PyBytes_Resize(&result, output - output_start);
2211 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002212}
2213
2214
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002215/*[clinic input]
2216
2217@staticmethod
2218bytes.maketrans
2219
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002220 frm: Py_buffer
2221 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002222 /
2223
2224Return a translation table useable for the bytes or bytearray translate method.
2225
2226The returned table will be one where each byte in frm is mapped to the byte at
2227the same position in to.
2228
2229The bytes objects frm and to must be of the same length.
2230[clinic start generated code]*/
2231
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002232static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002233bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002234/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002235{
2236 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002237}
2238
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002239
2240/*[clinic input]
2241bytes.replace
2242
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002243 old: Py_buffer
2244 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002245 count: Py_ssize_t = -1
2246 Maximum number of occurrences to replace.
2247 -1 (the default value) means replace all occurrences.
2248 /
2249
2250Return a copy with all occurrences of substring old replaced by new.
2251
2252If the optional argument count is given, only the first count occurrences are
2253replaced.
2254[clinic start generated code]*/
2255
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002256static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002257bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
Larry Hastings89964c42015-04-14 18:07:59 -04002258 Py_ssize_t count)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002259/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002260{
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +03002261 return stringlib_replace((PyObject *)self,
2262 (const char *)old->buf, old->len,
2263 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002264}
2265
2266/** End DALKE **/
2267
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002268
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002269static PyObject *
2270bytes_startswith(PyBytesObject *self, PyObject *args)
2271{
2272 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2273}
2274
2275static PyObject *
2276bytes_endswith(PyBytesObject *self, PyObject *args)
2277{
2278 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2279}
2280
2281
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002282/*[clinic input]
2283bytes.decode
2284
2285 encoding: str(c_default="NULL") = 'utf-8'
2286 The encoding with which to decode the bytes.
2287 errors: str(c_default="NULL") = 'strict'
2288 The error handling scheme to use for the handling of decoding errors.
2289 The default is 'strict' meaning that decoding errors raise a
2290 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2291 as well as any other name registered with codecs.register_error that
2292 can handle UnicodeDecodeErrors.
2293
2294Decode the bytes using the codec registered for encoding.
2295[clinic start generated code]*/
2296
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002297static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002298bytes_decode_impl(PyBytesObject *self, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04002299 const char *errors)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002300/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002301{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002302 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002303}
2304
Guido van Rossum20188312006-05-05 15:15:40 +00002305
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002306/*[clinic input]
2307bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002308
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002309 keepends: bool(accept={int}) = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002310
2311Return a list of the lines in the bytes, breaking at line boundaries.
2312
2313Line breaks are not included in the resulting list unless keepends is given and
2314true.
2315[clinic start generated code]*/
2316
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002317static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002318bytes_splitlines_impl(PyBytesObject *self, int keepends)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002319/*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002320{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002321 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002322 (PyObject*) self, PyBytes_AS_STRING(self),
2323 PyBytes_GET_SIZE(self), keepends
2324 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002325}
2326
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002327/*[clinic input]
2328@classmethod
2329bytes.fromhex
2330
2331 string: unicode
2332 /
2333
2334Create a bytes object from a string of hexadecimal numbers.
2335
2336Spaces between two numbers are accepted.
2337Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2338[clinic start generated code]*/
2339
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002340static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002341bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002342/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002343{
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002344 PyObject *result = _PyBytes_FromHex(string, 0);
2345 if (type != &PyBytes_Type && result != NULL) {
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01002346 Py_SETREF(result, PyObject_CallFunctionObjArgs((PyObject *)type,
2347 result, NULL));
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002348 }
2349 return result;
Victor Stinner2bf89932015-10-14 11:25:33 +02002350}
2351
2352PyObject*
2353_PyBytes_FromHex(PyObject *string, int use_bytearray)
2354{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002355 char *buf;
Victor Stinner2bf89932015-10-14 11:25:33 +02002356 Py_ssize_t hexlen, invalid_char;
2357 unsigned int top, bot;
2358 Py_UCS1 *str, *end;
2359 _PyBytesWriter writer;
2360
2361 _PyBytesWriter_Init(&writer);
2362 writer.use_bytearray = use_bytearray;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002363
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002364 assert(PyUnicode_Check(string));
2365 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002366 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002367 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002368
Victor Stinner2bf89932015-10-14 11:25:33 +02002369 if (!PyUnicode_IS_ASCII(string)) {
2370 void *data = PyUnicode_DATA(string);
2371 unsigned int kind = PyUnicode_KIND(string);
2372 Py_ssize_t i;
2373
2374 /* search for the first non-ASCII character */
2375 for (i = 0; i < hexlen; i++) {
2376 if (PyUnicode_READ(kind, data, i) >= 128)
2377 break;
2378 }
2379 invalid_char = i;
2380 goto error;
2381 }
2382
2383 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2384 str = PyUnicode_1BYTE_DATA(string);
2385
2386 /* This overestimates if there are spaces */
2387 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2388 if (buf == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002389 return NULL;
Victor Stinner2bf89932015-10-14 11:25:33 +02002390
2391 end = str + hexlen;
2392 while (str < end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002393 /* skip over spaces in the input */
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002394 if (Py_ISSPACE(*str)) {
Victor Stinner2bf89932015-10-14 11:25:33 +02002395 do {
2396 str++;
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002397 } while (Py_ISSPACE(*str));
Victor Stinner2bf89932015-10-14 11:25:33 +02002398 if (str >= end)
2399 break;
2400 }
2401
2402 top = _PyLong_DigitValue[*str];
2403 if (top >= 16) {
2404 invalid_char = str - PyUnicode_1BYTE_DATA(string);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002405 goto error;
2406 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002407 str++;
2408
2409 bot = _PyLong_DigitValue[*str];
2410 if (bot >= 16) {
2411 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2412 goto error;
2413 }
2414 str++;
2415
2416 *buf++ = (unsigned char)((top << 4) + bot);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002417 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002418
2419 return _PyBytesWriter_Finish(&writer, buf);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002420
2421 error:
Victor Stinner2bf89932015-10-14 11:25:33 +02002422 PyErr_Format(PyExc_ValueError,
2423 "non-hexadecimal number found in "
2424 "fromhex() arg at position %zd", invalid_char);
2425 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002426 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002427}
2428
Gregory P. Smith8cb65692015-04-25 23:22:26 +00002429PyDoc_STRVAR(hex__doc__,
2430"B.hex() -> string\n\
2431\n\
2432Create a string of hexadecimal numbers from a bytes object.\n\
2433Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'.");
2434
2435static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302436bytes_hex(PyBytesObject *self, PyObject *Py_UNUSED(ignored))
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002437{
2438 char* argbuf = PyBytes_AS_STRING(self);
2439 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2440 return _Py_strhex(argbuf, arglen);
2441}
2442
2443static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302444bytes_getnewargs(PyBytesObject *v, PyObject *Py_UNUSED(ignored))
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002445{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002446 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002447}
2448
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002449
2450static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002451bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002452 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302453 {"capitalize", stringlib_capitalize, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002454 _Py_capitalize__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002455 STRINGLIB_CENTER_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002456 {"count", (PyCFunction)bytes_count, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002457 _Py_count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002458 BYTES_DECODE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002459 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002460 _Py_endswith__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002461 STRINGLIB_EXPANDTABS_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002462 {"find", (PyCFunction)bytes_find, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002463 _Py_find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002464 BYTES_FROMHEX_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002465 {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__},
2466 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302467 {"isalnum", stringlib_isalnum, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002468 _Py_isalnum__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302469 {"isalpha", stringlib_isalpha, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002470 _Py_isalpha__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302471 {"isascii", stringlib_isascii, METH_NOARGS,
INADA Naokia49ac992018-01-27 14:06:21 +09002472 _Py_isascii__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302473 {"isdigit", stringlib_isdigit, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002474 _Py_isdigit__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302475 {"islower", stringlib_islower, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002476 _Py_islower__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302477 {"isspace", stringlib_isspace, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002478 _Py_isspace__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302479 {"istitle", stringlib_istitle, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002480 _Py_istitle__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302481 {"isupper", stringlib_isupper, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002482 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002483 BYTES_JOIN_METHODDEF
Tal Einatc929df32018-07-06 13:17:38 +03002484 STRINGLIB_LJUST_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302485 {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002486 BYTES_LSTRIP_METHODDEF
2487 BYTES_MAKETRANS_METHODDEF
2488 BYTES_PARTITION_METHODDEF
2489 BYTES_REPLACE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002490 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2491 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002492 STRINGLIB_RJUST_METHODDEF
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002493 BYTES_RPARTITION_METHODDEF
2494 BYTES_RSPLIT_METHODDEF
2495 BYTES_RSTRIP_METHODDEF
2496 BYTES_SPLIT_METHODDEF
2497 BYTES_SPLITLINES_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002498 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002499 _Py_startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002500 BYTES_STRIP_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302501 {"swapcase", stringlib_swapcase, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002502 _Py_swapcase__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302503 {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002504 BYTES_TRANSLATE_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302505 {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002506 STRINGLIB_ZFILL_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002507 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002508};
2509
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002510static PyObject *
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002511bytes_mod(PyObject *self, PyObject *arg)
Ethan Furmanb95b5612015-01-23 20:05:18 -08002512{
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002513 if (!PyBytes_Check(self)) {
2514 Py_RETURN_NOTIMPLEMENTED;
Victor Stinner772b2b02015-10-14 09:56:53 +02002515 }
Victor Stinner772b2b02015-10-14 09:56:53 +02002516 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002517 arg, 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -08002518}
2519
2520static PyNumberMethods bytes_as_number = {
2521 0, /*nb_add*/
2522 0, /*nb_subtract*/
2523 0, /*nb_multiply*/
2524 bytes_mod, /*nb_remainder*/
2525};
2526
2527static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002528bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002529
2530static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002531bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002532{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002533 PyObject *x = NULL;
2534 const char *encoding = NULL;
2535 const char *errors = NULL;
2536 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002537 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002538 Py_ssize_t size;
2539 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002540 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002541
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002542 if (type != &PyBytes_Type)
Serhiy Storchaka15095802015-11-25 15:47:01 +02002543 return bytes_subtype_new(type, args, kwds);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002544 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2545 &encoding, &errors))
2546 return NULL;
2547 if (x == NULL) {
2548 if (encoding != NULL || errors != NULL) {
2549 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka2c2044e2018-10-21 15:29:12 +03002550 encoding != NULL ?
2551 "encoding without a string argument" :
2552 "errors without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002553 return NULL;
2554 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02002555 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002556 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002557
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002558 if (encoding != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002559 /* Encode via the codec registry */
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002560 if (!PyUnicode_Check(x)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002561 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002562 "encoding without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002563 return NULL;
2564 }
2565 new = PyUnicode_AsEncodedString(x, encoding, errors);
2566 if (new == NULL)
2567 return NULL;
2568 assert(PyBytes_Check(new));
2569 return new;
2570 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002571
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002572 if (errors != NULL) {
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002573 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002574 PyUnicode_Check(x) ?
2575 "string argument without an encoding" :
2576 "errors without a string argument");
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002577 return NULL;
2578 }
2579
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002580 /* We'd like to call PyObject_Bytes here, but we need to check for an
2581 integer argument before deferring to PyBytes_FromObject, something
2582 PyObject_Bytes doesn't do. */
2583 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2584 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +01002585 new = _PyObject_CallNoArg(func);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002586 Py_DECREF(func);
2587 if (new == NULL)
2588 return NULL;
2589 if (!PyBytes_Check(new)) {
2590 PyErr_Format(PyExc_TypeError,
2591 "__bytes__ returned non-bytes (type %.200s)",
2592 Py_TYPE(new)->tp_name);
2593 Py_DECREF(new);
2594 return NULL;
2595 }
2596 return new;
2597 }
2598 else if (PyErr_Occurred())
2599 return NULL;
2600
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002601 if (PyUnicode_Check(x)) {
2602 PyErr_SetString(PyExc_TypeError,
2603 "string argument without an encoding");
2604 return NULL;
2605 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002606 /* Is it an integer? */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002607 if (PyIndex_Check(x)) {
2608 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2609 if (size == -1 && PyErr_Occurred()) {
Serhiy Storchakae8904212018-10-15 00:02:57 +03002610 if (!PyErr_ExceptionMatches(PyExc_TypeError))
INADA Naokia634e232017-01-06 17:32:01 +09002611 return NULL;
2612 PyErr_Clear(); /* fall through */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002613 }
INADA Naokia634e232017-01-06 17:32:01 +09002614 else {
2615 if (size < 0) {
2616 PyErr_SetString(PyExc_ValueError, "negative count");
2617 return NULL;
2618 }
2619 new = _PyBytes_FromSize(size, 1);
2620 if (new == NULL)
2621 return NULL;
2622 return new;
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002623 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002624 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002625
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002626 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002627}
2628
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002629static PyObject*
2630_PyBytes_FromBuffer(PyObject *x)
2631{
2632 PyObject *new;
2633 Py_buffer view;
2634
2635 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2636 return NULL;
2637
2638 new = PyBytes_FromStringAndSize(NULL, view.len);
2639 if (!new)
2640 goto fail;
2641 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2642 &view, view.len, 'C') < 0)
2643 goto fail;
2644 PyBuffer_Release(&view);
2645 return new;
2646
2647fail:
2648 Py_XDECREF(new);
2649 PyBuffer_Release(&view);
2650 return NULL;
2651}
2652
2653static PyObject*
2654_PyBytes_FromList(PyObject *x)
2655{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002656 Py_ssize_t i, size = PyList_GET_SIZE(x);
2657 Py_ssize_t value;
2658 char *str;
2659 PyObject *item;
2660 _PyBytesWriter writer;
2661
2662 _PyBytesWriter_Init(&writer);
2663 str = _PyBytesWriter_Alloc(&writer, size);
2664 if (str == NULL)
2665 return NULL;
2666 writer.overallocate = 1;
2667 size = writer.allocated;
2668
2669 for (i = 0; i < PyList_GET_SIZE(x); i++) {
2670 item = PyList_GET_ITEM(x, i);
2671 Py_INCREF(item);
2672 value = PyNumber_AsSsize_t(item, NULL);
2673 Py_DECREF(item);
2674 if (value == -1 && PyErr_Occurred())
2675 goto error;
2676
2677 if (value < 0 || value >= 256) {
2678 PyErr_SetString(PyExc_ValueError,
2679 "bytes must be in range(0, 256)");
2680 goto error;
2681 }
2682
2683 if (i >= size) {
2684 str = _PyBytesWriter_Resize(&writer, str, size+1);
2685 if (str == NULL)
2686 return NULL;
2687 size = writer.allocated;
2688 }
2689 *str++ = (char) value;
2690 }
2691 return _PyBytesWriter_Finish(&writer, str);
2692
2693 error:
2694 _PyBytesWriter_Dealloc(&writer);
2695 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002696}
2697
2698static PyObject*
2699_PyBytes_FromTuple(PyObject *x)
2700{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002701 PyObject *bytes;
2702 Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2703 Py_ssize_t value;
2704 char *str;
2705 PyObject *item;
2706
2707 bytes = PyBytes_FromStringAndSize(NULL, size);
2708 if (bytes == NULL)
2709 return NULL;
2710 str = ((PyBytesObject *)bytes)->ob_sval;
2711
2712 for (i = 0; i < size; i++) {
2713 item = PyTuple_GET_ITEM(x, i);
2714 value = PyNumber_AsSsize_t(item, NULL);
2715 if (value == -1 && PyErr_Occurred())
2716 goto error;
2717
2718 if (value < 0 || value >= 256) {
2719 PyErr_SetString(PyExc_ValueError,
2720 "bytes must be in range(0, 256)");
2721 goto error;
2722 }
2723 *str++ = (char) value;
2724 }
2725 return bytes;
2726
2727 error:
2728 Py_DECREF(bytes);
2729 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002730}
2731
2732static PyObject *
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002733_PyBytes_FromIterator(PyObject *it, PyObject *x)
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002734{
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002735 char *str;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002736 Py_ssize_t i, size;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002737 _PyBytesWriter writer;
2738
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002739 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02002740 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002741 if (size == -1 && PyErr_Occurred())
2742 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002743
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002744 _PyBytesWriter_Init(&writer);
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002745 str = _PyBytesWriter_Alloc(&writer, size);
2746 if (str == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002747 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002748 writer.overallocate = 1;
2749 size = writer.allocated;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002750
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002751 /* Run the iterator to exhaustion */
2752 for (i = 0; ; i++) {
2753 PyObject *item;
2754 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002755
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002756 /* Get the next item */
2757 item = PyIter_Next(it);
2758 if (item == NULL) {
2759 if (PyErr_Occurred())
2760 goto error;
2761 break;
2762 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002763
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002764 /* Interpret it as an int (__index__) */
Serhiy Storchakaf54d7812016-07-06 21:39:44 +03002765 value = PyNumber_AsSsize_t(item, NULL);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002766 Py_DECREF(item);
2767 if (value == -1 && PyErr_Occurred())
2768 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002769
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002770 /* Range check */
2771 if (value < 0 || value >= 256) {
2772 PyErr_SetString(PyExc_ValueError,
2773 "bytes must be in range(0, 256)");
2774 goto error;
2775 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002776
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002777 /* Append the byte */
2778 if (i >= size) {
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002779 str = _PyBytesWriter_Resize(&writer, str, size+1);
2780 if (str == NULL)
2781 return NULL;
2782 size = writer.allocated;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002783 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002784 *str++ = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002785 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002786
2787 return _PyBytesWriter_Finish(&writer, str);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002788
2789 error:
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002790 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002791 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002792}
2793
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002794PyObject *
2795PyBytes_FromObject(PyObject *x)
2796{
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002797 PyObject *it, *result;
2798
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002799 if (x == NULL) {
2800 PyErr_BadInternalCall();
2801 return NULL;
2802 }
2803
2804 if (PyBytes_CheckExact(x)) {
2805 Py_INCREF(x);
2806 return x;
2807 }
2808
2809 /* Use the modern buffer interface */
2810 if (PyObject_CheckBuffer(x))
2811 return _PyBytes_FromBuffer(x);
2812
2813 if (PyList_CheckExact(x))
2814 return _PyBytes_FromList(x);
2815
2816 if (PyTuple_CheckExact(x))
2817 return _PyBytes_FromTuple(x);
2818
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002819 if (!PyUnicode_Check(x)) {
2820 it = PyObject_GetIter(x);
2821 if (it != NULL) {
2822 result = _PyBytes_FromIterator(it, x);
2823 Py_DECREF(it);
2824 return result;
2825 }
Serhiy Storchakae8904212018-10-15 00:02:57 +03002826 if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2827 return NULL;
2828 }
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002829 }
2830
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002831 PyErr_Format(PyExc_TypeError,
2832 "cannot convert '%.200s' object to bytes",
2833 x->ob_type->tp_name);
2834 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002835}
2836
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002837static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002838bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002839{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002840 PyObject *tmp, *pnew;
2841 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002842
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002843 assert(PyType_IsSubtype(type, &PyBytes_Type));
2844 tmp = bytes_new(&PyBytes_Type, args, kwds);
2845 if (tmp == NULL)
2846 return NULL;
Serhiy Storchaka15095802015-11-25 15:47:01 +02002847 assert(PyBytes_Check(tmp));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002848 n = PyBytes_GET_SIZE(tmp);
2849 pnew = type->tp_alloc(type, n);
2850 if (pnew != NULL) {
Christian Heimesf051e432016-09-13 20:22:02 +02002851 memcpy(PyBytes_AS_STRING(pnew),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002852 PyBytes_AS_STRING(tmp), n+1);
2853 ((PyBytesObject *)pnew)->ob_shash =
2854 ((PyBytesObject *)tmp)->ob_shash;
2855 }
2856 Py_DECREF(tmp);
2857 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002858}
2859
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002860PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002861"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002862bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002863bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002864bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2865bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002866\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002867Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002868 - an iterable yielding integers in range(256)\n\
2869 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002870 - any object implementing the buffer API.\n\
2871 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002872
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002873static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002874
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002875PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002876 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2877 "bytes",
2878 PyBytesObject_SIZE,
2879 sizeof(char),
2880 bytes_dealloc, /* tp_dealloc */
2881 0, /* tp_print */
2882 0, /* tp_getattr */
2883 0, /* tp_setattr */
2884 0, /* tp_reserved */
2885 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08002886 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002887 &bytes_as_sequence, /* tp_as_sequence */
2888 &bytes_as_mapping, /* tp_as_mapping */
2889 (hashfunc)bytes_hash, /* tp_hash */
2890 0, /* tp_call */
2891 bytes_str, /* tp_str */
2892 PyObject_GenericGetAttr, /* tp_getattro */
2893 0, /* tp_setattro */
2894 &bytes_as_buffer, /* tp_as_buffer */
2895 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2896 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2897 bytes_doc, /* tp_doc */
2898 0, /* tp_traverse */
2899 0, /* tp_clear */
2900 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2901 0, /* tp_weaklistoffset */
2902 bytes_iter, /* tp_iter */
2903 0, /* tp_iternext */
2904 bytes_methods, /* tp_methods */
2905 0, /* tp_members */
2906 0, /* tp_getset */
2907 &PyBaseObject_Type, /* tp_base */
2908 0, /* tp_dict */
2909 0, /* tp_descr_get */
2910 0, /* tp_descr_set */
2911 0, /* tp_dictoffset */
2912 0, /* tp_init */
2913 0, /* tp_alloc */
2914 bytes_new, /* tp_new */
2915 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002916};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002917
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002918void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002919PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002920{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002921 assert(pv != NULL);
2922 if (*pv == NULL)
2923 return;
2924 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002925 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002926 return;
2927 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02002928
2929 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2930 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05002931 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02002932 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02002933
Antoine Pitrou161d6952014-05-01 14:36:20 +02002934 wb.len = -1;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002935 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02002936 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2937 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2938 Py_CLEAR(*pv);
2939 return;
2940 }
2941
2942 oldsize = PyBytes_GET_SIZE(*pv);
2943 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2944 PyErr_NoMemory();
2945 goto error;
2946 }
2947 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2948 goto error;
2949
2950 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2951 PyBuffer_Release(&wb);
2952 return;
2953
2954 error:
2955 PyBuffer_Release(&wb);
2956 Py_CLEAR(*pv);
2957 return;
2958 }
2959
2960 else {
2961 /* Multiple references, need to create new object */
2962 PyObject *v;
2963 v = bytes_concat(*pv, w);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002964 Py_SETREF(*pv, v);
Antoine Pitrou161d6952014-05-01 14:36:20 +02002965 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002966}
2967
2968void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002969PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002970{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002971 PyBytes_Concat(pv, w);
2972 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002973}
2974
2975
Ethan Furmanb95b5612015-01-23 20:05:18 -08002976/* The following function breaks the notion that bytes are immutable:
2977 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002978 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08002979 as creating a new bytes object and destroying the old one, only
2980 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002981 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08002982 Note that if there's not enough memory to resize the bytes object, the
2983 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002984 memory" exception is set, and -1 is returned. Else (on success) 0 is
2985 returned, and the value in *pv may or may not be the same as on input.
2986 As always, an extra byte is allocated for a trailing \0 byte (newsize
2987 does *not* include that), and a trailing \0 byte is stored.
2988*/
2989
2990int
2991_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2992{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002993 PyObject *v;
2994 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002995 v = *pv;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002996 if (!PyBytes_Check(v) || newsize < 0) {
2997 goto error;
2998 }
2999 if (Py_SIZE(v) == newsize) {
3000 /* return early if newsize equals to v->ob_size */
3001 return 0;
3002 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02003003 if (Py_SIZE(v) == 0) {
3004 if (newsize == 0) {
3005 return 0;
3006 }
3007 *pv = _PyBytes_FromSize(newsize, 0);
3008 Py_DECREF(v);
3009 return (*pv == NULL) ? -1 : 0;
3010 }
Berker Peksag4a72a7b2016-09-16 17:31:06 +03003011 if (Py_REFCNT(v) != 1) {
3012 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003013 }
Serhiy Storchaka44cc4822019-01-12 09:22:29 +02003014 if (newsize == 0) {
3015 *pv = _PyBytes_FromSize(0, 0);
3016 Py_DECREF(v);
3017 return (*pv == NULL) ? -1 : 0;
3018 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003019 /* XXX UNREF/NEWREF interface should be more symmetrical */
3020 _Py_DEC_REFTOTAL;
3021 _Py_ForgetReference(v);
3022 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03003023 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003024 if (*pv == NULL) {
3025 PyObject_Del(v);
3026 PyErr_NoMemory();
3027 return -1;
3028 }
3029 _Py_NewReference(*pv);
3030 sv = (PyBytesObject *) *pv;
3031 Py_SIZE(sv) = newsize;
3032 sv->ob_sval[newsize] = '\0';
3033 sv->ob_shash = -1; /* invalidate cached hash value */
3034 return 0;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03003035error:
3036 *pv = 0;
3037 Py_DECREF(v);
3038 PyErr_BadInternalCall();
3039 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003040}
3041
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003042void
3043PyBytes_Fini(void)
3044{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003045 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003046 for (i = 0; i < UCHAR_MAX + 1; i++)
3047 Py_CLEAR(characters[i]);
3048 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003049}
3050
Benjamin Peterson4116f362008-05-27 00:36:20 +00003051/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003052
3053typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003054 PyObject_HEAD
3055 Py_ssize_t it_index;
3056 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003057} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003058
3059static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003060striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003061{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003062 _PyObject_GC_UNTRACK(it);
3063 Py_XDECREF(it->it_seq);
3064 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003065}
3066
3067static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003068striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003069{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003070 Py_VISIT(it->it_seq);
3071 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003072}
3073
3074static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003075striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003076{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003077 PyBytesObject *seq;
3078 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003079
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003080 assert(it != NULL);
3081 seq = it->it_seq;
3082 if (seq == NULL)
3083 return NULL;
3084 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003085
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003086 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3087 item = PyLong_FromLong(
3088 (unsigned char)seq->ob_sval[it->it_index]);
3089 if (item != NULL)
3090 ++it->it_index;
3091 return item;
3092 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003093
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003094 it->it_seq = NULL;
Serhiy Storchakafbb1c5e2016-03-30 20:40:02 +03003095 Py_DECREF(seq);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003096 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003097}
3098
3099static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303100striter_len(striterobject *it, PyObject *Py_UNUSED(ignored))
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003101{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003102 Py_ssize_t len = 0;
3103 if (it->it_seq)
3104 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3105 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003106}
3107
3108PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003109 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003110
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003111static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303112striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003113{
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003114 _Py_IDENTIFIER(iter);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003115 if (it->it_seq != NULL) {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003116 return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003117 it->it_seq, it->it_index);
3118 } else {
Serhiy Storchakabb86bf42018-12-11 08:28:18 +02003119 return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003120 }
3121}
3122
3123PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3124
3125static PyObject *
3126striter_setstate(striterobject *it, PyObject *state)
3127{
3128 Py_ssize_t index = PyLong_AsSsize_t(state);
3129 if (index == -1 && PyErr_Occurred())
3130 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003131 if (it->it_seq != NULL) {
3132 if (index < 0)
3133 index = 0;
3134 else if (index > PyBytes_GET_SIZE(it->it_seq))
3135 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3136 it->it_index = index;
3137 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003138 Py_RETURN_NONE;
3139}
3140
3141PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3142
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003143static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003144 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3145 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003146 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3147 reduce_doc},
3148 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3149 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003150 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003151};
3152
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003153PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003154 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3155 "bytes_iterator", /* tp_name */
3156 sizeof(striterobject), /* tp_basicsize */
3157 0, /* tp_itemsize */
3158 /* methods */
3159 (destructor)striter_dealloc, /* tp_dealloc */
3160 0, /* tp_print */
3161 0, /* tp_getattr */
3162 0, /* tp_setattr */
3163 0, /* tp_reserved */
3164 0, /* tp_repr */
3165 0, /* tp_as_number */
3166 0, /* tp_as_sequence */
3167 0, /* tp_as_mapping */
3168 0, /* tp_hash */
3169 0, /* tp_call */
3170 0, /* tp_str */
3171 PyObject_GenericGetAttr, /* tp_getattro */
3172 0, /* tp_setattro */
3173 0, /* tp_as_buffer */
3174 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3175 0, /* tp_doc */
3176 (traverseproc)striter_traverse, /* tp_traverse */
3177 0, /* tp_clear */
3178 0, /* tp_richcompare */
3179 0, /* tp_weaklistoffset */
3180 PyObject_SelfIter, /* tp_iter */
3181 (iternextfunc)striter_next, /* tp_iternext */
3182 striter_methods, /* tp_methods */
3183 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003184};
3185
3186static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003187bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003188{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003189 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003190
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003191 if (!PyBytes_Check(seq)) {
3192 PyErr_BadInternalCall();
3193 return NULL;
3194 }
3195 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3196 if (it == NULL)
3197 return NULL;
3198 it->it_index = 0;
3199 Py_INCREF(seq);
3200 it->it_seq = (PyBytesObject *)seq;
3201 _PyObject_GC_TRACK(it);
3202 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003203}
Victor Stinner00165072015-10-09 01:53:21 +02003204
3205
3206/* _PyBytesWriter API */
3207
3208#ifdef MS_WINDOWS
3209 /* On Windows, overallocate by 50% is the best factor */
3210# define OVERALLOCATE_FACTOR 2
3211#else
3212 /* On Linux, overallocate by 25% is the best factor */
3213# define OVERALLOCATE_FACTOR 4
3214#endif
3215
3216void
3217_PyBytesWriter_Init(_PyBytesWriter *writer)
3218{
Victor Stinner661aacc2015-10-14 09:41:48 +02003219 /* Set all attributes before small_buffer to 0 */
3220 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003221#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003222 memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003223#endif
3224}
3225
3226void
3227_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3228{
3229 Py_CLEAR(writer->buffer);
3230}
3231
3232Py_LOCAL_INLINE(char*)
3233_PyBytesWriter_AsString(_PyBytesWriter *writer)
3234{
Victor Stinner661aacc2015-10-14 09:41:48 +02003235 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003236 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003237 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003238 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003239 else if (writer->use_bytearray) {
3240 assert(writer->buffer != NULL);
3241 return PyByteArray_AS_STRING(writer->buffer);
3242 }
3243 else {
3244 assert(writer->buffer != NULL);
3245 return PyBytes_AS_STRING(writer->buffer);
3246 }
Victor Stinner00165072015-10-09 01:53:21 +02003247}
3248
3249Py_LOCAL_INLINE(Py_ssize_t)
Victor Stinner2bf89932015-10-14 11:25:33 +02003250_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
Victor Stinner00165072015-10-09 01:53:21 +02003251{
3252 char *start = _PyBytesWriter_AsString(writer);
3253 assert(str != NULL);
3254 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003255 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003256 return str - start;
3257}
3258
3259Py_LOCAL_INLINE(void)
3260_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3261{
3262#ifdef Py_DEBUG
3263 char *start, *end;
3264
Victor Stinner661aacc2015-10-14 09:41:48 +02003265 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003266 assert(writer->buffer == NULL);
3267 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003268 else {
3269 assert(writer->buffer != NULL);
3270 if (writer->use_bytearray)
3271 assert(PyByteArray_CheckExact(writer->buffer));
3272 else
3273 assert(PyBytes_CheckExact(writer->buffer));
3274 assert(Py_REFCNT(writer->buffer) == 1);
3275 }
Victor Stinner00165072015-10-09 01:53:21 +02003276
Victor Stinner661aacc2015-10-14 09:41:48 +02003277 if (writer->use_bytearray) {
3278 /* bytearray has its own overallocation algorithm,
3279 writer overallocation must be disabled */
3280 assert(!writer->overallocate);
3281 }
3282
3283 assert(0 <= writer->allocated);
Victor Stinner53926a12015-10-09 12:37:03 +02003284 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003285 /* the last byte must always be null */
Victor Stinner661aacc2015-10-14 09:41:48 +02003286 start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003287 assert(start[writer->allocated] == 0);
3288
3289 end = start + writer->allocated;
3290 assert(str != NULL);
3291 assert(start <= str && str <= end);
3292#endif
3293}
3294
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003295void*
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003296_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
Victor Stinner00165072015-10-09 01:53:21 +02003297{
3298 Py_ssize_t allocated, pos;
3299
3300 _PyBytesWriter_CheckConsistency(writer, str);
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003301 assert(writer->allocated < size);
Victor Stinner00165072015-10-09 01:53:21 +02003302
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003303 allocated = size;
Victor Stinner00165072015-10-09 01:53:21 +02003304 if (writer->overallocate
3305 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3306 /* overallocate to limit the number of realloc() */
3307 allocated += allocated / OVERALLOCATE_FACTOR;
3308 }
3309
Victor Stinner2bf89932015-10-14 11:25:33 +02003310 pos = _PyBytesWriter_GetSize(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003311 if (!writer->use_small_buffer) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003312 if (writer->use_bytearray) {
3313 if (PyByteArray_Resize(writer->buffer, allocated))
3314 goto error;
3315 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3316 but we cannot use ob_alloc because bytes may need to be moved
3317 to use the whole buffer. bytearray uses an internal optimization
3318 to avoid moving or copying bytes when bytes are removed at the
3319 beginning (ex: del bytearray[:1]). */
3320 }
3321 else {
3322 if (_PyBytes_Resize(&writer->buffer, allocated))
3323 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003324 }
3325 }
3326 else {
3327 /* convert from stack buffer to bytes object buffer */
3328 assert(writer->buffer == NULL);
3329
Victor Stinner661aacc2015-10-14 09:41:48 +02003330 if (writer->use_bytearray)
3331 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3332 else
3333 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003334 if (writer->buffer == NULL)
Victor Stinner661aacc2015-10-14 09:41:48 +02003335 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003336
3337 if (pos != 0) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003338 char *dest;
3339 if (writer->use_bytearray)
3340 dest = PyByteArray_AS_STRING(writer->buffer);
3341 else
3342 dest = PyBytes_AS_STRING(writer->buffer);
Christian Heimesf051e432016-09-13 20:22:02 +02003343 memcpy(dest,
Victor Stinnerb3653a32015-10-09 03:38:24 +02003344 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02003345 pos);
3346 }
3347
Victor Stinnerb3653a32015-10-09 03:38:24 +02003348 writer->use_small_buffer = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003349#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003350 memset(writer->small_buffer, 0xDB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003351#endif
Victor Stinner00165072015-10-09 01:53:21 +02003352 }
3353 writer->allocated = allocated;
3354
3355 str = _PyBytesWriter_AsString(writer) + pos;
3356 _PyBytesWriter_CheckConsistency(writer, str);
3357 return str;
Victor Stinner661aacc2015-10-14 09:41:48 +02003358
3359error:
3360 _PyBytesWriter_Dealloc(writer);
3361 return NULL;
Victor Stinner00165072015-10-09 01:53:21 +02003362}
3363
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003364void*
3365_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3366{
3367 Py_ssize_t new_min_size;
3368
3369 _PyBytesWriter_CheckConsistency(writer, str);
3370 assert(size >= 0);
3371
3372 if (size == 0) {
3373 /* nothing to do */
3374 return str;
3375 }
3376
3377 if (writer->min_size > PY_SSIZE_T_MAX - size) {
3378 PyErr_NoMemory();
3379 _PyBytesWriter_Dealloc(writer);
3380 return NULL;
3381 }
3382 new_min_size = writer->min_size + size;
3383
3384 if (new_min_size > writer->allocated)
3385 str = _PyBytesWriter_Resize(writer, str, new_min_size);
3386
3387 writer->min_size = new_min_size;
3388 return str;
3389}
3390
Victor Stinner00165072015-10-09 01:53:21 +02003391/* Allocate the buffer to write size bytes.
3392 Return the pointer to the beginning of buffer data.
3393 Raise an exception and return NULL on error. */
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003394void*
Victor Stinner00165072015-10-09 01:53:21 +02003395_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3396{
3397 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02003398 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02003399 assert(size >= 0);
3400
Victor Stinnerb3653a32015-10-09 03:38:24 +02003401 writer->use_small_buffer = 1;
Victor Stinnerb13b97d2015-10-09 02:52:16 +02003402#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003403 writer->allocated = sizeof(writer->small_buffer) - 1;
Victor Stinnerf6358a72015-10-14 12:02:39 +02003404 /* In debug mode, don't use the full small buffer because it is less
3405 efficient than bytes and bytearray objects to detect buffer underflow
3406 and buffer overflow. Use 10 bytes of the small buffer to test also
3407 code using the smaller buffer in debug mode.
3408
3409 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3410 in debug mode to also be able to detect stack overflow when running
3411 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3412 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3413 stack overflow. */
3414 writer->allocated = Py_MIN(writer->allocated, 10);
3415 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3416 to detect buffer overflow */
Victor Stinnerb3653a32015-10-09 03:38:24 +02003417 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003418#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02003419 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02003420#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02003421 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02003422}
3423
3424PyObject *
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003425_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
Victor Stinner00165072015-10-09 01:53:21 +02003426{
Victor Stinner2bf89932015-10-14 11:25:33 +02003427 Py_ssize_t size;
Victor Stinner00165072015-10-09 01:53:21 +02003428 PyObject *result;
3429
3430 _PyBytesWriter_CheckConsistency(writer, str);
3431
Victor Stinner2bf89932015-10-14 11:25:33 +02003432 size = _PyBytesWriter_GetSize(writer, str);
3433 if (size == 0 && !writer->use_bytearray) {
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003434 Py_CLEAR(writer->buffer);
3435 /* Get the empty byte string singleton */
3436 result = PyBytes_FromStringAndSize(NULL, 0);
3437 }
3438 else if (writer->use_small_buffer) {
Victor Stinnere914d412016-04-15 17:52:27 +02003439 if (writer->use_bytearray) {
3440 result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3441 }
3442 else {
3443 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3444 }
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003445 }
3446 else {
3447 result = writer->buffer;
3448 writer->buffer = NULL;
3449
Victor Stinner2bf89932015-10-14 11:25:33 +02003450 if (size != writer->allocated) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003451 if (writer->use_bytearray) {
Victor Stinner2bf89932015-10-14 11:25:33 +02003452 if (PyByteArray_Resize(result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003453 Py_DECREF(result);
3454 return NULL;
3455 }
3456 }
3457 else {
Victor Stinner2bf89932015-10-14 11:25:33 +02003458 if (_PyBytes_Resize(&result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003459 assert(result == NULL);
3460 return NULL;
3461 }
Victor Stinner00165072015-10-09 01:53:21 +02003462 }
3463 }
Victor Stinner00165072015-10-09 01:53:21 +02003464 }
Victor Stinner00165072015-10-09 01:53:21 +02003465 return result;
3466}
Victor Stinnerce179bf2015-10-09 12:57:22 +02003467
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003468void*
Victor Stinnere9aa5952015-10-12 13:57:47 +02003469_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003470 const void *bytes, Py_ssize_t size)
Victor Stinnerce179bf2015-10-09 12:57:22 +02003471{
Victor Stinnere9aa5952015-10-12 13:57:47 +02003472 char *str = (char *)ptr;
3473
Victor Stinnerce179bf2015-10-09 12:57:22 +02003474 str = _PyBytesWriter_Prepare(writer, str, size);
3475 if (str == NULL)
3476 return NULL;
3477
Christian Heimesf051e432016-09-13 20:22:02 +02003478 memcpy(str, bytes, size);
Victor Stinnerce179bf2015-10-09 12:57:22 +02003479 str += size;
3480
3481 return str;
3482}