blob: e4a49731aba6724af5ae211e9328e0bbc8d973dc [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Victor Stinnerbcda8f12018-11-21 22:27:47 +01006#include "pycore_object.h"
Victor Stinner621cebe2018-11-12 16:53:38 +01007#include "pycore_pymem.h"
8#include "pycore_pystate.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00009
Gregory P. Smith60d241f2007-10-16 06:31:30 +000010#include "bytes_methods.h"
Gregory P. Smith8cb65692015-04-25 23:22:26 +000011#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +000012#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000013
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020014/*[clinic input]
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030015class bytes "PyBytesObject *" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020016[clinic start generated code]*/
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030017/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020018
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030019#include "clinic/bytesobject.c.h"
20
Christian Heimes2c9c7a52008-05-26 13:42:13 +000021#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +000022Py_ssize_t _Py_null_strings, _Py_one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000023#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000024
Christian Heimes2c9c7a52008-05-26 13:42:13 +000025static PyBytesObject *characters[UCHAR_MAX + 1];
26static PyBytesObject *nullstring;
27
Mark Dickinsonfd24b322008-12-06 15:33:31 +000028/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
29 for a string of length n should request PyBytesObject_SIZE + n bytes.
30
31 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
32 3 bytes per string allocation on a typical system.
33*/
34#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
35
Victor Stinner2bf89932015-10-14 11:25:33 +020036/* Forward declaration */
37Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
38 char *str);
39
Christian Heimes2c9c7a52008-05-26 13:42:13 +000040/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000041 For PyBytes_FromString(), the parameter `str' points to a null-terminated
42 string containing exactly `size' bytes.
43
Martin Pantera90a4a92016-05-30 04:04:50 +000044 For PyBytes_FromStringAndSize(), the parameter `str' is
Christian Heimes2c9c7a52008-05-26 13:42:13 +000045 either NULL or else points to a string containing at least `size' bytes.
46 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
47 not have to be null-terminated. (Therefore it is safe to construct a
48 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
49 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
50 bytes (setting the last byte to the null terminating character) and you can
51 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000052 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000053 alter the data yourself, since the strings may be shared.
54
55 The PyObject member `op->ob_size', which denotes the number of "extra
56 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020057 allocated for string data, not counting the null terminating character.
58 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000059 PyBytes_FromStringAndSize()) or the length of the string in the `str'
60 parameter (for PyBytes_FromString()).
61*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020062static PyObject *
63_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000064{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020065 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020066 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020067
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000068 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000069#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +000070 _Py_null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000071#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000072 Py_INCREF(op);
73 return (PyObject *)op;
74 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000075
Victor Stinner049e5092014-08-17 22:20:00 +020076 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 PyErr_SetString(PyExc_OverflowError,
78 "byte string is too large");
79 return NULL;
80 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000081
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000082 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020083 if (use_calloc)
84 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
85 else
86 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 if (op == NULL)
88 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +010089 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000090 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020091 if (!use_calloc)
92 op->ob_sval[size] = '\0';
93 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000094 if (size == 0) {
95 nullstring = op;
96 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020097 }
98 return (PyObject *) op;
99}
100
101PyObject *
102PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
103{
104 PyBytesObject *op;
105 if (size < 0) {
106 PyErr_SetString(PyExc_SystemError,
107 "Negative size passed to PyBytes_FromStringAndSize");
108 return NULL;
109 }
110 if (size == 1 && str != NULL &&
111 (op = characters[*str & UCHAR_MAX]) != NULL)
112 {
113#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +0000114 _Py_one_strings++;
Victor Stinnerdb067af2014-05-02 22:31:14 +0200115#endif
116 Py_INCREF(op);
117 return (PyObject *)op;
118 }
119
120 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
121 if (op == NULL)
122 return NULL;
123 if (str == NULL)
124 return (PyObject *) op;
125
Christian Heimesf051e432016-09-13 20:22:02 +0200126 memcpy(op->ob_sval, str, size);
Victor Stinnerdb067af2014-05-02 22:31:14 +0200127 /* share short strings */
128 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000129 characters[*str & UCHAR_MAX] = op;
130 Py_INCREF(op);
131 }
132 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000133}
134
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000135PyObject *
136PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000137{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200138 size_t size;
139 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000140
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 assert(str != NULL);
142 size = strlen(str);
143 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
144 PyErr_SetString(PyExc_OverflowError,
145 "byte string is too long");
146 return NULL;
147 }
148 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000149#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +0000150 _Py_null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000151#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000152 Py_INCREF(op);
153 return (PyObject *)op;
154 }
155 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000156#ifdef COUNT_ALLOCS
Pablo Galindo49c75a82018-10-28 15:02:17 +0000157 _Py_one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000158#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000159 Py_INCREF(op);
160 return (PyObject *)op;
161 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000162
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000163 /* Inline PyObject_NewVar */
164 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
165 if (op == NULL)
166 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +0100167 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000168 op->ob_shash = -1;
Christian Heimesf051e432016-09-13 20:22:02 +0200169 memcpy(op->ob_sval, str, size+1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000170 /* share short strings */
171 if (size == 0) {
172 nullstring = op;
173 Py_INCREF(op);
174 } else if (size == 1) {
175 characters[*str & UCHAR_MAX] = op;
176 Py_INCREF(op);
177 }
178 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000179}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000180
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000181PyObject *
182PyBytes_FromFormatV(const char *format, va_list vargs)
183{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000184 char *s;
Victor Stinner03dab782015-10-14 00:21:35 +0200185 const char *f;
186 const char *p;
187 Py_ssize_t prec;
188 int longflag;
189 int size_tflag;
190 /* Longest 64-bit formatted numbers:
191 - "18446744073709551615\0" (21 bytes)
192 - "-9223372036854775808\0" (21 bytes)
193 Decimal takes the most space (it isn't enough for octal.)
Guido van Rossum343e97f2007-04-09 00:43:24 +0000194
Victor Stinner03dab782015-10-14 00:21:35 +0200195 Longest 64-bit pointer representation:
196 "0xffffffffffffffff\0" (19 bytes). */
197 char buffer[21];
198 _PyBytesWriter writer;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000199
Victor Stinner03dab782015-10-14 00:21:35 +0200200 _PyBytesWriter_Init(&writer);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000201
Victor Stinner03dab782015-10-14 00:21:35 +0200202 s = _PyBytesWriter_Alloc(&writer, strlen(format));
203 if (s == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000204 return NULL;
Victor Stinner03dab782015-10-14 00:21:35 +0200205 writer.overallocate = 1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000206
Victor Stinner03dab782015-10-14 00:21:35 +0200207#define WRITE_BYTES(str) \
208 do { \
209 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
210 if (s == NULL) \
211 goto error; \
212 } while (0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000213
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000214 for (f = format; *f; f++) {
Victor Stinner03dab782015-10-14 00:21:35 +0200215 if (*f != '%') {
216 *s++ = *f;
217 continue;
218 }
219
220 p = f++;
221
222 /* ignore the width (ex: 10 in "%10s") */
223 while (Py_ISDIGIT(*f))
224 f++;
225
226 /* parse the precision (ex: 10 in "%.10s") */
227 prec = 0;
228 if (*f == '.') {
229 f++;
230 for (; Py_ISDIGIT(*f); f++) {
231 prec = (prec * 10) + (*f - '0');
232 }
233 }
234
235 while (*f && *f != '%' && !Py_ISALPHA(*f))
236 f++;
237
238 /* handle the long flag ('l'), but only for %ld and %lu.
239 others can be added when necessary. */
240 longflag = 0;
241 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
242 longflag = 1;
243 ++f;
244 }
245
246 /* handle the size_t flag ('z'). */
247 size_tflag = 0;
248 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
249 size_tflag = 1;
250 ++f;
251 }
252
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700253 /* subtract bytes preallocated for the format string
Victor Stinner03dab782015-10-14 00:21:35 +0200254 (ex: 2 for "%s") */
255 writer.min_size -= (f - p + 1);
256
257 switch (*f) {
258 case 'c':
259 {
260 int c = va_arg(vargs, int);
261 if (c < 0 || c > 255) {
262 PyErr_SetString(PyExc_OverflowError,
263 "PyBytes_FromFormatV(): %c format "
264 "expects an integer in range [0; 255]");
265 goto error;
266 }
267 writer.min_size++;
268 *s++ = (unsigned char)c;
269 break;
270 }
271
272 case 'd':
273 if (longflag)
274 sprintf(buffer, "%ld", va_arg(vargs, long));
275 else if (size_tflag)
276 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
277 va_arg(vargs, Py_ssize_t));
278 else
279 sprintf(buffer, "%d", va_arg(vargs, int));
280 assert(strlen(buffer) < sizeof(buffer));
281 WRITE_BYTES(buffer);
282 break;
283
284 case 'u':
285 if (longflag)
286 sprintf(buffer, "%lu",
287 va_arg(vargs, unsigned long));
288 else if (size_tflag)
289 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
290 va_arg(vargs, size_t));
291 else
292 sprintf(buffer, "%u",
293 va_arg(vargs, unsigned int));
294 assert(strlen(buffer) < sizeof(buffer));
295 WRITE_BYTES(buffer);
296 break;
297
298 case 'i':
299 sprintf(buffer, "%i", va_arg(vargs, int));
300 assert(strlen(buffer) < sizeof(buffer));
301 WRITE_BYTES(buffer);
302 break;
303
304 case 'x':
305 sprintf(buffer, "%x", va_arg(vargs, int));
306 assert(strlen(buffer) < sizeof(buffer));
307 WRITE_BYTES(buffer);
308 break;
309
310 case 's':
311 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000312 Py_ssize_t i;
Victor Stinner03dab782015-10-14 00:21:35 +0200313
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200314 p = va_arg(vargs, const char*);
Victor Stinner03dab782015-10-14 00:21:35 +0200315 i = strlen(p);
316 if (prec > 0 && i > prec)
317 i = prec;
318 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
319 if (s == NULL)
320 goto error;
321 break;
322 }
323
324 case 'p':
325 sprintf(buffer, "%p", va_arg(vargs, void*));
326 assert(strlen(buffer) < sizeof(buffer));
327 /* %p is ill-defined: ensure leading 0x. */
328 if (buffer[1] == 'X')
329 buffer[1] = 'x';
330 else if (buffer[1] != 'x') {
331 memmove(buffer+2, buffer, strlen(buffer)+1);
332 buffer[0] = '0';
333 buffer[1] = 'x';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000334 }
Victor Stinner03dab782015-10-14 00:21:35 +0200335 WRITE_BYTES(buffer);
336 break;
337
338 case '%':
339 writer.min_size++;
340 *s++ = '%';
341 break;
342
343 default:
344 if (*f == 0) {
345 /* fix min_size if we reached the end of the format string */
346 writer.min_size++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000347 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000348
Victor Stinner03dab782015-10-14 00:21:35 +0200349 /* invalid format string: copy unformatted string and exit */
350 WRITE_BYTES(p);
351 return _PyBytesWriter_Finish(&writer, s);
352 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000353 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000354
Victor Stinner03dab782015-10-14 00:21:35 +0200355#undef WRITE_BYTES
356
357 return _PyBytesWriter_Finish(&writer, s);
358
359 error:
360 _PyBytesWriter_Dealloc(&writer);
361 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000362}
363
364PyObject *
365PyBytes_FromFormat(const char *format, ...)
366{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 PyObject* ret;
368 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000369
370#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000371 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000372#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000373 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000374#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000375 ret = PyBytes_FromFormatV(format, vargs);
376 va_end(vargs);
377 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000378}
379
Ethan Furmanb95b5612015-01-23 20:05:18 -0800380/* Helpers for formatstring */
381
382Py_LOCAL_INLINE(PyObject *)
383getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
384{
385 Py_ssize_t argidx = *p_argidx;
386 if (argidx < arglen) {
387 (*p_argidx)++;
388 if (arglen < 0)
389 return args;
390 else
391 return PyTuple_GetItem(args, argidx);
392 }
393 PyErr_SetString(PyExc_TypeError,
394 "not enough arguments for format string");
395 return NULL;
396}
397
398/* Format codes
399 * F_LJUST '-'
400 * F_SIGN '+'
401 * F_BLANK ' '
402 * F_ALT '#'
403 * F_ZERO '0'
404 */
405#define F_LJUST (1<<0)
406#define F_SIGN (1<<1)
407#define F_BLANK (1<<2)
408#define F_ALT (1<<3)
409#define F_ZERO (1<<4)
410
411/* Returns a new reference to a PyBytes object, or NULL on failure. */
412
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200413static char*
414formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200415 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800416{
417 char *p;
418 PyObject *result;
419 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200420 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800421
422 x = PyFloat_AsDouble(v);
423 if (x == -1.0 && PyErr_Occurred()) {
424 PyErr_Format(PyExc_TypeError, "float argument required, "
425 "not %.200s", Py_TYPE(v)->tp_name);
426 return NULL;
427 }
428
429 if (prec < 0)
430 prec = 6;
431
432 p = PyOS_double_to_string(x, type, prec,
433 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
434
435 if (p == NULL)
436 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200437
438 len = strlen(p);
439 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200440 str = _PyBytesWriter_Prepare(writer, str, len);
441 if (str == NULL)
442 return NULL;
Christian Heimesf051e432016-09-13 20:22:02 +0200443 memcpy(str, p, len);
Victor Stinner71dc3d82016-04-26 12:35:13 +0200444 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200445 str += len;
446 return str;
447 }
448
449 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800450 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200451 *p_result = result;
Zackery Spytz96c59322018-10-03 00:01:30 -0600452 return result != NULL ? str : NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800453}
454
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300455static PyObject *
456formatlong(PyObject *v, int flags, int prec, int type)
457{
458 PyObject *result, *iobj;
459 if (type == 'i')
460 type = 'd';
461 if (PyLong_Check(v))
462 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
463 if (PyNumber_Check(v)) {
464 /* make sure number is a type of integer for o, x, and X */
465 if (type == 'o' || type == 'x' || type == 'X')
466 iobj = PyNumber_Index(v);
467 else
468 iobj = PyNumber_Long(v);
469 if (iobj == NULL) {
470 if (!PyErr_ExceptionMatches(PyExc_TypeError))
471 return NULL;
472 }
473 else if (!PyLong_Check(iobj))
474 Py_CLEAR(iobj);
475 if (iobj != NULL) {
476 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
477 Py_DECREF(iobj);
478 return result;
479 }
480 }
481 PyErr_Format(PyExc_TypeError,
482 "%%%c format: %s is required, not %.200s", type,
483 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
484 : "a number",
485 Py_TYPE(v)->tp_name);
486 return NULL;
487}
488
489static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200490byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800491{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300492 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200493 *p = PyBytes_AS_STRING(arg)[0];
494 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800495 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300496 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200497 *p = PyByteArray_AS_STRING(arg)[0];
498 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800499 }
500 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300501 PyObject *iobj;
502 long ival;
503 int overflow;
504 /* make sure number is a type of integer */
505 if (PyLong_Check(arg)) {
506 ival = PyLong_AsLongAndOverflow(arg, &overflow);
507 }
508 else {
509 iobj = PyNumber_Index(arg);
510 if (iobj == NULL) {
511 if (!PyErr_ExceptionMatches(PyExc_TypeError))
512 return 0;
513 goto onError;
514 }
515 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
516 Py_DECREF(iobj);
517 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300518 if (!overflow && ival == -1 && PyErr_Occurred())
519 goto onError;
520 if (overflow || !(0 <= ival && ival <= 255)) {
521 PyErr_SetString(PyExc_OverflowError,
522 "%c arg not in range(256)");
523 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800524 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300525 *p = (char)ival;
526 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800527 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300528 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200529 PyErr_SetString(PyExc_TypeError,
530 "%c requires an integer in range(256) or a single byte");
531 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800532}
533
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800534static PyObject *_PyBytes_FromBuffer(PyObject *x);
535
Ethan Furmanb95b5612015-01-23 20:05:18 -0800536static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200537format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800538{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200539 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800540 _Py_IDENTIFIER(__bytes__);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800541 /* is it a bytes object? */
542 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200543 *pbuf = PyBytes_AS_STRING(v);
544 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800545 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200546 return v;
547 }
548 if (PyByteArray_Check(v)) {
549 *pbuf = PyByteArray_AS_STRING(v);
550 *plen = PyByteArray_GET_SIZE(v);
551 Py_INCREF(v);
552 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800553 }
554 /* does it support __bytes__? */
555 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
556 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +0100557 result = _PyObject_CallNoArg(func);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800558 Py_DECREF(func);
559 if (result == NULL)
560 return NULL;
561 if (!PyBytes_Check(result)) {
562 PyErr_Format(PyExc_TypeError,
563 "__bytes__ returned non-bytes (type %.200s)",
564 Py_TYPE(result)->tp_name);
565 Py_DECREF(result);
566 return NULL;
567 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200568 *pbuf = PyBytes_AS_STRING(result);
569 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800570 return result;
571 }
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800572 /* does it support buffer protocol? */
573 if (PyObject_CheckBuffer(v)) {
574 /* maybe we can avoid making a copy of the buffer object here? */
575 result = _PyBytes_FromBuffer(v);
576 if (result == NULL)
577 return NULL;
578 *pbuf = PyBytes_AS_STRING(result);
579 *plen = PyBytes_GET_SIZE(result);
580 return result;
581 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800582 PyErr_Format(PyExc_TypeError,
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800583 "%%b requires a bytes-like object, "
584 "or an object that implements __bytes__, not '%.100s'",
Ethan Furmanb95b5612015-01-23 20:05:18 -0800585 Py_TYPE(v)->tp_name);
586 return NULL;
587}
588
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200589/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800590
591PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +0200592_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
593 PyObject *args, int use_bytearray)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800594{
Victor Stinner772b2b02015-10-14 09:56:53 +0200595 const char *fmt;
596 char *res;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800597 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200598 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800599 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800600 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200601 _PyBytesWriter writer;
602
Victor Stinner772b2b02015-10-14 09:56:53 +0200603 if (args == NULL) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800604 PyErr_BadInternalCall();
605 return NULL;
606 }
Victor Stinner772b2b02015-10-14 09:56:53 +0200607 fmt = format;
608 fmtcnt = format_len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200609
610 _PyBytesWriter_Init(&writer);
Victor Stinner772b2b02015-10-14 09:56:53 +0200611 writer.use_bytearray = use_bytearray;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200612
613 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
614 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800615 return NULL;
Victor Stinner772b2b02015-10-14 09:56:53 +0200616 if (!use_bytearray)
617 writer.overallocate = 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200618
Ethan Furmanb95b5612015-01-23 20:05:18 -0800619 if (PyTuple_Check(args)) {
620 arglen = PyTuple_GET_SIZE(args);
621 argidx = 0;
622 }
623 else {
624 arglen = -1;
625 argidx = -2;
626 }
627 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
628 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
629 !PyByteArray_Check(args)) {
630 dict = args;
631 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200632
Ethan Furmanb95b5612015-01-23 20:05:18 -0800633 while (--fmtcnt >= 0) {
634 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200635 Py_ssize_t len;
636 char *pos;
637
Xiang Zhangb76ad512017-03-06 17:17:05 +0800638 pos = (char *)memchr(fmt + 1, '%', fmtcnt);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200639 if (pos != NULL)
640 len = pos - fmt;
Victor Stinner772b2b02015-10-14 09:56:53 +0200641 else
Xiang Zhangb76ad512017-03-06 17:17:05 +0800642 len = fmtcnt + 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200643 assert(len != 0);
644
Christian Heimesf051e432016-09-13 20:22:02 +0200645 memcpy(res, fmt, len);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200646 res += len;
647 fmt += len;
648 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800649 }
650 else {
651 /* Got a format specifier */
652 int flags = 0;
653 Py_ssize_t width = -1;
654 int prec = -1;
655 int c = '\0';
656 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800657 PyObject *v = NULL;
658 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200659 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800660 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200661 Py_ssize_t len = 0;
662 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200663 Py_ssize_t alloc;
664#ifdef Py_DEBUG
665 char *before;
666#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -0800667
Ethan Furmanb95b5612015-01-23 20:05:18 -0800668 fmt++;
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200669 if (*fmt == '%') {
670 *res++ = '%';
671 fmt++;
672 fmtcnt--;
673 continue;
674 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800675 if (*fmt == '(') {
Victor Stinner772b2b02015-10-14 09:56:53 +0200676 const char *keystart;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800677 Py_ssize_t keylen;
678 PyObject *key;
679 int pcount = 1;
680
681 if (dict == NULL) {
682 PyErr_SetString(PyExc_TypeError,
683 "format requires a mapping");
684 goto error;
685 }
686 ++fmt;
687 --fmtcnt;
688 keystart = fmt;
689 /* Skip over balanced parentheses */
690 while (pcount > 0 && --fmtcnt >= 0) {
691 if (*fmt == ')')
692 --pcount;
693 else if (*fmt == '(')
694 ++pcount;
695 fmt++;
696 }
697 keylen = fmt - keystart - 1;
698 if (fmtcnt < 0 || pcount > 0) {
699 PyErr_SetString(PyExc_ValueError,
700 "incomplete format key");
701 goto error;
702 }
703 key = PyBytes_FromStringAndSize(keystart,
704 keylen);
705 if (key == NULL)
706 goto error;
707 if (args_owned) {
708 Py_DECREF(args);
709 args_owned = 0;
710 }
711 args = PyObject_GetItem(dict, key);
712 Py_DECREF(key);
713 if (args == NULL) {
714 goto error;
715 }
716 args_owned = 1;
717 arglen = -1;
718 argidx = -2;
719 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200720
721 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800722 while (--fmtcnt >= 0) {
723 switch (c = *fmt++) {
724 case '-': flags |= F_LJUST; continue;
725 case '+': flags |= F_SIGN; continue;
726 case ' ': flags |= F_BLANK; continue;
727 case '#': flags |= F_ALT; continue;
728 case '0': flags |= F_ZERO; continue;
729 }
730 break;
731 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200732
733 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800734 if (c == '*') {
735 v = getnextarg(args, arglen, &argidx);
736 if (v == NULL)
737 goto error;
738 if (!PyLong_Check(v)) {
739 PyErr_SetString(PyExc_TypeError,
740 "* wants int");
741 goto error;
742 }
743 width = PyLong_AsSsize_t(v);
744 if (width == -1 && PyErr_Occurred())
745 goto error;
746 if (width < 0) {
747 flags |= F_LJUST;
748 width = -width;
749 }
750 if (--fmtcnt >= 0)
751 c = *fmt++;
752 }
753 else if (c >= 0 && isdigit(c)) {
754 width = c - '0';
755 while (--fmtcnt >= 0) {
756 c = Py_CHARMASK(*fmt++);
757 if (!isdigit(c))
758 break;
759 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
760 PyErr_SetString(
761 PyExc_ValueError,
762 "width too big");
763 goto error;
764 }
765 width = width*10 + (c - '0');
766 }
767 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200768
769 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800770 if (c == '.') {
771 prec = 0;
772 if (--fmtcnt >= 0)
773 c = *fmt++;
774 if (c == '*') {
775 v = getnextarg(args, arglen, &argidx);
776 if (v == NULL)
777 goto error;
778 if (!PyLong_Check(v)) {
779 PyErr_SetString(
780 PyExc_TypeError,
781 "* wants int");
782 goto error;
783 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200784 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800785 if (prec == -1 && PyErr_Occurred())
786 goto error;
787 if (prec < 0)
788 prec = 0;
789 if (--fmtcnt >= 0)
790 c = *fmt++;
791 }
792 else if (c >= 0 && isdigit(c)) {
793 prec = c - '0';
794 while (--fmtcnt >= 0) {
795 c = Py_CHARMASK(*fmt++);
796 if (!isdigit(c))
797 break;
798 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
799 PyErr_SetString(
800 PyExc_ValueError,
801 "prec too big");
802 goto error;
803 }
804 prec = prec*10 + (c - '0');
805 }
806 }
807 } /* prec */
808 if (fmtcnt >= 0) {
809 if (c == 'h' || c == 'l' || c == 'L') {
810 if (--fmtcnt >= 0)
811 c = *fmt++;
812 }
813 }
814 if (fmtcnt < 0) {
815 PyErr_SetString(PyExc_ValueError,
816 "incomplete format");
817 goto error;
818 }
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200819 v = getnextarg(args, arglen, &argidx);
820 if (v == NULL)
821 goto error;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200822
Alexey Izbyshevccd99752018-08-23 10:50:52 +0300823 if (fmtcnt == 0) {
824 /* last write: disable writer overallocation */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200825 writer.overallocate = 0;
826 }
827
Ethan Furmanb95b5612015-01-23 20:05:18 -0800828 sign = 0;
829 fill = ' ';
830 switch (c) {
Ethan Furman62e977f2015-03-11 08:17:00 -0700831 case 'r':
832 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800833 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200834 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800835 if (temp == NULL)
836 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200837 assert(PyUnicode_IS_ASCII(temp));
838 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
839 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800840 if (prec >= 0 && len > prec)
841 len = prec;
842 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200843
Ethan Furmanb95b5612015-01-23 20:05:18 -0800844 case 's':
845 // %s is only for 2/3 code; 3 only code should use %b
846 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200847 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800848 if (temp == NULL)
849 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800850 if (prec >= 0 && len > prec)
851 len = prec;
852 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200853
Ethan Furmanb95b5612015-01-23 20:05:18 -0800854 case 'i':
855 case 'd':
856 case 'u':
857 case 'o':
858 case 'x':
859 case 'X':
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200860 if (PyLong_CheckExact(v)
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200861 && width == -1 && prec == -1
862 && !(flags & (F_SIGN | F_BLANK))
863 && c != 'X')
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200864 {
865 /* Fast path */
866 int alternate = flags & F_ALT;
867 int base;
868
869 switch(c)
870 {
871 default:
Barry Warsawb2e57942017-09-14 18:13:16 -0700872 Py_UNREACHABLE();
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200873 case 'd':
874 case 'i':
875 case 'u':
876 base = 10;
877 break;
878 case 'o':
879 base = 8;
880 break;
881 case 'x':
882 case 'X':
883 base = 16;
884 break;
885 }
886
887 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200888 writer.min_size -= 2; /* size preallocated for "%d" */
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200889 res = _PyLong_FormatBytesWriter(&writer, res,
890 v, base, alternate);
891 if (res == NULL)
892 goto error;
893 continue;
894 }
895
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300896 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200897 if (!temp)
898 goto error;
899 assert(PyUnicode_IS_ASCII(temp));
900 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
901 len = PyUnicode_GET_LENGTH(temp);
902 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800903 if (flags & F_ZERO)
904 fill = '0';
905 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200906
Ethan Furmanb95b5612015-01-23 20:05:18 -0800907 case 'e':
908 case 'E':
909 case 'f':
910 case 'F':
911 case 'g':
912 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200913 if (width == -1 && prec == -1
914 && !(flags & (F_SIGN | F_BLANK)))
915 {
916 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200917 writer.min_size -= 2; /* size preallocated for "%f" */
Victor Stinnerad771582015-10-09 12:38:53 +0200918 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200919 if (res == NULL)
920 goto error;
921 continue;
922 }
923
Victor Stinnerad771582015-10-09 12:38:53 +0200924 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800925 goto error;
926 pbuf = PyBytes_AS_STRING(temp);
927 len = PyBytes_GET_SIZE(temp);
928 sign = 1;
929 if (flags & F_ZERO)
930 fill = '0';
931 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200932
Ethan Furmanb95b5612015-01-23 20:05:18 -0800933 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200934 pbuf = &onechar;
935 len = byte_converter(v, &onechar);
936 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800937 goto error;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200938 if (width == -1) {
939 /* Fast path */
940 *res++ = onechar;
941 continue;
942 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800943 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200944
Ethan Furmanb95b5612015-01-23 20:05:18 -0800945 default:
946 PyErr_Format(PyExc_ValueError,
947 "unsupported format character '%c' (0x%x) "
948 "at index %zd",
949 c, c,
Victor Stinner772b2b02015-10-14 09:56:53 +0200950 (Py_ssize_t)(fmt - 1 - format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800951 goto error;
952 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200953
Ethan Furmanb95b5612015-01-23 20:05:18 -0800954 if (sign) {
955 if (*pbuf == '-' || *pbuf == '+') {
956 sign = *pbuf++;
957 len--;
958 }
959 else if (flags & F_SIGN)
960 sign = '+';
961 else if (flags & F_BLANK)
962 sign = ' ';
963 else
964 sign = 0;
965 }
966 if (width < len)
967 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200968
969 alloc = width;
970 if (sign != 0 && len == width)
971 alloc++;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200972 /* 2: size preallocated for %s */
973 if (alloc > 2) {
974 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200975 if (res == NULL)
976 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800977 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200978#ifdef Py_DEBUG
979 before = res;
980#endif
981
982 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800983 if (sign) {
984 if (fill != ' ')
985 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800986 if (width > len)
987 width--;
988 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200989
990 /* Write the numeric prefix for "x", "X" and "o" formats
991 if the alternate form is used.
992 For example, write "0x" for the "%#x" format. */
Serhiy Storchakab1a16192016-12-17 21:48:03 +0200993 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800994 assert(pbuf[0] == '0');
995 assert(pbuf[1] == c);
996 if (fill != ' ') {
997 *res++ = *pbuf++;
998 *res++ = *pbuf++;
999 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001000 width -= 2;
1001 if (width < 0)
1002 width = 0;
1003 len -= 2;
1004 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001005
1006 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001007 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001008 memset(res, fill, width - len);
1009 res += (width - len);
1010 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -08001011 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001012
1013 /* If padding with spaces: write sign if needed and/or numeric
1014 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001015 if (fill == ' ') {
1016 if (sign)
1017 *res++ = sign;
Serhiy Storchakab1a16192016-12-17 21:48:03 +02001018 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001019 assert(pbuf[0] == '0');
1020 assert(pbuf[1] == c);
1021 *res++ = *pbuf++;
1022 *res++ = *pbuf++;
1023 }
1024 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001025
1026 /* Copy bytes */
Christian Heimesf051e432016-09-13 20:22:02 +02001027 memcpy(res, pbuf, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001028 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001029
1030 /* Pad right with the fill character if needed */
1031 if (width > len) {
1032 memset(res, ' ', width - len);
1033 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001034 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001035
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +02001036 if (dict && (argidx < arglen)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001037 PyErr_SetString(PyExc_TypeError,
1038 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -08001039 Py_XDECREF(temp);
1040 goto error;
1041 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001042 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001043
1044#ifdef Py_DEBUG
1045 /* check that we computed the exact size for this write */
1046 assert((res - before) == alloc);
1047#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -08001048 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001049
1050 /* If overallocation was disabled, ensure that it was the last
1051 write. Otherwise, we missed an optimization */
Alexey Izbyshevccd99752018-08-23 10:50:52 +03001052 assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001053 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001054
Ethan Furmanb95b5612015-01-23 20:05:18 -08001055 if (argidx < arglen && !dict) {
1056 PyErr_SetString(PyExc_TypeError,
1057 "not all arguments converted during bytes formatting");
1058 goto error;
1059 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001060
Ethan Furmanb95b5612015-01-23 20:05:18 -08001061 if (args_owned) {
1062 Py_DECREF(args);
1063 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001064 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001065
1066 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001067 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001068 if (args_owned) {
1069 Py_DECREF(args);
1070 }
1071 return NULL;
1072}
1073
1074/* =-= */
1075
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001076static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001077bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001078{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001079 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001080}
1081
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001082/* Unescape a backslash-escaped string. If unicode is non-zero,
1083 the string is a u-literal. If recode_encoding is non-zero,
1084 the string is UTF-8 encoded and should be re-encoded in the
1085 specified encoding. */
1086
Victor Stinner2ec80632015-10-14 13:32:13 +02001087static char *
1088_PyBytes_DecodeEscapeRecode(const char **s, const char *end,
1089 const char *errors, const char *recode_encoding,
1090 _PyBytesWriter *writer, char *p)
1091{
1092 PyObject *u, *w;
1093 const char* t;
1094
1095 t = *s;
1096 /* Decode non-ASCII bytes as UTF-8. */
1097 while (t < end && (*t & 0x80))
1098 t++;
1099 u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
1100 if (u == NULL)
1101 return NULL;
1102
1103 /* Recode them in target encoding. */
1104 w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
1105 Py_DECREF(u);
1106 if (w == NULL)
1107 return NULL;
1108 assert(PyBytes_Check(w));
1109
1110 /* Append bytes to output buffer. */
Raymond Hettinger15f44ab2016-08-30 10:47:49 -07001111 writer->min_size--; /* subtract 1 preallocated byte */
Victor Stinner2ec80632015-10-14 13:32:13 +02001112 p = _PyBytesWriter_WriteBytes(writer, p,
1113 PyBytes_AS_STRING(w),
1114 PyBytes_GET_SIZE(w));
1115 Py_DECREF(w);
1116 if (p == NULL)
1117 return NULL;
1118
1119 *s = t;
1120 return p;
1121}
1122
Eric V. Smith42454af2016-10-31 09:22:08 -04001123PyObject *_PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001124 Py_ssize_t len,
1125 const char *errors,
1126 Py_ssize_t unicode,
Eric V. Smith42454af2016-10-31 09:22:08 -04001127 const char *recode_encoding,
1128 const char **first_invalid_escape)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001129{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001130 int c;
Victor Stinner2ec80632015-10-14 13:32:13 +02001131 char *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001132 const char *end;
Victor Stinner2ec80632015-10-14 13:32:13 +02001133 _PyBytesWriter writer;
1134
1135 _PyBytesWriter_Init(&writer);
1136
1137 p = _PyBytesWriter_Alloc(&writer, len);
1138 if (p == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001139 return NULL;
Victor Stinner2ec80632015-10-14 13:32:13 +02001140 writer.overallocate = 1;
1141
Eric V. Smith42454af2016-10-31 09:22:08 -04001142 *first_invalid_escape = NULL;
1143
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001144 end = s + len;
1145 while (s < end) {
1146 if (*s != '\\') {
1147 non_esc:
Victor Stinner2ec80632015-10-14 13:32:13 +02001148 if (!(recode_encoding && (*s & 0x80))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001149 *p++ = *s++;
1150 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001151 else {
1152 /* non-ASCII character and need to recode */
1153 p = _PyBytes_DecodeEscapeRecode(&s, end,
1154 errors, recode_encoding,
1155 &writer, p);
1156 if (p == NULL)
1157 goto failed;
1158 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001159 continue;
1160 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001161
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001162 s++;
Victor Stinner2ec80632015-10-14 13:32:13 +02001163 if (s == end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001164 PyErr_SetString(PyExc_ValueError,
1165 "Trailing \\ in string");
1166 goto failed;
1167 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001168
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001169 switch (*s++) {
1170 /* XXX This assumes ASCII! */
1171 case '\n': break;
1172 case '\\': *p++ = '\\'; break;
1173 case '\'': *p++ = '\''; break;
1174 case '\"': *p++ = '\"'; break;
1175 case 'b': *p++ = '\b'; break;
1176 case 'f': *p++ = '\014'; break; /* FF */
1177 case 't': *p++ = '\t'; break;
1178 case 'n': *p++ = '\n'; break;
1179 case 'r': *p++ = '\r'; break;
1180 case 'v': *p++ = '\013'; break; /* VT */
1181 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1182 case '0': case '1': case '2': case '3':
1183 case '4': case '5': case '6': case '7':
1184 c = s[-1] - '0';
1185 if (s < end && '0' <= *s && *s <= '7') {
1186 c = (c<<3) + *s++ - '0';
1187 if (s < end && '0' <= *s && *s <= '7')
1188 c = (c<<3) + *s++ - '0';
1189 }
1190 *p++ = c;
1191 break;
1192 case 'x':
Victor Stinner2ec80632015-10-14 13:32:13 +02001193 if (s+1 < end) {
1194 int digit1, digit2;
1195 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1196 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1197 if (digit1 < 16 && digit2 < 16) {
1198 *p++ = (unsigned char)((digit1 << 4) + digit2);
1199 s += 2;
1200 break;
1201 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001202 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001203 /* invalid hexadecimal digits */
1204
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001205 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001206 PyErr_Format(PyExc_ValueError,
1207 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001208 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001209 goto failed;
1210 }
1211 if (strcmp(errors, "replace") == 0) {
1212 *p++ = '?';
1213 } else if (strcmp(errors, "ignore") == 0)
1214 /* do nothing */;
1215 else {
1216 PyErr_Format(PyExc_ValueError,
1217 "decoding error; unknown "
1218 "error handling code: %.400s",
1219 errors);
1220 goto failed;
1221 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001222 /* skip \x */
1223 if (s < end && Py_ISXDIGIT(s[0]))
1224 s++; /* and a hexdigit */
1225 break;
Victor Stinner2ec80632015-10-14 13:32:13 +02001226
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001227 default:
Eric V. Smith42454af2016-10-31 09:22:08 -04001228 if (*first_invalid_escape == NULL) {
1229 *first_invalid_escape = s-1; /* Back up one char, since we've
1230 already incremented s. */
1231 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001232 *p++ = '\\';
Eric V. Smith42454af2016-10-31 09:22:08 -04001233 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +02001234 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001235 UTF-8 bytes may follow. */
1236 }
1237 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001238
1239 return _PyBytesWriter_Finish(&writer, p);
1240
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001241 failed:
Victor Stinner2ec80632015-10-14 13:32:13 +02001242 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001243 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001244}
1245
Eric V. Smith42454af2016-10-31 09:22:08 -04001246PyObject *PyBytes_DecodeEscape(const char *s,
1247 Py_ssize_t len,
1248 const char *errors,
1249 Py_ssize_t unicode,
1250 const char *recode_encoding)
1251{
1252 const char* first_invalid_escape;
1253 PyObject *result = _PyBytes_DecodeEscape(s, len, errors, unicode,
1254 recode_encoding,
1255 &first_invalid_escape);
1256 if (result == NULL)
1257 return NULL;
1258 if (first_invalid_escape != NULL) {
1259 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1260 "invalid escape sequence '\\%c'",
Serhiy Storchaka56cb4652017-10-20 17:08:15 +03001261 (unsigned char)*first_invalid_escape) < 0) {
Eric V. Smith42454af2016-10-31 09:22:08 -04001262 Py_DECREF(result);
1263 return NULL;
1264 }
1265 }
1266 return result;
1267
1268}
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001269/* -------------------------------------------------------------------- */
1270/* object api */
1271
1272Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001273PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001274{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001275 if (!PyBytes_Check(op)) {
1276 PyErr_Format(PyExc_TypeError,
1277 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1278 return -1;
1279 }
1280 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001281}
1282
1283char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001284PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001285{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001286 if (!PyBytes_Check(op)) {
1287 PyErr_Format(PyExc_TypeError,
1288 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1289 return NULL;
1290 }
1291 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001292}
1293
1294int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001295PyBytes_AsStringAndSize(PyObject *obj,
1296 char **s,
1297 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001298{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001299 if (s == NULL) {
1300 PyErr_BadInternalCall();
1301 return -1;
1302 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001303
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001304 if (!PyBytes_Check(obj)) {
1305 PyErr_Format(PyExc_TypeError,
1306 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1307 return -1;
1308 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001309
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001310 *s = PyBytes_AS_STRING(obj);
1311 if (len != NULL)
1312 *len = PyBytes_GET_SIZE(obj);
1313 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001314 PyErr_SetString(PyExc_ValueError,
1315 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001316 return -1;
1317 }
1318 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001319}
Neal Norwitz6968b052007-02-27 19:02:19 +00001320
1321/* -------------------------------------------------------------------- */
1322/* Methods */
1323
Eric Smith0923d1d2009-04-16 20:16:10 +00001324#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001325
1326#include "stringlib/fastsearch.h"
1327#include "stringlib/count.h"
1328#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001329#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001330#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001331#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001332#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001333
Eric Smith0f78bff2009-11-30 01:01:42 +00001334#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001335
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001336PyObject *
1337PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001338{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001339 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001340 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001341 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001342 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001343 unsigned char quote, *s, *p;
1344
1345 /* Compute size of output string */
1346 squotes = dquotes = 0;
1347 newsize = 3; /* b'' */
1348 s = (unsigned char*)op->ob_sval;
1349 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001350 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001351 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001352 case '\'': squotes++; break;
1353 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001354 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001355 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001356 default:
1357 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001358 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001359 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001360 if (newsize > PY_SSIZE_T_MAX - incr)
1361 goto overflow;
1362 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001363 }
1364 quote = '\'';
1365 if (smartquotes && squotes && !dquotes)
1366 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001367 if (squotes && quote == '\'') {
1368 if (newsize > PY_SSIZE_T_MAX - squotes)
1369 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001370 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001371 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001372
1373 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001374 if (v == NULL) {
1375 return NULL;
1376 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001377 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001378
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001379 *p++ = 'b', *p++ = quote;
1380 for (i = 0; i < length; i++) {
1381 unsigned char c = op->ob_sval[i];
1382 if (c == quote || c == '\\')
1383 *p++ = '\\', *p++ = c;
1384 else if (c == '\t')
1385 *p++ = '\\', *p++ = 't';
1386 else if (c == '\n')
1387 *p++ = '\\', *p++ = 'n';
1388 else if (c == '\r')
1389 *p++ = '\\', *p++ = 'r';
1390 else if (c < ' ' || c >= 0x7f) {
1391 *p++ = '\\';
1392 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001393 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1394 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001395 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001396 else
1397 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001398 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001399 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001400 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001401 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001402
1403 overflow:
1404 PyErr_SetString(PyExc_OverflowError,
1405 "bytes object is too large to make repr");
1406 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001407}
1408
Neal Norwitz6968b052007-02-27 19:02:19 +00001409static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001410bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001411{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001412 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001413}
1414
Neal Norwitz6968b052007-02-27 19:02:19 +00001415static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001416bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001417{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001418 if (Py_BytesWarningFlag) {
1419 if (PyErr_WarnEx(PyExc_BytesWarning,
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001420 "str() on a bytes instance", 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001421 return NULL;
Victor Stinner53b7d4e2018-07-25 01:37:05 +02001422 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001423 }
1424 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001425}
1426
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001427static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001428bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001429{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001430 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001431}
Neal Norwitz6968b052007-02-27 19:02:19 +00001432
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001433/* This is also used by PyBytes_Concat() */
1434static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001435bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001436{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001437 Py_buffer va, vb;
1438 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001439
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001440 va.len = -1;
1441 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001442 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1443 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001444 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
Serhiy Storchaka6b5a9ec2017-03-19 19:47:02 +02001445 Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001446 goto done;
1447 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001448
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001449 /* Optimize end cases */
1450 if (va.len == 0 && PyBytes_CheckExact(b)) {
1451 result = b;
1452 Py_INCREF(result);
1453 goto done;
1454 }
1455 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1456 result = a;
1457 Py_INCREF(result);
1458 goto done;
1459 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001460
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001461 if (va.len > PY_SSIZE_T_MAX - vb.len) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001462 PyErr_NoMemory();
1463 goto done;
1464 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001465
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001466 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001467 if (result != NULL) {
1468 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1469 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1470 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001471
1472 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001473 if (va.len != -1)
1474 PyBuffer_Release(&va);
1475 if (vb.len != -1)
1476 PyBuffer_Release(&vb);
1477 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001478}
Neal Norwitz6968b052007-02-27 19:02:19 +00001479
1480static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001481bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001482{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001483 Py_ssize_t i;
1484 Py_ssize_t j;
1485 Py_ssize_t size;
1486 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001487 size_t nbytes;
1488 if (n < 0)
1489 n = 0;
1490 /* watch out for overflows: the size can overflow int,
1491 * and the # of bytes needed can overflow size_t
1492 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001493 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001494 PyErr_SetString(PyExc_OverflowError,
1495 "repeated bytes are too long");
1496 return NULL;
1497 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001498 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001499 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1500 Py_INCREF(a);
1501 return (PyObject *)a;
1502 }
1503 nbytes = (size_t)size;
1504 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1505 PyErr_SetString(PyExc_OverflowError,
1506 "repeated bytes are too long");
1507 return NULL;
1508 }
1509 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1510 if (op == NULL)
1511 return PyErr_NoMemory();
Victor Stinnerb509d522018-11-23 14:27:38 +01001512 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001513 op->ob_shash = -1;
1514 op->ob_sval[size] = '\0';
1515 if (Py_SIZE(a) == 1 && n > 0) {
1516 memset(op->ob_sval, a->ob_sval[0] , n);
1517 return (PyObject *) op;
1518 }
1519 i = 0;
1520 if (i < size) {
Christian Heimesf051e432016-09-13 20:22:02 +02001521 memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001522 i = Py_SIZE(a);
1523 }
1524 while (i < size) {
1525 j = (i <= size-i) ? i : size-i;
Christian Heimesf051e432016-09-13 20:22:02 +02001526 memcpy(op->ob_sval+i, op->ob_sval, j);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001527 i += j;
1528 }
1529 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001530}
1531
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001532static int
1533bytes_contains(PyObject *self, PyObject *arg)
1534{
1535 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1536}
1537
Neal Norwitz6968b052007-02-27 19:02:19 +00001538static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001539bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001540{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001541 if (i < 0 || i >= Py_SIZE(a)) {
1542 PyErr_SetString(PyExc_IndexError, "index out of range");
1543 return NULL;
1544 }
1545 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001546}
1547
Benjamin Peterson621b4302016-09-09 13:54:34 -07001548static int
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001549bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1550{
1551 int cmp;
1552 Py_ssize_t len;
1553
1554 len = Py_SIZE(a);
1555 if (Py_SIZE(b) != len)
1556 return 0;
1557
1558 if (a->ob_sval[0] != b->ob_sval[0])
1559 return 0;
1560
1561 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1562 return (cmp == 0);
1563}
1564
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001565static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001566bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001567{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001568 int c;
1569 Py_ssize_t len_a, len_b;
1570 Py_ssize_t min_len;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001571 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001572
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001573 /* Make sure both arguments are strings. */
1574 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001575 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001576 rc = PyObject_IsInstance((PyObject*)a,
1577 (PyObject*)&PyUnicode_Type);
1578 if (!rc)
1579 rc = PyObject_IsInstance((PyObject*)b,
1580 (PyObject*)&PyUnicode_Type);
1581 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001582 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001583 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001584 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001585 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001586 return NULL;
1587 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001588 else {
1589 rc = PyObject_IsInstance((PyObject*)a,
1590 (PyObject*)&PyLong_Type);
1591 if (!rc)
1592 rc = PyObject_IsInstance((PyObject*)b,
1593 (PyObject*)&PyLong_Type);
1594 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001595 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001596 if (rc) {
1597 if (PyErr_WarnEx(PyExc_BytesWarning,
1598 "Comparison between bytes and int", 1))
1599 return NULL;
1600 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001601 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001602 }
stratakise8b19652017-11-02 11:32:54 +01001603 Py_RETURN_NOTIMPLEMENTED;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001604 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001605 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001606 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001607 case Py_EQ:
1608 case Py_LE:
1609 case Py_GE:
1610 /* a string is equal to itself */
stratakise8b19652017-11-02 11:32:54 +01001611 Py_RETURN_TRUE;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001612 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001613 case Py_NE:
1614 case Py_LT:
1615 case Py_GT:
stratakise8b19652017-11-02 11:32:54 +01001616 Py_RETURN_FALSE;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001617 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001618 default:
1619 PyErr_BadArgument();
1620 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001621 }
1622 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001623 else if (op == Py_EQ || op == Py_NE) {
1624 int eq = bytes_compare_eq(a, b);
1625 eq ^= (op == Py_NE);
stratakise8b19652017-11-02 11:32:54 +01001626 return PyBool_FromLong(eq);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001627 }
1628 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001629 len_a = Py_SIZE(a);
1630 len_b = Py_SIZE(b);
1631 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001632 if (min_len > 0) {
1633 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001634 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001635 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001636 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001637 else
1638 c = 0;
stratakise8b19652017-11-02 11:32:54 +01001639 if (c != 0)
1640 Py_RETURN_RICHCOMPARE(c, 0, op);
1641 Py_RETURN_RICHCOMPARE(len_a, len_b, op);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001642 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001643}
1644
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001645static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001646bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001647{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001648 if (a->ob_shash == -1) {
1649 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001650 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001651 }
1652 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001653}
1654
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001655static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001656bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001657{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001658 if (PyIndex_Check(item)) {
1659 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1660 if (i == -1 && PyErr_Occurred())
1661 return NULL;
1662 if (i < 0)
1663 i += PyBytes_GET_SIZE(self);
1664 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1665 PyErr_SetString(PyExc_IndexError,
1666 "index out of range");
1667 return NULL;
1668 }
1669 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1670 }
1671 else if (PySlice_Check(item)) {
1672 Py_ssize_t start, stop, step, slicelength, cur, i;
1673 char* source_buf;
1674 char* result_buf;
1675 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001676
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001677 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001678 return NULL;
1679 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001680 slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1681 &stop, step);
Neal Norwitz6968b052007-02-27 19:02:19 +00001682
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001683 if (slicelength <= 0) {
1684 return PyBytes_FromStringAndSize("", 0);
1685 }
1686 else if (start == 0 && step == 1 &&
1687 slicelength == PyBytes_GET_SIZE(self) &&
1688 PyBytes_CheckExact(self)) {
1689 Py_INCREF(self);
1690 return (PyObject *)self;
1691 }
1692 else if (step == 1) {
1693 return PyBytes_FromStringAndSize(
1694 PyBytes_AS_STRING(self) + start,
1695 slicelength);
1696 }
1697 else {
1698 source_buf = PyBytes_AS_STRING(self);
1699 result = PyBytes_FromStringAndSize(NULL, slicelength);
1700 if (result == NULL)
1701 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001702
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001703 result_buf = PyBytes_AS_STRING(result);
1704 for (cur = start, i = 0; i < slicelength;
1705 cur += step, i++) {
1706 result_buf[i] = source_buf[cur];
1707 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001708
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001709 return result;
1710 }
1711 }
1712 else {
1713 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001714 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001715 Py_TYPE(item)->tp_name);
1716 return NULL;
1717 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001718}
1719
1720static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001721bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001722{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001723 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1724 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001725}
1726
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001727static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001728 (lenfunc)bytes_length, /*sq_length*/
1729 (binaryfunc)bytes_concat, /*sq_concat*/
1730 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1731 (ssizeargfunc)bytes_item, /*sq_item*/
1732 0, /*sq_slice*/
1733 0, /*sq_ass_item*/
1734 0, /*sq_ass_slice*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001735 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001736};
1737
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001738static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001739 (lenfunc)bytes_length,
1740 (binaryfunc)bytes_subscript,
1741 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001742};
1743
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001744static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001745 (getbufferproc)bytes_buffer_getbuffer,
1746 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001747};
1748
1749
1750#define LEFTSTRIP 0
1751#define RIGHTSTRIP 1
1752#define BOTHSTRIP 2
1753
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001754/*[clinic input]
1755bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001756
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001757 sep: object = None
1758 The delimiter according which to split the bytes.
1759 None (the default value) means split on ASCII whitespace characters
1760 (space, tab, return, newline, formfeed, vertical tab).
1761 maxsplit: Py_ssize_t = -1
1762 Maximum number of splits to do.
1763 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001764
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001765Return a list of the sections in the bytes, using sep as the delimiter.
1766[clinic start generated code]*/
1767
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001768static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001769bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1770/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001771{
1772 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001773 const char *s = PyBytes_AS_STRING(self), *sub;
1774 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001775 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001776
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001777 if (maxsplit < 0)
1778 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001779 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001780 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001781 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001782 return NULL;
1783 sub = vsub.buf;
1784 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001785
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001786 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1787 PyBuffer_Release(&vsub);
1788 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001789}
1790
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001791/*[clinic input]
1792bytes.partition
1793
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001794 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001795 /
1796
1797Partition the bytes into three parts using the given separator.
1798
1799This will search for the separator sep in the bytes. If the separator is found,
1800returns a 3-tuple containing the part before the separator, the separator
1801itself, and the part after it.
1802
1803If the separator is not found, returns a 3-tuple containing the original bytes
1804object and two empty bytes objects.
1805[clinic start generated code]*/
1806
Neal Norwitz6968b052007-02-27 19:02:19 +00001807static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001808bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001809/*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001810{
Neal Norwitz6968b052007-02-27 19:02:19 +00001811 return stringlib_partition(
1812 (PyObject*) self,
1813 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001814 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001815 );
1816}
1817
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001818/*[clinic input]
1819bytes.rpartition
1820
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001821 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001822 /
1823
1824Partition the bytes into three parts using the given separator.
1825
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001826This will search for the separator sep in the bytes, starting at the end. If
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001827the separator is found, returns a 3-tuple containing the part before the
1828separator, the separator itself, and the part after it.
1829
1830If the separator is not found, returns a 3-tuple containing two empty bytes
1831objects and the original bytes object.
1832[clinic start generated code]*/
1833
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001834static PyObject *
1835bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001836/*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001837{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001838 return stringlib_rpartition(
1839 (PyObject*) self,
1840 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001841 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001842 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001843}
1844
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001845/*[clinic input]
1846bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001847
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001848Return a list of the sections in the bytes, using sep as the delimiter.
1849
1850Splitting is done starting at the end of the bytes and working to the front.
1851[clinic start generated code]*/
1852
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001853static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001854bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1855/*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001856{
1857 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001858 const char *s = PyBytes_AS_STRING(self), *sub;
1859 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001860 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001861
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001862 if (maxsplit < 0)
1863 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001864 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001865 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001866 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001867 return NULL;
1868 sub = vsub.buf;
1869 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001870
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001871 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1872 PyBuffer_Release(&vsub);
1873 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001874}
1875
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001876
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001877/*[clinic input]
1878bytes.join
1879
1880 iterable_of_bytes: object
1881 /
1882
1883Concatenate any number of bytes objects.
1884
1885The bytes whose method is called is inserted in between each pair.
1886
1887The result is returned as a new bytes object.
1888
1889Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1890[clinic start generated code]*/
1891
Neal Norwitz6968b052007-02-27 19:02:19 +00001892static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001893bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1894/*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001895{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001896 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001897}
1898
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001899PyObject *
1900_PyBytes_Join(PyObject *sep, PyObject *x)
1901{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001902 assert(sep != NULL && PyBytes_Check(sep));
1903 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001904 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001905}
1906
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001907static PyObject *
1908bytes_find(PyBytesObject *self, PyObject *args)
1909{
1910 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1911}
1912
1913static PyObject *
1914bytes_index(PyBytesObject *self, PyObject *args)
1915{
1916 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1917}
1918
1919
1920static PyObject *
1921bytes_rfind(PyBytesObject *self, PyObject *args)
1922{
1923 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1924}
1925
1926
1927static PyObject *
1928bytes_rindex(PyBytesObject *self, PyObject *args)
1929{
1930 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1931}
1932
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001933
1934Py_LOCAL_INLINE(PyObject *)
1935do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001936{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001937 Py_buffer vsep;
1938 char *s = PyBytes_AS_STRING(self);
1939 Py_ssize_t len = PyBytes_GET_SIZE(self);
1940 char *sep;
1941 Py_ssize_t seplen;
1942 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001943
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001944 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001945 return NULL;
1946 sep = vsep.buf;
1947 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001948
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001949 i = 0;
1950 if (striptype != RIGHTSTRIP) {
1951 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1952 i++;
1953 }
1954 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001955
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001956 j = len;
1957 if (striptype != LEFTSTRIP) {
1958 do {
1959 j--;
1960 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1961 j++;
1962 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001963
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001964 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001965
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001966 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1967 Py_INCREF(self);
1968 return (PyObject*)self;
1969 }
1970 else
1971 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001972}
1973
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001974
1975Py_LOCAL_INLINE(PyObject *)
1976do_strip(PyBytesObject *self, int striptype)
1977{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001978 char *s = PyBytes_AS_STRING(self);
1979 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001980
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001981 i = 0;
1982 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001983 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001984 i++;
1985 }
1986 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001987
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001988 j = len;
1989 if (striptype != LEFTSTRIP) {
1990 do {
1991 j--;
David Malcolm96960882010-11-05 17:23:41 +00001992 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001993 j++;
1994 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001995
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001996 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1997 Py_INCREF(self);
1998 return (PyObject*)self;
1999 }
2000 else
2001 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002002}
2003
2004
2005Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002006do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002007{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002008 if (bytes != NULL && bytes != Py_None) {
2009 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002010 }
2011 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002012}
2013
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002014/*[clinic input]
2015bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002016
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002017 bytes: object = None
2018 /
2019
2020Strip leading and trailing bytes contained in the argument.
2021
2022If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2023[clinic start generated code]*/
2024
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002025static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002026bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002027/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002028{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002029 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002030}
2031
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002032/*[clinic input]
2033bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002034
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002035 bytes: object = None
2036 /
2037
2038Strip leading bytes contained in the argument.
2039
2040If the argument is omitted or None, strip leading ASCII whitespace.
2041[clinic start generated code]*/
2042
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002043static PyObject *
2044bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002045/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002046{
2047 return do_argstrip(self, LEFTSTRIP, bytes);
2048}
2049
2050/*[clinic input]
2051bytes.rstrip
2052
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002053 bytes: object = None
2054 /
2055
2056Strip trailing bytes contained in the argument.
2057
2058If the argument is omitted or None, strip trailing ASCII whitespace.
2059[clinic start generated code]*/
2060
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002061static PyObject *
2062bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002063/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002064{
2065 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002066}
Neal Norwitz6968b052007-02-27 19:02:19 +00002067
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002068
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002069static PyObject *
2070bytes_count(PyBytesObject *self, PyObject *args)
2071{
2072 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2073}
2074
2075
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002076/*[clinic input]
2077bytes.translate
2078
Victor Stinner049e5092014-08-17 22:20:00 +02002079 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002080 Translation table, which must be a bytes object of length 256.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002081 /
Martin Panter1b6c6da2016-08-27 08:35:02 +00002082 delete as deletechars: object(c_default="NULL") = b''
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002083
2084Return a copy with each character mapped by the given translation table.
2085
Martin Panter1b6c6da2016-08-27 08:35:02 +00002086All characters occurring in the optional argument delete are removed.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002087The remaining characters are mapped through the given translation table.
2088[clinic start generated code]*/
2089
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002090static PyObject *
Martin Panter1b6c6da2016-08-27 08:35:02 +00002091bytes_translate_impl(PyBytesObject *self, PyObject *table,
Larry Hastings89964c42015-04-14 18:07:59 -04002092 PyObject *deletechars)
Martin Panter1b6c6da2016-08-27 08:35:02 +00002093/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002094{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002095 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002096 Py_buffer table_view = {NULL, NULL};
2097 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002098 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002099 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002100 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002101 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002102 Py_ssize_t inlen, tablen, dellen = 0;
2103 PyObject *result;
2104 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002105
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002106 if (PyBytes_Check(table)) {
2107 table_chars = PyBytes_AS_STRING(table);
2108 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002109 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002110 else if (table == Py_None) {
2111 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002112 tablen = 256;
2113 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002114 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002115 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002116 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002117 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002118 tablen = table_view.len;
2119 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002120
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002121 if (tablen != 256) {
2122 PyErr_SetString(PyExc_ValueError,
2123 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002124 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002125 return NULL;
2126 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002127
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002128 if (deletechars != NULL) {
2129 if (PyBytes_Check(deletechars)) {
2130 del_table_chars = PyBytes_AS_STRING(deletechars);
2131 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002132 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002133 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002134 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002135 PyBuffer_Release(&table_view);
2136 return NULL;
2137 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002138 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002139 dellen = del_table_view.len;
2140 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002141 }
2142 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002143 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002144 dellen = 0;
2145 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002146
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002147 inlen = PyBytes_GET_SIZE(input_obj);
2148 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002149 if (result == NULL) {
2150 PyBuffer_Release(&del_table_view);
2151 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002152 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002153 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002154 output_start = output = PyBytes_AS_STRING(result);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002155 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002156
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002157 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002158 /* If no deletions are required, use faster code */
2159 for (i = inlen; --i >= 0; ) {
2160 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002161 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002162 changed = 1;
2163 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002164 if (!changed && PyBytes_CheckExact(input_obj)) {
2165 Py_INCREF(input_obj);
2166 Py_DECREF(result);
2167 result = input_obj;
2168 }
2169 PyBuffer_Release(&del_table_view);
2170 PyBuffer_Release(&table_view);
2171 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002172 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002173
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002174 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002175 for (i = 0; i < 256; i++)
2176 trans_table[i] = Py_CHARMASK(i);
2177 } else {
2178 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002179 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002180 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002181 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002182
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002183 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002184 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002185 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002186
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002187 for (i = inlen; --i >= 0; ) {
2188 c = Py_CHARMASK(*input++);
2189 if (trans_table[c] != -1)
2190 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2191 continue;
2192 changed = 1;
2193 }
2194 if (!changed && PyBytes_CheckExact(input_obj)) {
2195 Py_DECREF(result);
2196 Py_INCREF(input_obj);
2197 return input_obj;
2198 }
2199 /* Fix the size of the resulting string */
2200 if (inlen > 0)
2201 _PyBytes_Resize(&result, output - output_start);
2202 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002203}
2204
2205
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002206/*[clinic input]
2207
2208@staticmethod
2209bytes.maketrans
2210
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002211 frm: Py_buffer
2212 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002213 /
2214
2215Return a translation table useable for the bytes or bytearray translate method.
2216
2217The returned table will be one where each byte in frm is mapped to the byte at
2218the same position in to.
2219
2220The bytes objects frm and to must be of the same length.
2221[clinic start generated code]*/
2222
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002223static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002224bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002225/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002226{
2227 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002228}
2229
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002230
2231/*[clinic input]
2232bytes.replace
2233
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002234 old: Py_buffer
2235 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002236 count: Py_ssize_t = -1
2237 Maximum number of occurrences to replace.
2238 -1 (the default value) means replace all occurrences.
2239 /
2240
2241Return a copy with all occurrences of substring old replaced by new.
2242
2243If the optional argument count is given, only the first count occurrences are
2244replaced.
2245[clinic start generated code]*/
2246
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002247static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002248bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
Larry Hastings89964c42015-04-14 18:07:59 -04002249 Py_ssize_t count)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002250/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002251{
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +03002252 return stringlib_replace((PyObject *)self,
2253 (const char *)old->buf, old->len,
2254 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002255}
2256
2257/** End DALKE **/
2258
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002259
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002260static PyObject *
2261bytes_startswith(PyBytesObject *self, PyObject *args)
2262{
2263 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2264}
2265
2266static PyObject *
2267bytes_endswith(PyBytesObject *self, PyObject *args)
2268{
2269 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2270}
2271
2272
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002273/*[clinic input]
2274bytes.decode
2275
2276 encoding: str(c_default="NULL") = 'utf-8'
2277 The encoding with which to decode the bytes.
2278 errors: str(c_default="NULL") = 'strict'
2279 The error handling scheme to use for the handling of decoding errors.
2280 The default is 'strict' meaning that decoding errors raise a
2281 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2282 as well as any other name registered with codecs.register_error that
2283 can handle UnicodeDecodeErrors.
2284
2285Decode the bytes using the codec registered for encoding.
2286[clinic start generated code]*/
2287
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002288static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002289bytes_decode_impl(PyBytesObject *self, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04002290 const char *errors)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002291/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002292{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002293 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002294}
2295
Guido van Rossum20188312006-05-05 15:15:40 +00002296
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002297/*[clinic input]
2298bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002299
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002300 keepends: bool(accept={int}) = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002301
2302Return a list of the lines in the bytes, breaking at line boundaries.
2303
2304Line breaks are not included in the resulting list unless keepends is given and
2305true.
2306[clinic start generated code]*/
2307
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002308static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002309bytes_splitlines_impl(PyBytesObject *self, int keepends)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002310/*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002311{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002312 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002313 (PyObject*) self, PyBytes_AS_STRING(self),
2314 PyBytes_GET_SIZE(self), keepends
2315 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002316}
2317
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002318/*[clinic input]
2319@classmethod
2320bytes.fromhex
2321
2322 string: unicode
2323 /
2324
2325Create a bytes object from a string of hexadecimal numbers.
2326
2327Spaces between two numbers are accepted.
2328Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2329[clinic start generated code]*/
2330
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002331static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002332bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002333/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002334{
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002335 PyObject *result = _PyBytes_FromHex(string, 0);
2336 if (type != &PyBytes_Type && result != NULL) {
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01002337 Py_SETREF(result, PyObject_CallFunctionObjArgs((PyObject *)type,
2338 result, NULL));
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002339 }
2340 return result;
Victor Stinner2bf89932015-10-14 11:25:33 +02002341}
2342
2343PyObject*
2344_PyBytes_FromHex(PyObject *string, int use_bytearray)
2345{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002346 char *buf;
Victor Stinner2bf89932015-10-14 11:25:33 +02002347 Py_ssize_t hexlen, invalid_char;
2348 unsigned int top, bot;
2349 Py_UCS1 *str, *end;
2350 _PyBytesWriter writer;
2351
2352 _PyBytesWriter_Init(&writer);
2353 writer.use_bytearray = use_bytearray;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002354
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002355 assert(PyUnicode_Check(string));
2356 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002357 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002358 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002359
Victor Stinner2bf89932015-10-14 11:25:33 +02002360 if (!PyUnicode_IS_ASCII(string)) {
2361 void *data = PyUnicode_DATA(string);
2362 unsigned int kind = PyUnicode_KIND(string);
2363 Py_ssize_t i;
2364
2365 /* search for the first non-ASCII character */
2366 for (i = 0; i < hexlen; i++) {
2367 if (PyUnicode_READ(kind, data, i) >= 128)
2368 break;
2369 }
2370 invalid_char = i;
2371 goto error;
2372 }
2373
2374 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2375 str = PyUnicode_1BYTE_DATA(string);
2376
2377 /* This overestimates if there are spaces */
2378 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2379 if (buf == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002380 return NULL;
Victor Stinner2bf89932015-10-14 11:25:33 +02002381
2382 end = str + hexlen;
2383 while (str < end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002384 /* skip over spaces in the input */
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002385 if (Py_ISSPACE(*str)) {
Victor Stinner2bf89932015-10-14 11:25:33 +02002386 do {
2387 str++;
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002388 } while (Py_ISSPACE(*str));
Victor Stinner2bf89932015-10-14 11:25:33 +02002389 if (str >= end)
2390 break;
2391 }
2392
2393 top = _PyLong_DigitValue[*str];
2394 if (top >= 16) {
2395 invalid_char = str - PyUnicode_1BYTE_DATA(string);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002396 goto error;
2397 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002398 str++;
2399
2400 bot = _PyLong_DigitValue[*str];
2401 if (bot >= 16) {
2402 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2403 goto error;
2404 }
2405 str++;
2406
2407 *buf++ = (unsigned char)((top << 4) + bot);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002408 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002409
2410 return _PyBytesWriter_Finish(&writer, buf);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002411
2412 error:
Victor Stinner2bf89932015-10-14 11:25:33 +02002413 PyErr_Format(PyExc_ValueError,
2414 "non-hexadecimal number found in "
2415 "fromhex() arg at position %zd", invalid_char);
2416 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002417 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002418}
2419
Gregory P. Smith8cb65692015-04-25 23:22:26 +00002420PyDoc_STRVAR(hex__doc__,
2421"B.hex() -> string\n\
2422\n\
2423Create a string of hexadecimal numbers from a bytes object.\n\
2424Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'.");
2425
2426static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302427bytes_hex(PyBytesObject *self, PyObject *Py_UNUSED(ignored))
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002428{
2429 char* argbuf = PyBytes_AS_STRING(self);
2430 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2431 return _Py_strhex(argbuf, arglen);
2432}
2433
2434static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302435bytes_getnewargs(PyBytesObject *v, PyObject *Py_UNUSED(ignored))
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002436{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002437 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002438}
2439
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002440
2441static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002442bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002443 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302444 {"capitalize", stringlib_capitalize, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002445 _Py_capitalize__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002446 STRINGLIB_CENTER_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002447 {"count", (PyCFunction)bytes_count, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002448 _Py_count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002449 BYTES_DECODE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002450 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002451 _Py_endswith__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002452 STRINGLIB_EXPANDTABS_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002453 {"find", (PyCFunction)bytes_find, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002454 _Py_find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002455 BYTES_FROMHEX_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002456 {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__},
2457 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302458 {"isalnum", stringlib_isalnum, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002459 _Py_isalnum__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302460 {"isalpha", stringlib_isalpha, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002461 _Py_isalpha__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302462 {"isascii", stringlib_isascii, METH_NOARGS,
INADA Naokia49ac992018-01-27 14:06:21 +09002463 _Py_isascii__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302464 {"isdigit", stringlib_isdigit, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002465 _Py_isdigit__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302466 {"islower", stringlib_islower, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002467 _Py_islower__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302468 {"isspace", stringlib_isspace, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002469 _Py_isspace__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302470 {"istitle", stringlib_istitle, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002471 _Py_istitle__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302472 {"isupper", stringlib_isupper, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002473 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002474 BYTES_JOIN_METHODDEF
Tal Einatc929df32018-07-06 13:17:38 +03002475 STRINGLIB_LJUST_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302476 {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002477 BYTES_LSTRIP_METHODDEF
2478 BYTES_MAKETRANS_METHODDEF
2479 BYTES_PARTITION_METHODDEF
2480 BYTES_REPLACE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002481 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2482 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002483 STRINGLIB_RJUST_METHODDEF
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002484 BYTES_RPARTITION_METHODDEF
2485 BYTES_RSPLIT_METHODDEF
2486 BYTES_RSTRIP_METHODDEF
2487 BYTES_SPLIT_METHODDEF
2488 BYTES_SPLITLINES_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002489 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002490 _Py_startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002491 BYTES_STRIP_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302492 {"swapcase", stringlib_swapcase, METH_NOARGS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002493 _Py_swapcase__doc__},
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302494 {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002495 BYTES_TRANSLATE_METHODDEF
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05302496 {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Tal Einatc929df32018-07-06 13:17:38 +03002497 STRINGLIB_ZFILL_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002498 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002499};
2500
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002501static PyObject *
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002502bytes_mod(PyObject *self, PyObject *arg)
Ethan Furmanb95b5612015-01-23 20:05:18 -08002503{
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002504 if (!PyBytes_Check(self)) {
2505 Py_RETURN_NOTIMPLEMENTED;
Victor Stinner772b2b02015-10-14 09:56:53 +02002506 }
Victor Stinner772b2b02015-10-14 09:56:53 +02002507 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002508 arg, 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -08002509}
2510
2511static PyNumberMethods bytes_as_number = {
2512 0, /*nb_add*/
2513 0, /*nb_subtract*/
2514 0, /*nb_multiply*/
2515 bytes_mod, /*nb_remainder*/
2516};
2517
2518static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002519bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002520
2521static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002522bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002523{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002524 PyObject *x = NULL;
2525 const char *encoding = NULL;
2526 const char *errors = NULL;
2527 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002528 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002529 Py_ssize_t size;
2530 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002531 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002532
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002533 if (type != &PyBytes_Type)
Serhiy Storchaka15095802015-11-25 15:47:01 +02002534 return bytes_subtype_new(type, args, kwds);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002535 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2536 &encoding, &errors))
2537 return NULL;
2538 if (x == NULL) {
2539 if (encoding != NULL || errors != NULL) {
2540 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka2c2044e2018-10-21 15:29:12 +03002541 encoding != NULL ?
2542 "encoding without a string argument" :
2543 "errors without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002544 return NULL;
2545 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02002546 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002547 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002548
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002549 if (encoding != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002550 /* Encode via the codec registry */
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002551 if (!PyUnicode_Check(x)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002552 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002553 "encoding without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002554 return NULL;
2555 }
2556 new = PyUnicode_AsEncodedString(x, encoding, errors);
2557 if (new == NULL)
2558 return NULL;
2559 assert(PyBytes_Check(new));
2560 return new;
2561 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002562
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002563 if (errors != NULL) {
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002564 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002565 PyUnicode_Check(x) ?
2566 "string argument without an encoding" :
2567 "errors without a string argument");
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002568 return NULL;
2569 }
2570
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002571 /* We'd like to call PyObject_Bytes here, but we need to check for an
2572 integer argument before deferring to PyBytes_FromObject, something
2573 PyObject_Bytes doesn't do. */
2574 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2575 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +01002576 new = _PyObject_CallNoArg(func);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002577 Py_DECREF(func);
2578 if (new == NULL)
2579 return NULL;
2580 if (!PyBytes_Check(new)) {
2581 PyErr_Format(PyExc_TypeError,
2582 "__bytes__ returned non-bytes (type %.200s)",
2583 Py_TYPE(new)->tp_name);
2584 Py_DECREF(new);
2585 return NULL;
2586 }
2587 return new;
2588 }
2589 else if (PyErr_Occurred())
2590 return NULL;
2591
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002592 if (PyUnicode_Check(x)) {
2593 PyErr_SetString(PyExc_TypeError,
2594 "string argument without an encoding");
2595 return NULL;
2596 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002597 /* Is it an integer? */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002598 if (PyIndex_Check(x)) {
2599 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2600 if (size == -1 && PyErr_Occurred()) {
Serhiy Storchakae8904212018-10-15 00:02:57 +03002601 if (!PyErr_ExceptionMatches(PyExc_TypeError))
INADA Naokia634e232017-01-06 17:32:01 +09002602 return NULL;
2603 PyErr_Clear(); /* fall through */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002604 }
INADA Naokia634e232017-01-06 17:32:01 +09002605 else {
2606 if (size < 0) {
2607 PyErr_SetString(PyExc_ValueError, "negative count");
2608 return NULL;
2609 }
2610 new = _PyBytes_FromSize(size, 1);
2611 if (new == NULL)
2612 return NULL;
2613 return new;
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002614 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002615 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002616
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002617 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002618}
2619
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002620static PyObject*
2621_PyBytes_FromBuffer(PyObject *x)
2622{
2623 PyObject *new;
2624 Py_buffer view;
2625
2626 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2627 return NULL;
2628
2629 new = PyBytes_FromStringAndSize(NULL, view.len);
2630 if (!new)
2631 goto fail;
2632 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2633 &view, view.len, 'C') < 0)
2634 goto fail;
2635 PyBuffer_Release(&view);
2636 return new;
2637
2638fail:
2639 Py_XDECREF(new);
2640 PyBuffer_Release(&view);
2641 return NULL;
2642}
2643
2644static PyObject*
2645_PyBytes_FromList(PyObject *x)
2646{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002647 Py_ssize_t i, size = PyList_GET_SIZE(x);
2648 Py_ssize_t value;
2649 char *str;
2650 PyObject *item;
2651 _PyBytesWriter writer;
2652
2653 _PyBytesWriter_Init(&writer);
2654 str = _PyBytesWriter_Alloc(&writer, size);
2655 if (str == NULL)
2656 return NULL;
2657 writer.overallocate = 1;
2658 size = writer.allocated;
2659
2660 for (i = 0; i < PyList_GET_SIZE(x); i++) {
2661 item = PyList_GET_ITEM(x, i);
2662 Py_INCREF(item);
2663 value = PyNumber_AsSsize_t(item, NULL);
2664 Py_DECREF(item);
2665 if (value == -1 && PyErr_Occurred())
2666 goto error;
2667
2668 if (value < 0 || value >= 256) {
2669 PyErr_SetString(PyExc_ValueError,
2670 "bytes must be in range(0, 256)");
2671 goto error;
2672 }
2673
2674 if (i >= size) {
2675 str = _PyBytesWriter_Resize(&writer, str, size+1);
2676 if (str == NULL)
2677 return NULL;
2678 size = writer.allocated;
2679 }
2680 *str++ = (char) value;
2681 }
2682 return _PyBytesWriter_Finish(&writer, str);
2683
2684 error:
2685 _PyBytesWriter_Dealloc(&writer);
2686 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002687}
2688
2689static PyObject*
2690_PyBytes_FromTuple(PyObject *x)
2691{
Serhiy Storchaka914f9a02018-10-21 15:25:53 +03002692 PyObject *bytes;
2693 Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2694 Py_ssize_t value;
2695 char *str;
2696 PyObject *item;
2697
2698 bytes = PyBytes_FromStringAndSize(NULL, size);
2699 if (bytes == NULL)
2700 return NULL;
2701 str = ((PyBytesObject *)bytes)->ob_sval;
2702
2703 for (i = 0; i < size; i++) {
2704 item = PyTuple_GET_ITEM(x, i);
2705 value = PyNumber_AsSsize_t(item, NULL);
2706 if (value == -1 && PyErr_Occurred())
2707 goto error;
2708
2709 if (value < 0 || value >= 256) {
2710 PyErr_SetString(PyExc_ValueError,
2711 "bytes must be in range(0, 256)");
2712 goto error;
2713 }
2714 *str++ = (char) value;
2715 }
2716 return bytes;
2717
2718 error:
2719 Py_DECREF(bytes);
2720 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002721}
2722
2723static PyObject *
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002724_PyBytes_FromIterator(PyObject *it, PyObject *x)
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002725{
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002726 char *str;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002727 Py_ssize_t i, size;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002728 _PyBytesWriter writer;
2729
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002730 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02002731 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002732 if (size == -1 && PyErr_Occurred())
2733 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002734
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002735 _PyBytesWriter_Init(&writer);
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002736 str = _PyBytesWriter_Alloc(&writer, size);
2737 if (str == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002738 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002739 writer.overallocate = 1;
2740 size = writer.allocated;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002741
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002742 /* Run the iterator to exhaustion */
2743 for (i = 0; ; i++) {
2744 PyObject *item;
2745 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002746
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002747 /* Get the next item */
2748 item = PyIter_Next(it);
2749 if (item == NULL) {
2750 if (PyErr_Occurred())
2751 goto error;
2752 break;
2753 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002754
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002755 /* Interpret it as an int (__index__) */
Serhiy Storchakaf54d7812016-07-06 21:39:44 +03002756 value = PyNumber_AsSsize_t(item, NULL);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002757 Py_DECREF(item);
2758 if (value == -1 && PyErr_Occurred())
2759 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002760
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002761 /* Range check */
2762 if (value < 0 || value >= 256) {
2763 PyErr_SetString(PyExc_ValueError,
2764 "bytes must be in range(0, 256)");
2765 goto error;
2766 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002767
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002768 /* Append the byte */
2769 if (i >= size) {
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002770 str = _PyBytesWriter_Resize(&writer, str, size+1);
2771 if (str == NULL)
2772 return NULL;
2773 size = writer.allocated;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002774 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002775 *str++ = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002776 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002777
2778 return _PyBytesWriter_Finish(&writer, str);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002779
2780 error:
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002781 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002782 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002783}
2784
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002785PyObject *
2786PyBytes_FromObject(PyObject *x)
2787{
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002788 PyObject *it, *result;
2789
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002790 if (x == NULL) {
2791 PyErr_BadInternalCall();
2792 return NULL;
2793 }
2794
2795 if (PyBytes_CheckExact(x)) {
2796 Py_INCREF(x);
2797 return x;
2798 }
2799
2800 /* Use the modern buffer interface */
2801 if (PyObject_CheckBuffer(x))
2802 return _PyBytes_FromBuffer(x);
2803
2804 if (PyList_CheckExact(x))
2805 return _PyBytes_FromList(x);
2806
2807 if (PyTuple_CheckExact(x))
2808 return _PyBytes_FromTuple(x);
2809
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002810 if (!PyUnicode_Check(x)) {
2811 it = PyObject_GetIter(x);
2812 if (it != NULL) {
2813 result = _PyBytes_FromIterator(it, x);
2814 Py_DECREF(it);
2815 return result;
2816 }
Serhiy Storchakae8904212018-10-15 00:02:57 +03002817 if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2818 return NULL;
2819 }
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002820 }
2821
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002822 PyErr_Format(PyExc_TypeError,
2823 "cannot convert '%.200s' object to bytes",
2824 x->ob_type->tp_name);
2825 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002826}
2827
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002828static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002829bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002830{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002831 PyObject *tmp, *pnew;
2832 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002833
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002834 assert(PyType_IsSubtype(type, &PyBytes_Type));
2835 tmp = bytes_new(&PyBytes_Type, args, kwds);
2836 if (tmp == NULL)
2837 return NULL;
Serhiy Storchaka15095802015-11-25 15:47:01 +02002838 assert(PyBytes_Check(tmp));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002839 n = PyBytes_GET_SIZE(tmp);
2840 pnew = type->tp_alloc(type, n);
2841 if (pnew != NULL) {
Christian Heimesf051e432016-09-13 20:22:02 +02002842 memcpy(PyBytes_AS_STRING(pnew),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002843 PyBytes_AS_STRING(tmp), n+1);
2844 ((PyBytesObject *)pnew)->ob_shash =
2845 ((PyBytesObject *)tmp)->ob_shash;
2846 }
2847 Py_DECREF(tmp);
2848 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002849}
2850
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002851PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002852"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002853bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002854bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002855bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2856bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002857\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002858Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002859 - an iterable yielding integers in range(256)\n\
2860 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002861 - any object implementing the buffer API.\n\
2862 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002863
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002864static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002865
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002866PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002867 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2868 "bytes",
2869 PyBytesObject_SIZE,
2870 sizeof(char),
2871 bytes_dealloc, /* tp_dealloc */
2872 0, /* tp_print */
2873 0, /* tp_getattr */
2874 0, /* tp_setattr */
2875 0, /* tp_reserved */
2876 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08002877 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002878 &bytes_as_sequence, /* tp_as_sequence */
2879 &bytes_as_mapping, /* tp_as_mapping */
2880 (hashfunc)bytes_hash, /* tp_hash */
2881 0, /* tp_call */
2882 bytes_str, /* tp_str */
2883 PyObject_GenericGetAttr, /* tp_getattro */
2884 0, /* tp_setattro */
2885 &bytes_as_buffer, /* tp_as_buffer */
2886 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2887 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2888 bytes_doc, /* tp_doc */
2889 0, /* tp_traverse */
2890 0, /* tp_clear */
2891 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2892 0, /* tp_weaklistoffset */
2893 bytes_iter, /* tp_iter */
2894 0, /* tp_iternext */
2895 bytes_methods, /* tp_methods */
2896 0, /* tp_members */
2897 0, /* tp_getset */
2898 &PyBaseObject_Type, /* tp_base */
2899 0, /* tp_dict */
2900 0, /* tp_descr_get */
2901 0, /* tp_descr_set */
2902 0, /* tp_dictoffset */
2903 0, /* tp_init */
2904 0, /* tp_alloc */
2905 bytes_new, /* tp_new */
2906 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002907};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002908
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002909void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002910PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002911{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002912 assert(pv != NULL);
2913 if (*pv == NULL)
2914 return;
2915 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002916 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002917 return;
2918 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02002919
2920 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2921 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05002922 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02002923 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02002924
Antoine Pitrou161d6952014-05-01 14:36:20 +02002925 wb.len = -1;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002926 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02002927 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2928 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2929 Py_CLEAR(*pv);
2930 return;
2931 }
2932
2933 oldsize = PyBytes_GET_SIZE(*pv);
2934 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2935 PyErr_NoMemory();
2936 goto error;
2937 }
2938 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2939 goto error;
2940
2941 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2942 PyBuffer_Release(&wb);
2943 return;
2944
2945 error:
2946 PyBuffer_Release(&wb);
2947 Py_CLEAR(*pv);
2948 return;
2949 }
2950
2951 else {
2952 /* Multiple references, need to create new object */
2953 PyObject *v;
2954 v = bytes_concat(*pv, w);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002955 Py_SETREF(*pv, v);
Antoine Pitrou161d6952014-05-01 14:36:20 +02002956 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002957}
2958
2959void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002960PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002961{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002962 PyBytes_Concat(pv, w);
2963 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002964}
2965
2966
Ethan Furmanb95b5612015-01-23 20:05:18 -08002967/* The following function breaks the notion that bytes are immutable:
2968 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002969 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08002970 as creating a new bytes object and destroying the old one, only
2971 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002972 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08002973 Note that if there's not enough memory to resize the bytes object, the
2974 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002975 memory" exception is set, and -1 is returned. Else (on success) 0 is
2976 returned, and the value in *pv may or may not be the same as on input.
2977 As always, an extra byte is allocated for a trailing \0 byte (newsize
2978 does *not* include that), and a trailing \0 byte is stored.
2979*/
2980
2981int
2982_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2983{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002984 PyObject *v;
2985 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002986 v = *pv;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002987 if (!PyBytes_Check(v) || newsize < 0) {
2988 goto error;
2989 }
2990 if (Py_SIZE(v) == newsize) {
2991 /* return early if newsize equals to v->ob_size */
2992 return 0;
2993 }
2994 if (Py_REFCNT(v) != 1) {
2995 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002996 }
2997 /* XXX UNREF/NEWREF interface should be more symmetrical */
2998 _Py_DEC_REFTOTAL;
2999 _Py_ForgetReference(v);
3000 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03003001 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003002 if (*pv == NULL) {
3003 PyObject_Del(v);
3004 PyErr_NoMemory();
3005 return -1;
3006 }
3007 _Py_NewReference(*pv);
3008 sv = (PyBytesObject *) *pv;
3009 Py_SIZE(sv) = newsize;
3010 sv->ob_sval[newsize] = '\0';
3011 sv->ob_shash = -1; /* invalidate cached hash value */
3012 return 0;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03003013error:
3014 *pv = 0;
3015 Py_DECREF(v);
3016 PyErr_BadInternalCall();
3017 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003018}
3019
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003020void
3021PyBytes_Fini(void)
3022{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003023 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003024 for (i = 0; i < UCHAR_MAX + 1; i++)
3025 Py_CLEAR(characters[i]);
3026 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003027}
3028
Benjamin Peterson4116f362008-05-27 00:36:20 +00003029/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003030
3031typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003032 PyObject_HEAD
3033 Py_ssize_t it_index;
3034 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003035} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003036
3037static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003038striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003039{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003040 _PyObject_GC_UNTRACK(it);
3041 Py_XDECREF(it->it_seq);
3042 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003043}
3044
3045static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003046striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003047{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003048 Py_VISIT(it->it_seq);
3049 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003050}
3051
3052static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003053striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003054{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003055 PyBytesObject *seq;
3056 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003057
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003058 assert(it != NULL);
3059 seq = it->it_seq;
3060 if (seq == NULL)
3061 return NULL;
3062 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003063
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003064 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3065 item = PyLong_FromLong(
3066 (unsigned char)seq->ob_sval[it->it_index]);
3067 if (item != NULL)
3068 ++it->it_index;
3069 return item;
3070 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003071
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003072 it->it_seq = NULL;
Serhiy Storchakafbb1c5e2016-03-30 20:40:02 +03003073 Py_DECREF(seq);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003074 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003075}
3076
3077static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303078striter_len(striterobject *it, PyObject *Py_UNUSED(ignored))
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003079{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003080 Py_ssize_t len = 0;
3081 if (it->it_seq)
3082 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3083 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003084}
3085
3086PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003087 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003088
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003089static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +05303090striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003091{
3092 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02003093 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003094 it->it_seq, it->it_index);
3095 } else {
Serhiy Storchaka460bd0d2016-11-20 12:16:46 +02003096 return Py_BuildValue("N(())", _PyObject_GetBuiltin("iter"));
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003097 }
3098}
3099
3100PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3101
3102static PyObject *
3103striter_setstate(striterobject *it, PyObject *state)
3104{
3105 Py_ssize_t index = PyLong_AsSsize_t(state);
3106 if (index == -1 && PyErr_Occurred())
3107 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003108 if (it->it_seq != NULL) {
3109 if (index < 0)
3110 index = 0;
3111 else if (index > PyBytes_GET_SIZE(it->it_seq))
3112 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3113 it->it_index = index;
3114 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003115 Py_RETURN_NONE;
3116}
3117
3118PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3119
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003120static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003121 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3122 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003123 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3124 reduce_doc},
3125 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3126 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003127 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003128};
3129
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003130PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003131 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3132 "bytes_iterator", /* tp_name */
3133 sizeof(striterobject), /* tp_basicsize */
3134 0, /* tp_itemsize */
3135 /* methods */
3136 (destructor)striter_dealloc, /* tp_dealloc */
3137 0, /* tp_print */
3138 0, /* tp_getattr */
3139 0, /* tp_setattr */
3140 0, /* tp_reserved */
3141 0, /* tp_repr */
3142 0, /* tp_as_number */
3143 0, /* tp_as_sequence */
3144 0, /* tp_as_mapping */
3145 0, /* tp_hash */
3146 0, /* tp_call */
3147 0, /* tp_str */
3148 PyObject_GenericGetAttr, /* tp_getattro */
3149 0, /* tp_setattro */
3150 0, /* tp_as_buffer */
3151 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3152 0, /* tp_doc */
3153 (traverseproc)striter_traverse, /* tp_traverse */
3154 0, /* tp_clear */
3155 0, /* tp_richcompare */
3156 0, /* tp_weaklistoffset */
3157 PyObject_SelfIter, /* tp_iter */
3158 (iternextfunc)striter_next, /* tp_iternext */
3159 striter_methods, /* tp_methods */
3160 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003161};
3162
3163static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003164bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003165{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003166 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003167
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003168 if (!PyBytes_Check(seq)) {
3169 PyErr_BadInternalCall();
3170 return NULL;
3171 }
3172 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3173 if (it == NULL)
3174 return NULL;
3175 it->it_index = 0;
3176 Py_INCREF(seq);
3177 it->it_seq = (PyBytesObject *)seq;
3178 _PyObject_GC_TRACK(it);
3179 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003180}
Victor Stinner00165072015-10-09 01:53:21 +02003181
3182
3183/* _PyBytesWriter API */
3184
3185#ifdef MS_WINDOWS
3186 /* On Windows, overallocate by 50% is the best factor */
3187# define OVERALLOCATE_FACTOR 2
3188#else
3189 /* On Linux, overallocate by 25% is the best factor */
3190# define OVERALLOCATE_FACTOR 4
3191#endif
3192
3193void
3194_PyBytesWriter_Init(_PyBytesWriter *writer)
3195{
Victor Stinner661aacc2015-10-14 09:41:48 +02003196 /* Set all attributes before small_buffer to 0 */
3197 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003198#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003199 memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003200#endif
3201}
3202
3203void
3204_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3205{
3206 Py_CLEAR(writer->buffer);
3207}
3208
3209Py_LOCAL_INLINE(char*)
3210_PyBytesWriter_AsString(_PyBytesWriter *writer)
3211{
Victor Stinner661aacc2015-10-14 09:41:48 +02003212 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003213 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003214 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003215 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003216 else if (writer->use_bytearray) {
3217 assert(writer->buffer != NULL);
3218 return PyByteArray_AS_STRING(writer->buffer);
3219 }
3220 else {
3221 assert(writer->buffer != NULL);
3222 return PyBytes_AS_STRING(writer->buffer);
3223 }
Victor Stinner00165072015-10-09 01:53:21 +02003224}
3225
3226Py_LOCAL_INLINE(Py_ssize_t)
Victor Stinner2bf89932015-10-14 11:25:33 +02003227_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
Victor Stinner00165072015-10-09 01:53:21 +02003228{
3229 char *start = _PyBytesWriter_AsString(writer);
3230 assert(str != NULL);
3231 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003232 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003233 return str - start;
3234}
3235
3236Py_LOCAL_INLINE(void)
3237_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3238{
3239#ifdef Py_DEBUG
3240 char *start, *end;
3241
Victor Stinner661aacc2015-10-14 09:41:48 +02003242 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003243 assert(writer->buffer == NULL);
3244 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003245 else {
3246 assert(writer->buffer != NULL);
3247 if (writer->use_bytearray)
3248 assert(PyByteArray_CheckExact(writer->buffer));
3249 else
3250 assert(PyBytes_CheckExact(writer->buffer));
3251 assert(Py_REFCNT(writer->buffer) == 1);
3252 }
Victor Stinner00165072015-10-09 01:53:21 +02003253
Victor Stinner661aacc2015-10-14 09:41:48 +02003254 if (writer->use_bytearray) {
3255 /* bytearray has its own overallocation algorithm,
3256 writer overallocation must be disabled */
3257 assert(!writer->overallocate);
3258 }
3259
3260 assert(0 <= writer->allocated);
Victor Stinner53926a12015-10-09 12:37:03 +02003261 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003262 /* the last byte must always be null */
Victor Stinner661aacc2015-10-14 09:41:48 +02003263 start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003264 assert(start[writer->allocated] == 0);
3265
3266 end = start + writer->allocated;
3267 assert(str != NULL);
3268 assert(start <= str && str <= end);
3269#endif
3270}
3271
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003272void*
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003273_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
Victor Stinner00165072015-10-09 01:53:21 +02003274{
3275 Py_ssize_t allocated, pos;
3276
3277 _PyBytesWriter_CheckConsistency(writer, str);
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003278 assert(writer->allocated < size);
Victor Stinner00165072015-10-09 01:53:21 +02003279
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003280 allocated = size;
Victor Stinner00165072015-10-09 01:53:21 +02003281 if (writer->overallocate
3282 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3283 /* overallocate to limit the number of realloc() */
3284 allocated += allocated / OVERALLOCATE_FACTOR;
3285 }
3286
Victor Stinner2bf89932015-10-14 11:25:33 +02003287 pos = _PyBytesWriter_GetSize(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003288 if (!writer->use_small_buffer) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003289 if (writer->use_bytearray) {
3290 if (PyByteArray_Resize(writer->buffer, allocated))
3291 goto error;
3292 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3293 but we cannot use ob_alloc because bytes may need to be moved
3294 to use the whole buffer. bytearray uses an internal optimization
3295 to avoid moving or copying bytes when bytes are removed at the
3296 beginning (ex: del bytearray[:1]). */
3297 }
3298 else {
3299 if (_PyBytes_Resize(&writer->buffer, allocated))
3300 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003301 }
3302 }
3303 else {
3304 /* convert from stack buffer to bytes object buffer */
3305 assert(writer->buffer == NULL);
3306
Victor Stinner661aacc2015-10-14 09:41:48 +02003307 if (writer->use_bytearray)
3308 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3309 else
3310 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003311 if (writer->buffer == NULL)
Victor Stinner661aacc2015-10-14 09:41:48 +02003312 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003313
3314 if (pos != 0) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003315 char *dest;
3316 if (writer->use_bytearray)
3317 dest = PyByteArray_AS_STRING(writer->buffer);
3318 else
3319 dest = PyBytes_AS_STRING(writer->buffer);
Christian Heimesf051e432016-09-13 20:22:02 +02003320 memcpy(dest,
Victor Stinnerb3653a32015-10-09 03:38:24 +02003321 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02003322 pos);
3323 }
3324
Victor Stinnerb3653a32015-10-09 03:38:24 +02003325 writer->use_small_buffer = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003326#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003327 memset(writer->small_buffer, 0xDB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003328#endif
Victor Stinner00165072015-10-09 01:53:21 +02003329 }
3330 writer->allocated = allocated;
3331
3332 str = _PyBytesWriter_AsString(writer) + pos;
3333 _PyBytesWriter_CheckConsistency(writer, str);
3334 return str;
Victor Stinner661aacc2015-10-14 09:41:48 +02003335
3336error:
3337 _PyBytesWriter_Dealloc(writer);
3338 return NULL;
Victor Stinner00165072015-10-09 01:53:21 +02003339}
3340
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003341void*
3342_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3343{
3344 Py_ssize_t new_min_size;
3345
3346 _PyBytesWriter_CheckConsistency(writer, str);
3347 assert(size >= 0);
3348
3349 if (size == 0) {
3350 /* nothing to do */
3351 return str;
3352 }
3353
3354 if (writer->min_size > PY_SSIZE_T_MAX - size) {
3355 PyErr_NoMemory();
3356 _PyBytesWriter_Dealloc(writer);
3357 return NULL;
3358 }
3359 new_min_size = writer->min_size + size;
3360
3361 if (new_min_size > writer->allocated)
3362 str = _PyBytesWriter_Resize(writer, str, new_min_size);
3363
3364 writer->min_size = new_min_size;
3365 return str;
3366}
3367
Victor Stinner00165072015-10-09 01:53:21 +02003368/* Allocate the buffer to write size bytes.
3369 Return the pointer to the beginning of buffer data.
3370 Raise an exception and return NULL on error. */
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003371void*
Victor Stinner00165072015-10-09 01:53:21 +02003372_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3373{
3374 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02003375 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02003376 assert(size >= 0);
3377
Victor Stinnerb3653a32015-10-09 03:38:24 +02003378 writer->use_small_buffer = 1;
Victor Stinnerb13b97d2015-10-09 02:52:16 +02003379#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003380 writer->allocated = sizeof(writer->small_buffer) - 1;
Victor Stinnerf6358a72015-10-14 12:02:39 +02003381 /* In debug mode, don't use the full small buffer because it is less
3382 efficient than bytes and bytearray objects to detect buffer underflow
3383 and buffer overflow. Use 10 bytes of the small buffer to test also
3384 code using the smaller buffer in debug mode.
3385
3386 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3387 in debug mode to also be able to detect stack overflow when running
3388 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3389 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3390 stack overflow. */
3391 writer->allocated = Py_MIN(writer->allocated, 10);
3392 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3393 to detect buffer overflow */
Victor Stinnerb3653a32015-10-09 03:38:24 +02003394 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003395#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02003396 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02003397#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02003398 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02003399}
3400
3401PyObject *
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003402_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
Victor Stinner00165072015-10-09 01:53:21 +02003403{
Victor Stinner2bf89932015-10-14 11:25:33 +02003404 Py_ssize_t size;
Victor Stinner00165072015-10-09 01:53:21 +02003405 PyObject *result;
3406
3407 _PyBytesWriter_CheckConsistency(writer, str);
3408
Victor Stinner2bf89932015-10-14 11:25:33 +02003409 size = _PyBytesWriter_GetSize(writer, str);
3410 if (size == 0 && !writer->use_bytearray) {
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003411 Py_CLEAR(writer->buffer);
3412 /* Get the empty byte string singleton */
3413 result = PyBytes_FromStringAndSize(NULL, 0);
3414 }
3415 else if (writer->use_small_buffer) {
Victor Stinnere914d412016-04-15 17:52:27 +02003416 if (writer->use_bytearray) {
3417 result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3418 }
3419 else {
3420 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3421 }
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003422 }
3423 else {
3424 result = writer->buffer;
3425 writer->buffer = NULL;
3426
Victor Stinner2bf89932015-10-14 11:25:33 +02003427 if (size != writer->allocated) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003428 if (writer->use_bytearray) {
Victor Stinner2bf89932015-10-14 11:25:33 +02003429 if (PyByteArray_Resize(result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003430 Py_DECREF(result);
3431 return NULL;
3432 }
3433 }
3434 else {
Victor Stinner2bf89932015-10-14 11:25:33 +02003435 if (_PyBytes_Resize(&result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003436 assert(result == NULL);
3437 return NULL;
3438 }
Victor Stinner00165072015-10-09 01:53:21 +02003439 }
3440 }
Victor Stinner00165072015-10-09 01:53:21 +02003441 }
Victor Stinner00165072015-10-09 01:53:21 +02003442 return result;
3443}
Victor Stinnerce179bf2015-10-09 12:57:22 +02003444
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003445void*
Victor Stinnere9aa5952015-10-12 13:57:47 +02003446_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003447 const void *bytes, Py_ssize_t size)
Victor Stinnerce179bf2015-10-09 12:57:22 +02003448{
Victor Stinnere9aa5952015-10-12 13:57:47 +02003449 char *str = (char *)ptr;
3450
Victor Stinnerce179bf2015-10-09 12:57:22 +02003451 str = _PyBytesWriter_Prepare(writer, str, size);
3452 if (str == NULL)
3453 return NULL;
3454
Christian Heimesf051e432016-09-13 20:22:02 +02003455 memcpy(str, bytes, size);
Victor Stinnerce179bf2015-10-09 12:57:22 +02003456 str += size;
3457
3458 return str;
3459}