blob: 5b628420937d8daf74092c0f43c33a11b3732896 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Eric Snow2ebc5ce2017-09-07 23:51:28 -06006#include "internal/mem.h"
7#include "internal/pystate.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00008
Gregory P. Smith60d241f2007-10-16 06:31:30 +00009#include "bytes_methods.h"
Gregory P. Smith8cb65692015-04-25 23:22:26 +000010#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +000011#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000012
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020013/*[clinic input]
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030014class bytes "PyBytesObject *" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020015[clinic start generated code]*/
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030016/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020017
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030018#include "clinic/bytesobject.c.h"
19
Christian Heimes2c9c7a52008-05-26 13:42:13 +000020#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000021Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000022#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000023
Christian Heimes2c9c7a52008-05-26 13:42:13 +000024static PyBytesObject *characters[UCHAR_MAX + 1];
25static PyBytesObject *nullstring;
26
Mark Dickinsonfd24b322008-12-06 15:33:31 +000027/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
28 for a string of length n should request PyBytesObject_SIZE + n bytes.
29
30 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
31 3 bytes per string allocation on a typical system.
32*/
33#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
34
Victor Stinner2bf89932015-10-14 11:25:33 +020035/* Forward declaration */
36Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
37 char *str);
38
Christian Heimes2c9c7a52008-05-26 13:42:13 +000039/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000040 For PyBytes_FromString(), the parameter `str' points to a null-terminated
41 string containing exactly `size' bytes.
42
Martin Pantera90a4a92016-05-30 04:04:50 +000043 For PyBytes_FromStringAndSize(), the parameter `str' is
Christian Heimes2c9c7a52008-05-26 13:42:13 +000044 either NULL or else points to a string containing at least `size' bytes.
45 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
46 not have to be null-terminated. (Therefore it is safe to construct a
47 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
48 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
49 bytes (setting the last byte to the null terminating character) and you can
50 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000051 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000052 alter the data yourself, since the strings may be shared.
53
54 The PyObject member `op->ob_size', which denotes the number of "extra
55 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020056 allocated for string data, not counting the null terminating character.
57 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000058 PyBytes_FromStringAndSize()) or the length of the string in the `str'
59 parameter (for PyBytes_FromString()).
60*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020061static PyObject *
62_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000063{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020064 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020065 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020066
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000068#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000069 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000070#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000071 Py_INCREF(op);
72 return (PyObject *)op;
73 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000074
Victor Stinner049e5092014-08-17 22:20:00 +020075 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000076 PyErr_SetString(PyExc_OverflowError,
77 "byte string is too large");
78 return NULL;
79 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000080
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000081 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020082 if (use_calloc)
83 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
84 else
85 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000086 if (op == NULL)
87 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +010088 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000089 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020090 if (!use_calloc)
91 op->ob_sval[size] = '\0';
92 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000093 if (size == 0) {
94 nullstring = op;
95 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020096 }
97 return (PyObject *) op;
98}
99
100PyObject *
101PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
102{
103 PyBytesObject *op;
104 if (size < 0) {
105 PyErr_SetString(PyExc_SystemError,
106 "Negative size passed to PyBytes_FromStringAndSize");
107 return NULL;
108 }
109 if (size == 1 && str != NULL &&
110 (op = characters[*str & UCHAR_MAX]) != NULL)
111 {
112#ifdef COUNT_ALLOCS
113 one_strings++;
114#endif
115 Py_INCREF(op);
116 return (PyObject *)op;
117 }
118
119 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
120 if (op == NULL)
121 return NULL;
122 if (str == NULL)
123 return (PyObject *) op;
124
Christian Heimesf051e432016-09-13 20:22:02 +0200125 memcpy(op->ob_sval, str, size);
Victor Stinnerdb067af2014-05-02 22:31:14 +0200126 /* share short strings */
127 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000128 characters[*str & UCHAR_MAX] = op;
129 Py_INCREF(op);
130 }
131 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000132}
133
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000134PyObject *
135PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000136{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200137 size_t size;
138 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000139
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000140 assert(str != NULL);
141 size = strlen(str);
142 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
143 PyErr_SetString(PyExc_OverflowError,
144 "byte string is too long");
145 return NULL;
146 }
147 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000148#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000150#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000151 Py_INCREF(op);
152 return (PyObject *)op;
153 }
154 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000155#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000157#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000158 Py_INCREF(op);
159 return (PyObject *)op;
160 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000161
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000162 /* Inline PyObject_NewVar */
163 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
164 if (op == NULL)
165 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100166 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000167 op->ob_shash = -1;
Christian Heimesf051e432016-09-13 20:22:02 +0200168 memcpy(op->ob_sval, str, size+1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000169 /* share short strings */
170 if (size == 0) {
171 nullstring = op;
172 Py_INCREF(op);
173 } else if (size == 1) {
174 characters[*str & UCHAR_MAX] = op;
175 Py_INCREF(op);
176 }
177 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000178}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000179
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000180PyObject *
181PyBytes_FromFormatV(const char *format, va_list vargs)
182{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000183 char *s;
Victor Stinner03dab782015-10-14 00:21:35 +0200184 const char *f;
185 const char *p;
186 Py_ssize_t prec;
187 int longflag;
188 int size_tflag;
189 /* Longest 64-bit formatted numbers:
190 - "18446744073709551615\0" (21 bytes)
191 - "-9223372036854775808\0" (21 bytes)
192 Decimal takes the most space (it isn't enough for octal.)
Guido van Rossum343e97f2007-04-09 00:43:24 +0000193
Victor Stinner03dab782015-10-14 00:21:35 +0200194 Longest 64-bit pointer representation:
195 "0xffffffffffffffff\0" (19 bytes). */
196 char buffer[21];
197 _PyBytesWriter writer;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000198
Victor Stinner03dab782015-10-14 00:21:35 +0200199 _PyBytesWriter_Init(&writer);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000200
Victor Stinner03dab782015-10-14 00:21:35 +0200201 s = _PyBytesWriter_Alloc(&writer, strlen(format));
202 if (s == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000203 return NULL;
Victor Stinner03dab782015-10-14 00:21:35 +0200204 writer.overallocate = 1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000205
Victor Stinner03dab782015-10-14 00:21:35 +0200206#define WRITE_BYTES(str) \
207 do { \
208 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
209 if (s == NULL) \
210 goto error; \
211 } while (0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000212
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000213 for (f = format; *f; f++) {
Victor Stinner03dab782015-10-14 00:21:35 +0200214 if (*f != '%') {
215 *s++ = *f;
216 continue;
217 }
218
219 p = f++;
220
221 /* ignore the width (ex: 10 in "%10s") */
222 while (Py_ISDIGIT(*f))
223 f++;
224
225 /* parse the precision (ex: 10 in "%.10s") */
226 prec = 0;
227 if (*f == '.') {
228 f++;
229 for (; Py_ISDIGIT(*f); f++) {
230 prec = (prec * 10) + (*f - '0');
231 }
232 }
233
234 while (*f && *f != '%' && !Py_ISALPHA(*f))
235 f++;
236
237 /* handle the long flag ('l'), but only for %ld and %lu.
238 others can be added when necessary. */
239 longflag = 0;
240 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
241 longflag = 1;
242 ++f;
243 }
244
245 /* handle the size_t flag ('z'). */
246 size_tflag = 0;
247 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
248 size_tflag = 1;
249 ++f;
250 }
251
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700252 /* subtract bytes preallocated for the format string
Victor Stinner03dab782015-10-14 00:21:35 +0200253 (ex: 2 for "%s") */
254 writer.min_size -= (f - p + 1);
255
256 switch (*f) {
257 case 'c':
258 {
259 int c = va_arg(vargs, int);
260 if (c < 0 || c > 255) {
261 PyErr_SetString(PyExc_OverflowError,
262 "PyBytes_FromFormatV(): %c format "
263 "expects an integer in range [0; 255]");
264 goto error;
265 }
266 writer.min_size++;
267 *s++ = (unsigned char)c;
268 break;
269 }
270
271 case 'd':
272 if (longflag)
273 sprintf(buffer, "%ld", va_arg(vargs, long));
274 else if (size_tflag)
275 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
277 else
278 sprintf(buffer, "%d", va_arg(vargs, int));
279 assert(strlen(buffer) < sizeof(buffer));
280 WRITE_BYTES(buffer);
281 break;
282
283 case 'u':
284 if (longflag)
285 sprintf(buffer, "%lu",
286 va_arg(vargs, unsigned long));
287 else if (size_tflag)
288 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
289 va_arg(vargs, size_t));
290 else
291 sprintf(buffer, "%u",
292 va_arg(vargs, unsigned int));
293 assert(strlen(buffer) < sizeof(buffer));
294 WRITE_BYTES(buffer);
295 break;
296
297 case 'i':
298 sprintf(buffer, "%i", va_arg(vargs, int));
299 assert(strlen(buffer) < sizeof(buffer));
300 WRITE_BYTES(buffer);
301 break;
302
303 case 'x':
304 sprintf(buffer, "%x", va_arg(vargs, int));
305 assert(strlen(buffer) < sizeof(buffer));
306 WRITE_BYTES(buffer);
307 break;
308
309 case 's':
310 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000311 Py_ssize_t i;
Victor Stinner03dab782015-10-14 00:21:35 +0200312
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200313 p = va_arg(vargs, const char*);
Victor Stinner03dab782015-10-14 00:21:35 +0200314 i = strlen(p);
315 if (prec > 0 && i > prec)
316 i = prec;
317 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
318 if (s == NULL)
319 goto error;
320 break;
321 }
322
323 case 'p':
324 sprintf(buffer, "%p", va_arg(vargs, void*));
325 assert(strlen(buffer) < sizeof(buffer));
326 /* %p is ill-defined: ensure leading 0x. */
327 if (buffer[1] == 'X')
328 buffer[1] = 'x';
329 else if (buffer[1] != 'x') {
330 memmove(buffer+2, buffer, strlen(buffer)+1);
331 buffer[0] = '0';
332 buffer[1] = 'x';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000333 }
Victor Stinner03dab782015-10-14 00:21:35 +0200334 WRITE_BYTES(buffer);
335 break;
336
337 case '%':
338 writer.min_size++;
339 *s++ = '%';
340 break;
341
342 default:
343 if (*f == 0) {
344 /* fix min_size if we reached the end of the format string */
345 writer.min_size++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000347
Victor Stinner03dab782015-10-14 00:21:35 +0200348 /* invalid format string: copy unformatted string and exit */
349 WRITE_BYTES(p);
350 return _PyBytesWriter_Finish(&writer, s);
351 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000352 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000353
Victor Stinner03dab782015-10-14 00:21:35 +0200354#undef WRITE_BYTES
355
356 return _PyBytesWriter_Finish(&writer, s);
357
358 error:
359 _PyBytesWriter_Dealloc(&writer);
360 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000361}
362
363PyObject *
364PyBytes_FromFormat(const char *format, ...)
365{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000366 PyObject* ret;
367 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000368
369#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000370 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000371#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000373#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000374 ret = PyBytes_FromFormatV(format, vargs);
375 va_end(vargs);
376 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000377}
378
Ethan Furmanb95b5612015-01-23 20:05:18 -0800379/* Helpers for formatstring */
380
381Py_LOCAL_INLINE(PyObject *)
382getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
383{
384 Py_ssize_t argidx = *p_argidx;
385 if (argidx < arglen) {
386 (*p_argidx)++;
387 if (arglen < 0)
388 return args;
389 else
390 return PyTuple_GetItem(args, argidx);
391 }
392 PyErr_SetString(PyExc_TypeError,
393 "not enough arguments for format string");
394 return NULL;
395}
396
397/* Format codes
398 * F_LJUST '-'
399 * F_SIGN '+'
400 * F_BLANK ' '
401 * F_ALT '#'
402 * F_ZERO '0'
403 */
404#define F_LJUST (1<<0)
405#define F_SIGN (1<<1)
406#define F_BLANK (1<<2)
407#define F_ALT (1<<3)
408#define F_ZERO (1<<4)
409
410/* Returns a new reference to a PyBytes object, or NULL on failure. */
411
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200412static char*
413formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200414 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800415{
416 char *p;
417 PyObject *result;
418 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200419 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800420
421 x = PyFloat_AsDouble(v);
422 if (x == -1.0 && PyErr_Occurred()) {
423 PyErr_Format(PyExc_TypeError, "float argument required, "
424 "not %.200s", Py_TYPE(v)->tp_name);
425 return NULL;
426 }
427
428 if (prec < 0)
429 prec = 6;
430
431 p = PyOS_double_to_string(x, type, prec,
432 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
433
434 if (p == NULL)
435 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200436
437 len = strlen(p);
438 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200439 str = _PyBytesWriter_Prepare(writer, str, len);
440 if (str == NULL)
441 return NULL;
Christian Heimesf051e432016-09-13 20:22:02 +0200442 memcpy(str, p, len);
Victor Stinner71dc3d82016-04-26 12:35:13 +0200443 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200444 str += len;
445 return str;
446 }
447
448 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800449 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200450 *p_result = result;
Miss Islington (bot)063755c2018-10-02 23:33:42 -0700451 return result != NULL ? str : NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800452}
453
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300454static PyObject *
455formatlong(PyObject *v, int flags, int prec, int type)
456{
457 PyObject *result, *iobj;
458 if (type == 'i')
459 type = 'd';
460 if (PyLong_Check(v))
461 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
462 if (PyNumber_Check(v)) {
463 /* make sure number is a type of integer for o, x, and X */
464 if (type == 'o' || type == 'x' || type == 'X')
465 iobj = PyNumber_Index(v);
466 else
467 iobj = PyNumber_Long(v);
468 if (iobj == NULL) {
469 if (!PyErr_ExceptionMatches(PyExc_TypeError))
470 return NULL;
471 }
472 else if (!PyLong_Check(iobj))
473 Py_CLEAR(iobj);
474 if (iobj != NULL) {
475 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
476 Py_DECREF(iobj);
477 return result;
478 }
479 }
480 PyErr_Format(PyExc_TypeError,
481 "%%%c format: %s is required, not %.200s", type,
482 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
483 : "a number",
484 Py_TYPE(v)->tp_name);
485 return NULL;
486}
487
488static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200489byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800490{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300491 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200492 *p = PyBytes_AS_STRING(arg)[0];
493 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800494 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300495 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200496 *p = PyByteArray_AS_STRING(arg)[0];
497 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800498 }
499 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300500 PyObject *iobj;
501 long ival;
502 int overflow;
503 /* make sure number is a type of integer */
504 if (PyLong_Check(arg)) {
505 ival = PyLong_AsLongAndOverflow(arg, &overflow);
506 }
507 else {
508 iobj = PyNumber_Index(arg);
509 if (iobj == NULL) {
510 if (!PyErr_ExceptionMatches(PyExc_TypeError))
511 return 0;
512 goto onError;
513 }
514 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
515 Py_DECREF(iobj);
516 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300517 if (!overflow && ival == -1 && PyErr_Occurred())
518 goto onError;
519 if (overflow || !(0 <= ival && ival <= 255)) {
520 PyErr_SetString(PyExc_OverflowError,
521 "%c arg not in range(256)");
522 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800523 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300524 *p = (char)ival;
525 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800526 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300527 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200528 PyErr_SetString(PyExc_TypeError,
529 "%c requires an integer in range(256) or a single byte");
530 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800531}
532
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800533static PyObject *_PyBytes_FromBuffer(PyObject *x);
534
Ethan Furmanb95b5612015-01-23 20:05:18 -0800535static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200536format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800537{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200538 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800539 _Py_IDENTIFIER(__bytes__);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800540 /* is it a bytes object? */
541 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200542 *pbuf = PyBytes_AS_STRING(v);
543 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800544 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200545 return v;
546 }
547 if (PyByteArray_Check(v)) {
548 *pbuf = PyByteArray_AS_STRING(v);
549 *plen = PyByteArray_GET_SIZE(v);
550 Py_INCREF(v);
551 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800552 }
553 /* does it support __bytes__? */
554 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
555 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +0100556 result = _PyObject_CallNoArg(func);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800557 Py_DECREF(func);
558 if (result == NULL)
559 return NULL;
560 if (!PyBytes_Check(result)) {
561 PyErr_Format(PyExc_TypeError,
562 "__bytes__ returned non-bytes (type %.200s)",
563 Py_TYPE(result)->tp_name);
564 Py_DECREF(result);
565 return NULL;
566 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200567 *pbuf = PyBytes_AS_STRING(result);
568 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800569 return result;
570 }
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800571 /* does it support buffer protocol? */
572 if (PyObject_CheckBuffer(v)) {
573 /* maybe we can avoid making a copy of the buffer object here? */
574 result = _PyBytes_FromBuffer(v);
575 if (result == NULL)
576 return NULL;
577 *pbuf = PyBytes_AS_STRING(result);
578 *plen = PyBytes_GET_SIZE(result);
579 return result;
580 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800581 PyErr_Format(PyExc_TypeError,
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800582 "%%b requires a bytes-like object, "
583 "or an object that implements __bytes__, not '%.100s'",
Ethan Furmanb95b5612015-01-23 20:05:18 -0800584 Py_TYPE(v)->tp_name);
585 return NULL;
586}
587
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200588/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800589
590PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +0200591_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
592 PyObject *args, int use_bytearray)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800593{
Victor Stinner772b2b02015-10-14 09:56:53 +0200594 const char *fmt;
595 char *res;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800596 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200597 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800598 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800599 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200600 _PyBytesWriter writer;
601
Victor Stinner772b2b02015-10-14 09:56:53 +0200602 if (args == NULL) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800603 PyErr_BadInternalCall();
604 return NULL;
605 }
Victor Stinner772b2b02015-10-14 09:56:53 +0200606 fmt = format;
607 fmtcnt = format_len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200608
609 _PyBytesWriter_Init(&writer);
Victor Stinner772b2b02015-10-14 09:56:53 +0200610 writer.use_bytearray = use_bytearray;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200611
612 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
613 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800614 return NULL;
Victor Stinner772b2b02015-10-14 09:56:53 +0200615 if (!use_bytearray)
616 writer.overallocate = 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200617
Ethan Furmanb95b5612015-01-23 20:05:18 -0800618 if (PyTuple_Check(args)) {
619 arglen = PyTuple_GET_SIZE(args);
620 argidx = 0;
621 }
622 else {
623 arglen = -1;
624 argidx = -2;
625 }
626 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
627 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
628 !PyByteArray_Check(args)) {
629 dict = args;
630 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200631
Ethan Furmanb95b5612015-01-23 20:05:18 -0800632 while (--fmtcnt >= 0) {
633 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200634 Py_ssize_t len;
635 char *pos;
636
Xiang Zhangb76ad512017-03-06 17:17:05 +0800637 pos = (char *)memchr(fmt + 1, '%', fmtcnt);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200638 if (pos != NULL)
639 len = pos - fmt;
Victor Stinner772b2b02015-10-14 09:56:53 +0200640 else
Xiang Zhangb76ad512017-03-06 17:17:05 +0800641 len = fmtcnt + 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200642 assert(len != 0);
643
Christian Heimesf051e432016-09-13 20:22:02 +0200644 memcpy(res, fmt, len);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200645 res += len;
646 fmt += len;
647 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800648 }
649 else {
650 /* Got a format specifier */
651 int flags = 0;
652 Py_ssize_t width = -1;
653 int prec = -1;
654 int c = '\0';
655 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800656 PyObject *v = NULL;
657 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200658 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800659 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200660 Py_ssize_t len = 0;
661 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200662 Py_ssize_t alloc;
663#ifdef Py_DEBUG
664 char *before;
665#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -0800666
Ethan Furmanb95b5612015-01-23 20:05:18 -0800667 fmt++;
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200668 if (*fmt == '%') {
669 *res++ = '%';
670 fmt++;
671 fmtcnt--;
672 continue;
673 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800674 if (*fmt == '(') {
Victor Stinner772b2b02015-10-14 09:56:53 +0200675 const char *keystart;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800676 Py_ssize_t keylen;
677 PyObject *key;
678 int pcount = 1;
679
680 if (dict == NULL) {
681 PyErr_SetString(PyExc_TypeError,
682 "format requires a mapping");
683 goto error;
684 }
685 ++fmt;
686 --fmtcnt;
687 keystart = fmt;
688 /* Skip over balanced parentheses */
689 while (pcount > 0 && --fmtcnt >= 0) {
690 if (*fmt == ')')
691 --pcount;
692 else if (*fmt == '(')
693 ++pcount;
694 fmt++;
695 }
696 keylen = fmt - keystart - 1;
697 if (fmtcnt < 0 || pcount > 0) {
698 PyErr_SetString(PyExc_ValueError,
699 "incomplete format key");
700 goto error;
701 }
702 key = PyBytes_FromStringAndSize(keystart,
703 keylen);
704 if (key == NULL)
705 goto error;
706 if (args_owned) {
707 Py_DECREF(args);
708 args_owned = 0;
709 }
710 args = PyObject_GetItem(dict, key);
711 Py_DECREF(key);
712 if (args == NULL) {
713 goto error;
714 }
715 args_owned = 1;
716 arglen = -1;
717 argidx = -2;
718 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200719
720 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800721 while (--fmtcnt >= 0) {
722 switch (c = *fmt++) {
723 case '-': flags |= F_LJUST; continue;
724 case '+': flags |= F_SIGN; continue;
725 case ' ': flags |= F_BLANK; continue;
726 case '#': flags |= F_ALT; continue;
727 case '0': flags |= F_ZERO; continue;
728 }
729 break;
730 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200731
732 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800733 if (c == '*') {
734 v = getnextarg(args, arglen, &argidx);
735 if (v == NULL)
736 goto error;
737 if (!PyLong_Check(v)) {
738 PyErr_SetString(PyExc_TypeError,
739 "* wants int");
740 goto error;
741 }
742 width = PyLong_AsSsize_t(v);
743 if (width == -1 && PyErr_Occurred())
744 goto error;
745 if (width < 0) {
746 flags |= F_LJUST;
747 width = -width;
748 }
749 if (--fmtcnt >= 0)
750 c = *fmt++;
751 }
752 else if (c >= 0 && isdigit(c)) {
753 width = c - '0';
754 while (--fmtcnt >= 0) {
755 c = Py_CHARMASK(*fmt++);
756 if (!isdigit(c))
757 break;
758 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
759 PyErr_SetString(
760 PyExc_ValueError,
761 "width too big");
762 goto error;
763 }
764 width = width*10 + (c - '0');
765 }
766 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200767
768 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800769 if (c == '.') {
770 prec = 0;
771 if (--fmtcnt >= 0)
772 c = *fmt++;
773 if (c == '*') {
774 v = getnextarg(args, arglen, &argidx);
775 if (v == NULL)
776 goto error;
777 if (!PyLong_Check(v)) {
778 PyErr_SetString(
779 PyExc_TypeError,
780 "* wants int");
781 goto error;
782 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200783 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800784 if (prec == -1 && PyErr_Occurred())
785 goto error;
786 if (prec < 0)
787 prec = 0;
788 if (--fmtcnt >= 0)
789 c = *fmt++;
790 }
791 else if (c >= 0 && isdigit(c)) {
792 prec = c - '0';
793 while (--fmtcnt >= 0) {
794 c = Py_CHARMASK(*fmt++);
795 if (!isdigit(c))
796 break;
797 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
798 PyErr_SetString(
799 PyExc_ValueError,
800 "prec too big");
801 goto error;
802 }
803 prec = prec*10 + (c - '0');
804 }
805 }
806 } /* prec */
807 if (fmtcnt >= 0) {
808 if (c == 'h' || c == 'l' || c == 'L') {
809 if (--fmtcnt >= 0)
810 c = *fmt++;
811 }
812 }
813 if (fmtcnt < 0) {
814 PyErr_SetString(PyExc_ValueError,
815 "incomplete format");
816 goto error;
817 }
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200818 v = getnextarg(args, arglen, &argidx);
819 if (v == NULL)
820 goto error;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200821
Miss Islington (bot)04208262018-08-23 04:03:02 -0400822 if (fmtcnt == 0) {
823 /* last write: disable writer overallocation */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200824 writer.overallocate = 0;
825 }
826
Ethan Furmanb95b5612015-01-23 20:05:18 -0800827 sign = 0;
828 fill = ' ';
829 switch (c) {
Ethan Furman62e977f2015-03-11 08:17:00 -0700830 case 'r':
831 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800832 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200833 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800834 if (temp == NULL)
835 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200836 assert(PyUnicode_IS_ASCII(temp));
837 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
838 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800839 if (prec >= 0 && len > prec)
840 len = prec;
841 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200842
Ethan Furmanb95b5612015-01-23 20:05:18 -0800843 case 's':
844 // %s is only for 2/3 code; 3 only code should use %b
845 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200846 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800847 if (temp == NULL)
848 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800849 if (prec >= 0 && len > prec)
850 len = prec;
851 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200852
Ethan Furmanb95b5612015-01-23 20:05:18 -0800853 case 'i':
854 case 'd':
855 case 'u':
856 case 'o':
857 case 'x':
858 case 'X':
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200859 if (PyLong_CheckExact(v)
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200860 && width == -1 && prec == -1
861 && !(flags & (F_SIGN | F_BLANK))
862 && c != 'X')
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200863 {
864 /* Fast path */
865 int alternate = flags & F_ALT;
866 int base;
867
868 switch(c)
869 {
870 default:
Barry Warsawb2e57942017-09-14 18:13:16 -0700871 Py_UNREACHABLE();
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200872 case 'd':
873 case 'i':
874 case 'u':
875 base = 10;
876 break;
877 case 'o':
878 base = 8;
879 break;
880 case 'x':
881 case 'X':
882 base = 16;
883 break;
884 }
885
886 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200887 writer.min_size -= 2; /* size preallocated for "%d" */
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200888 res = _PyLong_FormatBytesWriter(&writer, res,
889 v, base, alternate);
890 if (res == NULL)
891 goto error;
892 continue;
893 }
894
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300895 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200896 if (!temp)
897 goto error;
898 assert(PyUnicode_IS_ASCII(temp));
899 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
900 len = PyUnicode_GET_LENGTH(temp);
901 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800902 if (flags & F_ZERO)
903 fill = '0';
904 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200905
Ethan Furmanb95b5612015-01-23 20:05:18 -0800906 case 'e':
907 case 'E':
908 case 'f':
909 case 'F':
910 case 'g':
911 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200912 if (width == -1 && prec == -1
913 && !(flags & (F_SIGN | F_BLANK)))
914 {
915 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200916 writer.min_size -= 2; /* size preallocated for "%f" */
Victor Stinnerad771582015-10-09 12:38:53 +0200917 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200918 if (res == NULL)
919 goto error;
920 continue;
921 }
922
Victor Stinnerad771582015-10-09 12:38:53 +0200923 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800924 goto error;
925 pbuf = PyBytes_AS_STRING(temp);
926 len = PyBytes_GET_SIZE(temp);
927 sign = 1;
928 if (flags & F_ZERO)
929 fill = '0';
930 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200931
Ethan Furmanb95b5612015-01-23 20:05:18 -0800932 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200933 pbuf = &onechar;
934 len = byte_converter(v, &onechar);
935 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800936 goto error;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200937 if (width == -1) {
938 /* Fast path */
939 *res++ = onechar;
940 continue;
941 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800942 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200943
Ethan Furmanb95b5612015-01-23 20:05:18 -0800944 default:
945 PyErr_Format(PyExc_ValueError,
946 "unsupported format character '%c' (0x%x) "
947 "at index %zd",
948 c, c,
Victor Stinner772b2b02015-10-14 09:56:53 +0200949 (Py_ssize_t)(fmt - 1 - format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800950 goto error;
951 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200952
Ethan Furmanb95b5612015-01-23 20:05:18 -0800953 if (sign) {
954 if (*pbuf == '-' || *pbuf == '+') {
955 sign = *pbuf++;
956 len--;
957 }
958 else if (flags & F_SIGN)
959 sign = '+';
960 else if (flags & F_BLANK)
961 sign = ' ';
962 else
963 sign = 0;
964 }
965 if (width < len)
966 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200967
968 alloc = width;
969 if (sign != 0 && len == width)
970 alloc++;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200971 /* 2: size preallocated for %s */
972 if (alloc > 2) {
973 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200974 if (res == NULL)
975 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800976 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200977#ifdef Py_DEBUG
978 before = res;
979#endif
980
981 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800982 if (sign) {
983 if (fill != ' ')
984 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800985 if (width > len)
986 width--;
987 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200988
989 /* Write the numeric prefix for "x", "X" and "o" formats
990 if the alternate form is used.
991 For example, write "0x" for the "%#x" format. */
Serhiy Storchakab1a16192016-12-17 21:48:03 +0200992 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800993 assert(pbuf[0] == '0');
994 assert(pbuf[1] == c);
995 if (fill != ' ') {
996 *res++ = *pbuf++;
997 *res++ = *pbuf++;
998 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800999 width -= 2;
1000 if (width < 0)
1001 width = 0;
1002 len -= 2;
1003 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001004
1005 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001006 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001007 memset(res, fill, width - len);
1008 res += (width - len);
1009 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -08001010 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001011
1012 /* If padding with spaces: write sign if needed and/or numeric
1013 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001014 if (fill == ' ') {
1015 if (sign)
1016 *res++ = sign;
Serhiy Storchakab1a16192016-12-17 21:48:03 +02001017 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001018 assert(pbuf[0] == '0');
1019 assert(pbuf[1] == c);
1020 *res++ = *pbuf++;
1021 *res++ = *pbuf++;
1022 }
1023 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001024
1025 /* Copy bytes */
Christian Heimesf051e432016-09-13 20:22:02 +02001026 memcpy(res, pbuf, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001027 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001028
1029 /* Pad right with the fill character if needed */
1030 if (width > len) {
1031 memset(res, ' ', width - len);
1032 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001033 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001034
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +02001035 if (dict && (argidx < arglen)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001036 PyErr_SetString(PyExc_TypeError,
1037 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -08001038 Py_XDECREF(temp);
1039 goto error;
1040 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001041 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001042
1043#ifdef Py_DEBUG
1044 /* check that we computed the exact size for this write */
1045 assert((res - before) == alloc);
1046#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -08001047 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001048
1049 /* If overallocation was disabled, ensure that it was the last
1050 write. Otherwise, we missed an optimization */
Miss Islington (bot)04208262018-08-23 04:03:02 -04001051 assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001052 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001053
Ethan Furmanb95b5612015-01-23 20:05:18 -08001054 if (argidx < arglen && !dict) {
1055 PyErr_SetString(PyExc_TypeError,
1056 "not all arguments converted during bytes formatting");
1057 goto error;
1058 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001059
Ethan Furmanb95b5612015-01-23 20:05:18 -08001060 if (args_owned) {
1061 Py_DECREF(args);
1062 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001063 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001064
1065 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001066 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001067 if (args_owned) {
1068 Py_DECREF(args);
1069 }
1070 return NULL;
1071}
1072
1073/* =-= */
1074
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001075static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001076bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001077{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001078 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001079}
1080
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001081/* Unescape a backslash-escaped string. If unicode is non-zero,
1082 the string is a u-literal. If recode_encoding is non-zero,
1083 the string is UTF-8 encoded and should be re-encoded in the
1084 specified encoding. */
1085
Victor Stinner2ec80632015-10-14 13:32:13 +02001086static char *
1087_PyBytes_DecodeEscapeRecode(const char **s, const char *end,
1088 const char *errors, const char *recode_encoding,
1089 _PyBytesWriter *writer, char *p)
1090{
1091 PyObject *u, *w;
1092 const char* t;
1093
1094 t = *s;
1095 /* Decode non-ASCII bytes as UTF-8. */
1096 while (t < end && (*t & 0x80))
1097 t++;
1098 u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
1099 if (u == NULL)
1100 return NULL;
1101
1102 /* Recode them in target encoding. */
1103 w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
1104 Py_DECREF(u);
1105 if (w == NULL)
1106 return NULL;
1107 assert(PyBytes_Check(w));
1108
1109 /* Append bytes to output buffer. */
Raymond Hettinger15f44ab2016-08-30 10:47:49 -07001110 writer->min_size--; /* subtract 1 preallocated byte */
Victor Stinner2ec80632015-10-14 13:32:13 +02001111 p = _PyBytesWriter_WriteBytes(writer, p,
1112 PyBytes_AS_STRING(w),
1113 PyBytes_GET_SIZE(w));
1114 Py_DECREF(w);
1115 if (p == NULL)
1116 return NULL;
1117
1118 *s = t;
1119 return p;
1120}
1121
Eric V. Smith42454af2016-10-31 09:22:08 -04001122PyObject *_PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001123 Py_ssize_t len,
1124 const char *errors,
1125 Py_ssize_t unicode,
Eric V. Smith42454af2016-10-31 09:22:08 -04001126 const char *recode_encoding,
1127 const char **first_invalid_escape)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001128{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001129 int c;
Victor Stinner2ec80632015-10-14 13:32:13 +02001130 char *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001131 const char *end;
Victor Stinner2ec80632015-10-14 13:32:13 +02001132 _PyBytesWriter writer;
1133
1134 _PyBytesWriter_Init(&writer);
1135
1136 p = _PyBytesWriter_Alloc(&writer, len);
1137 if (p == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001138 return NULL;
Victor Stinner2ec80632015-10-14 13:32:13 +02001139 writer.overallocate = 1;
1140
Eric V. Smith42454af2016-10-31 09:22:08 -04001141 *first_invalid_escape = NULL;
1142
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001143 end = s + len;
1144 while (s < end) {
1145 if (*s != '\\') {
1146 non_esc:
Victor Stinner2ec80632015-10-14 13:32:13 +02001147 if (!(recode_encoding && (*s & 0x80))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001148 *p++ = *s++;
1149 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001150 else {
1151 /* non-ASCII character and need to recode */
1152 p = _PyBytes_DecodeEscapeRecode(&s, end,
1153 errors, recode_encoding,
1154 &writer, p);
1155 if (p == NULL)
1156 goto failed;
1157 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001158 continue;
1159 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001160
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001161 s++;
Victor Stinner2ec80632015-10-14 13:32:13 +02001162 if (s == end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001163 PyErr_SetString(PyExc_ValueError,
1164 "Trailing \\ in string");
1165 goto failed;
1166 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001167
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001168 switch (*s++) {
1169 /* XXX This assumes ASCII! */
1170 case '\n': break;
1171 case '\\': *p++ = '\\'; break;
1172 case '\'': *p++ = '\''; break;
1173 case '\"': *p++ = '\"'; break;
1174 case 'b': *p++ = '\b'; break;
1175 case 'f': *p++ = '\014'; break; /* FF */
1176 case 't': *p++ = '\t'; break;
1177 case 'n': *p++ = '\n'; break;
1178 case 'r': *p++ = '\r'; break;
1179 case 'v': *p++ = '\013'; break; /* VT */
1180 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1181 case '0': case '1': case '2': case '3':
1182 case '4': case '5': case '6': case '7':
1183 c = s[-1] - '0';
1184 if (s < end && '0' <= *s && *s <= '7') {
1185 c = (c<<3) + *s++ - '0';
1186 if (s < end && '0' <= *s && *s <= '7')
1187 c = (c<<3) + *s++ - '0';
1188 }
1189 *p++ = c;
1190 break;
1191 case 'x':
Victor Stinner2ec80632015-10-14 13:32:13 +02001192 if (s+1 < end) {
1193 int digit1, digit2;
1194 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1195 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1196 if (digit1 < 16 && digit2 < 16) {
1197 *p++ = (unsigned char)((digit1 << 4) + digit2);
1198 s += 2;
1199 break;
1200 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001201 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001202 /* invalid hexadecimal digits */
1203
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001204 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001205 PyErr_Format(PyExc_ValueError,
1206 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001207 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001208 goto failed;
1209 }
1210 if (strcmp(errors, "replace") == 0) {
1211 *p++ = '?';
1212 } else if (strcmp(errors, "ignore") == 0)
1213 /* do nothing */;
1214 else {
1215 PyErr_Format(PyExc_ValueError,
1216 "decoding error; unknown "
1217 "error handling code: %.400s",
1218 errors);
1219 goto failed;
1220 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001221 /* skip \x */
1222 if (s < end && Py_ISXDIGIT(s[0]))
1223 s++; /* and a hexdigit */
1224 break;
Victor Stinner2ec80632015-10-14 13:32:13 +02001225
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001226 default:
Eric V. Smith42454af2016-10-31 09:22:08 -04001227 if (*first_invalid_escape == NULL) {
1228 *first_invalid_escape = s-1; /* Back up one char, since we've
1229 already incremented s. */
1230 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001231 *p++ = '\\';
Eric V. Smith42454af2016-10-31 09:22:08 -04001232 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +02001233 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001234 UTF-8 bytes may follow. */
1235 }
1236 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001237
1238 return _PyBytesWriter_Finish(&writer, p);
1239
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001240 failed:
Victor Stinner2ec80632015-10-14 13:32:13 +02001241 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001242 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001243}
1244
Eric V. Smith42454af2016-10-31 09:22:08 -04001245PyObject *PyBytes_DecodeEscape(const char *s,
1246 Py_ssize_t len,
1247 const char *errors,
1248 Py_ssize_t unicode,
1249 const char *recode_encoding)
1250{
1251 const char* first_invalid_escape;
1252 PyObject *result = _PyBytes_DecodeEscape(s, len, errors, unicode,
1253 recode_encoding,
1254 &first_invalid_escape);
1255 if (result == NULL)
1256 return NULL;
1257 if (first_invalid_escape != NULL) {
1258 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1259 "invalid escape sequence '\\%c'",
Serhiy Storchaka56cb4652017-10-20 17:08:15 +03001260 (unsigned char)*first_invalid_escape) < 0) {
Eric V. Smith42454af2016-10-31 09:22:08 -04001261 Py_DECREF(result);
1262 return NULL;
1263 }
1264 }
1265 return result;
1266
1267}
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001268/* -------------------------------------------------------------------- */
1269/* object api */
1270
1271Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001272PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001273{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001274 if (!PyBytes_Check(op)) {
1275 PyErr_Format(PyExc_TypeError,
1276 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1277 return -1;
1278 }
1279 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001280}
1281
1282char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001283PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001284{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001285 if (!PyBytes_Check(op)) {
1286 PyErr_Format(PyExc_TypeError,
1287 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1288 return NULL;
1289 }
1290 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001291}
1292
1293int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001294PyBytes_AsStringAndSize(PyObject *obj,
1295 char **s,
1296 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001297{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001298 if (s == NULL) {
1299 PyErr_BadInternalCall();
1300 return -1;
1301 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001302
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001303 if (!PyBytes_Check(obj)) {
1304 PyErr_Format(PyExc_TypeError,
1305 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1306 return -1;
1307 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001308
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001309 *s = PyBytes_AS_STRING(obj);
1310 if (len != NULL)
1311 *len = PyBytes_GET_SIZE(obj);
1312 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001313 PyErr_SetString(PyExc_ValueError,
1314 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001315 return -1;
1316 }
1317 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001318}
Neal Norwitz6968b052007-02-27 19:02:19 +00001319
1320/* -------------------------------------------------------------------- */
1321/* Methods */
1322
Eric Smith0923d1d2009-04-16 20:16:10 +00001323#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001324
1325#include "stringlib/fastsearch.h"
1326#include "stringlib/count.h"
1327#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001328#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001329#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001330#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001331#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001332
Eric Smith0f78bff2009-11-30 01:01:42 +00001333#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001334
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001335PyObject *
1336PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001337{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001338 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001339 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001340 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001341 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001342 unsigned char quote, *s, *p;
1343
1344 /* Compute size of output string */
1345 squotes = dquotes = 0;
1346 newsize = 3; /* b'' */
1347 s = (unsigned char*)op->ob_sval;
1348 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001349 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001350 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001351 case '\'': squotes++; break;
1352 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001353 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001354 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001355 default:
1356 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001357 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001358 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001359 if (newsize > PY_SSIZE_T_MAX - incr)
1360 goto overflow;
1361 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001362 }
1363 quote = '\'';
1364 if (smartquotes && squotes && !dquotes)
1365 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001366 if (squotes && quote == '\'') {
1367 if (newsize > PY_SSIZE_T_MAX - squotes)
1368 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001369 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001370 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001371
1372 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001373 if (v == NULL) {
1374 return NULL;
1375 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001376 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001377
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001378 *p++ = 'b', *p++ = quote;
1379 for (i = 0; i < length; i++) {
1380 unsigned char c = op->ob_sval[i];
1381 if (c == quote || c == '\\')
1382 *p++ = '\\', *p++ = c;
1383 else if (c == '\t')
1384 *p++ = '\\', *p++ = 't';
1385 else if (c == '\n')
1386 *p++ = '\\', *p++ = 'n';
1387 else if (c == '\r')
1388 *p++ = '\\', *p++ = 'r';
1389 else if (c < ' ' || c >= 0x7f) {
1390 *p++ = '\\';
1391 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001392 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1393 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001394 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001395 else
1396 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001397 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001398 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001399 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001400 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001401
1402 overflow:
1403 PyErr_SetString(PyExc_OverflowError,
1404 "bytes object is too large to make repr");
1405 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001406}
1407
Neal Norwitz6968b052007-02-27 19:02:19 +00001408static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001409bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001410{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001411 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001412}
1413
Neal Norwitz6968b052007-02-27 19:02:19 +00001414static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001415bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001416{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001417 if (Py_BytesWarningFlag) {
1418 if (PyErr_WarnEx(PyExc_BytesWarning,
1419 "str() on a bytes instance", 1))
1420 return NULL;
1421 }
1422 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001423}
1424
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001425static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001426bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001427{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001428 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001429}
Neal Norwitz6968b052007-02-27 19:02:19 +00001430
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001431/* This is also used by PyBytes_Concat() */
1432static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001433bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001434{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001435 Py_buffer va, vb;
1436 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001437
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001438 va.len = -1;
1439 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001440 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1441 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001442 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
Serhiy Storchaka6b5a9ec2017-03-19 19:47:02 +02001443 Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001444 goto done;
1445 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001446
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001447 /* Optimize end cases */
1448 if (va.len == 0 && PyBytes_CheckExact(b)) {
1449 result = b;
1450 Py_INCREF(result);
1451 goto done;
1452 }
1453 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1454 result = a;
1455 Py_INCREF(result);
1456 goto done;
1457 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001458
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001459 if (va.len > PY_SSIZE_T_MAX - vb.len) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001460 PyErr_NoMemory();
1461 goto done;
1462 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001463
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001464 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001465 if (result != NULL) {
1466 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1467 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1468 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001469
1470 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001471 if (va.len != -1)
1472 PyBuffer_Release(&va);
1473 if (vb.len != -1)
1474 PyBuffer_Release(&vb);
1475 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001476}
Neal Norwitz6968b052007-02-27 19:02:19 +00001477
1478static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001479bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001480{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001481 Py_ssize_t i;
1482 Py_ssize_t j;
1483 Py_ssize_t size;
1484 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001485 size_t nbytes;
1486 if (n < 0)
1487 n = 0;
1488 /* watch out for overflows: the size can overflow int,
1489 * and the # of bytes needed can overflow size_t
1490 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001491 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001492 PyErr_SetString(PyExc_OverflowError,
1493 "repeated bytes are too long");
1494 return NULL;
1495 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001496 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001497 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1498 Py_INCREF(a);
1499 return (PyObject *)a;
1500 }
1501 nbytes = (size_t)size;
1502 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1503 PyErr_SetString(PyExc_OverflowError,
1504 "repeated bytes are too long");
1505 return NULL;
1506 }
1507 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1508 if (op == NULL)
1509 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +01001510 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001511 op->ob_shash = -1;
1512 op->ob_sval[size] = '\0';
1513 if (Py_SIZE(a) == 1 && n > 0) {
1514 memset(op->ob_sval, a->ob_sval[0] , n);
1515 return (PyObject *) op;
1516 }
1517 i = 0;
1518 if (i < size) {
Christian Heimesf051e432016-09-13 20:22:02 +02001519 memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001520 i = Py_SIZE(a);
1521 }
1522 while (i < size) {
1523 j = (i <= size-i) ? i : size-i;
Christian Heimesf051e432016-09-13 20:22:02 +02001524 memcpy(op->ob_sval+i, op->ob_sval, j);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001525 i += j;
1526 }
1527 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001528}
1529
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001530static int
1531bytes_contains(PyObject *self, PyObject *arg)
1532{
1533 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1534}
1535
Neal Norwitz6968b052007-02-27 19:02:19 +00001536static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001537bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001538{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001539 if (i < 0 || i >= Py_SIZE(a)) {
1540 PyErr_SetString(PyExc_IndexError, "index out of range");
1541 return NULL;
1542 }
1543 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001544}
1545
Benjamin Peterson621b4302016-09-09 13:54:34 -07001546static int
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001547bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1548{
1549 int cmp;
1550 Py_ssize_t len;
1551
1552 len = Py_SIZE(a);
1553 if (Py_SIZE(b) != len)
1554 return 0;
1555
1556 if (a->ob_sval[0] != b->ob_sval[0])
1557 return 0;
1558
1559 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1560 return (cmp == 0);
1561}
1562
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001563static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001564bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001565{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001566 int c;
1567 Py_ssize_t len_a, len_b;
1568 Py_ssize_t min_len;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001569 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001570
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001571 /* Make sure both arguments are strings. */
1572 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001573 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001574 rc = PyObject_IsInstance((PyObject*)a,
1575 (PyObject*)&PyUnicode_Type);
1576 if (!rc)
1577 rc = PyObject_IsInstance((PyObject*)b,
1578 (PyObject*)&PyUnicode_Type);
1579 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001580 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001581 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001582 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001583 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001584 return NULL;
1585 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001586 else {
1587 rc = PyObject_IsInstance((PyObject*)a,
1588 (PyObject*)&PyLong_Type);
1589 if (!rc)
1590 rc = PyObject_IsInstance((PyObject*)b,
1591 (PyObject*)&PyLong_Type);
1592 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001593 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001594 if (rc) {
1595 if (PyErr_WarnEx(PyExc_BytesWarning,
1596 "Comparison between bytes and int", 1))
1597 return NULL;
1598 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001599 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001600 }
stratakise8b19652017-11-02 11:32:54 +01001601 Py_RETURN_NOTIMPLEMENTED;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001602 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001603 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001604 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001605 case Py_EQ:
1606 case Py_LE:
1607 case Py_GE:
1608 /* a string is equal to itself */
stratakise8b19652017-11-02 11:32:54 +01001609 Py_RETURN_TRUE;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001610 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001611 case Py_NE:
1612 case Py_LT:
1613 case Py_GT:
stratakise8b19652017-11-02 11:32:54 +01001614 Py_RETURN_FALSE;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001615 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001616 default:
1617 PyErr_BadArgument();
1618 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001619 }
1620 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001621 else if (op == Py_EQ || op == Py_NE) {
1622 int eq = bytes_compare_eq(a, b);
1623 eq ^= (op == Py_NE);
stratakise8b19652017-11-02 11:32:54 +01001624 return PyBool_FromLong(eq);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001625 }
1626 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001627 len_a = Py_SIZE(a);
1628 len_b = Py_SIZE(b);
1629 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001630 if (min_len > 0) {
1631 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001632 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001633 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001634 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001635 else
1636 c = 0;
stratakise8b19652017-11-02 11:32:54 +01001637 if (c != 0)
1638 Py_RETURN_RICHCOMPARE(c, 0, op);
1639 Py_RETURN_RICHCOMPARE(len_a, len_b, op);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001640 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001641}
1642
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001643static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001644bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001645{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001646 if (a->ob_shash == -1) {
1647 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001648 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001649 }
1650 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001651}
1652
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001653static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001654bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001655{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001656 if (PyIndex_Check(item)) {
1657 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1658 if (i == -1 && PyErr_Occurred())
1659 return NULL;
1660 if (i < 0)
1661 i += PyBytes_GET_SIZE(self);
1662 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1663 PyErr_SetString(PyExc_IndexError,
1664 "index out of range");
1665 return NULL;
1666 }
1667 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1668 }
1669 else if (PySlice_Check(item)) {
1670 Py_ssize_t start, stop, step, slicelength, cur, i;
1671 char* source_buf;
1672 char* result_buf;
1673 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001674
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001675 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001676 return NULL;
1677 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001678 slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1679 &stop, step);
Neal Norwitz6968b052007-02-27 19:02:19 +00001680
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001681 if (slicelength <= 0) {
1682 return PyBytes_FromStringAndSize("", 0);
1683 }
1684 else if (start == 0 && step == 1 &&
1685 slicelength == PyBytes_GET_SIZE(self) &&
1686 PyBytes_CheckExact(self)) {
1687 Py_INCREF(self);
1688 return (PyObject *)self;
1689 }
1690 else if (step == 1) {
1691 return PyBytes_FromStringAndSize(
1692 PyBytes_AS_STRING(self) + start,
1693 slicelength);
1694 }
1695 else {
1696 source_buf = PyBytes_AS_STRING(self);
1697 result = PyBytes_FromStringAndSize(NULL, slicelength);
1698 if (result == NULL)
1699 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001700
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001701 result_buf = PyBytes_AS_STRING(result);
1702 for (cur = start, i = 0; i < slicelength;
1703 cur += step, i++) {
1704 result_buf[i] = source_buf[cur];
1705 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001706
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001707 return result;
1708 }
1709 }
1710 else {
1711 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001712 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001713 Py_TYPE(item)->tp_name);
1714 return NULL;
1715 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001716}
1717
1718static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001719bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001720{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001721 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1722 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001723}
1724
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001725static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001726 (lenfunc)bytes_length, /*sq_length*/
1727 (binaryfunc)bytes_concat, /*sq_concat*/
1728 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1729 (ssizeargfunc)bytes_item, /*sq_item*/
1730 0, /*sq_slice*/
1731 0, /*sq_ass_item*/
1732 0, /*sq_ass_slice*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001733 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001734};
1735
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001736static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001737 (lenfunc)bytes_length,
1738 (binaryfunc)bytes_subscript,
1739 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001740};
1741
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001742static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001743 (getbufferproc)bytes_buffer_getbuffer,
1744 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001745};
1746
1747
1748#define LEFTSTRIP 0
1749#define RIGHTSTRIP 1
1750#define BOTHSTRIP 2
1751
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001752/*[clinic input]
1753bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001754
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001755 sep: object = None
1756 The delimiter according which to split the bytes.
1757 None (the default value) means split on ASCII whitespace characters
1758 (space, tab, return, newline, formfeed, vertical tab).
1759 maxsplit: Py_ssize_t = -1
1760 Maximum number of splits to do.
1761 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001762
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001763Return a list of the sections in the bytes, using sep as the delimiter.
1764[clinic start generated code]*/
1765
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001766static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001767bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1768/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001769{
1770 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001771 const char *s = PyBytes_AS_STRING(self), *sub;
1772 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001773 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001774
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001775 if (maxsplit < 0)
1776 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001777 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001778 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001779 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001780 return NULL;
1781 sub = vsub.buf;
1782 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001783
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001784 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1785 PyBuffer_Release(&vsub);
1786 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001787}
1788
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001789/*[clinic input]
1790bytes.partition
1791
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001792 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001793 /
1794
1795Partition the bytes into three parts using the given separator.
1796
1797This will search for the separator sep in the bytes. If the separator is found,
1798returns a 3-tuple containing the part before the separator, the separator
1799itself, and the part after it.
1800
1801If the separator is not found, returns a 3-tuple containing the original bytes
1802object and two empty bytes objects.
1803[clinic start generated code]*/
1804
Neal Norwitz6968b052007-02-27 19:02:19 +00001805static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001806bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001807/*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001808{
Neal Norwitz6968b052007-02-27 19:02:19 +00001809 return stringlib_partition(
1810 (PyObject*) self,
1811 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001812 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001813 );
1814}
1815
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001816/*[clinic input]
1817bytes.rpartition
1818
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001819 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001820 /
1821
1822Partition the bytes into three parts using the given separator.
1823
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001824This will search for the separator sep in the bytes, starting at the end. If
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001825the separator is found, returns a 3-tuple containing the part before the
1826separator, the separator itself, and the part after it.
1827
1828If the separator is not found, returns a 3-tuple containing two empty bytes
1829objects and the original bytes object.
1830[clinic start generated code]*/
1831
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001832static PyObject *
1833bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001834/*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001835{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001836 return stringlib_rpartition(
1837 (PyObject*) self,
1838 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001839 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001840 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001841}
1842
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001843/*[clinic input]
1844bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001845
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001846Return a list of the sections in the bytes, using sep as the delimiter.
1847
1848Splitting is done starting at the end of the bytes and working to the front.
1849[clinic start generated code]*/
1850
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001851static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001852bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1853/*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001854{
1855 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001856 const char *s = PyBytes_AS_STRING(self), *sub;
1857 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001858 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001859
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001860 if (maxsplit < 0)
1861 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001862 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001863 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001864 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001865 return NULL;
1866 sub = vsub.buf;
1867 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001868
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001869 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1870 PyBuffer_Release(&vsub);
1871 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001872}
1873
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001874
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001875/*[clinic input]
1876bytes.join
1877
1878 iterable_of_bytes: object
1879 /
1880
1881Concatenate any number of bytes objects.
1882
1883The bytes whose method is called is inserted in between each pair.
1884
1885The result is returned as a new bytes object.
1886
1887Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1888[clinic start generated code]*/
1889
Neal Norwitz6968b052007-02-27 19:02:19 +00001890static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001891bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1892/*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001893{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001894 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001895}
1896
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001897PyObject *
1898_PyBytes_Join(PyObject *sep, PyObject *x)
1899{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001900 assert(sep != NULL && PyBytes_Check(sep));
1901 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001902 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001903}
1904
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001905static PyObject *
1906bytes_find(PyBytesObject *self, PyObject *args)
1907{
1908 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1909}
1910
1911static PyObject *
1912bytes_index(PyBytesObject *self, PyObject *args)
1913{
1914 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1915}
1916
1917
1918static PyObject *
1919bytes_rfind(PyBytesObject *self, PyObject *args)
1920{
1921 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1922}
1923
1924
1925static PyObject *
1926bytes_rindex(PyBytesObject *self, PyObject *args)
1927{
1928 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1929}
1930
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001931
1932Py_LOCAL_INLINE(PyObject *)
1933do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001934{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001935 Py_buffer vsep;
1936 char *s = PyBytes_AS_STRING(self);
1937 Py_ssize_t len = PyBytes_GET_SIZE(self);
1938 char *sep;
1939 Py_ssize_t seplen;
1940 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001941
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001942 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001943 return NULL;
1944 sep = vsep.buf;
1945 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001946
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001947 i = 0;
1948 if (striptype != RIGHTSTRIP) {
1949 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1950 i++;
1951 }
1952 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001953
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001954 j = len;
1955 if (striptype != LEFTSTRIP) {
1956 do {
1957 j--;
1958 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1959 j++;
1960 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001961
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001962 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001963
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001964 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1965 Py_INCREF(self);
1966 return (PyObject*)self;
1967 }
1968 else
1969 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001970}
1971
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001972
1973Py_LOCAL_INLINE(PyObject *)
1974do_strip(PyBytesObject *self, int striptype)
1975{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001976 char *s = PyBytes_AS_STRING(self);
1977 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001978
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001979 i = 0;
1980 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001981 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001982 i++;
1983 }
1984 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001985
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001986 j = len;
1987 if (striptype != LEFTSTRIP) {
1988 do {
1989 j--;
David Malcolm96960882010-11-05 17:23:41 +00001990 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001991 j++;
1992 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001993
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001994 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1995 Py_INCREF(self);
1996 return (PyObject*)self;
1997 }
1998 else
1999 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002000}
2001
2002
2003Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002004do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002005{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002006 if (bytes != NULL && bytes != Py_None) {
2007 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002008 }
2009 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002010}
2011
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002012/*[clinic input]
2013bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002014
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002015 bytes: object = None
2016 /
2017
2018Strip leading and trailing bytes contained in the argument.
2019
2020If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2021[clinic start generated code]*/
2022
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002023static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002024bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002025/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002026{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002027 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002028}
2029
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002030/*[clinic input]
2031bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002032
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002033 bytes: object = None
2034 /
2035
2036Strip leading bytes contained in the argument.
2037
2038If the argument is omitted or None, strip leading ASCII whitespace.
2039[clinic start generated code]*/
2040
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002041static PyObject *
2042bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002043/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002044{
2045 return do_argstrip(self, LEFTSTRIP, bytes);
2046}
2047
2048/*[clinic input]
2049bytes.rstrip
2050
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002051 bytes: object = None
2052 /
2053
2054Strip trailing bytes contained in the argument.
2055
2056If the argument is omitted or None, strip trailing ASCII whitespace.
2057[clinic start generated code]*/
2058
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002059static PyObject *
2060bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002061/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002062{
2063 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002064}
Neal Norwitz6968b052007-02-27 19:02:19 +00002065
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002066
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002067static PyObject *
2068bytes_count(PyBytesObject *self, PyObject *args)
2069{
2070 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2071}
2072
2073
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002074/*[clinic input]
2075bytes.translate
2076
Victor Stinner049e5092014-08-17 22:20:00 +02002077 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002078 Translation table, which must be a bytes object of length 256.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002079 /
Martin Panter1b6c6da2016-08-27 08:35:02 +00002080 delete as deletechars: object(c_default="NULL") = b''
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002081
2082Return a copy with each character mapped by the given translation table.
2083
Martin Panter1b6c6da2016-08-27 08:35:02 +00002084All characters occurring in the optional argument delete are removed.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002085The remaining characters are mapped through the given translation table.
2086[clinic start generated code]*/
2087
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002088static PyObject *
Martin Panter1b6c6da2016-08-27 08:35:02 +00002089bytes_translate_impl(PyBytesObject *self, PyObject *table,
Larry Hastings89964c42015-04-14 18:07:59 -04002090 PyObject *deletechars)
Martin Panter1b6c6da2016-08-27 08:35:02 +00002091/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002092{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002093 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002094 Py_buffer table_view = {NULL, NULL};
2095 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002096 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002097 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002098 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002099 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002100 Py_ssize_t inlen, tablen, dellen = 0;
2101 PyObject *result;
2102 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002103
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002104 if (PyBytes_Check(table)) {
2105 table_chars = PyBytes_AS_STRING(table);
2106 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002107 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002108 else if (table == Py_None) {
2109 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002110 tablen = 256;
2111 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002112 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002113 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002114 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002115 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002116 tablen = table_view.len;
2117 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002118
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002119 if (tablen != 256) {
2120 PyErr_SetString(PyExc_ValueError,
2121 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002122 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002123 return NULL;
2124 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002125
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002126 if (deletechars != NULL) {
2127 if (PyBytes_Check(deletechars)) {
2128 del_table_chars = PyBytes_AS_STRING(deletechars);
2129 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002130 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002131 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002132 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002133 PyBuffer_Release(&table_view);
2134 return NULL;
2135 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002136 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002137 dellen = del_table_view.len;
2138 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002139 }
2140 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002141 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002142 dellen = 0;
2143 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002144
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002145 inlen = PyBytes_GET_SIZE(input_obj);
2146 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002147 if (result == NULL) {
2148 PyBuffer_Release(&del_table_view);
2149 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002150 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002151 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002152 output_start = output = PyBytes_AS_STRING(result);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002153 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002154
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002155 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002156 /* If no deletions are required, use faster code */
2157 for (i = inlen; --i >= 0; ) {
2158 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002159 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002160 changed = 1;
2161 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002162 if (!changed && PyBytes_CheckExact(input_obj)) {
2163 Py_INCREF(input_obj);
2164 Py_DECREF(result);
2165 result = input_obj;
2166 }
2167 PyBuffer_Release(&del_table_view);
2168 PyBuffer_Release(&table_view);
2169 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002170 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002171
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002172 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002173 for (i = 0; i < 256; i++)
2174 trans_table[i] = Py_CHARMASK(i);
2175 } else {
2176 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002177 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002178 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002179 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002180
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002181 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002182 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002183 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002184
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002185 for (i = inlen; --i >= 0; ) {
2186 c = Py_CHARMASK(*input++);
2187 if (trans_table[c] != -1)
2188 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2189 continue;
2190 changed = 1;
2191 }
2192 if (!changed && PyBytes_CheckExact(input_obj)) {
2193 Py_DECREF(result);
2194 Py_INCREF(input_obj);
2195 return input_obj;
2196 }
2197 /* Fix the size of the resulting string */
2198 if (inlen > 0)
2199 _PyBytes_Resize(&result, output - output_start);
2200 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002201}
2202
2203
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002204/*[clinic input]
2205
2206@staticmethod
2207bytes.maketrans
2208
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002209 frm: Py_buffer
2210 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002211 /
2212
2213Return a translation table useable for the bytes or bytearray translate method.
2214
2215The returned table will be one where each byte in frm is mapped to the byte at
2216the same position in to.
2217
2218The bytes objects frm and to must be of the same length.
2219[clinic start generated code]*/
2220
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002221static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002222bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002223/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002224{
2225 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002226}
2227
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002228
2229/*[clinic input]
2230bytes.replace
2231
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002232 old: Py_buffer
2233 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002234 count: Py_ssize_t = -1
2235 Maximum number of occurrences to replace.
2236 -1 (the default value) means replace all occurrences.
2237 /
2238
2239Return a copy with all occurrences of substring old replaced by new.
2240
2241If the optional argument count is given, only the first count occurrences are
2242replaced.
2243[clinic start generated code]*/
2244
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002245static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002246bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
Larry Hastings89964c42015-04-14 18:07:59 -04002247 Py_ssize_t count)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002248/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002249{
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +03002250 return stringlib_replace((PyObject *)self,
2251 (const char *)old->buf, old->len,
2252 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002253}
2254
2255/** End DALKE **/
2256
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002257
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002258static PyObject *
2259bytes_startswith(PyBytesObject *self, PyObject *args)
2260{
2261 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2262}
2263
2264static PyObject *
2265bytes_endswith(PyBytesObject *self, PyObject *args)
2266{
2267 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2268}
2269
2270
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002271/*[clinic input]
2272bytes.decode
2273
2274 encoding: str(c_default="NULL") = 'utf-8'
2275 The encoding with which to decode the bytes.
2276 errors: str(c_default="NULL") = 'strict'
2277 The error handling scheme to use for the handling of decoding errors.
2278 The default is 'strict' meaning that decoding errors raise a
2279 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2280 as well as any other name registered with codecs.register_error that
2281 can handle UnicodeDecodeErrors.
2282
2283Decode the bytes using the codec registered for encoding.
2284[clinic start generated code]*/
2285
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002286static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002287bytes_decode_impl(PyBytesObject *self, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04002288 const char *errors)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002289/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002290{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002291 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002292}
2293
Guido van Rossum20188312006-05-05 15:15:40 +00002294
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002295/*[clinic input]
2296bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002297
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002298 keepends: bool(accept={int}) = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002299
2300Return a list of the lines in the bytes, breaking at line boundaries.
2301
2302Line breaks are not included in the resulting list unless keepends is given and
2303true.
2304[clinic start generated code]*/
2305
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002306static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002307bytes_splitlines_impl(PyBytesObject *self, int keepends)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002308/*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002309{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002310 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002311 (PyObject*) self, PyBytes_AS_STRING(self),
2312 PyBytes_GET_SIZE(self), keepends
2313 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002314}
2315
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002316/*[clinic input]
2317@classmethod
2318bytes.fromhex
2319
2320 string: unicode
2321 /
2322
2323Create a bytes object from a string of hexadecimal numbers.
2324
2325Spaces between two numbers are accepted.
2326Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2327[clinic start generated code]*/
2328
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002329static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002330bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002331/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002332{
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002333 PyObject *result = _PyBytes_FromHex(string, 0);
2334 if (type != &PyBytes_Type && result != NULL) {
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01002335 Py_SETREF(result, PyObject_CallFunctionObjArgs((PyObject *)type,
2336 result, NULL));
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002337 }
2338 return result;
Victor Stinner2bf89932015-10-14 11:25:33 +02002339}
2340
2341PyObject*
2342_PyBytes_FromHex(PyObject *string, int use_bytearray)
2343{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002344 char *buf;
Victor Stinner2bf89932015-10-14 11:25:33 +02002345 Py_ssize_t hexlen, invalid_char;
2346 unsigned int top, bot;
2347 Py_UCS1 *str, *end;
2348 _PyBytesWriter writer;
2349
2350 _PyBytesWriter_Init(&writer);
2351 writer.use_bytearray = use_bytearray;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002352
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002353 assert(PyUnicode_Check(string));
2354 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002355 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002356 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002357
Victor Stinner2bf89932015-10-14 11:25:33 +02002358 if (!PyUnicode_IS_ASCII(string)) {
2359 void *data = PyUnicode_DATA(string);
2360 unsigned int kind = PyUnicode_KIND(string);
2361 Py_ssize_t i;
2362
2363 /* search for the first non-ASCII character */
2364 for (i = 0; i < hexlen; i++) {
2365 if (PyUnicode_READ(kind, data, i) >= 128)
2366 break;
2367 }
2368 invalid_char = i;
2369 goto error;
2370 }
2371
2372 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2373 str = PyUnicode_1BYTE_DATA(string);
2374
2375 /* This overestimates if there are spaces */
2376 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2377 if (buf == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002378 return NULL;
Victor Stinner2bf89932015-10-14 11:25:33 +02002379
2380 end = str + hexlen;
2381 while (str < end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002382 /* skip over spaces in the input */
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002383 if (Py_ISSPACE(*str)) {
Victor Stinner2bf89932015-10-14 11:25:33 +02002384 do {
2385 str++;
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002386 } while (Py_ISSPACE(*str));
Victor Stinner2bf89932015-10-14 11:25:33 +02002387 if (str >= end)
2388 break;
2389 }
2390
2391 top = _PyLong_DigitValue[*str];
2392 if (top >= 16) {
2393 invalid_char = str - PyUnicode_1BYTE_DATA(string);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002394 goto error;
2395 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002396 str++;
2397
2398 bot = _PyLong_DigitValue[*str];
2399 if (bot >= 16) {
2400 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2401 goto error;
2402 }
2403 str++;
2404
2405 *buf++ = (unsigned char)((top << 4) + bot);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002406 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002407
2408 return _PyBytesWriter_Finish(&writer, buf);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002409
2410 error:
Victor Stinner2bf89932015-10-14 11:25:33 +02002411 PyErr_Format(PyExc_ValueError,
2412 "non-hexadecimal number found in "
2413 "fromhex() arg at position %zd", invalid_char);
2414 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002415 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002416}
2417
Gregory P. Smith8cb65692015-04-25 23:22:26 +00002418PyDoc_STRVAR(hex__doc__,
2419"B.hex() -> string\n\
2420\n\
2421Create a string of hexadecimal numbers from a bytes object.\n\
2422Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'.");
2423
2424static PyObject *
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002425bytes_hex(PyBytesObject *self)
2426{
2427 char* argbuf = PyBytes_AS_STRING(self);
2428 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2429 return _Py_strhex(argbuf, arglen);
2430}
2431
2432static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002433bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002434{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002435 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002436}
2437
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002438
2439static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002440bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002441 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2442 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2443 _Py_capitalize__doc__},
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002444 {"center", (PyCFunction)stringlib_center, METH_VARARGS,
2445 _Py_center__doc__},
2446 {"count", (PyCFunction)bytes_count, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002447 _Py_count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002448 BYTES_DECODE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002449 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002450 _Py_endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02002451 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002452 _Py_expandtabs__doc__},
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002453 {"find", (PyCFunction)bytes_find, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002454 _Py_find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002455 BYTES_FROMHEX_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002456 {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__},
2457 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002458 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2459 _Py_isalnum__doc__},
2460 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2461 _Py_isalpha__doc__},
INADA Naokia49ac992018-01-27 14:06:21 +09002462 {"isascii", (PyCFunction)stringlib_isascii, METH_NOARGS,
2463 _Py_isascii__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002464 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2465 _Py_isdigit__doc__},
2466 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2467 _Py_islower__doc__},
2468 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2469 _Py_isspace__doc__},
2470 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2471 _Py_istitle__doc__},
2472 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2473 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002474 BYTES_JOIN_METHODDEF
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002475 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, _Py_ljust__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002476 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002477 BYTES_LSTRIP_METHODDEF
2478 BYTES_MAKETRANS_METHODDEF
2479 BYTES_PARTITION_METHODDEF
2480 BYTES_REPLACE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002481 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2482 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002483 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, _Py_rjust__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002484 BYTES_RPARTITION_METHODDEF
2485 BYTES_RSPLIT_METHODDEF
2486 BYTES_RSTRIP_METHODDEF
2487 BYTES_SPLIT_METHODDEF
2488 BYTES_SPLITLINES_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002489 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002490 _Py_startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002491 BYTES_STRIP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002492 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2493 _Py_swapcase__doc__},
2494 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002495 BYTES_TRANSLATE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002496 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002497 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, _Py_zfill__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002498 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002499};
2500
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002501static PyObject *
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002502bytes_mod(PyObject *self, PyObject *arg)
Ethan Furmanb95b5612015-01-23 20:05:18 -08002503{
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002504 if (!PyBytes_Check(self)) {
2505 Py_RETURN_NOTIMPLEMENTED;
Victor Stinner772b2b02015-10-14 09:56:53 +02002506 }
Victor Stinner772b2b02015-10-14 09:56:53 +02002507 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002508 arg, 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -08002509}
2510
2511static PyNumberMethods bytes_as_number = {
2512 0, /*nb_add*/
2513 0, /*nb_subtract*/
2514 0, /*nb_multiply*/
2515 bytes_mod, /*nb_remainder*/
2516};
2517
2518static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002519bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002520
2521static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002522bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002523{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002524 PyObject *x = NULL;
2525 const char *encoding = NULL;
2526 const char *errors = NULL;
2527 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002528 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002529 Py_ssize_t size;
2530 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002531 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002532
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002533 if (type != &PyBytes_Type)
Serhiy Storchaka15095802015-11-25 15:47:01 +02002534 return bytes_subtype_new(type, args, kwds);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002535 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2536 &encoding, &errors))
2537 return NULL;
2538 if (x == NULL) {
2539 if (encoding != NULL || errors != NULL) {
2540 PyErr_SetString(PyExc_TypeError,
2541 "encoding or errors without sequence "
2542 "argument");
2543 return NULL;
2544 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02002545 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002546 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002547
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002548 if (encoding != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002549 /* Encode via the codec registry */
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002550 if (!PyUnicode_Check(x)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002551 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002552 "encoding without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002553 return NULL;
2554 }
2555 new = PyUnicode_AsEncodedString(x, encoding, errors);
2556 if (new == NULL)
2557 return NULL;
2558 assert(PyBytes_Check(new));
2559 return new;
2560 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002561
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002562 if (errors != NULL) {
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002563 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002564 PyUnicode_Check(x) ?
2565 "string argument without an encoding" :
2566 "errors without a string argument");
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002567 return NULL;
2568 }
2569
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002570 /* We'd like to call PyObject_Bytes here, but we need to check for an
2571 integer argument before deferring to PyBytes_FromObject, something
2572 PyObject_Bytes doesn't do. */
2573 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2574 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +01002575 new = _PyObject_CallNoArg(func);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002576 Py_DECREF(func);
2577 if (new == NULL)
2578 return NULL;
2579 if (!PyBytes_Check(new)) {
2580 PyErr_Format(PyExc_TypeError,
2581 "__bytes__ returned non-bytes (type %.200s)",
2582 Py_TYPE(new)->tp_name);
2583 Py_DECREF(new);
2584 return NULL;
2585 }
2586 return new;
2587 }
2588 else if (PyErr_Occurred())
2589 return NULL;
2590
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002591 if (PyUnicode_Check(x)) {
2592 PyErr_SetString(PyExc_TypeError,
2593 "string argument without an encoding");
2594 return NULL;
2595 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002596 /* Is it an integer? */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002597 if (PyIndex_Check(x)) {
2598 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2599 if (size == -1 && PyErr_Occurred()) {
Miss Islington (bot)13708322018-10-14 14:26:28 -07002600 if (!PyErr_ExceptionMatches(PyExc_TypeError))
INADA Naokia634e232017-01-06 17:32:01 +09002601 return NULL;
2602 PyErr_Clear(); /* fall through */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002603 }
INADA Naokia634e232017-01-06 17:32:01 +09002604 else {
2605 if (size < 0) {
2606 PyErr_SetString(PyExc_ValueError, "negative count");
2607 return NULL;
2608 }
2609 new = _PyBytes_FromSize(size, 1);
2610 if (new == NULL)
2611 return NULL;
2612 return new;
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002613 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002614 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002615
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002616 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002617}
2618
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002619static PyObject*
2620_PyBytes_FromBuffer(PyObject *x)
2621{
2622 PyObject *new;
2623 Py_buffer view;
2624
2625 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2626 return NULL;
2627
2628 new = PyBytes_FromStringAndSize(NULL, view.len);
2629 if (!new)
2630 goto fail;
2631 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2632 &view, view.len, 'C') < 0)
2633 goto fail;
2634 PyBuffer_Release(&view);
2635 return new;
2636
2637fail:
2638 Py_XDECREF(new);
2639 PyBuffer_Release(&view);
2640 return NULL;
2641}
2642
Victor Stinner3c50ce32015-10-14 13:50:40 +02002643#define _PyBytes_FROM_LIST_BODY(x, GET_ITEM) \
2644 do { \
2645 PyObject *bytes; \
2646 Py_ssize_t i; \
2647 Py_ssize_t value; \
2648 char *str; \
2649 PyObject *item; \
2650 \
2651 bytes = PyBytes_FromStringAndSize(NULL, Py_SIZE(x)); \
2652 if (bytes == NULL) \
2653 return NULL; \
2654 str = ((PyBytesObject *)bytes)->ob_sval; \
2655 \
2656 for (i = 0; i < Py_SIZE(x); i++) { \
2657 item = GET_ITEM((x), i); \
Serhiy Storchakaf54d7812016-07-06 21:39:44 +03002658 value = PyNumber_AsSsize_t(item, NULL); \
Victor Stinner3c50ce32015-10-14 13:50:40 +02002659 if (value == -1 && PyErr_Occurred()) \
2660 goto error; \
2661 \
2662 if (value < 0 || value >= 256) { \
2663 PyErr_SetString(PyExc_ValueError, \
2664 "bytes must be in range(0, 256)"); \
2665 goto error; \
2666 } \
2667 *str++ = (char) value; \
2668 } \
2669 return bytes; \
2670 \
2671 error: \
2672 Py_DECREF(bytes); \
2673 return NULL; \
2674 } while (0)
2675
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002676static PyObject*
2677_PyBytes_FromList(PyObject *x)
2678{
Victor Stinner3c50ce32015-10-14 13:50:40 +02002679 _PyBytes_FROM_LIST_BODY(x, PyList_GET_ITEM);
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002680}
2681
2682static PyObject*
2683_PyBytes_FromTuple(PyObject *x)
2684{
Victor Stinner3c50ce32015-10-14 13:50:40 +02002685 _PyBytes_FROM_LIST_BODY(x, PyTuple_GET_ITEM);
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002686}
2687
2688static PyObject *
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002689_PyBytes_FromIterator(PyObject *it, PyObject *x)
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002690{
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002691 char *str;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002692 Py_ssize_t i, size;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002693 _PyBytesWriter writer;
2694
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002695 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02002696 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002697 if (size == -1 && PyErr_Occurred())
2698 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002699
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002700 _PyBytesWriter_Init(&writer);
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002701 str = _PyBytesWriter_Alloc(&writer, size);
2702 if (str == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002703 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002704 writer.overallocate = 1;
2705 size = writer.allocated;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002706
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002707 /* Run the iterator to exhaustion */
2708 for (i = 0; ; i++) {
2709 PyObject *item;
2710 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002711
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002712 /* Get the next item */
2713 item = PyIter_Next(it);
2714 if (item == NULL) {
2715 if (PyErr_Occurred())
2716 goto error;
2717 break;
2718 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002719
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002720 /* Interpret it as an int (__index__) */
Serhiy Storchakaf54d7812016-07-06 21:39:44 +03002721 value = PyNumber_AsSsize_t(item, NULL);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002722 Py_DECREF(item);
2723 if (value == -1 && PyErr_Occurred())
2724 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002725
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002726 /* Range check */
2727 if (value < 0 || value >= 256) {
2728 PyErr_SetString(PyExc_ValueError,
2729 "bytes must be in range(0, 256)");
2730 goto error;
2731 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002732
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002733 /* Append the byte */
2734 if (i >= size) {
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002735 str = _PyBytesWriter_Resize(&writer, str, size+1);
2736 if (str == NULL)
2737 return NULL;
2738 size = writer.allocated;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002739 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002740 *str++ = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002741 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002742
2743 return _PyBytesWriter_Finish(&writer, str);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002744
2745 error:
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002746 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002747 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002748}
2749
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002750PyObject *
2751PyBytes_FromObject(PyObject *x)
2752{
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002753 PyObject *it, *result;
2754
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002755 if (x == NULL) {
2756 PyErr_BadInternalCall();
2757 return NULL;
2758 }
2759
2760 if (PyBytes_CheckExact(x)) {
2761 Py_INCREF(x);
2762 return x;
2763 }
2764
2765 /* Use the modern buffer interface */
2766 if (PyObject_CheckBuffer(x))
2767 return _PyBytes_FromBuffer(x);
2768
2769 if (PyList_CheckExact(x))
2770 return _PyBytes_FromList(x);
2771
2772 if (PyTuple_CheckExact(x))
2773 return _PyBytes_FromTuple(x);
2774
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002775 if (!PyUnicode_Check(x)) {
2776 it = PyObject_GetIter(x);
2777 if (it != NULL) {
2778 result = _PyBytes_FromIterator(it, x);
2779 Py_DECREF(it);
2780 return result;
2781 }
Miss Islington (bot)13708322018-10-14 14:26:28 -07002782 if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2783 return NULL;
2784 }
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002785 }
2786
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002787 PyErr_Format(PyExc_TypeError,
2788 "cannot convert '%.200s' object to bytes",
2789 x->ob_type->tp_name);
2790 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002791}
2792
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002793static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002794bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002795{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002796 PyObject *tmp, *pnew;
2797 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002798
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002799 assert(PyType_IsSubtype(type, &PyBytes_Type));
2800 tmp = bytes_new(&PyBytes_Type, args, kwds);
2801 if (tmp == NULL)
2802 return NULL;
Serhiy Storchaka15095802015-11-25 15:47:01 +02002803 assert(PyBytes_Check(tmp));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002804 n = PyBytes_GET_SIZE(tmp);
2805 pnew = type->tp_alloc(type, n);
2806 if (pnew != NULL) {
Christian Heimesf051e432016-09-13 20:22:02 +02002807 memcpy(PyBytes_AS_STRING(pnew),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002808 PyBytes_AS_STRING(tmp), n+1);
2809 ((PyBytesObject *)pnew)->ob_shash =
2810 ((PyBytesObject *)tmp)->ob_shash;
2811 }
2812 Py_DECREF(tmp);
2813 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002814}
2815
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002816PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002817"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002818bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002819bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002820bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2821bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002822\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002823Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002824 - an iterable yielding integers in range(256)\n\
2825 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002826 - any object implementing the buffer API.\n\
2827 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002828
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002829static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002830
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002831PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002832 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2833 "bytes",
2834 PyBytesObject_SIZE,
2835 sizeof(char),
2836 bytes_dealloc, /* tp_dealloc */
2837 0, /* tp_print */
2838 0, /* tp_getattr */
2839 0, /* tp_setattr */
2840 0, /* tp_reserved */
2841 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08002842 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002843 &bytes_as_sequence, /* tp_as_sequence */
2844 &bytes_as_mapping, /* tp_as_mapping */
2845 (hashfunc)bytes_hash, /* tp_hash */
2846 0, /* tp_call */
2847 bytes_str, /* tp_str */
2848 PyObject_GenericGetAttr, /* tp_getattro */
2849 0, /* tp_setattro */
2850 &bytes_as_buffer, /* tp_as_buffer */
2851 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2852 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2853 bytes_doc, /* tp_doc */
2854 0, /* tp_traverse */
2855 0, /* tp_clear */
2856 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2857 0, /* tp_weaklistoffset */
2858 bytes_iter, /* tp_iter */
2859 0, /* tp_iternext */
2860 bytes_methods, /* tp_methods */
2861 0, /* tp_members */
2862 0, /* tp_getset */
2863 &PyBaseObject_Type, /* tp_base */
2864 0, /* tp_dict */
2865 0, /* tp_descr_get */
2866 0, /* tp_descr_set */
2867 0, /* tp_dictoffset */
2868 0, /* tp_init */
2869 0, /* tp_alloc */
2870 bytes_new, /* tp_new */
2871 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002872};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002873
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002874void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002875PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002876{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002877 assert(pv != NULL);
2878 if (*pv == NULL)
2879 return;
2880 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002881 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002882 return;
2883 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02002884
2885 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2886 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05002887 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02002888 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02002889
Antoine Pitrou161d6952014-05-01 14:36:20 +02002890 wb.len = -1;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002891 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02002892 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2893 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2894 Py_CLEAR(*pv);
2895 return;
2896 }
2897
2898 oldsize = PyBytes_GET_SIZE(*pv);
2899 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2900 PyErr_NoMemory();
2901 goto error;
2902 }
2903 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2904 goto error;
2905
2906 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2907 PyBuffer_Release(&wb);
2908 return;
2909
2910 error:
2911 PyBuffer_Release(&wb);
2912 Py_CLEAR(*pv);
2913 return;
2914 }
2915
2916 else {
2917 /* Multiple references, need to create new object */
2918 PyObject *v;
2919 v = bytes_concat(*pv, w);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002920 Py_SETREF(*pv, v);
Antoine Pitrou161d6952014-05-01 14:36:20 +02002921 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002922}
2923
2924void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002925PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002926{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002927 PyBytes_Concat(pv, w);
2928 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002929}
2930
2931
Ethan Furmanb95b5612015-01-23 20:05:18 -08002932/* The following function breaks the notion that bytes are immutable:
2933 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002934 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08002935 as creating a new bytes object and destroying the old one, only
2936 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002937 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08002938 Note that if there's not enough memory to resize the bytes object, the
2939 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002940 memory" exception is set, and -1 is returned. Else (on success) 0 is
2941 returned, and the value in *pv may or may not be the same as on input.
2942 As always, an extra byte is allocated for a trailing \0 byte (newsize
2943 does *not* include that), and a trailing \0 byte is stored.
2944*/
2945
2946int
2947_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2948{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002949 PyObject *v;
2950 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002951 v = *pv;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002952 if (!PyBytes_Check(v) || newsize < 0) {
2953 goto error;
2954 }
2955 if (Py_SIZE(v) == newsize) {
2956 /* return early if newsize equals to v->ob_size */
2957 return 0;
2958 }
2959 if (Py_REFCNT(v) != 1) {
2960 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002961 }
2962 /* XXX UNREF/NEWREF interface should be more symmetrical */
2963 _Py_DEC_REFTOTAL;
2964 _Py_ForgetReference(v);
2965 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03002966 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002967 if (*pv == NULL) {
2968 PyObject_Del(v);
2969 PyErr_NoMemory();
2970 return -1;
2971 }
2972 _Py_NewReference(*pv);
2973 sv = (PyBytesObject *) *pv;
2974 Py_SIZE(sv) = newsize;
2975 sv->ob_sval[newsize] = '\0';
2976 sv->ob_shash = -1; /* invalidate cached hash value */
2977 return 0;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002978error:
2979 *pv = 0;
2980 Py_DECREF(v);
2981 PyErr_BadInternalCall();
2982 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002983}
2984
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002985void
2986PyBytes_Fini(void)
2987{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002988 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002989 for (i = 0; i < UCHAR_MAX + 1; i++)
2990 Py_CLEAR(characters[i]);
2991 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002992}
2993
Benjamin Peterson4116f362008-05-27 00:36:20 +00002994/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002995
2996typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002997 PyObject_HEAD
2998 Py_ssize_t it_index;
2999 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003000} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003001
3002static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003003striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003004{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003005 _PyObject_GC_UNTRACK(it);
3006 Py_XDECREF(it->it_seq);
3007 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003008}
3009
3010static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003011striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003012{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003013 Py_VISIT(it->it_seq);
3014 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003015}
3016
3017static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003018striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003019{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003020 PyBytesObject *seq;
3021 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003022
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003023 assert(it != NULL);
3024 seq = it->it_seq;
3025 if (seq == NULL)
3026 return NULL;
3027 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003028
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003029 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3030 item = PyLong_FromLong(
3031 (unsigned char)seq->ob_sval[it->it_index]);
3032 if (item != NULL)
3033 ++it->it_index;
3034 return item;
3035 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003036
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003037 it->it_seq = NULL;
Serhiy Storchakafbb1c5e2016-03-30 20:40:02 +03003038 Py_DECREF(seq);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003039 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003040}
3041
3042static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003043striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003044{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003045 Py_ssize_t len = 0;
3046 if (it->it_seq)
3047 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3048 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003049}
3050
3051PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003052 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003053
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003054static PyObject *
3055striter_reduce(striterobject *it)
3056{
3057 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02003058 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003059 it->it_seq, it->it_index);
3060 } else {
Serhiy Storchaka460bd0d2016-11-20 12:16:46 +02003061 return Py_BuildValue("N(())", _PyObject_GetBuiltin("iter"));
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003062 }
3063}
3064
3065PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3066
3067static PyObject *
3068striter_setstate(striterobject *it, PyObject *state)
3069{
3070 Py_ssize_t index = PyLong_AsSsize_t(state);
3071 if (index == -1 && PyErr_Occurred())
3072 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003073 if (it->it_seq != NULL) {
3074 if (index < 0)
3075 index = 0;
3076 else if (index > PyBytes_GET_SIZE(it->it_seq))
3077 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3078 it->it_index = index;
3079 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003080 Py_RETURN_NONE;
3081}
3082
3083PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3084
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003085static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003086 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3087 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003088 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3089 reduce_doc},
3090 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3091 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003092 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003093};
3094
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003095PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003096 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3097 "bytes_iterator", /* tp_name */
3098 sizeof(striterobject), /* tp_basicsize */
3099 0, /* tp_itemsize */
3100 /* methods */
3101 (destructor)striter_dealloc, /* tp_dealloc */
3102 0, /* tp_print */
3103 0, /* tp_getattr */
3104 0, /* tp_setattr */
3105 0, /* tp_reserved */
3106 0, /* tp_repr */
3107 0, /* tp_as_number */
3108 0, /* tp_as_sequence */
3109 0, /* tp_as_mapping */
3110 0, /* tp_hash */
3111 0, /* tp_call */
3112 0, /* tp_str */
3113 PyObject_GenericGetAttr, /* tp_getattro */
3114 0, /* tp_setattro */
3115 0, /* tp_as_buffer */
3116 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3117 0, /* tp_doc */
3118 (traverseproc)striter_traverse, /* tp_traverse */
3119 0, /* tp_clear */
3120 0, /* tp_richcompare */
3121 0, /* tp_weaklistoffset */
3122 PyObject_SelfIter, /* tp_iter */
3123 (iternextfunc)striter_next, /* tp_iternext */
3124 striter_methods, /* tp_methods */
3125 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003126};
3127
3128static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003129bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003130{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003131 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003132
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003133 if (!PyBytes_Check(seq)) {
3134 PyErr_BadInternalCall();
3135 return NULL;
3136 }
3137 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3138 if (it == NULL)
3139 return NULL;
3140 it->it_index = 0;
3141 Py_INCREF(seq);
3142 it->it_seq = (PyBytesObject *)seq;
3143 _PyObject_GC_TRACK(it);
3144 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003145}
Victor Stinner00165072015-10-09 01:53:21 +02003146
3147
3148/* _PyBytesWriter API */
3149
3150#ifdef MS_WINDOWS
3151 /* On Windows, overallocate by 50% is the best factor */
3152# define OVERALLOCATE_FACTOR 2
3153#else
3154 /* On Linux, overallocate by 25% is the best factor */
3155# define OVERALLOCATE_FACTOR 4
3156#endif
3157
3158void
3159_PyBytesWriter_Init(_PyBytesWriter *writer)
3160{
Victor Stinner661aacc2015-10-14 09:41:48 +02003161 /* Set all attributes before small_buffer to 0 */
3162 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003163#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003164 memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003165#endif
3166}
3167
3168void
3169_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3170{
3171 Py_CLEAR(writer->buffer);
3172}
3173
3174Py_LOCAL_INLINE(char*)
3175_PyBytesWriter_AsString(_PyBytesWriter *writer)
3176{
Victor Stinner661aacc2015-10-14 09:41:48 +02003177 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003178 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003179 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003180 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003181 else if (writer->use_bytearray) {
3182 assert(writer->buffer != NULL);
3183 return PyByteArray_AS_STRING(writer->buffer);
3184 }
3185 else {
3186 assert(writer->buffer != NULL);
3187 return PyBytes_AS_STRING(writer->buffer);
3188 }
Victor Stinner00165072015-10-09 01:53:21 +02003189}
3190
3191Py_LOCAL_INLINE(Py_ssize_t)
Victor Stinner2bf89932015-10-14 11:25:33 +02003192_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
Victor Stinner00165072015-10-09 01:53:21 +02003193{
3194 char *start = _PyBytesWriter_AsString(writer);
3195 assert(str != NULL);
3196 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003197 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003198 return str - start;
3199}
3200
3201Py_LOCAL_INLINE(void)
3202_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3203{
3204#ifdef Py_DEBUG
3205 char *start, *end;
3206
Victor Stinner661aacc2015-10-14 09:41:48 +02003207 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003208 assert(writer->buffer == NULL);
3209 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003210 else {
3211 assert(writer->buffer != NULL);
3212 if (writer->use_bytearray)
3213 assert(PyByteArray_CheckExact(writer->buffer));
3214 else
3215 assert(PyBytes_CheckExact(writer->buffer));
3216 assert(Py_REFCNT(writer->buffer) == 1);
3217 }
Victor Stinner00165072015-10-09 01:53:21 +02003218
Victor Stinner661aacc2015-10-14 09:41:48 +02003219 if (writer->use_bytearray) {
3220 /* bytearray has its own overallocation algorithm,
3221 writer overallocation must be disabled */
3222 assert(!writer->overallocate);
3223 }
3224
3225 assert(0 <= writer->allocated);
Victor Stinner53926a12015-10-09 12:37:03 +02003226 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003227 /* the last byte must always be null */
Victor Stinner661aacc2015-10-14 09:41:48 +02003228 start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003229 assert(start[writer->allocated] == 0);
3230
3231 end = start + writer->allocated;
3232 assert(str != NULL);
3233 assert(start <= str && str <= end);
3234#endif
3235}
3236
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003237void*
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003238_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
Victor Stinner00165072015-10-09 01:53:21 +02003239{
3240 Py_ssize_t allocated, pos;
3241
3242 _PyBytesWriter_CheckConsistency(writer, str);
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003243 assert(writer->allocated < size);
Victor Stinner00165072015-10-09 01:53:21 +02003244
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003245 allocated = size;
Victor Stinner00165072015-10-09 01:53:21 +02003246 if (writer->overallocate
3247 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3248 /* overallocate to limit the number of realloc() */
3249 allocated += allocated / OVERALLOCATE_FACTOR;
3250 }
3251
Victor Stinner2bf89932015-10-14 11:25:33 +02003252 pos = _PyBytesWriter_GetSize(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003253 if (!writer->use_small_buffer) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003254 if (writer->use_bytearray) {
3255 if (PyByteArray_Resize(writer->buffer, allocated))
3256 goto error;
3257 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3258 but we cannot use ob_alloc because bytes may need to be moved
3259 to use the whole buffer. bytearray uses an internal optimization
3260 to avoid moving or copying bytes when bytes are removed at the
3261 beginning (ex: del bytearray[:1]). */
3262 }
3263 else {
3264 if (_PyBytes_Resize(&writer->buffer, allocated))
3265 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003266 }
3267 }
3268 else {
3269 /* convert from stack buffer to bytes object buffer */
3270 assert(writer->buffer == NULL);
3271
Victor Stinner661aacc2015-10-14 09:41:48 +02003272 if (writer->use_bytearray)
3273 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3274 else
3275 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003276 if (writer->buffer == NULL)
Victor Stinner661aacc2015-10-14 09:41:48 +02003277 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003278
3279 if (pos != 0) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003280 char *dest;
3281 if (writer->use_bytearray)
3282 dest = PyByteArray_AS_STRING(writer->buffer);
3283 else
3284 dest = PyBytes_AS_STRING(writer->buffer);
Christian Heimesf051e432016-09-13 20:22:02 +02003285 memcpy(dest,
Victor Stinnerb3653a32015-10-09 03:38:24 +02003286 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02003287 pos);
3288 }
3289
Victor Stinnerb3653a32015-10-09 03:38:24 +02003290 writer->use_small_buffer = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003291#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003292 memset(writer->small_buffer, 0xDB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003293#endif
Victor Stinner00165072015-10-09 01:53:21 +02003294 }
3295 writer->allocated = allocated;
3296
3297 str = _PyBytesWriter_AsString(writer) + pos;
3298 _PyBytesWriter_CheckConsistency(writer, str);
3299 return str;
Victor Stinner661aacc2015-10-14 09:41:48 +02003300
3301error:
3302 _PyBytesWriter_Dealloc(writer);
3303 return NULL;
Victor Stinner00165072015-10-09 01:53:21 +02003304}
3305
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003306void*
3307_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3308{
3309 Py_ssize_t new_min_size;
3310
3311 _PyBytesWriter_CheckConsistency(writer, str);
3312 assert(size >= 0);
3313
3314 if (size == 0) {
3315 /* nothing to do */
3316 return str;
3317 }
3318
3319 if (writer->min_size > PY_SSIZE_T_MAX - size) {
3320 PyErr_NoMemory();
3321 _PyBytesWriter_Dealloc(writer);
3322 return NULL;
3323 }
3324 new_min_size = writer->min_size + size;
3325
3326 if (new_min_size > writer->allocated)
3327 str = _PyBytesWriter_Resize(writer, str, new_min_size);
3328
3329 writer->min_size = new_min_size;
3330 return str;
3331}
3332
Victor Stinner00165072015-10-09 01:53:21 +02003333/* Allocate the buffer to write size bytes.
3334 Return the pointer to the beginning of buffer data.
3335 Raise an exception and return NULL on error. */
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003336void*
Victor Stinner00165072015-10-09 01:53:21 +02003337_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3338{
3339 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02003340 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02003341 assert(size >= 0);
3342
Victor Stinnerb3653a32015-10-09 03:38:24 +02003343 writer->use_small_buffer = 1;
Victor Stinnerb13b97d2015-10-09 02:52:16 +02003344#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003345 writer->allocated = sizeof(writer->small_buffer) - 1;
Victor Stinnerf6358a72015-10-14 12:02:39 +02003346 /* In debug mode, don't use the full small buffer because it is less
3347 efficient than bytes and bytearray objects to detect buffer underflow
3348 and buffer overflow. Use 10 bytes of the small buffer to test also
3349 code using the smaller buffer in debug mode.
3350
3351 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3352 in debug mode to also be able to detect stack overflow when running
3353 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3354 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3355 stack overflow. */
3356 writer->allocated = Py_MIN(writer->allocated, 10);
3357 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3358 to detect buffer overflow */
Victor Stinnerb3653a32015-10-09 03:38:24 +02003359 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003360#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02003361 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02003362#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02003363 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02003364}
3365
3366PyObject *
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003367_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
Victor Stinner00165072015-10-09 01:53:21 +02003368{
Victor Stinner2bf89932015-10-14 11:25:33 +02003369 Py_ssize_t size;
Victor Stinner00165072015-10-09 01:53:21 +02003370 PyObject *result;
3371
3372 _PyBytesWriter_CheckConsistency(writer, str);
3373
Victor Stinner2bf89932015-10-14 11:25:33 +02003374 size = _PyBytesWriter_GetSize(writer, str);
3375 if (size == 0 && !writer->use_bytearray) {
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003376 Py_CLEAR(writer->buffer);
3377 /* Get the empty byte string singleton */
3378 result = PyBytes_FromStringAndSize(NULL, 0);
3379 }
3380 else if (writer->use_small_buffer) {
Victor Stinnere914d412016-04-15 17:52:27 +02003381 if (writer->use_bytearray) {
3382 result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3383 }
3384 else {
3385 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3386 }
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003387 }
3388 else {
3389 result = writer->buffer;
3390 writer->buffer = NULL;
3391
Victor Stinner2bf89932015-10-14 11:25:33 +02003392 if (size != writer->allocated) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003393 if (writer->use_bytearray) {
Victor Stinner2bf89932015-10-14 11:25:33 +02003394 if (PyByteArray_Resize(result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003395 Py_DECREF(result);
3396 return NULL;
3397 }
3398 }
3399 else {
Victor Stinner2bf89932015-10-14 11:25:33 +02003400 if (_PyBytes_Resize(&result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003401 assert(result == NULL);
3402 return NULL;
3403 }
Victor Stinner00165072015-10-09 01:53:21 +02003404 }
3405 }
Victor Stinner00165072015-10-09 01:53:21 +02003406 }
Victor Stinner00165072015-10-09 01:53:21 +02003407 return result;
3408}
Victor Stinnerce179bf2015-10-09 12:57:22 +02003409
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003410void*
Victor Stinnere9aa5952015-10-12 13:57:47 +02003411_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003412 const void *bytes, Py_ssize_t size)
Victor Stinnerce179bf2015-10-09 12:57:22 +02003413{
Victor Stinnere9aa5952015-10-12 13:57:47 +02003414 char *str = (char *)ptr;
3415
Victor Stinnerce179bf2015-10-09 12:57:22 +02003416 str = _PyBytesWriter_Prepare(writer, str, size);
3417 if (str == NULL)
3418 return NULL;
3419
Christian Heimesf051e432016-09-13 20:22:02 +02003420 memcpy(str, bytes, size);
Victor Stinnerce179bf2015-10-09 12:57:22 +02003421 str += size;
3422
3423 return str;
3424}