blob: c358756bfea8e65de342b1c148fe76eb76671db6 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Eric Snow2ebc5ce2017-09-07 23:51:28 -06006#include "internal/mem.h"
7#include "internal/pystate.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00008
Gregory P. Smith60d241f2007-10-16 06:31:30 +00009#include "bytes_methods.h"
Gregory P. Smith8cb65692015-04-25 23:22:26 +000010#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +000011#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000012
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020013/*[clinic input]
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030014class bytes "PyBytesObject *" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020015[clinic start generated code]*/
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030016/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020017
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030018#include "clinic/bytesobject.c.h"
19
Christian Heimes2c9c7a52008-05-26 13:42:13 +000020#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000021Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000022#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000023
Christian Heimes2c9c7a52008-05-26 13:42:13 +000024static PyBytesObject *characters[UCHAR_MAX + 1];
25static PyBytesObject *nullstring;
26
Mark Dickinsonfd24b322008-12-06 15:33:31 +000027/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
28 for a string of length n should request PyBytesObject_SIZE + n bytes.
29
30 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
31 3 bytes per string allocation on a typical system.
32*/
33#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
34
Victor Stinner2bf89932015-10-14 11:25:33 +020035/* Forward declaration */
36Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
37 char *str);
38
Christian Heimes2c9c7a52008-05-26 13:42:13 +000039/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000040 For PyBytes_FromString(), the parameter `str' points to a null-terminated
41 string containing exactly `size' bytes.
42
Martin Pantera90a4a92016-05-30 04:04:50 +000043 For PyBytes_FromStringAndSize(), the parameter `str' is
Christian Heimes2c9c7a52008-05-26 13:42:13 +000044 either NULL or else points to a string containing at least `size' bytes.
45 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
46 not have to be null-terminated. (Therefore it is safe to construct a
47 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
48 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
49 bytes (setting the last byte to the null terminating character) and you can
50 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000051 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000052 alter the data yourself, since the strings may be shared.
53
54 The PyObject member `op->ob_size', which denotes the number of "extra
55 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020056 allocated for string data, not counting the null terminating character.
57 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000058 PyBytes_FromStringAndSize()) or the length of the string in the `str'
59 parameter (for PyBytes_FromString()).
60*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020061static PyObject *
62_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000063{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020064 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020065 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020066
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000068#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000069 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000070#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000071 Py_INCREF(op);
72 return (PyObject *)op;
73 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000074
Victor Stinner049e5092014-08-17 22:20:00 +020075 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000076 PyErr_SetString(PyExc_OverflowError,
77 "byte string is too large");
78 return NULL;
79 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000080
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000081 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020082 if (use_calloc)
83 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
84 else
85 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000086 if (op == NULL)
87 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +010088 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000089 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020090 if (!use_calloc)
91 op->ob_sval[size] = '\0';
92 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000093 if (size == 0) {
94 nullstring = op;
95 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020096 }
97 return (PyObject *) op;
98}
99
100PyObject *
101PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
102{
103 PyBytesObject *op;
104 if (size < 0) {
105 PyErr_SetString(PyExc_SystemError,
106 "Negative size passed to PyBytes_FromStringAndSize");
107 return NULL;
108 }
109 if (size == 1 && str != NULL &&
110 (op = characters[*str & UCHAR_MAX]) != NULL)
111 {
112#ifdef COUNT_ALLOCS
113 one_strings++;
114#endif
115 Py_INCREF(op);
116 return (PyObject *)op;
117 }
118
119 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
120 if (op == NULL)
121 return NULL;
122 if (str == NULL)
123 return (PyObject *) op;
124
Christian Heimesf051e432016-09-13 20:22:02 +0200125 memcpy(op->ob_sval, str, size);
Victor Stinnerdb067af2014-05-02 22:31:14 +0200126 /* share short strings */
127 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000128 characters[*str & UCHAR_MAX] = op;
129 Py_INCREF(op);
130 }
131 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000132}
133
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000134PyObject *
135PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000136{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200137 size_t size;
138 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000139
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000140 assert(str != NULL);
141 size = strlen(str);
142 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
143 PyErr_SetString(PyExc_OverflowError,
144 "byte string is too long");
145 return NULL;
146 }
147 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000148#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000150#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000151 Py_INCREF(op);
152 return (PyObject *)op;
153 }
154 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000155#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000157#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000158 Py_INCREF(op);
159 return (PyObject *)op;
160 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000161
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000162 /* Inline PyObject_NewVar */
163 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
164 if (op == NULL)
165 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100166 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000167 op->ob_shash = -1;
Christian Heimesf051e432016-09-13 20:22:02 +0200168 memcpy(op->ob_sval, str, size+1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000169 /* share short strings */
170 if (size == 0) {
171 nullstring = op;
172 Py_INCREF(op);
173 } else if (size == 1) {
174 characters[*str & UCHAR_MAX] = op;
175 Py_INCREF(op);
176 }
177 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000178}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000179
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000180PyObject *
181PyBytes_FromFormatV(const char *format, va_list vargs)
182{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000183 char *s;
Victor Stinner03dab782015-10-14 00:21:35 +0200184 const char *f;
185 const char *p;
186 Py_ssize_t prec;
187 int longflag;
188 int size_tflag;
189 /* Longest 64-bit formatted numbers:
190 - "18446744073709551615\0" (21 bytes)
191 - "-9223372036854775808\0" (21 bytes)
192 Decimal takes the most space (it isn't enough for octal.)
Guido van Rossum343e97f2007-04-09 00:43:24 +0000193
Victor Stinner03dab782015-10-14 00:21:35 +0200194 Longest 64-bit pointer representation:
195 "0xffffffffffffffff\0" (19 bytes). */
196 char buffer[21];
197 _PyBytesWriter writer;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000198
Victor Stinner03dab782015-10-14 00:21:35 +0200199 _PyBytesWriter_Init(&writer);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000200
Victor Stinner03dab782015-10-14 00:21:35 +0200201 s = _PyBytesWriter_Alloc(&writer, strlen(format));
202 if (s == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000203 return NULL;
Victor Stinner03dab782015-10-14 00:21:35 +0200204 writer.overallocate = 1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000205
Victor Stinner03dab782015-10-14 00:21:35 +0200206#define WRITE_BYTES(str) \
207 do { \
208 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
209 if (s == NULL) \
210 goto error; \
211 } while (0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000212
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000213 for (f = format; *f; f++) {
Victor Stinner03dab782015-10-14 00:21:35 +0200214 if (*f != '%') {
215 *s++ = *f;
216 continue;
217 }
218
219 p = f++;
220
221 /* ignore the width (ex: 10 in "%10s") */
222 while (Py_ISDIGIT(*f))
223 f++;
224
225 /* parse the precision (ex: 10 in "%.10s") */
226 prec = 0;
227 if (*f == '.') {
228 f++;
229 for (; Py_ISDIGIT(*f); f++) {
230 prec = (prec * 10) + (*f - '0');
231 }
232 }
233
234 while (*f && *f != '%' && !Py_ISALPHA(*f))
235 f++;
236
237 /* handle the long flag ('l'), but only for %ld and %lu.
238 others can be added when necessary. */
239 longflag = 0;
240 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
241 longflag = 1;
242 ++f;
243 }
244
245 /* handle the size_t flag ('z'). */
246 size_tflag = 0;
247 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
248 size_tflag = 1;
249 ++f;
250 }
251
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700252 /* subtract bytes preallocated for the format string
Victor Stinner03dab782015-10-14 00:21:35 +0200253 (ex: 2 for "%s") */
254 writer.min_size -= (f - p + 1);
255
256 switch (*f) {
257 case 'c':
258 {
259 int c = va_arg(vargs, int);
260 if (c < 0 || c > 255) {
261 PyErr_SetString(PyExc_OverflowError,
262 "PyBytes_FromFormatV(): %c format "
263 "expects an integer in range [0; 255]");
264 goto error;
265 }
266 writer.min_size++;
267 *s++ = (unsigned char)c;
268 break;
269 }
270
271 case 'd':
272 if (longflag)
273 sprintf(buffer, "%ld", va_arg(vargs, long));
274 else if (size_tflag)
275 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
277 else
278 sprintf(buffer, "%d", va_arg(vargs, int));
279 assert(strlen(buffer) < sizeof(buffer));
280 WRITE_BYTES(buffer);
281 break;
282
283 case 'u':
284 if (longflag)
285 sprintf(buffer, "%lu",
286 va_arg(vargs, unsigned long));
287 else if (size_tflag)
288 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
289 va_arg(vargs, size_t));
290 else
291 sprintf(buffer, "%u",
292 va_arg(vargs, unsigned int));
293 assert(strlen(buffer) < sizeof(buffer));
294 WRITE_BYTES(buffer);
295 break;
296
297 case 'i':
298 sprintf(buffer, "%i", va_arg(vargs, int));
299 assert(strlen(buffer) < sizeof(buffer));
300 WRITE_BYTES(buffer);
301 break;
302
303 case 'x':
304 sprintf(buffer, "%x", va_arg(vargs, int));
305 assert(strlen(buffer) < sizeof(buffer));
306 WRITE_BYTES(buffer);
307 break;
308
309 case 's':
310 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000311 Py_ssize_t i;
Victor Stinner03dab782015-10-14 00:21:35 +0200312
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200313 p = va_arg(vargs, const char*);
Victor Stinner03dab782015-10-14 00:21:35 +0200314 i = strlen(p);
315 if (prec > 0 && i > prec)
316 i = prec;
317 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
318 if (s == NULL)
319 goto error;
320 break;
321 }
322
323 case 'p':
324 sprintf(buffer, "%p", va_arg(vargs, void*));
325 assert(strlen(buffer) < sizeof(buffer));
326 /* %p is ill-defined: ensure leading 0x. */
327 if (buffer[1] == 'X')
328 buffer[1] = 'x';
329 else if (buffer[1] != 'x') {
330 memmove(buffer+2, buffer, strlen(buffer)+1);
331 buffer[0] = '0';
332 buffer[1] = 'x';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000333 }
Victor Stinner03dab782015-10-14 00:21:35 +0200334 WRITE_BYTES(buffer);
335 break;
336
337 case '%':
338 writer.min_size++;
339 *s++ = '%';
340 break;
341
342 default:
343 if (*f == 0) {
344 /* fix min_size if we reached the end of the format string */
345 writer.min_size++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000347
Victor Stinner03dab782015-10-14 00:21:35 +0200348 /* invalid format string: copy unformatted string and exit */
349 WRITE_BYTES(p);
350 return _PyBytesWriter_Finish(&writer, s);
351 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000352 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000353
Victor Stinner03dab782015-10-14 00:21:35 +0200354#undef WRITE_BYTES
355
356 return _PyBytesWriter_Finish(&writer, s);
357
358 error:
359 _PyBytesWriter_Dealloc(&writer);
360 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000361}
362
363PyObject *
364PyBytes_FromFormat(const char *format, ...)
365{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000366 PyObject* ret;
367 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000368
369#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000370 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000371#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000373#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000374 ret = PyBytes_FromFormatV(format, vargs);
375 va_end(vargs);
376 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000377}
378
Ethan Furmanb95b5612015-01-23 20:05:18 -0800379/* Helpers for formatstring */
380
381Py_LOCAL_INLINE(PyObject *)
382getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
383{
384 Py_ssize_t argidx = *p_argidx;
385 if (argidx < arglen) {
386 (*p_argidx)++;
387 if (arglen < 0)
388 return args;
389 else
390 return PyTuple_GetItem(args, argidx);
391 }
392 PyErr_SetString(PyExc_TypeError,
393 "not enough arguments for format string");
394 return NULL;
395}
396
397/* Format codes
398 * F_LJUST '-'
399 * F_SIGN '+'
400 * F_BLANK ' '
401 * F_ALT '#'
402 * F_ZERO '0'
403 */
404#define F_LJUST (1<<0)
405#define F_SIGN (1<<1)
406#define F_BLANK (1<<2)
407#define F_ALT (1<<3)
408#define F_ZERO (1<<4)
409
410/* Returns a new reference to a PyBytes object, or NULL on failure. */
411
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200412static char*
413formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200414 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800415{
416 char *p;
417 PyObject *result;
418 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200419 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800420
421 x = PyFloat_AsDouble(v);
422 if (x == -1.0 && PyErr_Occurred()) {
423 PyErr_Format(PyExc_TypeError, "float argument required, "
424 "not %.200s", Py_TYPE(v)->tp_name);
425 return NULL;
426 }
427
428 if (prec < 0)
429 prec = 6;
430
431 p = PyOS_double_to_string(x, type, prec,
432 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
433
434 if (p == NULL)
435 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200436
437 len = strlen(p);
438 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200439 str = _PyBytesWriter_Prepare(writer, str, len);
440 if (str == NULL)
441 return NULL;
Christian Heimesf051e432016-09-13 20:22:02 +0200442 memcpy(str, p, len);
Victor Stinner71dc3d82016-04-26 12:35:13 +0200443 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200444 str += len;
445 return str;
446 }
447
448 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800449 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200450 *p_result = result;
451 return str;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800452}
453
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300454static PyObject *
455formatlong(PyObject *v, int flags, int prec, int type)
456{
457 PyObject *result, *iobj;
458 if (type == 'i')
459 type = 'd';
460 if (PyLong_Check(v))
461 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
462 if (PyNumber_Check(v)) {
463 /* make sure number is a type of integer for o, x, and X */
464 if (type == 'o' || type == 'x' || type == 'X')
465 iobj = PyNumber_Index(v);
466 else
467 iobj = PyNumber_Long(v);
468 if (iobj == NULL) {
469 if (!PyErr_ExceptionMatches(PyExc_TypeError))
470 return NULL;
471 }
472 else if (!PyLong_Check(iobj))
473 Py_CLEAR(iobj);
474 if (iobj != NULL) {
475 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
476 Py_DECREF(iobj);
477 return result;
478 }
479 }
480 PyErr_Format(PyExc_TypeError,
481 "%%%c format: %s is required, not %.200s", type,
482 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
483 : "a number",
484 Py_TYPE(v)->tp_name);
485 return NULL;
486}
487
488static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200489byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800490{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300491 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200492 *p = PyBytes_AS_STRING(arg)[0];
493 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800494 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300495 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200496 *p = PyByteArray_AS_STRING(arg)[0];
497 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800498 }
499 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300500 PyObject *iobj;
501 long ival;
502 int overflow;
503 /* make sure number is a type of integer */
504 if (PyLong_Check(arg)) {
505 ival = PyLong_AsLongAndOverflow(arg, &overflow);
506 }
507 else {
508 iobj = PyNumber_Index(arg);
509 if (iobj == NULL) {
510 if (!PyErr_ExceptionMatches(PyExc_TypeError))
511 return 0;
512 goto onError;
513 }
514 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
515 Py_DECREF(iobj);
516 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300517 if (!overflow && ival == -1 && PyErr_Occurred())
518 goto onError;
519 if (overflow || !(0 <= ival && ival <= 255)) {
520 PyErr_SetString(PyExc_OverflowError,
521 "%c arg not in range(256)");
522 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800523 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300524 *p = (char)ival;
525 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800526 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300527 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200528 PyErr_SetString(PyExc_TypeError,
529 "%c requires an integer in range(256) or a single byte");
530 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800531}
532
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800533static PyObject *_PyBytes_FromBuffer(PyObject *x);
534
Ethan Furmanb95b5612015-01-23 20:05:18 -0800535static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200536format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800537{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200538 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800539 _Py_IDENTIFIER(__bytes__);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800540 /* is it a bytes object? */
541 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200542 *pbuf = PyBytes_AS_STRING(v);
543 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800544 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200545 return v;
546 }
547 if (PyByteArray_Check(v)) {
548 *pbuf = PyByteArray_AS_STRING(v);
549 *plen = PyByteArray_GET_SIZE(v);
550 Py_INCREF(v);
551 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800552 }
553 /* does it support __bytes__? */
554 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
555 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +0100556 result = _PyObject_CallNoArg(func);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800557 Py_DECREF(func);
558 if (result == NULL)
559 return NULL;
560 if (!PyBytes_Check(result)) {
561 PyErr_Format(PyExc_TypeError,
562 "__bytes__ returned non-bytes (type %.200s)",
563 Py_TYPE(result)->tp_name);
564 Py_DECREF(result);
565 return NULL;
566 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200567 *pbuf = PyBytes_AS_STRING(result);
568 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800569 return result;
570 }
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800571 /* does it support buffer protocol? */
572 if (PyObject_CheckBuffer(v)) {
573 /* maybe we can avoid making a copy of the buffer object here? */
574 result = _PyBytes_FromBuffer(v);
575 if (result == NULL)
576 return NULL;
577 *pbuf = PyBytes_AS_STRING(result);
578 *plen = PyBytes_GET_SIZE(result);
579 return result;
580 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800581 PyErr_Format(PyExc_TypeError,
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800582 "%%b requires a bytes-like object, "
583 "or an object that implements __bytes__, not '%.100s'",
Ethan Furmanb95b5612015-01-23 20:05:18 -0800584 Py_TYPE(v)->tp_name);
585 return NULL;
586}
587
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200588/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800589
590PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +0200591_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
592 PyObject *args, int use_bytearray)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800593{
Victor Stinner772b2b02015-10-14 09:56:53 +0200594 const char *fmt;
595 char *res;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800596 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200597 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800598 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800599 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200600 _PyBytesWriter writer;
601
Victor Stinner772b2b02015-10-14 09:56:53 +0200602 if (args == NULL) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800603 PyErr_BadInternalCall();
604 return NULL;
605 }
Victor Stinner772b2b02015-10-14 09:56:53 +0200606 fmt = format;
607 fmtcnt = format_len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200608
609 _PyBytesWriter_Init(&writer);
Victor Stinner772b2b02015-10-14 09:56:53 +0200610 writer.use_bytearray = use_bytearray;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200611
612 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
613 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800614 return NULL;
Victor Stinner772b2b02015-10-14 09:56:53 +0200615 if (!use_bytearray)
616 writer.overallocate = 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200617
Ethan Furmanb95b5612015-01-23 20:05:18 -0800618 if (PyTuple_Check(args)) {
619 arglen = PyTuple_GET_SIZE(args);
620 argidx = 0;
621 }
622 else {
623 arglen = -1;
624 argidx = -2;
625 }
626 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
627 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
628 !PyByteArray_Check(args)) {
629 dict = args;
630 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200631
Ethan Furmanb95b5612015-01-23 20:05:18 -0800632 while (--fmtcnt >= 0) {
633 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200634 Py_ssize_t len;
635 char *pos;
636
Xiang Zhangb76ad512017-03-06 17:17:05 +0800637 pos = (char *)memchr(fmt + 1, '%', fmtcnt);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200638 if (pos != NULL)
639 len = pos - fmt;
Victor Stinner772b2b02015-10-14 09:56:53 +0200640 else
Xiang Zhangb76ad512017-03-06 17:17:05 +0800641 len = fmtcnt + 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200642 assert(len != 0);
643
Christian Heimesf051e432016-09-13 20:22:02 +0200644 memcpy(res, fmt, len);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200645 res += len;
646 fmt += len;
647 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800648 }
649 else {
650 /* Got a format specifier */
651 int flags = 0;
652 Py_ssize_t width = -1;
653 int prec = -1;
654 int c = '\0';
655 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800656 PyObject *v = NULL;
657 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200658 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800659 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200660 Py_ssize_t len = 0;
661 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200662 Py_ssize_t alloc;
663#ifdef Py_DEBUG
664 char *before;
665#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -0800666
Ethan Furmanb95b5612015-01-23 20:05:18 -0800667 fmt++;
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200668 if (*fmt == '%') {
669 *res++ = '%';
670 fmt++;
671 fmtcnt--;
672 continue;
673 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800674 if (*fmt == '(') {
Victor Stinner772b2b02015-10-14 09:56:53 +0200675 const char *keystart;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800676 Py_ssize_t keylen;
677 PyObject *key;
678 int pcount = 1;
679
680 if (dict == NULL) {
681 PyErr_SetString(PyExc_TypeError,
682 "format requires a mapping");
683 goto error;
684 }
685 ++fmt;
686 --fmtcnt;
687 keystart = fmt;
688 /* Skip over balanced parentheses */
689 while (pcount > 0 && --fmtcnt >= 0) {
690 if (*fmt == ')')
691 --pcount;
692 else if (*fmt == '(')
693 ++pcount;
694 fmt++;
695 }
696 keylen = fmt - keystart - 1;
697 if (fmtcnt < 0 || pcount > 0) {
698 PyErr_SetString(PyExc_ValueError,
699 "incomplete format key");
700 goto error;
701 }
702 key = PyBytes_FromStringAndSize(keystart,
703 keylen);
704 if (key == NULL)
705 goto error;
706 if (args_owned) {
707 Py_DECREF(args);
708 args_owned = 0;
709 }
710 args = PyObject_GetItem(dict, key);
711 Py_DECREF(key);
712 if (args == NULL) {
713 goto error;
714 }
715 args_owned = 1;
716 arglen = -1;
717 argidx = -2;
718 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200719
720 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800721 while (--fmtcnt >= 0) {
722 switch (c = *fmt++) {
723 case '-': flags |= F_LJUST; continue;
724 case '+': flags |= F_SIGN; continue;
725 case ' ': flags |= F_BLANK; continue;
726 case '#': flags |= F_ALT; continue;
727 case '0': flags |= F_ZERO; continue;
728 }
729 break;
730 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200731
732 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800733 if (c == '*') {
734 v = getnextarg(args, arglen, &argidx);
735 if (v == NULL)
736 goto error;
737 if (!PyLong_Check(v)) {
738 PyErr_SetString(PyExc_TypeError,
739 "* wants int");
740 goto error;
741 }
742 width = PyLong_AsSsize_t(v);
743 if (width == -1 && PyErr_Occurred())
744 goto error;
745 if (width < 0) {
746 flags |= F_LJUST;
747 width = -width;
748 }
749 if (--fmtcnt >= 0)
750 c = *fmt++;
751 }
752 else if (c >= 0 && isdigit(c)) {
753 width = c - '0';
754 while (--fmtcnt >= 0) {
755 c = Py_CHARMASK(*fmt++);
756 if (!isdigit(c))
757 break;
758 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
759 PyErr_SetString(
760 PyExc_ValueError,
761 "width too big");
762 goto error;
763 }
764 width = width*10 + (c - '0');
765 }
766 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200767
768 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800769 if (c == '.') {
770 prec = 0;
771 if (--fmtcnt >= 0)
772 c = *fmt++;
773 if (c == '*') {
774 v = getnextarg(args, arglen, &argidx);
775 if (v == NULL)
776 goto error;
777 if (!PyLong_Check(v)) {
778 PyErr_SetString(
779 PyExc_TypeError,
780 "* wants int");
781 goto error;
782 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200783 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800784 if (prec == -1 && PyErr_Occurred())
785 goto error;
786 if (prec < 0)
787 prec = 0;
788 if (--fmtcnt >= 0)
789 c = *fmt++;
790 }
791 else if (c >= 0 && isdigit(c)) {
792 prec = c - '0';
793 while (--fmtcnt >= 0) {
794 c = Py_CHARMASK(*fmt++);
795 if (!isdigit(c))
796 break;
797 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
798 PyErr_SetString(
799 PyExc_ValueError,
800 "prec too big");
801 goto error;
802 }
803 prec = prec*10 + (c - '0');
804 }
805 }
806 } /* prec */
807 if (fmtcnt >= 0) {
808 if (c == 'h' || c == 'l' || c == 'L') {
809 if (--fmtcnt >= 0)
810 c = *fmt++;
811 }
812 }
813 if (fmtcnt < 0) {
814 PyErr_SetString(PyExc_ValueError,
815 "incomplete format");
816 goto error;
817 }
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200818 v = getnextarg(args, arglen, &argidx);
819 if (v == NULL)
820 goto error;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200821
822 if (fmtcnt < 0) {
823 /* last writer: disable writer overallocation */
824 writer.overallocate = 0;
825 }
826
Ethan Furmanb95b5612015-01-23 20:05:18 -0800827 sign = 0;
828 fill = ' ';
829 switch (c) {
Ethan Furman62e977f2015-03-11 08:17:00 -0700830 case 'r':
831 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800832 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200833 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800834 if (temp == NULL)
835 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200836 assert(PyUnicode_IS_ASCII(temp));
837 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
838 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800839 if (prec >= 0 && len > prec)
840 len = prec;
841 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200842
Ethan Furmanb95b5612015-01-23 20:05:18 -0800843 case 's':
844 // %s is only for 2/3 code; 3 only code should use %b
845 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200846 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800847 if (temp == NULL)
848 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800849 if (prec >= 0 && len > prec)
850 len = prec;
851 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200852
Ethan Furmanb95b5612015-01-23 20:05:18 -0800853 case 'i':
854 case 'd':
855 case 'u':
856 case 'o':
857 case 'x':
858 case 'X':
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200859 if (PyLong_CheckExact(v)
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200860 && width == -1 && prec == -1
861 && !(flags & (F_SIGN | F_BLANK))
862 && c != 'X')
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200863 {
864 /* Fast path */
865 int alternate = flags & F_ALT;
866 int base;
867
868 switch(c)
869 {
870 default:
Barry Warsawb2e57942017-09-14 18:13:16 -0700871 Py_UNREACHABLE();
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200872 case 'd':
873 case 'i':
874 case 'u':
875 base = 10;
876 break;
877 case 'o':
878 base = 8;
879 break;
880 case 'x':
881 case 'X':
882 base = 16;
883 break;
884 }
885
886 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200887 writer.min_size -= 2; /* size preallocated for "%d" */
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200888 res = _PyLong_FormatBytesWriter(&writer, res,
889 v, base, alternate);
890 if (res == NULL)
891 goto error;
892 continue;
893 }
894
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300895 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200896 if (!temp)
897 goto error;
898 assert(PyUnicode_IS_ASCII(temp));
899 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
900 len = PyUnicode_GET_LENGTH(temp);
901 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800902 if (flags & F_ZERO)
903 fill = '0';
904 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200905
Ethan Furmanb95b5612015-01-23 20:05:18 -0800906 case 'e':
907 case 'E':
908 case 'f':
909 case 'F':
910 case 'g':
911 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200912 if (width == -1 && prec == -1
913 && !(flags & (F_SIGN | F_BLANK)))
914 {
915 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200916 writer.min_size -= 2; /* size preallocated for "%f" */
Victor Stinnerad771582015-10-09 12:38:53 +0200917 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200918 if (res == NULL)
919 goto error;
920 continue;
921 }
922
Victor Stinnerad771582015-10-09 12:38:53 +0200923 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800924 goto error;
925 pbuf = PyBytes_AS_STRING(temp);
926 len = PyBytes_GET_SIZE(temp);
927 sign = 1;
928 if (flags & F_ZERO)
929 fill = '0';
930 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200931
Ethan Furmanb95b5612015-01-23 20:05:18 -0800932 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200933 pbuf = &onechar;
934 len = byte_converter(v, &onechar);
935 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800936 goto error;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200937 if (width == -1) {
938 /* Fast path */
939 *res++ = onechar;
940 continue;
941 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800942 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200943
Ethan Furmanb95b5612015-01-23 20:05:18 -0800944 default:
945 PyErr_Format(PyExc_ValueError,
946 "unsupported format character '%c' (0x%x) "
947 "at index %zd",
948 c, c,
Victor Stinner772b2b02015-10-14 09:56:53 +0200949 (Py_ssize_t)(fmt - 1 - format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800950 goto error;
951 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200952
Ethan Furmanb95b5612015-01-23 20:05:18 -0800953 if (sign) {
954 if (*pbuf == '-' || *pbuf == '+') {
955 sign = *pbuf++;
956 len--;
957 }
958 else if (flags & F_SIGN)
959 sign = '+';
960 else if (flags & F_BLANK)
961 sign = ' ';
962 else
963 sign = 0;
964 }
965 if (width < len)
966 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200967
968 alloc = width;
969 if (sign != 0 && len == width)
970 alloc++;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200971 /* 2: size preallocated for %s */
972 if (alloc > 2) {
973 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200974 if (res == NULL)
975 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800976 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200977#ifdef Py_DEBUG
978 before = res;
979#endif
980
981 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800982 if (sign) {
983 if (fill != ' ')
984 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800985 if (width > len)
986 width--;
987 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200988
989 /* Write the numeric prefix for "x", "X" and "o" formats
990 if the alternate form is used.
991 For example, write "0x" for the "%#x" format. */
Serhiy Storchakab1a16192016-12-17 21:48:03 +0200992 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800993 assert(pbuf[0] == '0');
994 assert(pbuf[1] == c);
995 if (fill != ' ') {
996 *res++ = *pbuf++;
997 *res++ = *pbuf++;
998 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800999 width -= 2;
1000 if (width < 0)
1001 width = 0;
1002 len -= 2;
1003 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001004
1005 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001006 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001007 memset(res, fill, width - len);
1008 res += (width - len);
1009 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -08001010 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001011
1012 /* If padding with spaces: write sign if needed and/or numeric
1013 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001014 if (fill == ' ') {
1015 if (sign)
1016 *res++ = sign;
Serhiy Storchakab1a16192016-12-17 21:48:03 +02001017 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001018 assert(pbuf[0] == '0');
1019 assert(pbuf[1] == c);
1020 *res++ = *pbuf++;
1021 *res++ = *pbuf++;
1022 }
1023 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001024
1025 /* Copy bytes */
Christian Heimesf051e432016-09-13 20:22:02 +02001026 memcpy(res, pbuf, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001027 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001028
1029 /* Pad right with the fill character if needed */
1030 if (width > len) {
1031 memset(res, ' ', width - len);
1032 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001033 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001034
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +02001035 if (dict && (argidx < arglen)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001036 PyErr_SetString(PyExc_TypeError,
1037 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -08001038 Py_XDECREF(temp);
1039 goto error;
1040 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001041 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001042
1043#ifdef Py_DEBUG
1044 /* check that we computed the exact size for this write */
1045 assert((res - before) == alloc);
1046#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -08001047 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001048
1049 /* If overallocation was disabled, ensure that it was the last
1050 write. Otherwise, we missed an optimization */
Victor Stinner772b2b02015-10-14 09:56:53 +02001051 assert(writer.overallocate || fmtcnt < 0 || use_bytearray);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001052 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001053
Ethan Furmanb95b5612015-01-23 20:05:18 -08001054 if (argidx < arglen && !dict) {
1055 PyErr_SetString(PyExc_TypeError,
1056 "not all arguments converted during bytes formatting");
1057 goto error;
1058 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001059
Ethan Furmanb95b5612015-01-23 20:05:18 -08001060 if (args_owned) {
1061 Py_DECREF(args);
1062 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001063 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001064
1065 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001066 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001067 if (args_owned) {
1068 Py_DECREF(args);
1069 }
1070 return NULL;
1071}
1072
1073/* =-= */
1074
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001075static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001076bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001077{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001078 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001079}
1080
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001081/* Unescape a backslash-escaped string. If unicode is non-zero,
1082 the string is a u-literal. If recode_encoding is non-zero,
1083 the string is UTF-8 encoded and should be re-encoded in the
1084 specified encoding. */
1085
Victor Stinner2ec80632015-10-14 13:32:13 +02001086static char *
1087_PyBytes_DecodeEscapeRecode(const char **s, const char *end,
1088 const char *errors, const char *recode_encoding,
1089 _PyBytesWriter *writer, char *p)
1090{
1091 PyObject *u, *w;
1092 const char* t;
1093
1094 t = *s;
1095 /* Decode non-ASCII bytes as UTF-8. */
1096 while (t < end && (*t & 0x80))
1097 t++;
1098 u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
1099 if (u == NULL)
1100 return NULL;
1101
1102 /* Recode them in target encoding. */
1103 w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
1104 Py_DECREF(u);
1105 if (w == NULL)
1106 return NULL;
1107 assert(PyBytes_Check(w));
1108
1109 /* Append bytes to output buffer. */
Raymond Hettinger15f44ab2016-08-30 10:47:49 -07001110 writer->min_size--; /* subtract 1 preallocated byte */
Victor Stinner2ec80632015-10-14 13:32:13 +02001111 p = _PyBytesWriter_WriteBytes(writer, p,
1112 PyBytes_AS_STRING(w),
1113 PyBytes_GET_SIZE(w));
1114 Py_DECREF(w);
1115 if (p == NULL)
1116 return NULL;
1117
1118 *s = t;
1119 return p;
1120}
1121
Eric V. Smith42454af2016-10-31 09:22:08 -04001122PyObject *_PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001123 Py_ssize_t len,
1124 const char *errors,
1125 Py_ssize_t unicode,
Eric V. Smith42454af2016-10-31 09:22:08 -04001126 const char *recode_encoding,
1127 const char **first_invalid_escape)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001128{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001129 int c;
Victor Stinner2ec80632015-10-14 13:32:13 +02001130 char *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001131 const char *end;
Victor Stinner2ec80632015-10-14 13:32:13 +02001132 _PyBytesWriter writer;
1133
1134 _PyBytesWriter_Init(&writer);
1135
1136 p = _PyBytesWriter_Alloc(&writer, len);
1137 if (p == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001138 return NULL;
Victor Stinner2ec80632015-10-14 13:32:13 +02001139 writer.overallocate = 1;
1140
Eric V. Smith42454af2016-10-31 09:22:08 -04001141 *first_invalid_escape = NULL;
1142
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001143 end = s + len;
1144 while (s < end) {
1145 if (*s != '\\') {
1146 non_esc:
Victor Stinner2ec80632015-10-14 13:32:13 +02001147 if (!(recode_encoding && (*s & 0x80))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001148 *p++ = *s++;
1149 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001150 else {
1151 /* non-ASCII character and need to recode */
1152 p = _PyBytes_DecodeEscapeRecode(&s, end,
1153 errors, recode_encoding,
1154 &writer, p);
1155 if (p == NULL)
1156 goto failed;
1157 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001158 continue;
1159 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001160
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001161 s++;
Victor Stinner2ec80632015-10-14 13:32:13 +02001162 if (s == end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001163 PyErr_SetString(PyExc_ValueError,
1164 "Trailing \\ in string");
1165 goto failed;
1166 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001167
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001168 switch (*s++) {
1169 /* XXX This assumes ASCII! */
1170 case '\n': break;
1171 case '\\': *p++ = '\\'; break;
1172 case '\'': *p++ = '\''; break;
1173 case '\"': *p++ = '\"'; break;
1174 case 'b': *p++ = '\b'; break;
1175 case 'f': *p++ = '\014'; break; /* FF */
1176 case 't': *p++ = '\t'; break;
1177 case 'n': *p++ = '\n'; break;
1178 case 'r': *p++ = '\r'; break;
1179 case 'v': *p++ = '\013'; break; /* VT */
1180 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1181 case '0': case '1': case '2': case '3':
1182 case '4': case '5': case '6': case '7':
1183 c = s[-1] - '0';
1184 if (s < end && '0' <= *s && *s <= '7') {
1185 c = (c<<3) + *s++ - '0';
1186 if (s < end && '0' <= *s && *s <= '7')
1187 c = (c<<3) + *s++ - '0';
1188 }
1189 *p++ = c;
1190 break;
1191 case 'x':
Victor Stinner2ec80632015-10-14 13:32:13 +02001192 if (s+1 < end) {
1193 int digit1, digit2;
1194 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1195 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1196 if (digit1 < 16 && digit2 < 16) {
1197 *p++ = (unsigned char)((digit1 << 4) + digit2);
1198 s += 2;
1199 break;
1200 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001201 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001202 /* invalid hexadecimal digits */
1203
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001204 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001205 PyErr_Format(PyExc_ValueError,
1206 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001207 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001208 goto failed;
1209 }
1210 if (strcmp(errors, "replace") == 0) {
1211 *p++ = '?';
1212 } else if (strcmp(errors, "ignore") == 0)
1213 /* do nothing */;
1214 else {
1215 PyErr_Format(PyExc_ValueError,
1216 "decoding error; unknown "
1217 "error handling code: %.400s",
1218 errors);
1219 goto failed;
1220 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001221 /* skip \x */
1222 if (s < end && Py_ISXDIGIT(s[0]))
1223 s++; /* and a hexdigit */
1224 break;
Victor Stinner2ec80632015-10-14 13:32:13 +02001225
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001226 default:
Eric V. Smith42454af2016-10-31 09:22:08 -04001227 if (*first_invalid_escape == NULL) {
1228 *first_invalid_escape = s-1; /* Back up one char, since we've
1229 already incremented s. */
1230 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001231 *p++ = '\\';
Eric V. Smith42454af2016-10-31 09:22:08 -04001232 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +02001233 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001234 UTF-8 bytes may follow. */
1235 }
1236 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001237
1238 return _PyBytesWriter_Finish(&writer, p);
1239
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001240 failed:
Victor Stinner2ec80632015-10-14 13:32:13 +02001241 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001242 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001243}
1244
Eric V. Smith42454af2016-10-31 09:22:08 -04001245PyObject *PyBytes_DecodeEscape(const char *s,
1246 Py_ssize_t len,
1247 const char *errors,
1248 Py_ssize_t unicode,
1249 const char *recode_encoding)
1250{
1251 const char* first_invalid_escape;
1252 PyObject *result = _PyBytes_DecodeEscape(s, len, errors, unicode,
1253 recode_encoding,
1254 &first_invalid_escape);
1255 if (result == NULL)
1256 return NULL;
1257 if (first_invalid_escape != NULL) {
1258 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1259 "invalid escape sequence '\\%c'",
Serhiy Storchaka56cb4652017-10-20 17:08:15 +03001260 (unsigned char)*first_invalid_escape) < 0) {
Eric V. Smith42454af2016-10-31 09:22:08 -04001261 Py_DECREF(result);
1262 return NULL;
1263 }
1264 }
1265 return result;
1266
1267}
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001268/* -------------------------------------------------------------------- */
1269/* object api */
1270
1271Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001272PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001273{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001274 if (!PyBytes_Check(op)) {
1275 PyErr_Format(PyExc_TypeError,
1276 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1277 return -1;
1278 }
1279 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001280}
1281
1282char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001283PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001284{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001285 if (!PyBytes_Check(op)) {
1286 PyErr_Format(PyExc_TypeError,
1287 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1288 return NULL;
1289 }
1290 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001291}
1292
1293int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001294PyBytes_AsStringAndSize(PyObject *obj,
1295 char **s,
1296 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001297{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001298 if (s == NULL) {
1299 PyErr_BadInternalCall();
1300 return -1;
1301 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001302
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001303 if (!PyBytes_Check(obj)) {
1304 PyErr_Format(PyExc_TypeError,
1305 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1306 return -1;
1307 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001308
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001309 *s = PyBytes_AS_STRING(obj);
1310 if (len != NULL)
1311 *len = PyBytes_GET_SIZE(obj);
1312 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001313 PyErr_SetString(PyExc_ValueError,
1314 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001315 return -1;
1316 }
1317 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001318}
Neal Norwitz6968b052007-02-27 19:02:19 +00001319
1320/* -------------------------------------------------------------------- */
1321/* Methods */
1322
Eric Smith0923d1d2009-04-16 20:16:10 +00001323#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001324
1325#include "stringlib/fastsearch.h"
1326#include "stringlib/count.h"
1327#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001328#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001329#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001330#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001331#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001332
Eric Smith0f78bff2009-11-30 01:01:42 +00001333#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001334
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001335PyObject *
1336PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001337{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001338 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001339 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001340 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001341 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001342 unsigned char quote, *s, *p;
1343
1344 /* Compute size of output string */
1345 squotes = dquotes = 0;
1346 newsize = 3; /* b'' */
1347 s = (unsigned char*)op->ob_sval;
1348 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001349 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001350 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001351 case '\'': squotes++; break;
1352 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001353 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001354 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001355 default:
1356 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001357 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001358 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001359 if (newsize > PY_SSIZE_T_MAX - incr)
1360 goto overflow;
1361 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001362 }
1363 quote = '\'';
1364 if (smartquotes && squotes && !dquotes)
1365 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001366 if (squotes && quote == '\'') {
1367 if (newsize > PY_SSIZE_T_MAX - squotes)
1368 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001369 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001370 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001371
1372 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001373 if (v == NULL) {
1374 return NULL;
1375 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001376 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001377
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001378 *p++ = 'b', *p++ = quote;
1379 for (i = 0; i < length; i++) {
1380 unsigned char c = op->ob_sval[i];
1381 if (c == quote || c == '\\')
1382 *p++ = '\\', *p++ = c;
1383 else if (c == '\t')
1384 *p++ = '\\', *p++ = 't';
1385 else if (c == '\n')
1386 *p++ = '\\', *p++ = 'n';
1387 else if (c == '\r')
1388 *p++ = '\\', *p++ = 'r';
1389 else if (c < ' ' || c >= 0x7f) {
1390 *p++ = '\\';
1391 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001392 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1393 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001394 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001395 else
1396 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001397 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001398 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001399 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001400 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001401
1402 overflow:
1403 PyErr_SetString(PyExc_OverflowError,
1404 "bytes object is too large to make repr");
1405 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001406}
1407
Neal Norwitz6968b052007-02-27 19:02:19 +00001408static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001409bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001410{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001411 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001412}
1413
Neal Norwitz6968b052007-02-27 19:02:19 +00001414static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001415bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001416{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001417 if (Py_BytesWarningFlag) {
1418 if (PyErr_WarnEx(PyExc_BytesWarning,
1419 "str() on a bytes instance", 1))
1420 return NULL;
1421 }
1422 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001423}
1424
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001425static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001426bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001427{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001428 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001429}
Neal Norwitz6968b052007-02-27 19:02:19 +00001430
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001431/* This is also used by PyBytes_Concat() */
1432static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001433bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001434{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001435 Py_buffer va, vb;
1436 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001437
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001438 va.len = -1;
1439 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001440 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1441 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001442 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
Serhiy Storchaka6b5a9ec2017-03-19 19:47:02 +02001443 Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001444 goto done;
1445 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001446
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001447 /* Optimize end cases */
1448 if (va.len == 0 && PyBytes_CheckExact(b)) {
1449 result = b;
1450 Py_INCREF(result);
1451 goto done;
1452 }
1453 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1454 result = a;
1455 Py_INCREF(result);
1456 goto done;
1457 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001458
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001459 if (va.len > PY_SSIZE_T_MAX - vb.len) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001460 PyErr_NoMemory();
1461 goto done;
1462 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001463
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001464 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001465 if (result != NULL) {
1466 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1467 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1468 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001469
1470 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001471 if (va.len != -1)
1472 PyBuffer_Release(&va);
1473 if (vb.len != -1)
1474 PyBuffer_Release(&vb);
1475 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001476}
Neal Norwitz6968b052007-02-27 19:02:19 +00001477
1478static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001479bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001480{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001481 Py_ssize_t i;
1482 Py_ssize_t j;
1483 Py_ssize_t size;
1484 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001485 size_t nbytes;
1486 if (n < 0)
1487 n = 0;
1488 /* watch out for overflows: the size can overflow int,
1489 * and the # of bytes needed can overflow size_t
1490 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001491 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001492 PyErr_SetString(PyExc_OverflowError,
1493 "repeated bytes are too long");
1494 return NULL;
1495 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001496 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001497 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1498 Py_INCREF(a);
1499 return (PyObject *)a;
1500 }
1501 nbytes = (size_t)size;
1502 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1503 PyErr_SetString(PyExc_OverflowError,
1504 "repeated bytes are too long");
1505 return NULL;
1506 }
1507 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1508 if (op == NULL)
1509 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +01001510 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001511 op->ob_shash = -1;
1512 op->ob_sval[size] = '\0';
1513 if (Py_SIZE(a) == 1 && n > 0) {
1514 memset(op->ob_sval, a->ob_sval[0] , n);
1515 return (PyObject *) op;
1516 }
1517 i = 0;
1518 if (i < size) {
Christian Heimesf051e432016-09-13 20:22:02 +02001519 memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001520 i = Py_SIZE(a);
1521 }
1522 while (i < size) {
1523 j = (i <= size-i) ? i : size-i;
Christian Heimesf051e432016-09-13 20:22:02 +02001524 memcpy(op->ob_sval+i, op->ob_sval, j);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001525 i += j;
1526 }
1527 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001528}
1529
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001530static int
1531bytes_contains(PyObject *self, PyObject *arg)
1532{
1533 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1534}
1535
Neal Norwitz6968b052007-02-27 19:02:19 +00001536static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001537bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001538{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001539 if (i < 0 || i >= Py_SIZE(a)) {
1540 PyErr_SetString(PyExc_IndexError, "index out of range");
1541 return NULL;
1542 }
1543 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001544}
1545
Benjamin Peterson621b4302016-09-09 13:54:34 -07001546static int
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001547bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1548{
1549 int cmp;
1550 Py_ssize_t len;
1551
1552 len = Py_SIZE(a);
1553 if (Py_SIZE(b) != len)
1554 return 0;
1555
1556 if (a->ob_sval[0] != b->ob_sval[0])
1557 return 0;
1558
1559 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1560 return (cmp == 0);
1561}
1562
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001563static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001564bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001565{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001566 int c;
1567 Py_ssize_t len_a, len_b;
1568 Py_ssize_t min_len;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001569 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001570
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001571 /* Make sure both arguments are strings. */
1572 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001573 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001574 rc = PyObject_IsInstance((PyObject*)a,
1575 (PyObject*)&PyUnicode_Type);
1576 if (!rc)
1577 rc = PyObject_IsInstance((PyObject*)b,
1578 (PyObject*)&PyUnicode_Type);
1579 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001580 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001581 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001582 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001583 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001584 return NULL;
1585 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001586 else {
1587 rc = PyObject_IsInstance((PyObject*)a,
1588 (PyObject*)&PyLong_Type);
1589 if (!rc)
1590 rc = PyObject_IsInstance((PyObject*)b,
1591 (PyObject*)&PyLong_Type);
1592 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001593 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001594 if (rc) {
1595 if (PyErr_WarnEx(PyExc_BytesWarning,
1596 "Comparison between bytes and int", 1))
1597 return NULL;
1598 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001599 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001600 }
stratakise8b19652017-11-02 11:32:54 +01001601 Py_RETURN_NOTIMPLEMENTED;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001602 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001603 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001604 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001605 case Py_EQ:
1606 case Py_LE:
1607 case Py_GE:
1608 /* a string is equal to itself */
stratakise8b19652017-11-02 11:32:54 +01001609 Py_RETURN_TRUE;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001610 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001611 case Py_NE:
1612 case Py_LT:
1613 case Py_GT:
stratakise8b19652017-11-02 11:32:54 +01001614 Py_RETURN_FALSE;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001615 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001616 default:
1617 PyErr_BadArgument();
1618 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001619 }
1620 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001621 else if (op == Py_EQ || op == Py_NE) {
1622 int eq = bytes_compare_eq(a, b);
1623 eq ^= (op == Py_NE);
stratakise8b19652017-11-02 11:32:54 +01001624 return PyBool_FromLong(eq);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001625 }
1626 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001627 len_a = Py_SIZE(a);
1628 len_b = Py_SIZE(b);
1629 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001630 if (min_len > 0) {
1631 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001632 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001633 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001634 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001635 else
1636 c = 0;
stratakise8b19652017-11-02 11:32:54 +01001637 if (c != 0)
1638 Py_RETURN_RICHCOMPARE(c, 0, op);
1639 Py_RETURN_RICHCOMPARE(len_a, len_b, op);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001640 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001641}
1642
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001643static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001644bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001645{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001646 if (a->ob_shash == -1) {
1647 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001648 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001649 }
1650 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001651}
1652
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001653static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001654bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001655{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001656 if (PyIndex_Check(item)) {
1657 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1658 if (i == -1 && PyErr_Occurred())
1659 return NULL;
1660 if (i < 0)
1661 i += PyBytes_GET_SIZE(self);
1662 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1663 PyErr_SetString(PyExc_IndexError,
1664 "index out of range");
1665 return NULL;
1666 }
1667 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1668 }
1669 else if (PySlice_Check(item)) {
1670 Py_ssize_t start, stop, step, slicelength, cur, i;
1671 char* source_buf;
1672 char* result_buf;
1673 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001674
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001675 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001676 return NULL;
1677 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001678 slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1679 &stop, step);
Neal Norwitz6968b052007-02-27 19:02:19 +00001680
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001681 if (slicelength <= 0) {
1682 return PyBytes_FromStringAndSize("", 0);
1683 }
1684 else if (start == 0 && step == 1 &&
1685 slicelength == PyBytes_GET_SIZE(self) &&
1686 PyBytes_CheckExact(self)) {
1687 Py_INCREF(self);
1688 return (PyObject *)self;
1689 }
1690 else if (step == 1) {
1691 return PyBytes_FromStringAndSize(
1692 PyBytes_AS_STRING(self) + start,
1693 slicelength);
1694 }
1695 else {
1696 source_buf = PyBytes_AS_STRING(self);
1697 result = PyBytes_FromStringAndSize(NULL, slicelength);
1698 if (result == NULL)
1699 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001700
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001701 result_buf = PyBytes_AS_STRING(result);
1702 for (cur = start, i = 0; i < slicelength;
1703 cur += step, i++) {
1704 result_buf[i] = source_buf[cur];
1705 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001706
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001707 return result;
1708 }
1709 }
1710 else {
1711 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001712 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001713 Py_TYPE(item)->tp_name);
1714 return NULL;
1715 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001716}
1717
1718static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001719bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001720{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001721 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1722 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001723}
1724
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001725static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001726 (lenfunc)bytes_length, /*sq_length*/
1727 (binaryfunc)bytes_concat, /*sq_concat*/
1728 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1729 (ssizeargfunc)bytes_item, /*sq_item*/
1730 0, /*sq_slice*/
1731 0, /*sq_ass_item*/
1732 0, /*sq_ass_slice*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001733 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001734};
1735
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001736static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001737 (lenfunc)bytes_length,
1738 (binaryfunc)bytes_subscript,
1739 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001740};
1741
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001742static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001743 (getbufferproc)bytes_buffer_getbuffer,
1744 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001745};
1746
1747
1748#define LEFTSTRIP 0
1749#define RIGHTSTRIP 1
1750#define BOTHSTRIP 2
1751
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001752/*[clinic input]
1753bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001754
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001755 sep: object = None
1756 The delimiter according which to split the bytes.
1757 None (the default value) means split on ASCII whitespace characters
1758 (space, tab, return, newline, formfeed, vertical tab).
1759 maxsplit: Py_ssize_t = -1
1760 Maximum number of splits to do.
1761 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001762
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001763Return a list of the sections in the bytes, using sep as the delimiter.
1764[clinic start generated code]*/
1765
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001766static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001767bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1768/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001769{
1770 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001771 const char *s = PyBytes_AS_STRING(self), *sub;
1772 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001773 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001774
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001775 if (maxsplit < 0)
1776 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001777 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001778 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001779 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001780 return NULL;
1781 sub = vsub.buf;
1782 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001783
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001784 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1785 PyBuffer_Release(&vsub);
1786 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001787}
1788
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001789/*[clinic input]
1790bytes.partition
1791
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001792 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001793 /
1794
1795Partition the bytes into three parts using the given separator.
1796
1797This will search for the separator sep in the bytes. If the separator is found,
1798returns a 3-tuple containing the part before the separator, the separator
1799itself, and the part after it.
1800
1801If the separator is not found, returns a 3-tuple containing the original bytes
1802object and two empty bytes objects.
1803[clinic start generated code]*/
1804
Neal Norwitz6968b052007-02-27 19:02:19 +00001805static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001806bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001807/*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001808{
Neal Norwitz6968b052007-02-27 19:02:19 +00001809 return stringlib_partition(
1810 (PyObject*) self,
1811 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001812 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001813 );
1814}
1815
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001816/*[clinic input]
1817bytes.rpartition
1818
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001819 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001820 /
1821
1822Partition the bytes into three parts using the given separator.
1823
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001824This will search for the separator sep in the bytes, starting at the end. If
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001825the separator is found, returns a 3-tuple containing the part before the
1826separator, the separator itself, and the part after it.
1827
1828If the separator is not found, returns a 3-tuple containing two empty bytes
1829objects and the original bytes object.
1830[clinic start generated code]*/
1831
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001832static PyObject *
1833bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchakaa2314282017-10-29 02:11:54 +03001834/*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001835{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001836 return stringlib_rpartition(
1837 (PyObject*) self,
1838 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001839 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001840 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001841}
1842
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001843/*[clinic input]
1844bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001845
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001846Return a list of the sections in the bytes, using sep as the delimiter.
1847
1848Splitting is done starting at the end of the bytes and working to the front.
1849[clinic start generated code]*/
1850
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001851static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001852bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1853/*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001854{
1855 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001856 const char *s = PyBytes_AS_STRING(self), *sub;
1857 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001858 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001859
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001860 if (maxsplit < 0)
1861 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001862 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001863 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001864 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001865 return NULL;
1866 sub = vsub.buf;
1867 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001868
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001869 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1870 PyBuffer_Release(&vsub);
1871 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001872}
1873
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001874
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001875/*[clinic input]
1876bytes.join
1877
1878 iterable_of_bytes: object
1879 /
1880
1881Concatenate any number of bytes objects.
1882
1883The bytes whose method is called is inserted in between each pair.
1884
1885The result is returned as a new bytes object.
1886
1887Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1888[clinic start generated code]*/
1889
Neal Norwitz6968b052007-02-27 19:02:19 +00001890static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001891bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1892/*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001893{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001894 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001895}
1896
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001897PyObject *
1898_PyBytes_Join(PyObject *sep, PyObject *x)
1899{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001900 assert(sep != NULL && PyBytes_Check(sep));
1901 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001902 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001903}
1904
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001905static PyObject *
1906bytes_find(PyBytesObject *self, PyObject *args)
1907{
1908 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1909}
1910
1911static PyObject *
1912bytes_index(PyBytesObject *self, PyObject *args)
1913{
1914 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1915}
1916
1917
1918static PyObject *
1919bytes_rfind(PyBytesObject *self, PyObject *args)
1920{
1921 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1922}
1923
1924
1925static PyObject *
1926bytes_rindex(PyBytesObject *self, PyObject *args)
1927{
1928 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1929}
1930
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001931
1932Py_LOCAL_INLINE(PyObject *)
1933do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001934{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001935 Py_buffer vsep;
1936 char *s = PyBytes_AS_STRING(self);
1937 Py_ssize_t len = PyBytes_GET_SIZE(self);
1938 char *sep;
1939 Py_ssize_t seplen;
1940 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001941
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001942 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001943 return NULL;
1944 sep = vsep.buf;
1945 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001946
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001947 i = 0;
1948 if (striptype != RIGHTSTRIP) {
1949 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1950 i++;
1951 }
1952 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001953
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001954 j = len;
1955 if (striptype != LEFTSTRIP) {
1956 do {
1957 j--;
1958 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1959 j++;
1960 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001961
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001962 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001963
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001964 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1965 Py_INCREF(self);
1966 return (PyObject*)self;
1967 }
1968 else
1969 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001970}
1971
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001972
1973Py_LOCAL_INLINE(PyObject *)
1974do_strip(PyBytesObject *self, int striptype)
1975{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001976 char *s = PyBytes_AS_STRING(self);
1977 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001978
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001979 i = 0;
1980 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001981 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001982 i++;
1983 }
1984 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001985
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001986 j = len;
1987 if (striptype != LEFTSTRIP) {
1988 do {
1989 j--;
David Malcolm96960882010-11-05 17:23:41 +00001990 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001991 j++;
1992 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001993
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001994 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1995 Py_INCREF(self);
1996 return (PyObject*)self;
1997 }
1998 else
1999 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002000}
2001
2002
2003Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002004do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002005{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002006 if (bytes != NULL && bytes != Py_None) {
2007 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002008 }
2009 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002010}
2011
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002012/*[clinic input]
2013bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002014
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002015 bytes: object = None
2016 /
2017
2018Strip leading and trailing bytes contained in the argument.
2019
2020If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2021[clinic start generated code]*/
2022
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002023static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002024bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002025/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002026{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002027 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002028}
2029
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002030/*[clinic input]
2031bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002032
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002033 bytes: object = None
2034 /
2035
2036Strip leading bytes contained in the argument.
2037
2038If the argument is omitted or None, strip leading ASCII whitespace.
2039[clinic start generated code]*/
2040
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002041static PyObject *
2042bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002043/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002044{
2045 return do_argstrip(self, LEFTSTRIP, bytes);
2046}
2047
2048/*[clinic input]
2049bytes.rstrip
2050
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002051 bytes: object = None
2052 /
2053
2054Strip trailing bytes contained in the argument.
2055
2056If the argument is omitted or None, strip trailing ASCII whitespace.
2057[clinic start generated code]*/
2058
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002059static PyObject *
2060bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002061/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002062{
2063 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002064}
Neal Norwitz6968b052007-02-27 19:02:19 +00002065
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002066
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002067static PyObject *
2068bytes_count(PyBytesObject *self, PyObject *args)
2069{
2070 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2071}
2072
2073
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002074/*[clinic input]
2075bytes.translate
2076
Victor Stinner049e5092014-08-17 22:20:00 +02002077 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002078 Translation table, which must be a bytes object of length 256.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002079 /
Martin Panter1b6c6da2016-08-27 08:35:02 +00002080 delete as deletechars: object(c_default="NULL") = b''
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002081
2082Return a copy with each character mapped by the given translation table.
2083
Martin Panter1b6c6da2016-08-27 08:35:02 +00002084All characters occurring in the optional argument delete are removed.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002085The remaining characters are mapped through the given translation table.
2086[clinic start generated code]*/
2087
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002088static PyObject *
Martin Panter1b6c6da2016-08-27 08:35:02 +00002089bytes_translate_impl(PyBytesObject *self, PyObject *table,
Larry Hastings89964c42015-04-14 18:07:59 -04002090 PyObject *deletechars)
Martin Panter1b6c6da2016-08-27 08:35:02 +00002091/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002092{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002093 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002094 Py_buffer table_view = {NULL, NULL};
2095 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002096 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002097 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002098 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002099 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002100 Py_ssize_t inlen, tablen, dellen = 0;
2101 PyObject *result;
2102 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002103
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002104 if (PyBytes_Check(table)) {
2105 table_chars = PyBytes_AS_STRING(table);
2106 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002107 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002108 else if (table == Py_None) {
2109 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002110 tablen = 256;
2111 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002112 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002113 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002114 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002115 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002116 tablen = table_view.len;
2117 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002118
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002119 if (tablen != 256) {
2120 PyErr_SetString(PyExc_ValueError,
2121 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002122 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002123 return NULL;
2124 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002125
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002126 if (deletechars != NULL) {
2127 if (PyBytes_Check(deletechars)) {
2128 del_table_chars = PyBytes_AS_STRING(deletechars);
2129 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002130 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002131 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002132 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002133 PyBuffer_Release(&table_view);
2134 return NULL;
2135 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002136 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002137 dellen = del_table_view.len;
2138 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002139 }
2140 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002141 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002142 dellen = 0;
2143 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002144
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002145 inlen = PyBytes_GET_SIZE(input_obj);
2146 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002147 if (result == NULL) {
2148 PyBuffer_Release(&del_table_view);
2149 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002150 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002151 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002152 output_start = output = PyBytes_AS_STRING(result);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002153 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002154
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002155 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002156 /* If no deletions are required, use faster code */
2157 for (i = inlen; --i >= 0; ) {
2158 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002159 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002160 changed = 1;
2161 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002162 if (!changed && PyBytes_CheckExact(input_obj)) {
2163 Py_INCREF(input_obj);
2164 Py_DECREF(result);
2165 result = input_obj;
2166 }
2167 PyBuffer_Release(&del_table_view);
2168 PyBuffer_Release(&table_view);
2169 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002170 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002171
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002172 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002173 for (i = 0; i < 256; i++)
2174 trans_table[i] = Py_CHARMASK(i);
2175 } else {
2176 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002177 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002178 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002179 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002180
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002181 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002182 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002183 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002184
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002185 for (i = inlen; --i >= 0; ) {
2186 c = Py_CHARMASK(*input++);
2187 if (trans_table[c] != -1)
2188 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2189 continue;
2190 changed = 1;
2191 }
2192 if (!changed && PyBytes_CheckExact(input_obj)) {
2193 Py_DECREF(result);
2194 Py_INCREF(input_obj);
2195 return input_obj;
2196 }
2197 /* Fix the size of the resulting string */
2198 if (inlen > 0)
2199 _PyBytes_Resize(&result, output - output_start);
2200 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002201}
2202
2203
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002204/*[clinic input]
2205
2206@staticmethod
2207bytes.maketrans
2208
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002209 frm: Py_buffer
2210 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002211 /
2212
2213Return a translation table useable for the bytes or bytearray translate method.
2214
2215The returned table will be one where each byte in frm is mapped to the byte at
2216the same position in to.
2217
2218The bytes objects frm and to must be of the same length.
2219[clinic start generated code]*/
2220
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002221static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002222bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002223/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002224{
2225 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002226}
2227
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002228
2229/*[clinic input]
2230bytes.replace
2231
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002232 old: Py_buffer
2233 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002234 count: Py_ssize_t = -1
2235 Maximum number of occurrences to replace.
2236 -1 (the default value) means replace all occurrences.
2237 /
2238
2239Return a copy with all occurrences of substring old replaced by new.
2240
2241If the optional argument count is given, only the first count occurrences are
2242replaced.
2243[clinic start generated code]*/
2244
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002245static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002246bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
Larry Hastings89964c42015-04-14 18:07:59 -04002247 Py_ssize_t count)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002248/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002249{
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +03002250 return stringlib_replace((PyObject *)self,
2251 (const char *)old->buf, old->len,
2252 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002253}
2254
2255/** End DALKE **/
2256
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002257
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002258static PyObject *
2259bytes_startswith(PyBytesObject *self, PyObject *args)
2260{
2261 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2262}
2263
2264static PyObject *
2265bytes_endswith(PyBytesObject *self, PyObject *args)
2266{
2267 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2268}
2269
2270
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002271/*[clinic input]
2272bytes.decode
2273
2274 encoding: str(c_default="NULL") = 'utf-8'
2275 The encoding with which to decode the bytes.
2276 errors: str(c_default="NULL") = 'strict'
2277 The error handling scheme to use for the handling of decoding errors.
2278 The default is 'strict' meaning that decoding errors raise a
2279 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2280 as well as any other name registered with codecs.register_error that
2281 can handle UnicodeDecodeErrors.
2282
2283Decode the bytes using the codec registered for encoding.
2284[clinic start generated code]*/
2285
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002286static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002287bytes_decode_impl(PyBytesObject *self, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04002288 const char *errors)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002289/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002290{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002291 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002292}
2293
Guido van Rossum20188312006-05-05 15:15:40 +00002294
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002295/*[clinic input]
2296bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002297
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002298 keepends: bool(accept={int}) = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002299
2300Return a list of the lines in the bytes, breaking at line boundaries.
2301
2302Line breaks are not included in the resulting list unless keepends is given and
2303true.
2304[clinic start generated code]*/
2305
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002306static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002307bytes_splitlines_impl(PyBytesObject *self, int keepends)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002308/*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002309{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002310 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002311 (PyObject*) self, PyBytes_AS_STRING(self),
2312 PyBytes_GET_SIZE(self), keepends
2313 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002314}
2315
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002316/*[clinic input]
2317@classmethod
2318bytes.fromhex
2319
2320 string: unicode
2321 /
2322
2323Create a bytes object from a string of hexadecimal numbers.
2324
2325Spaces between two numbers are accepted.
2326Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2327[clinic start generated code]*/
2328
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002329static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002330bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002331/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002332{
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002333 PyObject *result = _PyBytes_FromHex(string, 0);
2334 if (type != &PyBytes_Type && result != NULL) {
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01002335 Py_SETREF(result, PyObject_CallFunctionObjArgs((PyObject *)type,
2336 result, NULL));
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002337 }
2338 return result;
Victor Stinner2bf89932015-10-14 11:25:33 +02002339}
2340
2341PyObject*
2342_PyBytes_FromHex(PyObject *string, int use_bytearray)
2343{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002344 char *buf;
Victor Stinner2bf89932015-10-14 11:25:33 +02002345 Py_ssize_t hexlen, invalid_char;
2346 unsigned int top, bot;
2347 Py_UCS1 *str, *end;
2348 _PyBytesWriter writer;
2349
2350 _PyBytesWriter_Init(&writer);
2351 writer.use_bytearray = use_bytearray;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002352
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002353 assert(PyUnicode_Check(string));
2354 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002355 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002356 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002357
Victor Stinner2bf89932015-10-14 11:25:33 +02002358 if (!PyUnicode_IS_ASCII(string)) {
2359 void *data = PyUnicode_DATA(string);
2360 unsigned int kind = PyUnicode_KIND(string);
2361 Py_ssize_t i;
2362
2363 /* search for the first non-ASCII character */
2364 for (i = 0; i < hexlen; i++) {
2365 if (PyUnicode_READ(kind, data, i) >= 128)
2366 break;
2367 }
2368 invalid_char = i;
2369 goto error;
2370 }
2371
2372 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2373 str = PyUnicode_1BYTE_DATA(string);
2374
2375 /* This overestimates if there are spaces */
2376 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2377 if (buf == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002378 return NULL;
Victor Stinner2bf89932015-10-14 11:25:33 +02002379
2380 end = str + hexlen;
2381 while (str < end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002382 /* skip over spaces in the input */
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002383 if (Py_ISSPACE(*str)) {
Victor Stinner2bf89932015-10-14 11:25:33 +02002384 do {
2385 str++;
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002386 } while (Py_ISSPACE(*str));
Victor Stinner2bf89932015-10-14 11:25:33 +02002387 if (str >= end)
2388 break;
2389 }
2390
2391 top = _PyLong_DigitValue[*str];
2392 if (top >= 16) {
2393 invalid_char = str - PyUnicode_1BYTE_DATA(string);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002394 goto error;
2395 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002396 str++;
2397
2398 bot = _PyLong_DigitValue[*str];
2399 if (bot >= 16) {
2400 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2401 goto error;
2402 }
2403 str++;
2404
2405 *buf++ = (unsigned char)((top << 4) + bot);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002406 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002407
2408 return _PyBytesWriter_Finish(&writer, buf);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002409
2410 error:
Victor Stinner2bf89932015-10-14 11:25:33 +02002411 PyErr_Format(PyExc_ValueError,
2412 "non-hexadecimal number found in "
2413 "fromhex() arg at position %zd", invalid_char);
2414 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002415 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002416}
2417
Gregory P. Smith8cb65692015-04-25 23:22:26 +00002418PyDoc_STRVAR(hex__doc__,
2419"B.hex() -> string\n\
2420\n\
2421Create a string of hexadecimal numbers from a bytes object.\n\
2422Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'.");
2423
2424static PyObject *
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002425bytes_hex(PyBytesObject *self)
2426{
2427 char* argbuf = PyBytes_AS_STRING(self);
2428 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2429 return _Py_strhex(argbuf, arglen);
2430}
2431
2432static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002433bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002434{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002435 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002436}
2437
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002438
2439static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002440bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002441 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2442 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2443 _Py_capitalize__doc__},
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002444 {"center", (PyCFunction)stringlib_center, METH_VARARGS,
2445 _Py_center__doc__},
2446 {"count", (PyCFunction)bytes_count, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002447 _Py_count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002448 BYTES_DECODE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002449 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002450 _Py_endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02002451 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002452 _Py_expandtabs__doc__},
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002453 {"find", (PyCFunction)bytes_find, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002454 _Py_find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002455 BYTES_FROMHEX_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002456 {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__},
2457 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002458 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2459 _Py_isalnum__doc__},
2460 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2461 _Py_isalpha__doc__},
INADA Naokia49ac992018-01-27 14:06:21 +09002462 {"isascii", (PyCFunction)stringlib_isascii, METH_NOARGS,
2463 _Py_isascii__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002464 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2465 _Py_isdigit__doc__},
2466 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2467 _Py_islower__doc__},
2468 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2469 _Py_isspace__doc__},
2470 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2471 _Py_istitle__doc__},
2472 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2473 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002474 BYTES_JOIN_METHODDEF
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002475 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, _Py_ljust__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002476 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002477 BYTES_LSTRIP_METHODDEF
2478 BYTES_MAKETRANS_METHODDEF
2479 BYTES_PARTITION_METHODDEF
2480 BYTES_REPLACE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002481 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2482 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002483 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, _Py_rjust__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002484 BYTES_RPARTITION_METHODDEF
2485 BYTES_RSPLIT_METHODDEF
2486 BYTES_RSTRIP_METHODDEF
2487 BYTES_SPLIT_METHODDEF
2488 BYTES_SPLITLINES_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002489 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002490 _Py_startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002491 BYTES_STRIP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002492 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2493 _Py_swapcase__doc__},
2494 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002495 BYTES_TRANSLATE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002496 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002497 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, _Py_zfill__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002498 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002499};
2500
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002501static PyObject *
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002502bytes_mod(PyObject *self, PyObject *arg)
Ethan Furmanb95b5612015-01-23 20:05:18 -08002503{
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002504 if (!PyBytes_Check(self)) {
2505 Py_RETURN_NOTIMPLEMENTED;
Victor Stinner772b2b02015-10-14 09:56:53 +02002506 }
Victor Stinner772b2b02015-10-14 09:56:53 +02002507 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002508 arg, 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -08002509}
2510
2511static PyNumberMethods bytes_as_number = {
2512 0, /*nb_add*/
2513 0, /*nb_subtract*/
2514 0, /*nb_multiply*/
2515 bytes_mod, /*nb_remainder*/
2516};
2517
2518static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002519bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002520
2521static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002522bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002523{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002524 PyObject *x = NULL;
2525 const char *encoding = NULL;
2526 const char *errors = NULL;
2527 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002528 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002529 Py_ssize_t size;
2530 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002531 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002532
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002533 if (type != &PyBytes_Type)
Serhiy Storchaka15095802015-11-25 15:47:01 +02002534 return bytes_subtype_new(type, args, kwds);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002535 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2536 &encoding, &errors))
2537 return NULL;
2538 if (x == NULL) {
2539 if (encoding != NULL || errors != NULL) {
2540 PyErr_SetString(PyExc_TypeError,
2541 "encoding or errors without sequence "
2542 "argument");
2543 return NULL;
2544 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02002545 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002546 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002547
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002548 if (encoding != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002549 /* Encode via the codec registry */
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002550 if (!PyUnicode_Check(x)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002551 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002552 "encoding without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002553 return NULL;
2554 }
2555 new = PyUnicode_AsEncodedString(x, encoding, errors);
2556 if (new == NULL)
2557 return NULL;
2558 assert(PyBytes_Check(new));
2559 return new;
2560 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002561
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002562 if (errors != NULL) {
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002563 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002564 PyUnicode_Check(x) ?
2565 "string argument without an encoding" :
2566 "errors without a string argument");
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002567 return NULL;
2568 }
2569
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002570 /* We'd like to call PyObject_Bytes here, but we need to check for an
2571 integer argument before deferring to PyBytes_FromObject, something
2572 PyObject_Bytes doesn't do. */
2573 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2574 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +01002575 new = _PyObject_CallNoArg(func);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002576 Py_DECREF(func);
2577 if (new == NULL)
2578 return NULL;
2579 if (!PyBytes_Check(new)) {
2580 PyErr_Format(PyExc_TypeError,
2581 "__bytes__ returned non-bytes (type %.200s)",
2582 Py_TYPE(new)->tp_name);
2583 Py_DECREF(new);
2584 return NULL;
2585 }
2586 return new;
2587 }
2588 else if (PyErr_Occurred())
2589 return NULL;
2590
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002591 if (PyUnicode_Check(x)) {
2592 PyErr_SetString(PyExc_TypeError,
2593 "string argument without an encoding");
2594 return NULL;
2595 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002596 /* Is it an integer? */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002597 if (PyIndex_Check(x)) {
2598 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2599 if (size == -1 && PyErr_Occurred()) {
INADA Naokia634e232017-01-06 17:32:01 +09002600 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2601 return NULL;
2602 PyErr_Clear(); /* fall through */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002603 }
INADA Naokia634e232017-01-06 17:32:01 +09002604 else {
2605 if (size < 0) {
2606 PyErr_SetString(PyExc_ValueError, "negative count");
2607 return NULL;
2608 }
2609 new = _PyBytes_FromSize(size, 1);
2610 if (new == NULL)
2611 return NULL;
2612 return new;
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002613 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002614 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002615
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002616 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002617}
2618
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002619static PyObject*
2620_PyBytes_FromBuffer(PyObject *x)
2621{
2622 PyObject *new;
2623 Py_buffer view;
2624
2625 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2626 return NULL;
2627
2628 new = PyBytes_FromStringAndSize(NULL, view.len);
2629 if (!new)
2630 goto fail;
2631 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2632 &view, view.len, 'C') < 0)
2633 goto fail;
2634 PyBuffer_Release(&view);
2635 return new;
2636
2637fail:
2638 Py_XDECREF(new);
2639 PyBuffer_Release(&view);
2640 return NULL;
2641}
2642
Victor Stinner3c50ce32015-10-14 13:50:40 +02002643#define _PyBytes_FROM_LIST_BODY(x, GET_ITEM) \
2644 do { \
2645 PyObject *bytes; \
2646 Py_ssize_t i; \
2647 Py_ssize_t value; \
2648 char *str; \
2649 PyObject *item; \
2650 \
2651 bytes = PyBytes_FromStringAndSize(NULL, Py_SIZE(x)); \
2652 if (bytes == NULL) \
2653 return NULL; \
2654 str = ((PyBytesObject *)bytes)->ob_sval; \
2655 \
2656 for (i = 0; i < Py_SIZE(x); i++) { \
2657 item = GET_ITEM((x), i); \
Serhiy Storchakaf54d7812016-07-06 21:39:44 +03002658 value = PyNumber_AsSsize_t(item, NULL); \
Victor Stinner3c50ce32015-10-14 13:50:40 +02002659 if (value == -1 && PyErr_Occurred()) \
2660 goto error; \
2661 \
2662 if (value < 0 || value >= 256) { \
2663 PyErr_SetString(PyExc_ValueError, \
2664 "bytes must be in range(0, 256)"); \
2665 goto error; \
2666 } \
2667 *str++ = (char) value; \
2668 } \
2669 return bytes; \
2670 \
2671 error: \
2672 Py_DECREF(bytes); \
2673 return NULL; \
2674 } while (0)
2675
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002676static PyObject*
2677_PyBytes_FromList(PyObject *x)
2678{
Victor Stinner3c50ce32015-10-14 13:50:40 +02002679 _PyBytes_FROM_LIST_BODY(x, PyList_GET_ITEM);
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002680}
2681
2682static PyObject*
2683_PyBytes_FromTuple(PyObject *x)
2684{
Victor Stinner3c50ce32015-10-14 13:50:40 +02002685 _PyBytes_FROM_LIST_BODY(x, PyTuple_GET_ITEM);
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002686}
2687
2688static PyObject *
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002689_PyBytes_FromIterator(PyObject *it, PyObject *x)
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002690{
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002691 char *str;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002692 Py_ssize_t i, size;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002693 _PyBytesWriter writer;
2694
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002695 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02002696 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002697 if (size == -1 && PyErr_Occurred())
2698 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002699
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002700 _PyBytesWriter_Init(&writer);
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002701 str = _PyBytesWriter_Alloc(&writer, size);
2702 if (str == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002703 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002704 writer.overallocate = 1;
2705 size = writer.allocated;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002706
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002707 /* Run the iterator to exhaustion */
2708 for (i = 0; ; i++) {
2709 PyObject *item;
2710 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002711
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002712 /* Get the next item */
2713 item = PyIter_Next(it);
2714 if (item == NULL) {
2715 if (PyErr_Occurred())
2716 goto error;
2717 break;
2718 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002719
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002720 /* Interpret it as an int (__index__) */
Serhiy Storchakaf54d7812016-07-06 21:39:44 +03002721 value = PyNumber_AsSsize_t(item, NULL);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002722 Py_DECREF(item);
2723 if (value == -1 && PyErr_Occurred())
2724 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002725
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002726 /* Range check */
2727 if (value < 0 || value >= 256) {
2728 PyErr_SetString(PyExc_ValueError,
2729 "bytes must be in range(0, 256)");
2730 goto error;
2731 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002732
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002733 /* Append the byte */
2734 if (i >= size) {
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002735 str = _PyBytesWriter_Resize(&writer, str, size+1);
2736 if (str == NULL)
2737 return NULL;
2738 size = writer.allocated;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002739 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002740 *str++ = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002741 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002742
2743 return _PyBytesWriter_Finish(&writer, str);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002744
2745 error:
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002746 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002747 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002748}
2749
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002750PyObject *
2751PyBytes_FromObject(PyObject *x)
2752{
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002753 PyObject *it, *result;
2754
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002755 if (x == NULL) {
2756 PyErr_BadInternalCall();
2757 return NULL;
2758 }
2759
2760 if (PyBytes_CheckExact(x)) {
2761 Py_INCREF(x);
2762 return x;
2763 }
2764
2765 /* Use the modern buffer interface */
2766 if (PyObject_CheckBuffer(x))
2767 return _PyBytes_FromBuffer(x);
2768
2769 if (PyList_CheckExact(x))
2770 return _PyBytes_FromList(x);
2771
2772 if (PyTuple_CheckExact(x))
2773 return _PyBytes_FromTuple(x);
2774
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002775 if (!PyUnicode_Check(x)) {
2776 it = PyObject_GetIter(x);
2777 if (it != NULL) {
2778 result = _PyBytes_FromIterator(it, x);
2779 Py_DECREF(it);
2780 return result;
2781 }
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002782 }
2783
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002784 PyErr_Format(PyExc_TypeError,
2785 "cannot convert '%.200s' object to bytes",
2786 x->ob_type->tp_name);
2787 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002788}
2789
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002790static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002791bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002792{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002793 PyObject *tmp, *pnew;
2794 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002795
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002796 assert(PyType_IsSubtype(type, &PyBytes_Type));
2797 tmp = bytes_new(&PyBytes_Type, args, kwds);
2798 if (tmp == NULL)
2799 return NULL;
Serhiy Storchaka15095802015-11-25 15:47:01 +02002800 assert(PyBytes_Check(tmp));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002801 n = PyBytes_GET_SIZE(tmp);
2802 pnew = type->tp_alloc(type, n);
2803 if (pnew != NULL) {
Christian Heimesf051e432016-09-13 20:22:02 +02002804 memcpy(PyBytes_AS_STRING(pnew),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002805 PyBytes_AS_STRING(tmp), n+1);
2806 ((PyBytesObject *)pnew)->ob_shash =
2807 ((PyBytesObject *)tmp)->ob_shash;
2808 }
2809 Py_DECREF(tmp);
2810 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002811}
2812
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002813PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002814"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002815bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002816bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002817bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2818bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002819\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002820Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002821 - an iterable yielding integers in range(256)\n\
2822 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002823 - any object implementing the buffer API.\n\
2824 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002825
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002826static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002827
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002828PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002829 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2830 "bytes",
2831 PyBytesObject_SIZE,
2832 sizeof(char),
2833 bytes_dealloc, /* tp_dealloc */
2834 0, /* tp_print */
2835 0, /* tp_getattr */
2836 0, /* tp_setattr */
2837 0, /* tp_reserved */
2838 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08002839 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002840 &bytes_as_sequence, /* tp_as_sequence */
2841 &bytes_as_mapping, /* tp_as_mapping */
2842 (hashfunc)bytes_hash, /* tp_hash */
2843 0, /* tp_call */
2844 bytes_str, /* tp_str */
2845 PyObject_GenericGetAttr, /* tp_getattro */
2846 0, /* tp_setattro */
2847 &bytes_as_buffer, /* tp_as_buffer */
2848 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2849 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2850 bytes_doc, /* tp_doc */
2851 0, /* tp_traverse */
2852 0, /* tp_clear */
2853 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2854 0, /* tp_weaklistoffset */
2855 bytes_iter, /* tp_iter */
2856 0, /* tp_iternext */
2857 bytes_methods, /* tp_methods */
2858 0, /* tp_members */
2859 0, /* tp_getset */
2860 &PyBaseObject_Type, /* tp_base */
2861 0, /* tp_dict */
2862 0, /* tp_descr_get */
2863 0, /* tp_descr_set */
2864 0, /* tp_dictoffset */
2865 0, /* tp_init */
2866 0, /* tp_alloc */
2867 bytes_new, /* tp_new */
2868 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002869};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002870
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002871void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002872PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002873{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002874 assert(pv != NULL);
2875 if (*pv == NULL)
2876 return;
2877 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002878 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002879 return;
2880 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02002881
2882 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2883 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05002884 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02002885 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02002886
Antoine Pitrou161d6952014-05-01 14:36:20 +02002887 wb.len = -1;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002888 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02002889 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2890 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2891 Py_CLEAR(*pv);
2892 return;
2893 }
2894
2895 oldsize = PyBytes_GET_SIZE(*pv);
2896 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2897 PyErr_NoMemory();
2898 goto error;
2899 }
2900 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2901 goto error;
2902
2903 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2904 PyBuffer_Release(&wb);
2905 return;
2906
2907 error:
2908 PyBuffer_Release(&wb);
2909 Py_CLEAR(*pv);
2910 return;
2911 }
2912
2913 else {
2914 /* Multiple references, need to create new object */
2915 PyObject *v;
2916 v = bytes_concat(*pv, w);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002917 Py_SETREF(*pv, v);
Antoine Pitrou161d6952014-05-01 14:36:20 +02002918 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002919}
2920
2921void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002922PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002923{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002924 PyBytes_Concat(pv, w);
2925 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002926}
2927
2928
Ethan Furmanb95b5612015-01-23 20:05:18 -08002929/* The following function breaks the notion that bytes are immutable:
2930 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002931 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08002932 as creating a new bytes object and destroying the old one, only
2933 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002934 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08002935 Note that if there's not enough memory to resize the bytes object, the
2936 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002937 memory" exception is set, and -1 is returned. Else (on success) 0 is
2938 returned, and the value in *pv may or may not be the same as on input.
2939 As always, an extra byte is allocated for a trailing \0 byte (newsize
2940 does *not* include that), and a trailing \0 byte is stored.
2941*/
2942
2943int
2944_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2945{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002946 PyObject *v;
2947 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002948 v = *pv;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002949 if (!PyBytes_Check(v) || newsize < 0) {
2950 goto error;
2951 }
2952 if (Py_SIZE(v) == newsize) {
2953 /* return early if newsize equals to v->ob_size */
2954 return 0;
2955 }
2956 if (Py_REFCNT(v) != 1) {
2957 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002958 }
2959 /* XXX UNREF/NEWREF interface should be more symmetrical */
2960 _Py_DEC_REFTOTAL;
2961 _Py_ForgetReference(v);
2962 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03002963 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002964 if (*pv == NULL) {
2965 PyObject_Del(v);
2966 PyErr_NoMemory();
2967 return -1;
2968 }
2969 _Py_NewReference(*pv);
2970 sv = (PyBytesObject *) *pv;
2971 Py_SIZE(sv) = newsize;
2972 sv->ob_sval[newsize] = '\0';
2973 sv->ob_shash = -1; /* invalidate cached hash value */
2974 return 0;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002975error:
2976 *pv = 0;
2977 Py_DECREF(v);
2978 PyErr_BadInternalCall();
2979 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002980}
2981
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002982void
2983PyBytes_Fini(void)
2984{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002985 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002986 for (i = 0; i < UCHAR_MAX + 1; i++)
2987 Py_CLEAR(characters[i]);
2988 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002989}
2990
Benjamin Peterson4116f362008-05-27 00:36:20 +00002991/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002992
2993typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002994 PyObject_HEAD
2995 Py_ssize_t it_index;
2996 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002997} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002998
2999static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003000striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003001{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003002 _PyObject_GC_UNTRACK(it);
3003 Py_XDECREF(it->it_seq);
3004 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003005}
3006
3007static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003008striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003009{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003010 Py_VISIT(it->it_seq);
3011 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003012}
3013
3014static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003015striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003016{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003017 PyBytesObject *seq;
3018 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003019
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003020 assert(it != NULL);
3021 seq = it->it_seq;
3022 if (seq == NULL)
3023 return NULL;
3024 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003025
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003026 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3027 item = PyLong_FromLong(
3028 (unsigned char)seq->ob_sval[it->it_index]);
3029 if (item != NULL)
3030 ++it->it_index;
3031 return item;
3032 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003033
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003034 it->it_seq = NULL;
Serhiy Storchakafbb1c5e2016-03-30 20:40:02 +03003035 Py_DECREF(seq);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003036 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003037}
3038
3039static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003040striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003041{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003042 Py_ssize_t len = 0;
3043 if (it->it_seq)
3044 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3045 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003046}
3047
3048PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003049 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003050
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003051static PyObject *
3052striter_reduce(striterobject *it)
3053{
3054 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02003055 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003056 it->it_seq, it->it_index);
3057 } else {
Serhiy Storchaka460bd0d2016-11-20 12:16:46 +02003058 return Py_BuildValue("N(())", _PyObject_GetBuiltin("iter"));
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003059 }
3060}
3061
3062PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3063
3064static PyObject *
3065striter_setstate(striterobject *it, PyObject *state)
3066{
3067 Py_ssize_t index = PyLong_AsSsize_t(state);
3068 if (index == -1 && PyErr_Occurred())
3069 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003070 if (it->it_seq != NULL) {
3071 if (index < 0)
3072 index = 0;
3073 else if (index > PyBytes_GET_SIZE(it->it_seq))
3074 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3075 it->it_index = index;
3076 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003077 Py_RETURN_NONE;
3078}
3079
3080PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3081
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003082static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003083 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3084 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003085 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3086 reduce_doc},
3087 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3088 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003089 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003090};
3091
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003092PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003093 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3094 "bytes_iterator", /* tp_name */
3095 sizeof(striterobject), /* tp_basicsize */
3096 0, /* tp_itemsize */
3097 /* methods */
3098 (destructor)striter_dealloc, /* tp_dealloc */
3099 0, /* tp_print */
3100 0, /* tp_getattr */
3101 0, /* tp_setattr */
3102 0, /* tp_reserved */
3103 0, /* tp_repr */
3104 0, /* tp_as_number */
3105 0, /* tp_as_sequence */
3106 0, /* tp_as_mapping */
3107 0, /* tp_hash */
3108 0, /* tp_call */
3109 0, /* tp_str */
3110 PyObject_GenericGetAttr, /* tp_getattro */
3111 0, /* tp_setattro */
3112 0, /* tp_as_buffer */
3113 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3114 0, /* tp_doc */
3115 (traverseproc)striter_traverse, /* tp_traverse */
3116 0, /* tp_clear */
3117 0, /* tp_richcompare */
3118 0, /* tp_weaklistoffset */
3119 PyObject_SelfIter, /* tp_iter */
3120 (iternextfunc)striter_next, /* tp_iternext */
3121 striter_methods, /* tp_methods */
3122 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003123};
3124
3125static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003126bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003127{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003128 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003129
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003130 if (!PyBytes_Check(seq)) {
3131 PyErr_BadInternalCall();
3132 return NULL;
3133 }
3134 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3135 if (it == NULL)
3136 return NULL;
3137 it->it_index = 0;
3138 Py_INCREF(seq);
3139 it->it_seq = (PyBytesObject *)seq;
3140 _PyObject_GC_TRACK(it);
3141 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003142}
Victor Stinner00165072015-10-09 01:53:21 +02003143
3144
3145/* _PyBytesWriter API */
3146
3147#ifdef MS_WINDOWS
3148 /* On Windows, overallocate by 50% is the best factor */
3149# define OVERALLOCATE_FACTOR 2
3150#else
3151 /* On Linux, overallocate by 25% is the best factor */
3152# define OVERALLOCATE_FACTOR 4
3153#endif
3154
3155void
3156_PyBytesWriter_Init(_PyBytesWriter *writer)
3157{
Victor Stinner661aacc2015-10-14 09:41:48 +02003158 /* Set all attributes before small_buffer to 0 */
3159 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003160#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003161 memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003162#endif
3163}
3164
3165void
3166_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3167{
3168 Py_CLEAR(writer->buffer);
3169}
3170
3171Py_LOCAL_INLINE(char*)
3172_PyBytesWriter_AsString(_PyBytesWriter *writer)
3173{
Victor Stinner661aacc2015-10-14 09:41:48 +02003174 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003175 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003176 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003177 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003178 else if (writer->use_bytearray) {
3179 assert(writer->buffer != NULL);
3180 return PyByteArray_AS_STRING(writer->buffer);
3181 }
3182 else {
3183 assert(writer->buffer != NULL);
3184 return PyBytes_AS_STRING(writer->buffer);
3185 }
Victor Stinner00165072015-10-09 01:53:21 +02003186}
3187
3188Py_LOCAL_INLINE(Py_ssize_t)
Victor Stinner2bf89932015-10-14 11:25:33 +02003189_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
Victor Stinner00165072015-10-09 01:53:21 +02003190{
3191 char *start = _PyBytesWriter_AsString(writer);
3192 assert(str != NULL);
3193 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003194 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003195 return str - start;
3196}
3197
3198Py_LOCAL_INLINE(void)
3199_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3200{
3201#ifdef Py_DEBUG
3202 char *start, *end;
3203
Victor Stinner661aacc2015-10-14 09:41:48 +02003204 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003205 assert(writer->buffer == NULL);
3206 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003207 else {
3208 assert(writer->buffer != NULL);
3209 if (writer->use_bytearray)
3210 assert(PyByteArray_CheckExact(writer->buffer));
3211 else
3212 assert(PyBytes_CheckExact(writer->buffer));
3213 assert(Py_REFCNT(writer->buffer) == 1);
3214 }
Victor Stinner00165072015-10-09 01:53:21 +02003215
Victor Stinner661aacc2015-10-14 09:41:48 +02003216 if (writer->use_bytearray) {
3217 /* bytearray has its own overallocation algorithm,
3218 writer overallocation must be disabled */
3219 assert(!writer->overallocate);
3220 }
3221
3222 assert(0 <= writer->allocated);
Victor Stinner53926a12015-10-09 12:37:03 +02003223 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003224 /* the last byte must always be null */
Victor Stinner661aacc2015-10-14 09:41:48 +02003225 start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003226 assert(start[writer->allocated] == 0);
3227
3228 end = start + writer->allocated;
3229 assert(str != NULL);
3230 assert(start <= str && str <= end);
3231#endif
3232}
3233
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003234void*
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003235_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
Victor Stinner00165072015-10-09 01:53:21 +02003236{
3237 Py_ssize_t allocated, pos;
3238
3239 _PyBytesWriter_CheckConsistency(writer, str);
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003240 assert(writer->allocated < size);
Victor Stinner00165072015-10-09 01:53:21 +02003241
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003242 allocated = size;
Victor Stinner00165072015-10-09 01:53:21 +02003243 if (writer->overallocate
3244 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3245 /* overallocate to limit the number of realloc() */
3246 allocated += allocated / OVERALLOCATE_FACTOR;
3247 }
3248
Victor Stinner2bf89932015-10-14 11:25:33 +02003249 pos = _PyBytesWriter_GetSize(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003250 if (!writer->use_small_buffer) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003251 if (writer->use_bytearray) {
3252 if (PyByteArray_Resize(writer->buffer, allocated))
3253 goto error;
3254 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3255 but we cannot use ob_alloc because bytes may need to be moved
3256 to use the whole buffer. bytearray uses an internal optimization
3257 to avoid moving or copying bytes when bytes are removed at the
3258 beginning (ex: del bytearray[:1]). */
3259 }
3260 else {
3261 if (_PyBytes_Resize(&writer->buffer, allocated))
3262 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003263 }
3264 }
3265 else {
3266 /* convert from stack buffer to bytes object buffer */
3267 assert(writer->buffer == NULL);
3268
Victor Stinner661aacc2015-10-14 09:41:48 +02003269 if (writer->use_bytearray)
3270 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3271 else
3272 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003273 if (writer->buffer == NULL)
Victor Stinner661aacc2015-10-14 09:41:48 +02003274 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003275
3276 if (pos != 0) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003277 char *dest;
3278 if (writer->use_bytearray)
3279 dest = PyByteArray_AS_STRING(writer->buffer);
3280 else
3281 dest = PyBytes_AS_STRING(writer->buffer);
Christian Heimesf051e432016-09-13 20:22:02 +02003282 memcpy(dest,
Victor Stinnerb3653a32015-10-09 03:38:24 +02003283 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02003284 pos);
3285 }
3286
Victor Stinnerb3653a32015-10-09 03:38:24 +02003287 writer->use_small_buffer = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003288#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003289 memset(writer->small_buffer, 0xDB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003290#endif
Victor Stinner00165072015-10-09 01:53:21 +02003291 }
3292 writer->allocated = allocated;
3293
3294 str = _PyBytesWriter_AsString(writer) + pos;
3295 _PyBytesWriter_CheckConsistency(writer, str);
3296 return str;
Victor Stinner661aacc2015-10-14 09:41:48 +02003297
3298error:
3299 _PyBytesWriter_Dealloc(writer);
3300 return NULL;
Victor Stinner00165072015-10-09 01:53:21 +02003301}
3302
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003303void*
3304_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3305{
3306 Py_ssize_t new_min_size;
3307
3308 _PyBytesWriter_CheckConsistency(writer, str);
3309 assert(size >= 0);
3310
3311 if (size == 0) {
3312 /* nothing to do */
3313 return str;
3314 }
3315
3316 if (writer->min_size > PY_SSIZE_T_MAX - size) {
3317 PyErr_NoMemory();
3318 _PyBytesWriter_Dealloc(writer);
3319 return NULL;
3320 }
3321 new_min_size = writer->min_size + size;
3322
3323 if (new_min_size > writer->allocated)
3324 str = _PyBytesWriter_Resize(writer, str, new_min_size);
3325
3326 writer->min_size = new_min_size;
3327 return str;
3328}
3329
Victor Stinner00165072015-10-09 01:53:21 +02003330/* Allocate the buffer to write size bytes.
3331 Return the pointer to the beginning of buffer data.
3332 Raise an exception and return NULL on error. */
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003333void*
Victor Stinner00165072015-10-09 01:53:21 +02003334_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3335{
3336 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02003337 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02003338 assert(size >= 0);
3339
Victor Stinnerb3653a32015-10-09 03:38:24 +02003340 writer->use_small_buffer = 1;
Victor Stinnerb13b97d2015-10-09 02:52:16 +02003341#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003342 writer->allocated = sizeof(writer->small_buffer) - 1;
Victor Stinnerf6358a72015-10-14 12:02:39 +02003343 /* In debug mode, don't use the full small buffer because it is less
3344 efficient than bytes and bytearray objects to detect buffer underflow
3345 and buffer overflow. Use 10 bytes of the small buffer to test also
3346 code using the smaller buffer in debug mode.
3347
3348 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3349 in debug mode to also be able to detect stack overflow when running
3350 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3351 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3352 stack overflow. */
3353 writer->allocated = Py_MIN(writer->allocated, 10);
3354 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3355 to detect buffer overflow */
Victor Stinnerb3653a32015-10-09 03:38:24 +02003356 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003357#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02003358 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02003359#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02003360 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02003361}
3362
3363PyObject *
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003364_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
Victor Stinner00165072015-10-09 01:53:21 +02003365{
Victor Stinner2bf89932015-10-14 11:25:33 +02003366 Py_ssize_t size;
Victor Stinner00165072015-10-09 01:53:21 +02003367 PyObject *result;
3368
3369 _PyBytesWriter_CheckConsistency(writer, str);
3370
Victor Stinner2bf89932015-10-14 11:25:33 +02003371 size = _PyBytesWriter_GetSize(writer, str);
3372 if (size == 0 && !writer->use_bytearray) {
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003373 Py_CLEAR(writer->buffer);
3374 /* Get the empty byte string singleton */
3375 result = PyBytes_FromStringAndSize(NULL, 0);
3376 }
3377 else if (writer->use_small_buffer) {
Victor Stinnere914d412016-04-15 17:52:27 +02003378 if (writer->use_bytearray) {
3379 result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3380 }
3381 else {
3382 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3383 }
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003384 }
3385 else {
3386 result = writer->buffer;
3387 writer->buffer = NULL;
3388
Victor Stinner2bf89932015-10-14 11:25:33 +02003389 if (size != writer->allocated) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003390 if (writer->use_bytearray) {
Victor Stinner2bf89932015-10-14 11:25:33 +02003391 if (PyByteArray_Resize(result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003392 Py_DECREF(result);
3393 return NULL;
3394 }
3395 }
3396 else {
Victor Stinner2bf89932015-10-14 11:25:33 +02003397 if (_PyBytes_Resize(&result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003398 assert(result == NULL);
3399 return NULL;
3400 }
Victor Stinner00165072015-10-09 01:53:21 +02003401 }
3402 }
Victor Stinner00165072015-10-09 01:53:21 +02003403 }
Victor Stinner00165072015-10-09 01:53:21 +02003404 return result;
3405}
Victor Stinnerce179bf2015-10-09 12:57:22 +02003406
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003407void*
Victor Stinnere9aa5952015-10-12 13:57:47 +02003408_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003409 const void *bytes, Py_ssize_t size)
Victor Stinnerce179bf2015-10-09 12:57:22 +02003410{
Victor Stinnere9aa5952015-10-12 13:57:47 +02003411 char *str = (char *)ptr;
3412
Victor Stinnerce179bf2015-10-09 12:57:22 +02003413 str = _PyBytesWriter_Prepare(writer, str, size);
3414 if (str == NULL)
3415 return NULL;
3416
Christian Heimesf051e432016-09-13 20:22:02 +02003417 memcpy(str, bytes, size);
Victor Stinnerce179bf2015-10-09 12:57:22 +02003418 str += size;
3419
3420 return str;
3421}