blob: d91cb7d87245f7c2d5deaae818c8e9915df10d20 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Eric Snow2ebc5ce2017-09-07 23:51:28 -06006#include "internal/mem.h"
7#include "internal/pystate.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00008
Gregory P. Smith60d241f2007-10-16 06:31:30 +00009#include "bytes_methods.h"
Gregory P. Smith8cb65692015-04-25 23:22:26 +000010#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +000011#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000012
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020013/*[clinic input]
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030014class bytes "PyBytesObject *" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020015[clinic start generated code]*/
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030016/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020017
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030018#include "clinic/bytesobject.c.h"
19
Christian Heimes2c9c7a52008-05-26 13:42:13 +000020#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000021Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000022#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000023
Christian Heimes2c9c7a52008-05-26 13:42:13 +000024static PyBytesObject *characters[UCHAR_MAX + 1];
25static PyBytesObject *nullstring;
26
Mark Dickinsonfd24b322008-12-06 15:33:31 +000027/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
28 for a string of length n should request PyBytesObject_SIZE + n bytes.
29
30 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
31 3 bytes per string allocation on a typical system.
32*/
33#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
34
Victor Stinner2bf89932015-10-14 11:25:33 +020035/* Forward declaration */
36Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
37 char *str);
38
Christian Heimes2c9c7a52008-05-26 13:42:13 +000039/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000040 For PyBytes_FromString(), the parameter `str' points to a null-terminated
41 string containing exactly `size' bytes.
42
Martin Pantera90a4a92016-05-30 04:04:50 +000043 For PyBytes_FromStringAndSize(), the parameter `str' is
Christian Heimes2c9c7a52008-05-26 13:42:13 +000044 either NULL or else points to a string containing at least `size' bytes.
45 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
46 not have to be null-terminated. (Therefore it is safe to construct a
47 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
48 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
49 bytes (setting the last byte to the null terminating character) and you can
50 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000051 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000052 alter the data yourself, since the strings may be shared.
53
54 The PyObject member `op->ob_size', which denotes the number of "extra
55 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020056 allocated for string data, not counting the null terminating character.
57 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000058 PyBytes_FromStringAndSize()) or the length of the string in the `str'
59 parameter (for PyBytes_FromString()).
60*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020061static PyObject *
62_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000063{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020064 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020065 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020066
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000068#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000069 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000070#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000071 Py_INCREF(op);
72 return (PyObject *)op;
73 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000074
Victor Stinner049e5092014-08-17 22:20:00 +020075 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000076 PyErr_SetString(PyExc_OverflowError,
77 "byte string is too large");
78 return NULL;
79 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000080
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000081 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020082 if (use_calloc)
83 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
84 else
85 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000086 if (op == NULL)
87 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +010088 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000089 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020090 if (!use_calloc)
91 op->ob_sval[size] = '\0';
92 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000093 if (size == 0) {
94 nullstring = op;
95 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020096 }
97 return (PyObject *) op;
98}
99
100PyObject *
101PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
102{
103 PyBytesObject *op;
104 if (size < 0) {
105 PyErr_SetString(PyExc_SystemError,
106 "Negative size passed to PyBytes_FromStringAndSize");
107 return NULL;
108 }
109 if (size == 1 && str != NULL &&
110 (op = characters[*str & UCHAR_MAX]) != NULL)
111 {
112#ifdef COUNT_ALLOCS
113 one_strings++;
114#endif
115 Py_INCREF(op);
116 return (PyObject *)op;
117 }
118
119 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
120 if (op == NULL)
121 return NULL;
122 if (str == NULL)
123 return (PyObject *) op;
124
Christian Heimesf051e432016-09-13 20:22:02 +0200125 memcpy(op->ob_sval, str, size);
Victor Stinnerdb067af2014-05-02 22:31:14 +0200126 /* share short strings */
127 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000128 characters[*str & UCHAR_MAX] = op;
129 Py_INCREF(op);
130 }
131 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000132}
133
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000134PyObject *
135PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000136{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200137 size_t size;
138 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000139
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000140 assert(str != NULL);
141 size = strlen(str);
142 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
143 PyErr_SetString(PyExc_OverflowError,
144 "byte string is too long");
145 return NULL;
146 }
147 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000148#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000150#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000151 Py_INCREF(op);
152 return (PyObject *)op;
153 }
154 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000155#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000157#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000158 Py_INCREF(op);
159 return (PyObject *)op;
160 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000161
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000162 /* Inline PyObject_NewVar */
163 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
164 if (op == NULL)
165 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100166 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000167 op->ob_shash = -1;
Christian Heimesf051e432016-09-13 20:22:02 +0200168 memcpy(op->ob_sval, str, size+1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000169 /* share short strings */
170 if (size == 0) {
171 nullstring = op;
172 Py_INCREF(op);
173 } else if (size == 1) {
174 characters[*str & UCHAR_MAX] = op;
175 Py_INCREF(op);
176 }
177 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000178}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000179
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000180PyObject *
181PyBytes_FromFormatV(const char *format, va_list vargs)
182{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000183 char *s;
Victor Stinner03dab782015-10-14 00:21:35 +0200184 const char *f;
185 const char *p;
186 Py_ssize_t prec;
187 int longflag;
188 int size_tflag;
189 /* Longest 64-bit formatted numbers:
190 - "18446744073709551615\0" (21 bytes)
191 - "-9223372036854775808\0" (21 bytes)
192 Decimal takes the most space (it isn't enough for octal.)
Guido van Rossum343e97f2007-04-09 00:43:24 +0000193
Victor Stinner03dab782015-10-14 00:21:35 +0200194 Longest 64-bit pointer representation:
195 "0xffffffffffffffff\0" (19 bytes). */
196 char buffer[21];
197 _PyBytesWriter writer;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000198
Victor Stinner03dab782015-10-14 00:21:35 +0200199 _PyBytesWriter_Init(&writer);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000200
Victor Stinner03dab782015-10-14 00:21:35 +0200201 s = _PyBytesWriter_Alloc(&writer, strlen(format));
202 if (s == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000203 return NULL;
Victor Stinner03dab782015-10-14 00:21:35 +0200204 writer.overallocate = 1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000205
Victor Stinner03dab782015-10-14 00:21:35 +0200206#define WRITE_BYTES(str) \
207 do { \
208 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
209 if (s == NULL) \
210 goto error; \
211 } while (0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000212
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000213 for (f = format; *f; f++) {
Victor Stinner03dab782015-10-14 00:21:35 +0200214 if (*f != '%') {
215 *s++ = *f;
216 continue;
217 }
218
219 p = f++;
220
221 /* ignore the width (ex: 10 in "%10s") */
222 while (Py_ISDIGIT(*f))
223 f++;
224
225 /* parse the precision (ex: 10 in "%.10s") */
226 prec = 0;
227 if (*f == '.') {
228 f++;
229 for (; Py_ISDIGIT(*f); f++) {
230 prec = (prec * 10) + (*f - '0');
231 }
232 }
233
234 while (*f && *f != '%' && !Py_ISALPHA(*f))
235 f++;
236
237 /* handle the long flag ('l'), but only for %ld and %lu.
238 others can be added when necessary. */
239 longflag = 0;
240 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
241 longflag = 1;
242 ++f;
243 }
244
245 /* handle the size_t flag ('z'). */
246 size_tflag = 0;
247 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
248 size_tflag = 1;
249 ++f;
250 }
251
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700252 /* subtract bytes preallocated for the format string
Victor Stinner03dab782015-10-14 00:21:35 +0200253 (ex: 2 for "%s") */
254 writer.min_size -= (f - p + 1);
255
256 switch (*f) {
257 case 'c':
258 {
259 int c = va_arg(vargs, int);
260 if (c < 0 || c > 255) {
261 PyErr_SetString(PyExc_OverflowError,
262 "PyBytes_FromFormatV(): %c format "
263 "expects an integer in range [0; 255]");
264 goto error;
265 }
266 writer.min_size++;
267 *s++ = (unsigned char)c;
268 break;
269 }
270
271 case 'd':
272 if (longflag)
273 sprintf(buffer, "%ld", va_arg(vargs, long));
274 else if (size_tflag)
275 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
277 else
278 sprintf(buffer, "%d", va_arg(vargs, int));
279 assert(strlen(buffer) < sizeof(buffer));
280 WRITE_BYTES(buffer);
281 break;
282
283 case 'u':
284 if (longflag)
285 sprintf(buffer, "%lu",
286 va_arg(vargs, unsigned long));
287 else if (size_tflag)
288 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
289 va_arg(vargs, size_t));
290 else
291 sprintf(buffer, "%u",
292 va_arg(vargs, unsigned int));
293 assert(strlen(buffer) < sizeof(buffer));
294 WRITE_BYTES(buffer);
295 break;
296
297 case 'i':
298 sprintf(buffer, "%i", va_arg(vargs, int));
299 assert(strlen(buffer) < sizeof(buffer));
300 WRITE_BYTES(buffer);
301 break;
302
303 case 'x':
304 sprintf(buffer, "%x", va_arg(vargs, int));
305 assert(strlen(buffer) < sizeof(buffer));
306 WRITE_BYTES(buffer);
307 break;
308
309 case 's':
310 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000311 Py_ssize_t i;
Victor Stinner03dab782015-10-14 00:21:35 +0200312
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200313 p = va_arg(vargs, const char*);
Victor Stinner03dab782015-10-14 00:21:35 +0200314 i = strlen(p);
315 if (prec > 0 && i > prec)
316 i = prec;
317 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
318 if (s == NULL)
319 goto error;
320 break;
321 }
322
323 case 'p':
324 sprintf(buffer, "%p", va_arg(vargs, void*));
325 assert(strlen(buffer) < sizeof(buffer));
326 /* %p is ill-defined: ensure leading 0x. */
327 if (buffer[1] == 'X')
328 buffer[1] = 'x';
329 else if (buffer[1] != 'x') {
330 memmove(buffer+2, buffer, strlen(buffer)+1);
331 buffer[0] = '0';
332 buffer[1] = 'x';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000333 }
Victor Stinner03dab782015-10-14 00:21:35 +0200334 WRITE_BYTES(buffer);
335 break;
336
337 case '%':
338 writer.min_size++;
339 *s++ = '%';
340 break;
341
342 default:
343 if (*f == 0) {
344 /* fix min_size if we reached the end of the format string */
345 writer.min_size++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000347
Victor Stinner03dab782015-10-14 00:21:35 +0200348 /* invalid format string: copy unformatted string and exit */
349 WRITE_BYTES(p);
350 return _PyBytesWriter_Finish(&writer, s);
351 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000352 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000353
Victor Stinner03dab782015-10-14 00:21:35 +0200354#undef WRITE_BYTES
355
356 return _PyBytesWriter_Finish(&writer, s);
357
358 error:
359 _PyBytesWriter_Dealloc(&writer);
360 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000361}
362
363PyObject *
364PyBytes_FromFormat(const char *format, ...)
365{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000366 PyObject* ret;
367 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000368
369#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000370 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000371#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000373#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000374 ret = PyBytes_FromFormatV(format, vargs);
375 va_end(vargs);
376 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000377}
378
Ethan Furmanb95b5612015-01-23 20:05:18 -0800379/* Helpers for formatstring */
380
381Py_LOCAL_INLINE(PyObject *)
382getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
383{
384 Py_ssize_t argidx = *p_argidx;
385 if (argidx < arglen) {
386 (*p_argidx)++;
387 if (arglen < 0)
388 return args;
389 else
390 return PyTuple_GetItem(args, argidx);
391 }
392 PyErr_SetString(PyExc_TypeError,
393 "not enough arguments for format string");
394 return NULL;
395}
396
397/* Format codes
398 * F_LJUST '-'
399 * F_SIGN '+'
400 * F_BLANK ' '
401 * F_ALT '#'
402 * F_ZERO '0'
403 */
404#define F_LJUST (1<<0)
405#define F_SIGN (1<<1)
406#define F_BLANK (1<<2)
407#define F_ALT (1<<3)
408#define F_ZERO (1<<4)
409
410/* Returns a new reference to a PyBytes object, or NULL on failure. */
411
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200412static char*
413formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200414 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800415{
416 char *p;
417 PyObject *result;
418 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200419 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800420
421 x = PyFloat_AsDouble(v);
422 if (x == -1.0 && PyErr_Occurred()) {
423 PyErr_Format(PyExc_TypeError, "float argument required, "
424 "not %.200s", Py_TYPE(v)->tp_name);
425 return NULL;
426 }
427
428 if (prec < 0)
429 prec = 6;
430
431 p = PyOS_double_to_string(x, type, prec,
432 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
433
434 if (p == NULL)
435 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200436
437 len = strlen(p);
438 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200439 str = _PyBytesWriter_Prepare(writer, str, len);
440 if (str == NULL)
441 return NULL;
Christian Heimesf051e432016-09-13 20:22:02 +0200442 memcpy(str, p, len);
Victor Stinner71dc3d82016-04-26 12:35:13 +0200443 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200444 str += len;
445 return str;
446 }
447
448 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800449 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200450 *p_result = result;
451 return str;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800452}
453
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300454static PyObject *
455formatlong(PyObject *v, int flags, int prec, int type)
456{
457 PyObject *result, *iobj;
458 if (type == 'i')
459 type = 'd';
460 if (PyLong_Check(v))
461 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
462 if (PyNumber_Check(v)) {
463 /* make sure number is a type of integer for o, x, and X */
464 if (type == 'o' || type == 'x' || type == 'X')
465 iobj = PyNumber_Index(v);
466 else
467 iobj = PyNumber_Long(v);
468 if (iobj == NULL) {
469 if (!PyErr_ExceptionMatches(PyExc_TypeError))
470 return NULL;
471 }
472 else if (!PyLong_Check(iobj))
473 Py_CLEAR(iobj);
474 if (iobj != NULL) {
475 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
476 Py_DECREF(iobj);
477 return result;
478 }
479 }
480 PyErr_Format(PyExc_TypeError,
481 "%%%c format: %s is required, not %.200s", type,
482 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
483 : "a number",
484 Py_TYPE(v)->tp_name);
485 return NULL;
486}
487
488static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200489byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800490{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300491 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200492 *p = PyBytes_AS_STRING(arg)[0];
493 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800494 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300495 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200496 *p = PyByteArray_AS_STRING(arg)[0];
497 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800498 }
499 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300500 PyObject *iobj;
501 long ival;
502 int overflow;
503 /* make sure number is a type of integer */
504 if (PyLong_Check(arg)) {
505 ival = PyLong_AsLongAndOverflow(arg, &overflow);
506 }
507 else {
508 iobj = PyNumber_Index(arg);
509 if (iobj == NULL) {
510 if (!PyErr_ExceptionMatches(PyExc_TypeError))
511 return 0;
512 goto onError;
513 }
514 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
515 Py_DECREF(iobj);
516 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300517 if (!overflow && ival == -1 && PyErr_Occurred())
518 goto onError;
519 if (overflow || !(0 <= ival && ival <= 255)) {
520 PyErr_SetString(PyExc_OverflowError,
521 "%c arg not in range(256)");
522 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800523 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300524 *p = (char)ival;
525 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800526 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300527 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200528 PyErr_SetString(PyExc_TypeError,
529 "%c requires an integer in range(256) or a single byte");
530 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800531}
532
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800533static PyObject *_PyBytes_FromBuffer(PyObject *x);
534
Ethan Furmanb95b5612015-01-23 20:05:18 -0800535static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200536format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800537{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200538 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800539 _Py_IDENTIFIER(__bytes__);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800540 /* is it a bytes object? */
541 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200542 *pbuf = PyBytes_AS_STRING(v);
543 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800544 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200545 return v;
546 }
547 if (PyByteArray_Check(v)) {
548 *pbuf = PyByteArray_AS_STRING(v);
549 *plen = PyByteArray_GET_SIZE(v);
550 Py_INCREF(v);
551 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800552 }
553 /* does it support __bytes__? */
554 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
555 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +0100556 result = _PyObject_CallNoArg(func);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800557 Py_DECREF(func);
558 if (result == NULL)
559 return NULL;
560 if (!PyBytes_Check(result)) {
561 PyErr_Format(PyExc_TypeError,
562 "__bytes__ returned non-bytes (type %.200s)",
563 Py_TYPE(result)->tp_name);
564 Py_DECREF(result);
565 return NULL;
566 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200567 *pbuf = PyBytes_AS_STRING(result);
568 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800569 return result;
570 }
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800571 /* does it support buffer protocol? */
572 if (PyObject_CheckBuffer(v)) {
573 /* maybe we can avoid making a copy of the buffer object here? */
574 result = _PyBytes_FromBuffer(v);
575 if (result == NULL)
576 return NULL;
577 *pbuf = PyBytes_AS_STRING(result);
578 *plen = PyBytes_GET_SIZE(result);
579 return result;
580 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800581 PyErr_Format(PyExc_TypeError,
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800582 "%%b requires a bytes-like object, "
583 "or an object that implements __bytes__, not '%.100s'",
Ethan Furmanb95b5612015-01-23 20:05:18 -0800584 Py_TYPE(v)->tp_name);
585 return NULL;
586}
587
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200588/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800589
590PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +0200591_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
592 PyObject *args, int use_bytearray)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800593{
Victor Stinner772b2b02015-10-14 09:56:53 +0200594 const char *fmt;
595 char *res;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800596 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200597 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800598 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800599 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200600 _PyBytesWriter writer;
601
Victor Stinner772b2b02015-10-14 09:56:53 +0200602 if (args == NULL) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800603 PyErr_BadInternalCall();
604 return NULL;
605 }
Victor Stinner772b2b02015-10-14 09:56:53 +0200606 fmt = format;
607 fmtcnt = format_len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200608
609 _PyBytesWriter_Init(&writer);
Victor Stinner772b2b02015-10-14 09:56:53 +0200610 writer.use_bytearray = use_bytearray;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200611
612 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
613 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800614 return NULL;
Victor Stinner772b2b02015-10-14 09:56:53 +0200615 if (!use_bytearray)
616 writer.overallocate = 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200617
Ethan Furmanb95b5612015-01-23 20:05:18 -0800618 if (PyTuple_Check(args)) {
619 arglen = PyTuple_GET_SIZE(args);
620 argidx = 0;
621 }
622 else {
623 arglen = -1;
624 argidx = -2;
625 }
626 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
627 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
628 !PyByteArray_Check(args)) {
629 dict = args;
630 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200631
Ethan Furmanb95b5612015-01-23 20:05:18 -0800632 while (--fmtcnt >= 0) {
633 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200634 Py_ssize_t len;
635 char *pos;
636
Xiang Zhangb76ad512017-03-06 17:17:05 +0800637 pos = (char *)memchr(fmt + 1, '%', fmtcnt);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200638 if (pos != NULL)
639 len = pos - fmt;
Victor Stinner772b2b02015-10-14 09:56:53 +0200640 else
Xiang Zhangb76ad512017-03-06 17:17:05 +0800641 len = fmtcnt + 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200642 assert(len != 0);
643
Christian Heimesf051e432016-09-13 20:22:02 +0200644 memcpy(res, fmt, len);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200645 res += len;
646 fmt += len;
647 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800648 }
649 else {
650 /* Got a format specifier */
651 int flags = 0;
652 Py_ssize_t width = -1;
653 int prec = -1;
654 int c = '\0';
655 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800656 PyObject *v = NULL;
657 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200658 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800659 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200660 Py_ssize_t len = 0;
661 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200662 Py_ssize_t alloc;
663#ifdef Py_DEBUG
664 char *before;
665#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -0800666
Ethan Furmanb95b5612015-01-23 20:05:18 -0800667 fmt++;
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200668 if (*fmt == '%') {
669 *res++ = '%';
670 fmt++;
671 fmtcnt--;
672 continue;
673 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800674 if (*fmt == '(') {
Victor Stinner772b2b02015-10-14 09:56:53 +0200675 const char *keystart;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800676 Py_ssize_t keylen;
677 PyObject *key;
678 int pcount = 1;
679
680 if (dict == NULL) {
681 PyErr_SetString(PyExc_TypeError,
682 "format requires a mapping");
683 goto error;
684 }
685 ++fmt;
686 --fmtcnt;
687 keystart = fmt;
688 /* Skip over balanced parentheses */
689 while (pcount > 0 && --fmtcnt >= 0) {
690 if (*fmt == ')')
691 --pcount;
692 else if (*fmt == '(')
693 ++pcount;
694 fmt++;
695 }
696 keylen = fmt - keystart - 1;
697 if (fmtcnt < 0 || pcount > 0) {
698 PyErr_SetString(PyExc_ValueError,
699 "incomplete format key");
700 goto error;
701 }
702 key = PyBytes_FromStringAndSize(keystart,
703 keylen);
704 if (key == NULL)
705 goto error;
706 if (args_owned) {
707 Py_DECREF(args);
708 args_owned = 0;
709 }
710 args = PyObject_GetItem(dict, key);
711 Py_DECREF(key);
712 if (args == NULL) {
713 goto error;
714 }
715 args_owned = 1;
716 arglen = -1;
717 argidx = -2;
718 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200719
720 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800721 while (--fmtcnt >= 0) {
722 switch (c = *fmt++) {
723 case '-': flags |= F_LJUST; continue;
724 case '+': flags |= F_SIGN; continue;
725 case ' ': flags |= F_BLANK; continue;
726 case '#': flags |= F_ALT; continue;
727 case '0': flags |= F_ZERO; continue;
728 }
729 break;
730 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200731
732 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800733 if (c == '*') {
734 v = getnextarg(args, arglen, &argidx);
735 if (v == NULL)
736 goto error;
737 if (!PyLong_Check(v)) {
738 PyErr_SetString(PyExc_TypeError,
739 "* wants int");
740 goto error;
741 }
742 width = PyLong_AsSsize_t(v);
743 if (width == -1 && PyErr_Occurred())
744 goto error;
745 if (width < 0) {
746 flags |= F_LJUST;
747 width = -width;
748 }
749 if (--fmtcnt >= 0)
750 c = *fmt++;
751 }
752 else if (c >= 0 && isdigit(c)) {
753 width = c - '0';
754 while (--fmtcnt >= 0) {
755 c = Py_CHARMASK(*fmt++);
756 if (!isdigit(c))
757 break;
758 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
759 PyErr_SetString(
760 PyExc_ValueError,
761 "width too big");
762 goto error;
763 }
764 width = width*10 + (c - '0');
765 }
766 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200767
768 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800769 if (c == '.') {
770 prec = 0;
771 if (--fmtcnt >= 0)
772 c = *fmt++;
773 if (c == '*') {
774 v = getnextarg(args, arglen, &argidx);
775 if (v == NULL)
776 goto error;
777 if (!PyLong_Check(v)) {
778 PyErr_SetString(
779 PyExc_TypeError,
780 "* wants int");
781 goto error;
782 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200783 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800784 if (prec == -1 && PyErr_Occurred())
785 goto error;
786 if (prec < 0)
787 prec = 0;
788 if (--fmtcnt >= 0)
789 c = *fmt++;
790 }
791 else if (c >= 0 && isdigit(c)) {
792 prec = c - '0';
793 while (--fmtcnt >= 0) {
794 c = Py_CHARMASK(*fmt++);
795 if (!isdigit(c))
796 break;
797 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
798 PyErr_SetString(
799 PyExc_ValueError,
800 "prec too big");
801 goto error;
802 }
803 prec = prec*10 + (c - '0');
804 }
805 }
806 } /* prec */
807 if (fmtcnt >= 0) {
808 if (c == 'h' || c == 'l' || c == 'L') {
809 if (--fmtcnt >= 0)
810 c = *fmt++;
811 }
812 }
813 if (fmtcnt < 0) {
814 PyErr_SetString(PyExc_ValueError,
815 "incomplete format");
816 goto error;
817 }
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200818 v = getnextarg(args, arglen, &argidx);
819 if (v == NULL)
820 goto error;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200821
822 if (fmtcnt < 0) {
823 /* last writer: disable writer overallocation */
824 writer.overallocate = 0;
825 }
826
Ethan Furmanb95b5612015-01-23 20:05:18 -0800827 sign = 0;
828 fill = ' ';
829 switch (c) {
Ethan Furman62e977f2015-03-11 08:17:00 -0700830 case 'r':
831 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800832 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200833 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800834 if (temp == NULL)
835 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200836 assert(PyUnicode_IS_ASCII(temp));
837 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
838 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800839 if (prec >= 0 && len > prec)
840 len = prec;
841 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200842
Ethan Furmanb95b5612015-01-23 20:05:18 -0800843 case 's':
844 // %s is only for 2/3 code; 3 only code should use %b
845 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200846 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800847 if (temp == NULL)
848 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800849 if (prec >= 0 && len > prec)
850 len = prec;
851 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200852
Ethan Furmanb95b5612015-01-23 20:05:18 -0800853 case 'i':
854 case 'd':
855 case 'u':
856 case 'o':
857 case 'x':
858 case 'X':
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200859 if (PyLong_CheckExact(v)
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200860 && width == -1 && prec == -1
861 && !(flags & (F_SIGN | F_BLANK))
862 && c != 'X')
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200863 {
864 /* Fast path */
865 int alternate = flags & F_ALT;
866 int base;
867
868 switch(c)
869 {
870 default:
871 assert(0 && "'type' not in [diuoxX]");
872 case 'd':
873 case 'i':
874 case 'u':
875 base = 10;
876 break;
877 case 'o':
878 base = 8;
879 break;
880 case 'x':
881 case 'X':
882 base = 16;
883 break;
884 }
885
886 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200887 writer.min_size -= 2; /* size preallocated for "%d" */
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200888 res = _PyLong_FormatBytesWriter(&writer, res,
889 v, base, alternate);
890 if (res == NULL)
891 goto error;
892 continue;
893 }
894
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300895 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200896 if (!temp)
897 goto error;
898 assert(PyUnicode_IS_ASCII(temp));
899 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
900 len = PyUnicode_GET_LENGTH(temp);
901 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800902 if (flags & F_ZERO)
903 fill = '0';
904 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200905
Ethan Furmanb95b5612015-01-23 20:05:18 -0800906 case 'e':
907 case 'E':
908 case 'f':
909 case 'F':
910 case 'g':
911 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200912 if (width == -1 && prec == -1
913 && !(flags & (F_SIGN | F_BLANK)))
914 {
915 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200916 writer.min_size -= 2; /* size preallocated for "%f" */
Victor Stinnerad771582015-10-09 12:38:53 +0200917 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200918 if (res == NULL)
919 goto error;
920 continue;
921 }
922
Victor Stinnerad771582015-10-09 12:38:53 +0200923 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800924 goto error;
925 pbuf = PyBytes_AS_STRING(temp);
926 len = PyBytes_GET_SIZE(temp);
927 sign = 1;
928 if (flags & F_ZERO)
929 fill = '0';
930 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200931
Ethan Furmanb95b5612015-01-23 20:05:18 -0800932 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200933 pbuf = &onechar;
934 len = byte_converter(v, &onechar);
935 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800936 goto error;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200937 if (width == -1) {
938 /* Fast path */
939 *res++ = onechar;
940 continue;
941 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800942 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200943
Ethan Furmanb95b5612015-01-23 20:05:18 -0800944 default:
945 PyErr_Format(PyExc_ValueError,
946 "unsupported format character '%c' (0x%x) "
947 "at index %zd",
948 c, c,
Victor Stinner772b2b02015-10-14 09:56:53 +0200949 (Py_ssize_t)(fmt - 1 - format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800950 goto error;
951 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200952
Ethan Furmanb95b5612015-01-23 20:05:18 -0800953 if (sign) {
954 if (*pbuf == '-' || *pbuf == '+') {
955 sign = *pbuf++;
956 len--;
957 }
958 else if (flags & F_SIGN)
959 sign = '+';
960 else if (flags & F_BLANK)
961 sign = ' ';
962 else
963 sign = 0;
964 }
965 if (width < len)
966 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200967
968 alloc = width;
969 if (sign != 0 && len == width)
970 alloc++;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200971 /* 2: size preallocated for %s */
972 if (alloc > 2) {
973 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200974 if (res == NULL)
975 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800976 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200977#ifdef Py_DEBUG
978 before = res;
979#endif
980
981 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800982 if (sign) {
983 if (fill != ' ')
984 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800985 if (width > len)
986 width--;
987 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200988
989 /* Write the numeric prefix for "x", "X" and "o" formats
990 if the alternate form is used.
991 For example, write "0x" for the "%#x" format. */
Serhiy Storchakab1a16192016-12-17 21:48:03 +0200992 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800993 assert(pbuf[0] == '0');
994 assert(pbuf[1] == c);
995 if (fill != ' ') {
996 *res++ = *pbuf++;
997 *res++ = *pbuf++;
998 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800999 width -= 2;
1000 if (width < 0)
1001 width = 0;
1002 len -= 2;
1003 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001004
1005 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001006 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001007 memset(res, fill, width - len);
1008 res += (width - len);
1009 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -08001010 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001011
1012 /* If padding with spaces: write sign if needed and/or numeric
1013 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001014 if (fill == ' ') {
1015 if (sign)
1016 *res++ = sign;
Serhiy Storchakab1a16192016-12-17 21:48:03 +02001017 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001018 assert(pbuf[0] == '0');
1019 assert(pbuf[1] == c);
1020 *res++ = *pbuf++;
1021 *res++ = *pbuf++;
1022 }
1023 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001024
1025 /* Copy bytes */
Christian Heimesf051e432016-09-13 20:22:02 +02001026 memcpy(res, pbuf, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001027 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001028
1029 /* Pad right with the fill character if needed */
1030 if (width > len) {
1031 memset(res, ' ', width - len);
1032 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001033 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001034
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +02001035 if (dict && (argidx < arglen)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001036 PyErr_SetString(PyExc_TypeError,
1037 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -08001038 Py_XDECREF(temp);
1039 goto error;
1040 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001041 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001042
1043#ifdef Py_DEBUG
1044 /* check that we computed the exact size for this write */
1045 assert((res - before) == alloc);
1046#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -08001047 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001048
1049 /* If overallocation was disabled, ensure that it was the last
1050 write. Otherwise, we missed an optimization */
Victor Stinner772b2b02015-10-14 09:56:53 +02001051 assert(writer.overallocate || fmtcnt < 0 || use_bytearray);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001052 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001053
Ethan Furmanb95b5612015-01-23 20:05:18 -08001054 if (argidx < arglen && !dict) {
1055 PyErr_SetString(PyExc_TypeError,
1056 "not all arguments converted during bytes formatting");
1057 goto error;
1058 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001059
Ethan Furmanb95b5612015-01-23 20:05:18 -08001060 if (args_owned) {
1061 Py_DECREF(args);
1062 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001063 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001064
1065 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001066 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001067 if (args_owned) {
1068 Py_DECREF(args);
1069 }
1070 return NULL;
1071}
1072
1073/* =-= */
1074
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001075static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001076bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001077{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001078 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001079}
1080
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001081/* Unescape a backslash-escaped string. If unicode is non-zero,
1082 the string is a u-literal. If recode_encoding is non-zero,
1083 the string is UTF-8 encoded and should be re-encoded in the
1084 specified encoding. */
1085
Victor Stinner2ec80632015-10-14 13:32:13 +02001086static char *
1087_PyBytes_DecodeEscapeRecode(const char **s, const char *end,
1088 const char *errors, const char *recode_encoding,
1089 _PyBytesWriter *writer, char *p)
1090{
1091 PyObject *u, *w;
1092 const char* t;
1093
1094 t = *s;
1095 /* Decode non-ASCII bytes as UTF-8. */
1096 while (t < end && (*t & 0x80))
1097 t++;
1098 u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
1099 if (u == NULL)
1100 return NULL;
1101
1102 /* Recode them in target encoding. */
1103 w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
1104 Py_DECREF(u);
1105 if (w == NULL)
1106 return NULL;
1107 assert(PyBytes_Check(w));
1108
1109 /* Append bytes to output buffer. */
Raymond Hettinger15f44ab2016-08-30 10:47:49 -07001110 writer->min_size--; /* subtract 1 preallocated byte */
Victor Stinner2ec80632015-10-14 13:32:13 +02001111 p = _PyBytesWriter_WriteBytes(writer, p,
1112 PyBytes_AS_STRING(w),
1113 PyBytes_GET_SIZE(w));
1114 Py_DECREF(w);
1115 if (p == NULL)
1116 return NULL;
1117
1118 *s = t;
1119 return p;
1120}
1121
Eric V. Smith42454af2016-10-31 09:22:08 -04001122PyObject *_PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001123 Py_ssize_t len,
1124 const char *errors,
1125 Py_ssize_t unicode,
Eric V. Smith42454af2016-10-31 09:22:08 -04001126 const char *recode_encoding,
1127 const char **first_invalid_escape)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001128{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001129 int c;
Victor Stinner2ec80632015-10-14 13:32:13 +02001130 char *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001131 const char *end;
Victor Stinner2ec80632015-10-14 13:32:13 +02001132 _PyBytesWriter writer;
1133
1134 _PyBytesWriter_Init(&writer);
1135
1136 p = _PyBytesWriter_Alloc(&writer, len);
1137 if (p == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001138 return NULL;
Victor Stinner2ec80632015-10-14 13:32:13 +02001139 writer.overallocate = 1;
1140
Eric V. Smith42454af2016-10-31 09:22:08 -04001141 *first_invalid_escape = NULL;
1142
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001143 end = s + len;
1144 while (s < end) {
1145 if (*s != '\\') {
1146 non_esc:
Victor Stinner2ec80632015-10-14 13:32:13 +02001147 if (!(recode_encoding && (*s & 0x80))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001148 *p++ = *s++;
1149 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001150 else {
1151 /* non-ASCII character and need to recode */
1152 p = _PyBytes_DecodeEscapeRecode(&s, end,
1153 errors, recode_encoding,
1154 &writer, p);
1155 if (p == NULL)
1156 goto failed;
1157 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001158 continue;
1159 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001160
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001161 s++;
Victor Stinner2ec80632015-10-14 13:32:13 +02001162 if (s == end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001163 PyErr_SetString(PyExc_ValueError,
1164 "Trailing \\ in string");
1165 goto failed;
1166 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001167
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001168 switch (*s++) {
1169 /* XXX This assumes ASCII! */
1170 case '\n': break;
1171 case '\\': *p++ = '\\'; break;
1172 case '\'': *p++ = '\''; break;
1173 case '\"': *p++ = '\"'; break;
1174 case 'b': *p++ = '\b'; break;
1175 case 'f': *p++ = '\014'; break; /* FF */
1176 case 't': *p++ = '\t'; break;
1177 case 'n': *p++ = '\n'; break;
1178 case 'r': *p++ = '\r'; break;
1179 case 'v': *p++ = '\013'; break; /* VT */
1180 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1181 case '0': case '1': case '2': case '3':
1182 case '4': case '5': case '6': case '7':
1183 c = s[-1] - '0';
1184 if (s < end && '0' <= *s && *s <= '7') {
1185 c = (c<<3) + *s++ - '0';
1186 if (s < end && '0' <= *s && *s <= '7')
1187 c = (c<<3) + *s++ - '0';
1188 }
1189 *p++ = c;
1190 break;
1191 case 'x':
Victor Stinner2ec80632015-10-14 13:32:13 +02001192 if (s+1 < end) {
1193 int digit1, digit2;
1194 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1195 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1196 if (digit1 < 16 && digit2 < 16) {
1197 *p++ = (unsigned char)((digit1 << 4) + digit2);
1198 s += 2;
1199 break;
1200 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001201 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001202 /* invalid hexadecimal digits */
1203
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001204 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001205 PyErr_Format(PyExc_ValueError,
1206 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001207 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001208 goto failed;
1209 }
1210 if (strcmp(errors, "replace") == 0) {
1211 *p++ = '?';
1212 } else if (strcmp(errors, "ignore") == 0)
1213 /* do nothing */;
1214 else {
1215 PyErr_Format(PyExc_ValueError,
1216 "decoding error; unknown "
1217 "error handling code: %.400s",
1218 errors);
1219 goto failed;
1220 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001221 /* skip \x */
1222 if (s < end && Py_ISXDIGIT(s[0]))
1223 s++; /* and a hexdigit */
1224 break;
Victor Stinner2ec80632015-10-14 13:32:13 +02001225
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001226 default:
Eric V. Smith42454af2016-10-31 09:22:08 -04001227 if (*first_invalid_escape == NULL) {
1228 *first_invalid_escape = s-1; /* Back up one char, since we've
1229 already incremented s. */
1230 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001231 *p++ = '\\';
Eric V. Smith42454af2016-10-31 09:22:08 -04001232 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +02001233 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001234 UTF-8 bytes may follow. */
1235 }
1236 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001237
1238 return _PyBytesWriter_Finish(&writer, p);
1239
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001240 failed:
Victor Stinner2ec80632015-10-14 13:32:13 +02001241 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001242 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001243}
1244
Eric V. Smith42454af2016-10-31 09:22:08 -04001245PyObject *PyBytes_DecodeEscape(const char *s,
1246 Py_ssize_t len,
1247 const char *errors,
1248 Py_ssize_t unicode,
1249 const char *recode_encoding)
1250{
1251 const char* first_invalid_escape;
1252 PyObject *result = _PyBytes_DecodeEscape(s, len, errors, unicode,
1253 recode_encoding,
1254 &first_invalid_escape);
1255 if (result == NULL)
1256 return NULL;
1257 if (first_invalid_escape != NULL) {
1258 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1259 "invalid escape sequence '\\%c'",
1260 *first_invalid_escape) < 0) {
1261 Py_DECREF(result);
1262 return NULL;
1263 }
1264 }
1265 return result;
1266
1267}
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001268/* -------------------------------------------------------------------- */
1269/* object api */
1270
1271Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001272PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001273{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001274 if (!PyBytes_Check(op)) {
1275 PyErr_Format(PyExc_TypeError,
1276 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1277 return -1;
1278 }
1279 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001280}
1281
1282char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001283PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001284{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001285 if (!PyBytes_Check(op)) {
1286 PyErr_Format(PyExc_TypeError,
1287 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1288 return NULL;
1289 }
1290 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001291}
1292
1293int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001294PyBytes_AsStringAndSize(PyObject *obj,
1295 char **s,
1296 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001297{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001298 if (s == NULL) {
1299 PyErr_BadInternalCall();
1300 return -1;
1301 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001302
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001303 if (!PyBytes_Check(obj)) {
1304 PyErr_Format(PyExc_TypeError,
1305 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1306 return -1;
1307 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001308
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001309 *s = PyBytes_AS_STRING(obj);
1310 if (len != NULL)
1311 *len = PyBytes_GET_SIZE(obj);
1312 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001313 PyErr_SetString(PyExc_ValueError,
1314 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001315 return -1;
1316 }
1317 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001318}
Neal Norwitz6968b052007-02-27 19:02:19 +00001319
1320/* -------------------------------------------------------------------- */
1321/* Methods */
1322
Eric Smith0923d1d2009-04-16 20:16:10 +00001323#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001324
1325#include "stringlib/fastsearch.h"
1326#include "stringlib/count.h"
1327#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001328#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001329#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001330#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001331#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001332
Eric Smith0f78bff2009-11-30 01:01:42 +00001333#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001334
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001335PyObject *
1336PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001337{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001338 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001339 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001340 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001341 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001342 unsigned char quote, *s, *p;
1343
1344 /* Compute size of output string */
1345 squotes = dquotes = 0;
1346 newsize = 3; /* b'' */
1347 s = (unsigned char*)op->ob_sval;
1348 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001349 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001350 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001351 case '\'': squotes++; break;
1352 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001353 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001354 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001355 default:
1356 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001357 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001358 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001359 if (newsize > PY_SSIZE_T_MAX - incr)
1360 goto overflow;
1361 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001362 }
1363 quote = '\'';
1364 if (smartquotes && squotes && !dquotes)
1365 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001366 if (squotes && quote == '\'') {
1367 if (newsize > PY_SSIZE_T_MAX - squotes)
1368 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001369 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001370 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001371
1372 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001373 if (v == NULL) {
1374 return NULL;
1375 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001376 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001377
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001378 *p++ = 'b', *p++ = quote;
1379 for (i = 0; i < length; i++) {
1380 unsigned char c = op->ob_sval[i];
1381 if (c == quote || c == '\\')
1382 *p++ = '\\', *p++ = c;
1383 else if (c == '\t')
1384 *p++ = '\\', *p++ = 't';
1385 else if (c == '\n')
1386 *p++ = '\\', *p++ = 'n';
1387 else if (c == '\r')
1388 *p++ = '\\', *p++ = 'r';
1389 else if (c < ' ' || c >= 0x7f) {
1390 *p++ = '\\';
1391 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001392 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1393 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001394 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001395 else
1396 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001397 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001398 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001399 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001400 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001401
1402 overflow:
1403 PyErr_SetString(PyExc_OverflowError,
1404 "bytes object is too large to make repr");
1405 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001406}
1407
Neal Norwitz6968b052007-02-27 19:02:19 +00001408static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001409bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001410{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001411 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001412}
1413
Neal Norwitz6968b052007-02-27 19:02:19 +00001414static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001415bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001416{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001417 if (Py_BytesWarningFlag) {
1418 if (PyErr_WarnEx(PyExc_BytesWarning,
1419 "str() on a bytes instance", 1))
1420 return NULL;
1421 }
1422 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001423}
1424
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001425static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001426bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001427{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001428 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001429}
Neal Norwitz6968b052007-02-27 19:02:19 +00001430
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001431/* This is also used by PyBytes_Concat() */
1432static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001433bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001434{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001435 Py_buffer va, vb;
1436 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001437
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001438 va.len = -1;
1439 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001440 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1441 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001442 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
Serhiy Storchaka6b5a9ec2017-03-19 19:47:02 +02001443 Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001444 goto done;
1445 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001446
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001447 /* Optimize end cases */
1448 if (va.len == 0 && PyBytes_CheckExact(b)) {
1449 result = b;
1450 Py_INCREF(result);
1451 goto done;
1452 }
1453 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1454 result = a;
1455 Py_INCREF(result);
1456 goto done;
1457 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001458
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001459 if (va.len > PY_SSIZE_T_MAX - vb.len) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001460 PyErr_NoMemory();
1461 goto done;
1462 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001463
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001464 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001465 if (result != NULL) {
1466 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1467 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1468 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001469
1470 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001471 if (va.len != -1)
1472 PyBuffer_Release(&va);
1473 if (vb.len != -1)
1474 PyBuffer_Release(&vb);
1475 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001476}
Neal Norwitz6968b052007-02-27 19:02:19 +00001477
1478static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001479bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001480{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001481 Py_ssize_t i;
1482 Py_ssize_t j;
1483 Py_ssize_t size;
1484 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001485 size_t nbytes;
1486 if (n < 0)
1487 n = 0;
1488 /* watch out for overflows: the size can overflow int,
1489 * and the # of bytes needed can overflow size_t
1490 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001491 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001492 PyErr_SetString(PyExc_OverflowError,
1493 "repeated bytes are too long");
1494 return NULL;
1495 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001496 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001497 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1498 Py_INCREF(a);
1499 return (PyObject *)a;
1500 }
1501 nbytes = (size_t)size;
1502 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1503 PyErr_SetString(PyExc_OverflowError,
1504 "repeated bytes are too long");
1505 return NULL;
1506 }
1507 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1508 if (op == NULL)
1509 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +01001510 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001511 op->ob_shash = -1;
1512 op->ob_sval[size] = '\0';
1513 if (Py_SIZE(a) == 1 && n > 0) {
1514 memset(op->ob_sval, a->ob_sval[0] , n);
1515 return (PyObject *) op;
1516 }
1517 i = 0;
1518 if (i < size) {
Christian Heimesf051e432016-09-13 20:22:02 +02001519 memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001520 i = Py_SIZE(a);
1521 }
1522 while (i < size) {
1523 j = (i <= size-i) ? i : size-i;
Christian Heimesf051e432016-09-13 20:22:02 +02001524 memcpy(op->ob_sval+i, op->ob_sval, j);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001525 i += j;
1526 }
1527 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001528}
1529
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001530static int
1531bytes_contains(PyObject *self, PyObject *arg)
1532{
1533 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1534}
1535
Neal Norwitz6968b052007-02-27 19:02:19 +00001536static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001537bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001538{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001539 if (i < 0 || i >= Py_SIZE(a)) {
1540 PyErr_SetString(PyExc_IndexError, "index out of range");
1541 return NULL;
1542 }
1543 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001544}
1545
Benjamin Peterson621b4302016-09-09 13:54:34 -07001546static int
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001547bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1548{
1549 int cmp;
1550 Py_ssize_t len;
1551
1552 len = Py_SIZE(a);
1553 if (Py_SIZE(b) != len)
1554 return 0;
1555
1556 if (a->ob_sval[0] != b->ob_sval[0])
1557 return 0;
1558
1559 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1560 return (cmp == 0);
1561}
1562
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001563static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001564bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001565{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001566 int c;
1567 Py_ssize_t len_a, len_b;
1568 Py_ssize_t min_len;
1569 PyObject *result;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001570 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001571
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001572 /* Make sure both arguments are strings. */
1573 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001574 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001575 rc = PyObject_IsInstance((PyObject*)a,
1576 (PyObject*)&PyUnicode_Type);
1577 if (!rc)
1578 rc = PyObject_IsInstance((PyObject*)b,
1579 (PyObject*)&PyUnicode_Type);
1580 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001581 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001582 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001583 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001584 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001585 return NULL;
1586 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001587 else {
1588 rc = PyObject_IsInstance((PyObject*)a,
1589 (PyObject*)&PyLong_Type);
1590 if (!rc)
1591 rc = PyObject_IsInstance((PyObject*)b,
1592 (PyObject*)&PyLong_Type);
1593 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001594 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001595 if (rc) {
1596 if (PyErr_WarnEx(PyExc_BytesWarning,
1597 "Comparison between bytes and int", 1))
1598 return NULL;
1599 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001600 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001601 }
1602 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001603 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001604 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001605 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001606 case Py_EQ:
1607 case Py_LE:
1608 case Py_GE:
1609 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001610 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001611 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001612 case Py_NE:
1613 case Py_LT:
1614 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001615 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001616 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001617 default:
1618 PyErr_BadArgument();
1619 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001620 }
1621 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001622 else if (op == Py_EQ || op == Py_NE) {
1623 int eq = bytes_compare_eq(a, b);
1624 eq ^= (op == Py_NE);
1625 result = eq ? Py_True : Py_False;
1626 }
1627 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001628 len_a = Py_SIZE(a);
1629 len_b = Py_SIZE(b);
1630 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001631 if (min_len > 0) {
1632 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001633 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001634 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001635 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001636 else
1637 c = 0;
1638 if (c == 0)
1639 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1640 switch (op) {
1641 case Py_LT: c = c < 0; break;
1642 case Py_LE: c = c <= 0; break;
1643 case Py_GT: c = c > 0; break;
1644 case Py_GE: c = c >= 0; break;
1645 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001646 PyErr_BadArgument();
1647 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001648 }
1649 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001650 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001651
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001652 Py_INCREF(result);
1653 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001654}
1655
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001656static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001657bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001658{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001659 if (a->ob_shash == -1) {
1660 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001661 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001662 }
1663 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001664}
1665
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001666static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001667bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001668{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001669 if (PyIndex_Check(item)) {
1670 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1671 if (i == -1 && PyErr_Occurred())
1672 return NULL;
1673 if (i < 0)
1674 i += PyBytes_GET_SIZE(self);
1675 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1676 PyErr_SetString(PyExc_IndexError,
1677 "index out of range");
1678 return NULL;
1679 }
1680 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1681 }
1682 else if (PySlice_Check(item)) {
1683 Py_ssize_t start, stop, step, slicelength, cur, i;
1684 char* source_buf;
1685 char* result_buf;
1686 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001687
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001688 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001689 return NULL;
1690 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001691 slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1692 &stop, step);
Neal Norwitz6968b052007-02-27 19:02:19 +00001693
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001694 if (slicelength <= 0) {
1695 return PyBytes_FromStringAndSize("", 0);
1696 }
1697 else if (start == 0 && step == 1 &&
1698 slicelength == PyBytes_GET_SIZE(self) &&
1699 PyBytes_CheckExact(self)) {
1700 Py_INCREF(self);
1701 return (PyObject *)self;
1702 }
1703 else if (step == 1) {
1704 return PyBytes_FromStringAndSize(
1705 PyBytes_AS_STRING(self) + start,
1706 slicelength);
1707 }
1708 else {
1709 source_buf = PyBytes_AS_STRING(self);
1710 result = PyBytes_FromStringAndSize(NULL, slicelength);
1711 if (result == NULL)
1712 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001713
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001714 result_buf = PyBytes_AS_STRING(result);
1715 for (cur = start, i = 0; i < slicelength;
1716 cur += step, i++) {
1717 result_buf[i] = source_buf[cur];
1718 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001719
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001720 return result;
1721 }
1722 }
1723 else {
1724 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001725 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001726 Py_TYPE(item)->tp_name);
1727 return NULL;
1728 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001729}
1730
1731static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001732bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001733{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001734 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1735 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001736}
1737
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001738static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001739 (lenfunc)bytes_length, /*sq_length*/
1740 (binaryfunc)bytes_concat, /*sq_concat*/
1741 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1742 (ssizeargfunc)bytes_item, /*sq_item*/
1743 0, /*sq_slice*/
1744 0, /*sq_ass_item*/
1745 0, /*sq_ass_slice*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001746 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001747};
1748
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001749static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001750 (lenfunc)bytes_length,
1751 (binaryfunc)bytes_subscript,
1752 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001753};
1754
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001755static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001756 (getbufferproc)bytes_buffer_getbuffer,
1757 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001758};
1759
1760
1761#define LEFTSTRIP 0
1762#define RIGHTSTRIP 1
1763#define BOTHSTRIP 2
1764
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001765/*[clinic input]
1766bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001767
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001768 sep: object = None
1769 The delimiter according which to split the bytes.
1770 None (the default value) means split on ASCII whitespace characters
1771 (space, tab, return, newline, formfeed, vertical tab).
1772 maxsplit: Py_ssize_t = -1
1773 Maximum number of splits to do.
1774 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001775
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001776Return a list of the sections in the bytes, using sep as the delimiter.
1777[clinic start generated code]*/
1778
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001779static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001780bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1781/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001782{
1783 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001784 const char *s = PyBytes_AS_STRING(self), *sub;
1785 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001786 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001787
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001788 if (maxsplit < 0)
1789 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001790 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001791 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001792 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001793 return NULL;
1794 sub = vsub.buf;
1795 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001796
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001797 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1798 PyBuffer_Release(&vsub);
1799 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001800}
1801
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001802/*[clinic input]
1803bytes.partition
1804
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001805 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001806 /
1807
1808Partition the bytes into three parts using the given separator.
1809
1810This will search for the separator sep in the bytes. If the separator is found,
1811returns a 3-tuple containing the part before the separator, the separator
1812itself, and the part after it.
1813
1814If the separator is not found, returns a 3-tuple containing the original bytes
1815object and two empty bytes objects.
1816[clinic start generated code]*/
1817
Neal Norwitz6968b052007-02-27 19:02:19 +00001818static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001819bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001820/*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001821{
Neal Norwitz6968b052007-02-27 19:02:19 +00001822 return stringlib_partition(
1823 (PyObject*) self,
1824 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001825 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001826 );
1827}
1828
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001829/*[clinic input]
1830bytes.rpartition
1831
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001832 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001833 /
1834
1835Partition the bytes into three parts using the given separator.
1836
1837This will search for the separator sep in the bytes, starting and the end. If
1838the separator is found, returns a 3-tuple containing the part before the
1839separator, the separator itself, and the part after it.
1840
1841If the separator is not found, returns a 3-tuple containing two empty bytes
1842objects and the original bytes object.
1843[clinic start generated code]*/
1844
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001845static PyObject *
1846bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001847/*[clinic end generated code: output=191b114cbb028e50 input=67f689e63a62d478]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001848{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001849 return stringlib_rpartition(
1850 (PyObject*) self,
1851 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001852 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001853 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001854}
1855
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001856/*[clinic input]
1857bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001858
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001859Return a list of the sections in the bytes, using sep as the delimiter.
1860
1861Splitting is done starting at the end of the bytes and working to the front.
1862[clinic start generated code]*/
1863
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001864static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001865bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1866/*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001867{
1868 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001869 const char *s = PyBytes_AS_STRING(self), *sub;
1870 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001871 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001872
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001873 if (maxsplit < 0)
1874 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001875 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001876 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001877 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001878 return NULL;
1879 sub = vsub.buf;
1880 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001881
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001882 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1883 PyBuffer_Release(&vsub);
1884 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001885}
1886
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001887
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001888/*[clinic input]
1889bytes.join
1890
1891 iterable_of_bytes: object
1892 /
1893
1894Concatenate any number of bytes objects.
1895
1896The bytes whose method is called is inserted in between each pair.
1897
1898The result is returned as a new bytes object.
1899
1900Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1901[clinic start generated code]*/
1902
Neal Norwitz6968b052007-02-27 19:02:19 +00001903static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001904bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1905/*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001906{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001907 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001908}
1909
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001910PyObject *
1911_PyBytes_Join(PyObject *sep, PyObject *x)
1912{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001913 assert(sep != NULL && PyBytes_Check(sep));
1914 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001915 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001916}
1917
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001918static PyObject *
1919bytes_find(PyBytesObject *self, PyObject *args)
1920{
1921 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1922}
1923
1924static PyObject *
1925bytes_index(PyBytesObject *self, PyObject *args)
1926{
1927 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1928}
1929
1930
1931static PyObject *
1932bytes_rfind(PyBytesObject *self, PyObject *args)
1933{
1934 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1935}
1936
1937
1938static PyObject *
1939bytes_rindex(PyBytesObject *self, PyObject *args)
1940{
1941 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1942}
1943
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001944
1945Py_LOCAL_INLINE(PyObject *)
1946do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001947{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001948 Py_buffer vsep;
1949 char *s = PyBytes_AS_STRING(self);
1950 Py_ssize_t len = PyBytes_GET_SIZE(self);
1951 char *sep;
1952 Py_ssize_t seplen;
1953 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001954
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001955 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001956 return NULL;
1957 sep = vsep.buf;
1958 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001959
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001960 i = 0;
1961 if (striptype != RIGHTSTRIP) {
1962 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1963 i++;
1964 }
1965 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001966
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001967 j = len;
1968 if (striptype != LEFTSTRIP) {
1969 do {
1970 j--;
1971 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1972 j++;
1973 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001974
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001975 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001976
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001977 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1978 Py_INCREF(self);
1979 return (PyObject*)self;
1980 }
1981 else
1982 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001983}
1984
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001985
1986Py_LOCAL_INLINE(PyObject *)
1987do_strip(PyBytesObject *self, int striptype)
1988{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001989 char *s = PyBytes_AS_STRING(self);
1990 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001991
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001992 i = 0;
1993 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001994 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001995 i++;
1996 }
1997 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001998
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001999 j = len;
2000 if (striptype != LEFTSTRIP) {
2001 do {
2002 j--;
David Malcolm96960882010-11-05 17:23:41 +00002003 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002004 j++;
2005 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002006
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002007 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2008 Py_INCREF(self);
2009 return (PyObject*)self;
2010 }
2011 else
2012 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002013}
2014
2015
2016Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002017do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002018{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002019 if (bytes != NULL && bytes != Py_None) {
2020 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002021 }
2022 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002023}
2024
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002025/*[clinic input]
2026bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002027
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002028 bytes: object = None
2029 /
2030
2031Strip leading and trailing bytes contained in the argument.
2032
2033If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2034[clinic start generated code]*/
2035
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002036static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002037bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002038/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002039{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002040 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002041}
2042
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002043/*[clinic input]
2044bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002045
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002046 bytes: object = None
2047 /
2048
2049Strip leading bytes contained in the argument.
2050
2051If the argument is omitted or None, strip leading ASCII whitespace.
2052[clinic start generated code]*/
2053
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002054static PyObject *
2055bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002056/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002057{
2058 return do_argstrip(self, LEFTSTRIP, bytes);
2059}
2060
2061/*[clinic input]
2062bytes.rstrip
2063
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002064 bytes: object = None
2065 /
2066
2067Strip trailing bytes contained in the argument.
2068
2069If the argument is omitted or None, strip trailing ASCII whitespace.
2070[clinic start generated code]*/
2071
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002072static PyObject *
2073bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002074/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002075{
2076 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002077}
Neal Norwitz6968b052007-02-27 19:02:19 +00002078
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002079
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002080static PyObject *
2081bytes_count(PyBytesObject *self, PyObject *args)
2082{
2083 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2084}
2085
2086
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002087/*[clinic input]
2088bytes.translate
2089
Victor Stinner049e5092014-08-17 22:20:00 +02002090 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002091 Translation table, which must be a bytes object of length 256.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002092 /
Martin Panter1b6c6da2016-08-27 08:35:02 +00002093 delete as deletechars: object(c_default="NULL") = b''
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002094
2095Return a copy with each character mapped by the given translation table.
2096
Martin Panter1b6c6da2016-08-27 08:35:02 +00002097All characters occurring in the optional argument delete are removed.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002098The remaining characters are mapped through the given translation table.
2099[clinic start generated code]*/
2100
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002101static PyObject *
Martin Panter1b6c6da2016-08-27 08:35:02 +00002102bytes_translate_impl(PyBytesObject *self, PyObject *table,
Larry Hastings89964c42015-04-14 18:07:59 -04002103 PyObject *deletechars)
Martin Panter1b6c6da2016-08-27 08:35:02 +00002104/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002105{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002106 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002107 Py_buffer table_view = {NULL, NULL};
2108 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002109 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002110 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002111 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002112 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002113 Py_ssize_t inlen, tablen, dellen = 0;
2114 PyObject *result;
2115 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002116
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002117 if (PyBytes_Check(table)) {
2118 table_chars = PyBytes_AS_STRING(table);
2119 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002120 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002121 else if (table == Py_None) {
2122 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002123 tablen = 256;
2124 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002125 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002126 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002127 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002128 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002129 tablen = table_view.len;
2130 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002131
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002132 if (tablen != 256) {
2133 PyErr_SetString(PyExc_ValueError,
2134 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002135 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002136 return NULL;
2137 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002138
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002139 if (deletechars != NULL) {
2140 if (PyBytes_Check(deletechars)) {
2141 del_table_chars = PyBytes_AS_STRING(deletechars);
2142 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002143 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002144 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002145 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002146 PyBuffer_Release(&table_view);
2147 return NULL;
2148 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002149 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002150 dellen = del_table_view.len;
2151 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002152 }
2153 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002154 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002155 dellen = 0;
2156 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002157
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002158 inlen = PyBytes_GET_SIZE(input_obj);
2159 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002160 if (result == NULL) {
2161 PyBuffer_Release(&del_table_view);
2162 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002163 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002164 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002165 output_start = output = PyBytes_AS_STRING(result);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002166 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002167
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002168 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002169 /* If no deletions are required, use faster code */
2170 for (i = inlen; --i >= 0; ) {
2171 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002172 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002173 changed = 1;
2174 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002175 if (!changed && PyBytes_CheckExact(input_obj)) {
2176 Py_INCREF(input_obj);
2177 Py_DECREF(result);
2178 result = input_obj;
2179 }
2180 PyBuffer_Release(&del_table_view);
2181 PyBuffer_Release(&table_view);
2182 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002183 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002184
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002185 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002186 for (i = 0; i < 256; i++)
2187 trans_table[i] = Py_CHARMASK(i);
2188 } else {
2189 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002190 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002191 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002192 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002193
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002194 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002195 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002196 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002197
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002198 for (i = inlen; --i >= 0; ) {
2199 c = Py_CHARMASK(*input++);
2200 if (trans_table[c] != -1)
2201 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2202 continue;
2203 changed = 1;
2204 }
2205 if (!changed && PyBytes_CheckExact(input_obj)) {
2206 Py_DECREF(result);
2207 Py_INCREF(input_obj);
2208 return input_obj;
2209 }
2210 /* Fix the size of the resulting string */
2211 if (inlen > 0)
2212 _PyBytes_Resize(&result, output - output_start);
2213 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002214}
2215
2216
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002217/*[clinic input]
2218
2219@staticmethod
2220bytes.maketrans
2221
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002222 frm: Py_buffer
2223 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002224 /
2225
2226Return a translation table useable for the bytes or bytearray translate method.
2227
2228The returned table will be one where each byte in frm is mapped to the byte at
2229the same position in to.
2230
2231The bytes objects frm and to must be of the same length.
2232[clinic start generated code]*/
2233
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002234static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002235bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002236/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002237{
2238 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002239}
2240
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002241
2242/*[clinic input]
2243bytes.replace
2244
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002245 old: Py_buffer
2246 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002247 count: Py_ssize_t = -1
2248 Maximum number of occurrences to replace.
2249 -1 (the default value) means replace all occurrences.
2250 /
2251
2252Return a copy with all occurrences of substring old replaced by new.
2253
2254If the optional argument count is given, only the first count occurrences are
2255replaced.
2256[clinic start generated code]*/
2257
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002258static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002259bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
Larry Hastings89964c42015-04-14 18:07:59 -04002260 Py_ssize_t count)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002261/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002262{
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +03002263 return stringlib_replace((PyObject *)self,
2264 (const char *)old->buf, old->len,
2265 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002266}
2267
2268/** End DALKE **/
2269
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002270
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002271static PyObject *
2272bytes_startswith(PyBytesObject *self, PyObject *args)
2273{
2274 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2275}
2276
2277static PyObject *
2278bytes_endswith(PyBytesObject *self, PyObject *args)
2279{
2280 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2281}
2282
2283
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002284/*[clinic input]
2285bytes.decode
2286
2287 encoding: str(c_default="NULL") = 'utf-8'
2288 The encoding with which to decode the bytes.
2289 errors: str(c_default="NULL") = 'strict'
2290 The error handling scheme to use for the handling of decoding errors.
2291 The default is 'strict' meaning that decoding errors raise a
2292 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2293 as well as any other name registered with codecs.register_error that
2294 can handle UnicodeDecodeErrors.
2295
2296Decode the bytes using the codec registered for encoding.
2297[clinic start generated code]*/
2298
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002299static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002300bytes_decode_impl(PyBytesObject *self, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04002301 const char *errors)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002302/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002303{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002304 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002305}
2306
Guido van Rossum20188312006-05-05 15:15:40 +00002307
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002308/*[clinic input]
2309bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002310
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002311 keepends: bool(accept={int}) = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002312
2313Return a list of the lines in the bytes, breaking at line boundaries.
2314
2315Line breaks are not included in the resulting list unless keepends is given and
2316true.
2317[clinic start generated code]*/
2318
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002319static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002320bytes_splitlines_impl(PyBytesObject *self, int keepends)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002321/*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002322{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002323 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002324 (PyObject*) self, PyBytes_AS_STRING(self),
2325 PyBytes_GET_SIZE(self), keepends
2326 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002327}
2328
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002329/*[clinic input]
2330@classmethod
2331bytes.fromhex
2332
2333 string: unicode
2334 /
2335
2336Create a bytes object from a string of hexadecimal numbers.
2337
2338Spaces between two numbers are accepted.
2339Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2340[clinic start generated code]*/
2341
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002342static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002343bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002344/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002345{
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002346 PyObject *result = _PyBytes_FromHex(string, 0);
2347 if (type != &PyBytes_Type && result != NULL) {
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01002348 Py_SETREF(result, PyObject_CallFunctionObjArgs((PyObject *)type,
2349 result, NULL));
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002350 }
2351 return result;
Victor Stinner2bf89932015-10-14 11:25:33 +02002352}
2353
2354PyObject*
2355_PyBytes_FromHex(PyObject *string, int use_bytearray)
2356{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002357 char *buf;
Victor Stinner2bf89932015-10-14 11:25:33 +02002358 Py_ssize_t hexlen, invalid_char;
2359 unsigned int top, bot;
2360 Py_UCS1 *str, *end;
2361 _PyBytesWriter writer;
2362
2363 _PyBytesWriter_Init(&writer);
2364 writer.use_bytearray = use_bytearray;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002365
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002366 assert(PyUnicode_Check(string));
2367 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002368 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002369 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002370
Victor Stinner2bf89932015-10-14 11:25:33 +02002371 if (!PyUnicode_IS_ASCII(string)) {
2372 void *data = PyUnicode_DATA(string);
2373 unsigned int kind = PyUnicode_KIND(string);
2374 Py_ssize_t i;
2375
2376 /* search for the first non-ASCII character */
2377 for (i = 0; i < hexlen; i++) {
2378 if (PyUnicode_READ(kind, data, i) >= 128)
2379 break;
2380 }
2381 invalid_char = i;
2382 goto error;
2383 }
2384
2385 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2386 str = PyUnicode_1BYTE_DATA(string);
2387
2388 /* This overestimates if there are spaces */
2389 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2390 if (buf == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002391 return NULL;
Victor Stinner2bf89932015-10-14 11:25:33 +02002392
2393 end = str + hexlen;
2394 while (str < end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002395 /* skip over spaces in the input */
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002396 if (Py_ISSPACE(*str)) {
Victor Stinner2bf89932015-10-14 11:25:33 +02002397 do {
2398 str++;
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002399 } while (Py_ISSPACE(*str));
Victor Stinner2bf89932015-10-14 11:25:33 +02002400 if (str >= end)
2401 break;
2402 }
2403
2404 top = _PyLong_DigitValue[*str];
2405 if (top >= 16) {
2406 invalid_char = str - PyUnicode_1BYTE_DATA(string);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002407 goto error;
2408 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002409 str++;
2410
2411 bot = _PyLong_DigitValue[*str];
2412 if (bot >= 16) {
2413 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2414 goto error;
2415 }
2416 str++;
2417
2418 *buf++ = (unsigned char)((top << 4) + bot);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002419 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002420
2421 return _PyBytesWriter_Finish(&writer, buf);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002422
2423 error:
Victor Stinner2bf89932015-10-14 11:25:33 +02002424 PyErr_Format(PyExc_ValueError,
2425 "non-hexadecimal number found in "
2426 "fromhex() arg at position %zd", invalid_char);
2427 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002428 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002429}
2430
Gregory P. Smith8cb65692015-04-25 23:22:26 +00002431PyDoc_STRVAR(hex__doc__,
2432"B.hex() -> string\n\
2433\n\
2434Create a string of hexadecimal numbers from a bytes object.\n\
2435Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'.");
2436
2437static PyObject *
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002438bytes_hex(PyBytesObject *self)
2439{
2440 char* argbuf = PyBytes_AS_STRING(self);
2441 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2442 return _Py_strhex(argbuf, arglen);
2443}
2444
2445static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002446bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002447{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002448 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002449}
2450
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002451
2452static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002453bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002454 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2455 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2456 _Py_capitalize__doc__},
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002457 {"center", (PyCFunction)stringlib_center, METH_VARARGS,
2458 _Py_center__doc__},
2459 {"count", (PyCFunction)bytes_count, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002460 _Py_count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002461 BYTES_DECODE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002462 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002463 _Py_endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02002464 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002465 _Py_expandtabs__doc__},
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002466 {"find", (PyCFunction)bytes_find, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002467 _Py_find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002468 BYTES_FROMHEX_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002469 {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__},
2470 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002471 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2472 _Py_isalnum__doc__},
2473 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2474 _Py_isalpha__doc__},
2475 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2476 _Py_isdigit__doc__},
2477 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2478 _Py_islower__doc__},
2479 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2480 _Py_isspace__doc__},
2481 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2482 _Py_istitle__doc__},
2483 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2484 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002485 BYTES_JOIN_METHODDEF
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002486 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, _Py_ljust__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002487 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002488 BYTES_LSTRIP_METHODDEF
2489 BYTES_MAKETRANS_METHODDEF
2490 BYTES_PARTITION_METHODDEF
2491 BYTES_REPLACE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002492 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2493 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002494 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, _Py_rjust__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002495 BYTES_RPARTITION_METHODDEF
2496 BYTES_RSPLIT_METHODDEF
2497 BYTES_RSTRIP_METHODDEF
2498 BYTES_SPLIT_METHODDEF
2499 BYTES_SPLITLINES_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002500 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002501 _Py_startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002502 BYTES_STRIP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002503 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2504 _Py_swapcase__doc__},
2505 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002506 BYTES_TRANSLATE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002507 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002508 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, _Py_zfill__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002509 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002510};
2511
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002512static PyObject *
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002513bytes_mod(PyObject *self, PyObject *arg)
Ethan Furmanb95b5612015-01-23 20:05:18 -08002514{
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002515 if (!PyBytes_Check(self)) {
2516 Py_RETURN_NOTIMPLEMENTED;
Victor Stinner772b2b02015-10-14 09:56:53 +02002517 }
Victor Stinner772b2b02015-10-14 09:56:53 +02002518 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002519 arg, 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -08002520}
2521
2522static PyNumberMethods bytes_as_number = {
2523 0, /*nb_add*/
2524 0, /*nb_subtract*/
2525 0, /*nb_multiply*/
2526 bytes_mod, /*nb_remainder*/
2527};
2528
2529static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002530bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002531
2532static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002533bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002534{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002535 PyObject *x = NULL;
2536 const char *encoding = NULL;
2537 const char *errors = NULL;
2538 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002539 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002540 Py_ssize_t size;
2541 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002542 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002543
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002544 if (type != &PyBytes_Type)
Serhiy Storchaka15095802015-11-25 15:47:01 +02002545 return bytes_subtype_new(type, args, kwds);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002546 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2547 &encoding, &errors))
2548 return NULL;
2549 if (x == NULL) {
2550 if (encoding != NULL || errors != NULL) {
2551 PyErr_SetString(PyExc_TypeError,
2552 "encoding or errors without sequence "
2553 "argument");
2554 return NULL;
2555 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02002556 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002557 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002558
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002559 if (encoding != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002560 /* Encode via the codec registry */
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002561 if (!PyUnicode_Check(x)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002562 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002563 "encoding without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002564 return NULL;
2565 }
2566 new = PyUnicode_AsEncodedString(x, encoding, errors);
2567 if (new == NULL)
2568 return NULL;
2569 assert(PyBytes_Check(new));
2570 return new;
2571 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002572
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002573 if (errors != NULL) {
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002574 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002575 PyUnicode_Check(x) ?
2576 "string argument without an encoding" :
2577 "errors without a string argument");
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002578 return NULL;
2579 }
2580
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002581 /* We'd like to call PyObject_Bytes here, but we need to check for an
2582 integer argument before deferring to PyBytes_FromObject, something
2583 PyObject_Bytes doesn't do. */
2584 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2585 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +01002586 new = _PyObject_CallNoArg(func);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002587 Py_DECREF(func);
2588 if (new == NULL)
2589 return NULL;
2590 if (!PyBytes_Check(new)) {
2591 PyErr_Format(PyExc_TypeError,
2592 "__bytes__ returned non-bytes (type %.200s)",
2593 Py_TYPE(new)->tp_name);
2594 Py_DECREF(new);
2595 return NULL;
2596 }
2597 return new;
2598 }
2599 else if (PyErr_Occurred())
2600 return NULL;
2601
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002602 if (PyUnicode_Check(x)) {
2603 PyErr_SetString(PyExc_TypeError,
2604 "string argument without an encoding");
2605 return NULL;
2606 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002607 /* Is it an integer? */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002608 if (PyIndex_Check(x)) {
2609 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2610 if (size == -1 && PyErr_Occurred()) {
INADA Naokia634e232017-01-06 17:32:01 +09002611 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2612 return NULL;
2613 PyErr_Clear(); /* fall through */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002614 }
INADA Naokia634e232017-01-06 17:32:01 +09002615 else {
2616 if (size < 0) {
2617 PyErr_SetString(PyExc_ValueError, "negative count");
2618 return NULL;
2619 }
2620 new = _PyBytes_FromSize(size, 1);
2621 if (new == NULL)
2622 return NULL;
2623 return new;
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002624 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002625 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002626
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002627 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002628}
2629
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002630static PyObject*
2631_PyBytes_FromBuffer(PyObject *x)
2632{
2633 PyObject *new;
2634 Py_buffer view;
2635
2636 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2637 return NULL;
2638
2639 new = PyBytes_FromStringAndSize(NULL, view.len);
2640 if (!new)
2641 goto fail;
2642 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2643 &view, view.len, 'C') < 0)
2644 goto fail;
2645 PyBuffer_Release(&view);
2646 return new;
2647
2648fail:
2649 Py_XDECREF(new);
2650 PyBuffer_Release(&view);
2651 return NULL;
2652}
2653
Victor Stinner3c50ce32015-10-14 13:50:40 +02002654#define _PyBytes_FROM_LIST_BODY(x, GET_ITEM) \
2655 do { \
2656 PyObject *bytes; \
2657 Py_ssize_t i; \
2658 Py_ssize_t value; \
2659 char *str; \
2660 PyObject *item; \
2661 \
2662 bytes = PyBytes_FromStringAndSize(NULL, Py_SIZE(x)); \
2663 if (bytes == NULL) \
2664 return NULL; \
2665 str = ((PyBytesObject *)bytes)->ob_sval; \
2666 \
2667 for (i = 0; i < Py_SIZE(x); i++) { \
2668 item = GET_ITEM((x), i); \
Serhiy Storchakaf54d7812016-07-06 21:39:44 +03002669 value = PyNumber_AsSsize_t(item, NULL); \
Victor Stinner3c50ce32015-10-14 13:50:40 +02002670 if (value == -1 && PyErr_Occurred()) \
2671 goto error; \
2672 \
2673 if (value < 0 || value >= 256) { \
2674 PyErr_SetString(PyExc_ValueError, \
2675 "bytes must be in range(0, 256)"); \
2676 goto error; \
2677 } \
2678 *str++ = (char) value; \
2679 } \
2680 return bytes; \
2681 \
2682 error: \
2683 Py_DECREF(bytes); \
2684 return NULL; \
2685 } while (0)
2686
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002687static PyObject*
2688_PyBytes_FromList(PyObject *x)
2689{
Victor Stinner3c50ce32015-10-14 13:50:40 +02002690 _PyBytes_FROM_LIST_BODY(x, PyList_GET_ITEM);
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002691}
2692
2693static PyObject*
2694_PyBytes_FromTuple(PyObject *x)
2695{
Victor Stinner3c50ce32015-10-14 13:50:40 +02002696 _PyBytes_FROM_LIST_BODY(x, PyTuple_GET_ITEM);
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002697}
2698
2699static PyObject *
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002700_PyBytes_FromIterator(PyObject *it, PyObject *x)
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002701{
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002702 char *str;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002703 Py_ssize_t i, size;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002704 _PyBytesWriter writer;
2705
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002706 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02002707 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002708 if (size == -1 && PyErr_Occurred())
2709 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002710
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002711 _PyBytesWriter_Init(&writer);
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002712 str = _PyBytesWriter_Alloc(&writer, size);
2713 if (str == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002714 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002715 writer.overallocate = 1;
2716 size = writer.allocated;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002717
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002718 /* Run the iterator to exhaustion */
2719 for (i = 0; ; i++) {
2720 PyObject *item;
2721 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002722
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002723 /* Get the next item */
2724 item = PyIter_Next(it);
2725 if (item == NULL) {
2726 if (PyErr_Occurred())
2727 goto error;
2728 break;
2729 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002730
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002731 /* Interpret it as an int (__index__) */
Serhiy Storchakaf54d7812016-07-06 21:39:44 +03002732 value = PyNumber_AsSsize_t(item, NULL);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002733 Py_DECREF(item);
2734 if (value == -1 && PyErr_Occurred())
2735 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002736
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002737 /* Range check */
2738 if (value < 0 || value >= 256) {
2739 PyErr_SetString(PyExc_ValueError,
2740 "bytes must be in range(0, 256)");
2741 goto error;
2742 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002743
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002744 /* Append the byte */
2745 if (i >= size) {
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002746 str = _PyBytesWriter_Resize(&writer, str, size+1);
2747 if (str == NULL)
2748 return NULL;
2749 size = writer.allocated;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002750 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002751 *str++ = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002752 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002753
2754 return _PyBytesWriter_Finish(&writer, str);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002755
2756 error:
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002757 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002758 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002759}
2760
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002761PyObject *
2762PyBytes_FromObject(PyObject *x)
2763{
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002764 PyObject *it, *result;
2765
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002766 if (x == NULL) {
2767 PyErr_BadInternalCall();
2768 return NULL;
2769 }
2770
2771 if (PyBytes_CheckExact(x)) {
2772 Py_INCREF(x);
2773 return x;
2774 }
2775
2776 /* Use the modern buffer interface */
2777 if (PyObject_CheckBuffer(x))
2778 return _PyBytes_FromBuffer(x);
2779
2780 if (PyList_CheckExact(x))
2781 return _PyBytes_FromList(x);
2782
2783 if (PyTuple_CheckExact(x))
2784 return _PyBytes_FromTuple(x);
2785
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002786 if (!PyUnicode_Check(x)) {
2787 it = PyObject_GetIter(x);
2788 if (it != NULL) {
2789 result = _PyBytes_FromIterator(it, x);
2790 Py_DECREF(it);
2791 return result;
2792 }
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002793 }
2794
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002795 PyErr_Format(PyExc_TypeError,
2796 "cannot convert '%.200s' object to bytes",
2797 x->ob_type->tp_name);
2798 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002799}
2800
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002801static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002802bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002803{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002804 PyObject *tmp, *pnew;
2805 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002806
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002807 assert(PyType_IsSubtype(type, &PyBytes_Type));
2808 tmp = bytes_new(&PyBytes_Type, args, kwds);
2809 if (tmp == NULL)
2810 return NULL;
Serhiy Storchaka15095802015-11-25 15:47:01 +02002811 assert(PyBytes_Check(tmp));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002812 n = PyBytes_GET_SIZE(tmp);
2813 pnew = type->tp_alloc(type, n);
2814 if (pnew != NULL) {
Christian Heimesf051e432016-09-13 20:22:02 +02002815 memcpy(PyBytes_AS_STRING(pnew),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002816 PyBytes_AS_STRING(tmp), n+1);
2817 ((PyBytesObject *)pnew)->ob_shash =
2818 ((PyBytesObject *)tmp)->ob_shash;
2819 }
2820 Py_DECREF(tmp);
2821 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002822}
2823
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002824PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002825"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002826bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002827bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002828bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2829bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002830\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002831Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002832 - an iterable yielding integers in range(256)\n\
2833 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002834 - any object implementing the buffer API.\n\
2835 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002836
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002837static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002838
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002839PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002840 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2841 "bytes",
2842 PyBytesObject_SIZE,
2843 sizeof(char),
2844 bytes_dealloc, /* tp_dealloc */
2845 0, /* tp_print */
2846 0, /* tp_getattr */
2847 0, /* tp_setattr */
2848 0, /* tp_reserved */
2849 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08002850 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002851 &bytes_as_sequence, /* tp_as_sequence */
2852 &bytes_as_mapping, /* tp_as_mapping */
2853 (hashfunc)bytes_hash, /* tp_hash */
2854 0, /* tp_call */
2855 bytes_str, /* tp_str */
2856 PyObject_GenericGetAttr, /* tp_getattro */
2857 0, /* tp_setattro */
2858 &bytes_as_buffer, /* tp_as_buffer */
2859 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2860 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2861 bytes_doc, /* tp_doc */
2862 0, /* tp_traverse */
2863 0, /* tp_clear */
2864 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2865 0, /* tp_weaklistoffset */
2866 bytes_iter, /* tp_iter */
2867 0, /* tp_iternext */
2868 bytes_methods, /* tp_methods */
2869 0, /* tp_members */
2870 0, /* tp_getset */
2871 &PyBaseObject_Type, /* tp_base */
2872 0, /* tp_dict */
2873 0, /* tp_descr_get */
2874 0, /* tp_descr_set */
2875 0, /* tp_dictoffset */
2876 0, /* tp_init */
2877 0, /* tp_alloc */
2878 bytes_new, /* tp_new */
2879 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002880};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002881
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002882void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002883PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002884{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002885 assert(pv != NULL);
2886 if (*pv == NULL)
2887 return;
2888 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002889 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002890 return;
2891 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02002892
2893 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2894 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05002895 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02002896 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02002897
Antoine Pitrou161d6952014-05-01 14:36:20 +02002898 wb.len = -1;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002899 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02002900 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2901 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2902 Py_CLEAR(*pv);
2903 return;
2904 }
2905
2906 oldsize = PyBytes_GET_SIZE(*pv);
2907 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2908 PyErr_NoMemory();
2909 goto error;
2910 }
2911 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2912 goto error;
2913
2914 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2915 PyBuffer_Release(&wb);
2916 return;
2917
2918 error:
2919 PyBuffer_Release(&wb);
2920 Py_CLEAR(*pv);
2921 return;
2922 }
2923
2924 else {
2925 /* Multiple references, need to create new object */
2926 PyObject *v;
2927 v = bytes_concat(*pv, w);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002928 Py_SETREF(*pv, v);
Antoine Pitrou161d6952014-05-01 14:36:20 +02002929 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002930}
2931
2932void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002933PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002934{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002935 PyBytes_Concat(pv, w);
2936 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002937}
2938
2939
Ethan Furmanb95b5612015-01-23 20:05:18 -08002940/* The following function breaks the notion that bytes are immutable:
2941 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002942 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08002943 as creating a new bytes object and destroying the old one, only
2944 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002945 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08002946 Note that if there's not enough memory to resize the bytes object, the
2947 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002948 memory" exception is set, and -1 is returned. Else (on success) 0 is
2949 returned, and the value in *pv may or may not be the same as on input.
2950 As always, an extra byte is allocated for a trailing \0 byte (newsize
2951 does *not* include that), and a trailing \0 byte is stored.
2952*/
2953
2954int
2955_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2956{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002957 PyObject *v;
2958 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002959 v = *pv;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002960 if (!PyBytes_Check(v) || newsize < 0) {
2961 goto error;
2962 }
2963 if (Py_SIZE(v) == newsize) {
2964 /* return early if newsize equals to v->ob_size */
2965 return 0;
2966 }
2967 if (Py_REFCNT(v) != 1) {
2968 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002969 }
2970 /* XXX UNREF/NEWREF interface should be more symmetrical */
2971 _Py_DEC_REFTOTAL;
2972 _Py_ForgetReference(v);
2973 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03002974 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002975 if (*pv == NULL) {
2976 PyObject_Del(v);
2977 PyErr_NoMemory();
2978 return -1;
2979 }
2980 _Py_NewReference(*pv);
2981 sv = (PyBytesObject *) *pv;
2982 Py_SIZE(sv) = newsize;
2983 sv->ob_sval[newsize] = '\0';
2984 sv->ob_shash = -1; /* invalidate cached hash value */
2985 return 0;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002986error:
2987 *pv = 0;
2988 Py_DECREF(v);
2989 PyErr_BadInternalCall();
2990 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002991}
2992
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002993void
2994PyBytes_Fini(void)
2995{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002996 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002997 for (i = 0; i < UCHAR_MAX + 1; i++)
2998 Py_CLEAR(characters[i]);
2999 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003000}
3001
Benjamin Peterson4116f362008-05-27 00:36:20 +00003002/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003003
3004typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003005 PyObject_HEAD
3006 Py_ssize_t it_index;
3007 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003008} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003009
3010static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003011striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003012{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003013 _PyObject_GC_UNTRACK(it);
3014 Py_XDECREF(it->it_seq);
3015 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003016}
3017
3018static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003019striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003020{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003021 Py_VISIT(it->it_seq);
3022 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003023}
3024
3025static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003026striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003027{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003028 PyBytesObject *seq;
3029 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003030
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003031 assert(it != NULL);
3032 seq = it->it_seq;
3033 if (seq == NULL)
3034 return NULL;
3035 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003036
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003037 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3038 item = PyLong_FromLong(
3039 (unsigned char)seq->ob_sval[it->it_index]);
3040 if (item != NULL)
3041 ++it->it_index;
3042 return item;
3043 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003044
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003045 it->it_seq = NULL;
Serhiy Storchakafbb1c5e2016-03-30 20:40:02 +03003046 Py_DECREF(seq);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003047 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003048}
3049
3050static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003051striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003052{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003053 Py_ssize_t len = 0;
3054 if (it->it_seq)
3055 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3056 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003057}
3058
3059PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003060 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003061
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003062static PyObject *
3063striter_reduce(striterobject *it)
3064{
3065 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02003066 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003067 it->it_seq, it->it_index);
3068 } else {
Serhiy Storchaka460bd0d2016-11-20 12:16:46 +02003069 return Py_BuildValue("N(())", _PyObject_GetBuiltin("iter"));
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003070 }
3071}
3072
3073PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3074
3075static PyObject *
3076striter_setstate(striterobject *it, PyObject *state)
3077{
3078 Py_ssize_t index = PyLong_AsSsize_t(state);
3079 if (index == -1 && PyErr_Occurred())
3080 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003081 if (it->it_seq != NULL) {
3082 if (index < 0)
3083 index = 0;
3084 else if (index > PyBytes_GET_SIZE(it->it_seq))
3085 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3086 it->it_index = index;
3087 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003088 Py_RETURN_NONE;
3089}
3090
3091PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3092
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003093static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003094 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3095 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003096 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3097 reduce_doc},
3098 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3099 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003100 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003101};
3102
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003103PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003104 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3105 "bytes_iterator", /* tp_name */
3106 sizeof(striterobject), /* tp_basicsize */
3107 0, /* tp_itemsize */
3108 /* methods */
3109 (destructor)striter_dealloc, /* tp_dealloc */
3110 0, /* tp_print */
3111 0, /* tp_getattr */
3112 0, /* tp_setattr */
3113 0, /* tp_reserved */
3114 0, /* tp_repr */
3115 0, /* tp_as_number */
3116 0, /* tp_as_sequence */
3117 0, /* tp_as_mapping */
3118 0, /* tp_hash */
3119 0, /* tp_call */
3120 0, /* tp_str */
3121 PyObject_GenericGetAttr, /* tp_getattro */
3122 0, /* tp_setattro */
3123 0, /* tp_as_buffer */
3124 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3125 0, /* tp_doc */
3126 (traverseproc)striter_traverse, /* tp_traverse */
3127 0, /* tp_clear */
3128 0, /* tp_richcompare */
3129 0, /* tp_weaklistoffset */
3130 PyObject_SelfIter, /* tp_iter */
3131 (iternextfunc)striter_next, /* tp_iternext */
3132 striter_methods, /* tp_methods */
3133 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003134};
3135
3136static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003137bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003138{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003139 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003140
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003141 if (!PyBytes_Check(seq)) {
3142 PyErr_BadInternalCall();
3143 return NULL;
3144 }
3145 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3146 if (it == NULL)
3147 return NULL;
3148 it->it_index = 0;
3149 Py_INCREF(seq);
3150 it->it_seq = (PyBytesObject *)seq;
3151 _PyObject_GC_TRACK(it);
3152 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003153}
Victor Stinner00165072015-10-09 01:53:21 +02003154
3155
3156/* _PyBytesWriter API */
3157
3158#ifdef MS_WINDOWS
3159 /* On Windows, overallocate by 50% is the best factor */
3160# define OVERALLOCATE_FACTOR 2
3161#else
3162 /* On Linux, overallocate by 25% is the best factor */
3163# define OVERALLOCATE_FACTOR 4
3164#endif
3165
3166void
3167_PyBytesWriter_Init(_PyBytesWriter *writer)
3168{
Victor Stinner661aacc2015-10-14 09:41:48 +02003169 /* Set all attributes before small_buffer to 0 */
3170 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003171#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003172 memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003173#endif
3174}
3175
3176void
3177_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3178{
3179 Py_CLEAR(writer->buffer);
3180}
3181
3182Py_LOCAL_INLINE(char*)
3183_PyBytesWriter_AsString(_PyBytesWriter *writer)
3184{
Victor Stinner661aacc2015-10-14 09:41:48 +02003185 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003186 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003187 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003188 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003189 else if (writer->use_bytearray) {
3190 assert(writer->buffer != NULL);
3191 return PyByteArray_AS_STRING(writer->buffer);
3192 }
3193 else {
3194 assert(writer->buffer != NULL);
3195 return PyBytes_AS_STRING(writer->buffer);
3196 }
Victor Stinner00165072015-10-09 01:53:21 +02003197}
3198
3199Py_LOCAL_INLINE(Py_ssize_t)
Victor Stinner2bf89932015-10-14 11:25:33 +02003200_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
Victor Stinner00165072015-10-09 01:53:21 +02003201{
3202 char *start = _PyBytesWriter_AsString(writer);
3203 assert(str != NULL);
3204 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003205 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003206 return str - start;
3207}
3208
3209Py_LOCAL_INLINE(void)
3210_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3211{
3212#ifdef Py_DEBUG
3213 char *start, *end;
3214
Victor Stinner661aacc2015-10-14 09:41:48 +02003215 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003216 assert(writer->buffer == NULL);
3217 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003218 else {
3219 assert(writer->buffer != NULL);
3220 if (writer->use_bytearray)
3221 assert(PyByteArray_CheckExact(writer->buffer));
3222 else
3223 assert(PyBytes_CheckExact(writer->buffer));
3224 assert(Py_REFCNT(writer->buffer) == 1);
3225 }
Victor Stinner00165072015-10-09 01:53:21 +02003226
Victor Stinner661aacc2015-10-14 09:41:48 +02003227 if (writer->use_bytearray) {
3228 /* bytearray has its own overallocation algorithm,
3229 writer overallocation must be disabled */
3230 assert(!writer->overallocate);
3231 }
3232
3233 assert(0 <= writer->allocated);
Victor Stinner53926a12015-10-09 12:37:03 +02003234 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003235 /* the last byte must always be null */
Victor Stinner661aacc2015-10-14 09:41:48 +02003236 start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003237 assert(start[writer->allocated] == 0);
3238
3239 end = start + writer->allocated;
3240 assert(str != NULL);
3241 assert(start <= str && str <= end);
3242#endif
3243}
3244
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003245void*
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003246_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
Victor Stinner00165072015-10-09 01:53:21 +02003247{
3248 Py_ssize_t allocated, pos;
3249
3250 _PyBytesWriter_CheckConsistency(writer, str);
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003251 assert(writer->allocated < size);
Victor Stinner00165072015-10-09 01:53:21 +02003252
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003253 allocated = size;
Victor Stinner00165072015-10-09 01:53:21 +02003254 if (writer->overallocate
3255 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3256 /* overallocate to limit the number of realloc() */
3257 allocated += allocated / OVERALLOCATE_FACTOR;
3258 }
3259
Victor Stinner2bf89932015-10-14 11:25:33 +02003260 pos = _PyBytesWriter_GetSize(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003261 if (!writer->use_small_buffer) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003262 if (writer->use_bytearray) {
3263 if (PyByteArray_Resize(writer->buffer, allocated))
3264 goto error;
3265 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3266 but we cannot use ob_alloc because bytes may need to be moved
3267 to use the whole buffer. bytearray uses an internal optimization
3268 to avoid moving or copying bytes when bytes are removed at the
3269 beginning (ex: del bytearray[:1]). */
3270 }
3271 else {
3272 if (_PyBytes_Resize(&writer->buffer, allocated))
3273 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003274 }
3275 }
3276 else {
3277 /* convert from stack buffer to bytes object buffer */
3278 assert(writer->buffer == NULL);
3279
Victor Stinner661aacc2015-10-14 09:41:48 +02003280 if (writer->use_bytearray)
3281 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3282 else
3283 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003284 if (writer->buffer == NULL)
Victor Stinner661aacc2015-10-14 09:41:48 +02003285 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003286
3287 if (pos != 0) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003288 char *dest;
3289 if (writer->use_bytearray)
3290 dest = PyByteArray_AS_STRING(writer->buffer);
3291 else
3292 dest = PyBytes_AS_STRING(writer->buffer);
Christian Heimesf051e432016-09-13 20:22:02 +02003293 memcpy(dest,
Victor Stinnerb3653a32015-10-09 03:38:24 +02003294 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02003295 pos);
3296 }
3297
Victor Stinnerb3653a32015-10-09 03:38:24 +02003298 writer->use_small_buffer = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003299#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003300 memset(writer->small_buffer, 0xDB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003301#endif
Victor Stinner00165072015-10-09 01:53:21 +02003302 }
3303 writer->allocated = allocated;
3304
3305 str = _PyBytesWriter_AsString(writer) + pos;
3306 _PyBytesWriter_CheckConsistency(writer, str);
3307 return str;
Victor Stinner661aacc2015-10-14 09:41:48 +02003308
3309error:
3310 _PyBytesWriter_Dealloc(writer);
3311 return NULL;
Victor Stinner00165072015-10-09 01:53:21 +02003312}
3313
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003314void*
3315_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3316{
3317 Py_ssize_t new_min_size;
3318
3319 _PyBytesWriter_CheckConsistency(writer, str);
3320 assert(size >= 0);
3321
3322 if (size == 0) {
3323 /* nothing to do */
3324 return str;
3325 }
3326
3327 if (writer->min_size > PY_SSIZE_T_MAX - size) {
3328 PyErr_NoMemory();
3329 _PyBytesWriter_Dealloc(writer);
3330 return NULL;
3331 }
3332 new_min_size = writer->min_size + size;
3333
3334 if (new_min_size > writer->allocated)
3335 str = _PyBytesWriter_Resize(writer, str, new_min_size);
3336
3337 writer->min_size = new_min_size;
3338 return str;
3339}
3340
Victor Stinner00165072015-10-09 01:53:21 +02003341/* Allocate the buffer to write size bytes.
3342 Return the pointer to the beginning of buffer data.
3343 Raise an exception and return NULL on error. */
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003344void*
Victor Stinner00165072015-10-09 01:53:21 +02003345_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3346{
3347 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02003348 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02003349 assert(size >= 0);
3350
Victor Stinnerb3653a32015-10-09 03:38:24 +02003351 writer->use_small_buffer = 1;
Victor Stinnerb13b97d2015-10-09 02:52:16 +02003352#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003353 writer->allocated = sizeof(writer->small_buffer) - 1;
Victor Stinnerf6358a72015-10-14 12:02:39 +02003354 /* In debug mode, don't use the full small buffer because it is less
3355 efficient than bytes and bytearray objects to detect buffer underflow
3356 and buffer overflow. Use 10 bytes of the small buffer to test also
3357 code using the smaller buffer in debug mode.
3358
3359 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3360 in debug mode to also be able to detect stack overflow when running
3361 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3362 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3363 stack overflow. */
3364 writer->allocated = Py_MIN(writer->allocated, 10);
3365 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3366 to detect buffer overflow */
Victor Stinnerb3653a32015-10-09 03:38:24 +02003367 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003368#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02003369 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02003370#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02003371 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02003372}
3373
3374PyObject *
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003375_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
Victor Stinner00165072015-10-09 01:53:21 +02003376{
Victor Stinner2bf89932015-10-14 11:25:33 +02003377 Py_ssize_t size;
Victor Stinner00165072015-10-09 01:53:21 +02003378 PyObject *result;
3379
3380 _PyBytesWriter_CheckConsistency(writer, str);
3381
Victor Stinner2bf89932015-10-14 11:25:33 +02003382 size = _PyBytesWriter_GetSize(writer, str);
3383 if (size == 0 && !writer->use_bytearray) {
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003384 Py_CLEAR(writer->buffer);
3385 /* Get the empty byte string singleton */
3386 result = PyBytes_FromStringAndSize(NULL, 0);
3387 }
3388 else if (writer->use_small_buffer) {
Victor Stinnere914d412016-04-15 17:52:27 +02003389 if (writer->use_bytearray) {
3390 result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3391 }
3392 else {
3393 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3394 }
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003395 }
3396 else {
3397 result = writer->buffer;
3398 writer->buffer = NULL;
3399
Victor Stinner2bf89932015-10-14 11:25:33 +02003400 if (size != writer->allocated) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003401 if (writer->use_bytearray) {
Victor Stinner2bf89932015-10-14 11:25:33 +02003402 if (PyByteArray_Resize(result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003403 Py_DECREF(result);
3404 return NULL;
3405 }
3406 }
3407 else {
Victor Stinner2bf89932015-10-14 11:25:33 +02003408 if (_PyBytes_Resize(&result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003409 assert(result == NULL);
3410 return NULL;
3411 }
Victor Stinner00165072015-10-09 01:53:21 +02003412 }
3413 }
Victor Stinner00165072015-10-09 01:53:21 +02003414 }
Victor Stinner00165072015-10-09 01:53:21 +02003415 return result;
3416}
Victor Stinnerce179bf2015-10-09 12:57:22 +02003417
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003418void*
Victor Stinnere9aa5952015-10-12 13:57:47 +02003419_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003420 const void *bytes, Py_ssize_t size)
Victor Stinnerce179bf2015-10-09 12:57:22 +02003421{
Victor Stinnere9aa5952015-10-12 13:57:47 +02003422 char *str = (char *)ptr;
3423
Victor Stinnerce179bf2015-10-09 12:57:22 +02003424 str = _PyBytesWriter_Prepare(writer, str, size);
3425 if (str == NULL)
3426 return NULL;
3427
Christian Heimesf051e432016-09-13 20:22:02 +02003428 memcpy(str, bytes, size);
Victor Stinnerce179bf2015-10-09 12:57:22 +02003429 str += size;
3430
3431 return str;
3432}