blob: e95ab9c63b1b8d69a1e0491a077a24c9a3428f01 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Gregory P. Smith8cb65692015-04-25 23:22:26 +00008#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00009#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000010
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020011/*[clinic input]
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030012class bytes "PyBytesObject *" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020013[clinic start generated code]*/
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030014/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020015
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030016#include "clinic/bytesobject.c.h"
17
Christian Heimes2c9c7a52008-05-26 13:42:13 +000018#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000019Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000020#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000021
Christian Heimes2c9c7a52008-05-26 13:42:13 +000022static PyBytesObject *characters[UCHAR_MAX + 1];
23static PyBytesObject *nullstring;
24
Mark Dickinsonfd24b322008-12-06 15:33:31 +000025/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
26 for a string of length n should request PyBytesObject_SIZE + n bytes.
27
28 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
29 3 bytes per string allocation on a typical system.
30*/
31#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
32
Victor Stinner2bf89932015-10-14 11:25:33 +020033/* Forward declaration */
34Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
35 char *str);
36
Christian Heimes2c9c7a52008-05-26 13:42:13 +000037/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000038 For PyBytes_FromString(), the parameter `str' points to a null-terminated
39 string containing exactly `size' bytes.
40
Martin Pantera90a4a92016-05-30 04:04:50 +000041 For PyBytes_FromStringAndSize(), the parameter `str' is
Christian Heimes2c9c7a52008-05-26 13:42:13 +000042 either NULL or else points to a string containing at least `size' bytes.
43 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
44 not have to be null-terminated. (Therefore it is safe to construct a
45 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
46 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
47 bytes (setting the last byte to the null terminating character) and you can
48 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000049 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000050 alter the data yourself, since the strings may be shared.
51
52 The PyObject member `op->ob_size', which denotes the number of "extra
53 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020054 allocated for string data, not counting the null terminating character.
55 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000056 PyBytes_FromStringAndSize()) or the length of the string in the `str'
57 parameter (for PyBytes_FromString()).
58*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020059static PyObject *
60_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000061{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020062 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020063 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020064
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000066#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000068#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000069 Py_INCREF(op);
70 return (PyObject *)op;
71 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000072
Victor Stinner049e5092014-08-17 22:20:00 +020073 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000074 PyErr_SetString(PyExc_OverflowError,
75 "byte string is too large");
76 return NULL;
77 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000078
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000079 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020080 if (use_calloc)
81 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
82 else
83 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084 if (op == NULL)
85 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +010086 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020088 if (!use_calloc)
89 op->ob_sval[size] = '\0';
90 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 if (size == 0) {
92 nullstring = op;
93 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020094 }
95 return (PyObject *) op;
96}
97
98PyObject *
99PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
100{
101 PyBytesObject *op;
102 if (size < 0) {
103 PyErr_SetString(PyExc_SystemError,
104 "Negative size passed to PyBytes_FromStringAndSize");
105 return NULL;
106 }
107 if (size == 1 && str != NULL &&
108 (op = characters[*str & UCHAR_MAX]) != NULL)
109 {
110#ifdef COUNT_ALLOCS
111 one_strings++;
112#endif
113 Py_INCREF(op);
114 return (PyObject *)op;
115 }
116
117 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
118 if (op == NULL)
119 return NULL;
120 if (str == NULL)
121 return (PyObject *) op;
122
Christian Heimesf051e432016-09-13 20:22:02 +0200123 memcpy(op->ob_sval, str, size);
Victor Stinnerdb067af2014-05-02 22:31:14 +0200124 /* share short strings */
125 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000126 characters[*str & UCHAR_MAX] = op;
127 Py_INCREF(op);
128 }
129 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000130}
131
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000132PyObject *
133PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000134{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200135 size_t size;
136 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000137
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000138 assert(str != NULL);
139 size = strlen(str);
140 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
141 PyErr_SetString(PyExc_OverflowError,
142 "byte string is too long");
143 return NULL;
144 }
145 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000146#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000147 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000148#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 Py_INCREF(op);
150 return (PyObject *)op;
151 }
152 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000153#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000155#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 Py_INCREF(op);
157 return (PyObject *)op;
158 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000159
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000160 /* Inline PyObject_NewVar */
161 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
162 if (op == NULL)
163 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100164 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000165 op->ob_shash = -1;
Christian Heimesf051e432016-09-13 20:22:02 +0200166 memcpy(op->ob_sval, str, size+1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000167 /* share short strings */
168 if (size == 0) {
169 nullstring = op;
170 Py_INCREF(op);
171 } else if (size == 1) {
172 characters[*str & UCHAR_MAX] = op;
173 Py_INCREF(op);
174 }
175 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000176}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000177
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000178PyObject *
179PyBytes_FromFormatV(const char *format, va_list vargs)
180{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000181 char *s;
Victor Stinner03dab782015-10-14 00:21:35 +0200182 const char *f;
183 const char *p;
184 Py_ssize_t prec;
185 int longflag;
186 int size_tflag;
187 /* Longest 64-bit formatted numbers:
188 - "18446744073709551615\0" (21 bytes)
189 - "-9223372036854775808\0" (21 bytes)
190 Decimal takes the most space (it isn't enough for octal.)
Guido van Rossum343e97f2007-04-09 00:43:24 +0000191
Victor Stinner03dab782015-10-14 00:21:35 +0200192 Longest 64-bit pointer representation:
193 "0xffffffffffffffff\0" (19 bytes). */
194 char buffer[21];
195 _PyBytesWriter writer;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000196
Victor Stinner03dab782015-10-14 00:21:35 +0200197 _PyBytesWriter_Init(&writer);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000198
Victor Stinner03dab782015-10-14 00:21:35 +0200199 s = _PyBytesWriter_Alloc(&writer, strlen(format));
200 if (s == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000201 return NULL;
Victor Stinner03dab782015-10-14 00:21:35 +0200202 writer.overallocate = 1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000203
Victor Stinner03dab782015-10-14 00:21:35 +0200204#define WRITE_BYTES(str) \
205 do { \
206 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
207 if (s == NULL) \
208 goto error; \
209 } while (0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000210
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000211 for (f = format; *f; f++) {
Victor Stinner03dab782015-10-14 00:21:35 +0200212 if (*f != '%') {
213 *s++ = *f;
214 continue;
215 }
216
217 p = f++;
218
219 /* ignore the width (ex: 10 in "%10s") */
220 while (Py_ISDIGIT(*f))
221 f++;
222
223 /* parse the precision (ex: 10 in "%.10s") */
224 prec = 0;
225 if (*f == '.') {
226 f++;
227 for (; Py_ISDIGIT(*f); f++) {
228 prec = (prec * 10) + (*f - '0');
229 }
230 }
231
232 while (*f && *f != '%' && !Py_ISALPHA(*f))
233 f++;
234
235 /* handle the long flag ('l'), but only for %ld and %lu.
236 others can be added when necessary. */
237 longflag = 0;
238 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
239 longflag = 1;
240 ++f;
241 }
242
243 /* handle the size_t flag ('z'). */
244 size_tflag = 0;
245 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
246 size_tflag = 1;
247 ++f;
248 }
249
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700250 /* subtract bytes preallocated for the format string
Victor Stinner03dab782015-10-14 00:21:35 +0200251 (ex: 2 for "%s") */
252 writer.min_size -= (f - p + 1);
253
254 switch (*f) {
255 case 'c':
256 {
257 int c = va_arg(vargs, int);
258 if (c < 0 || c > 255) {
259 PyErr_SetString(PyExc_OverflowError,
260 "PyBytes_FromFormatV(): %c format "
261 "expects an integer in range [0; 255]");
262 goto error;
263 }
264 writer.min_size++;
265 *s++ = (unsigned char)c;
266 break;
267 }
268
269 case 'd':
270 if (longflag)
271 sprintf(buffer, "%ld", va_arg(vargs, long));
272 else if (size_tflag)
273 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
274 va_arg(vargs, Py_ssize_t));
275 else
276 sprintf(buffer, "%d", va_arg(vargs, int));
277 assert(strlen(buffer) < sizeof(buffer));
278 WRITE_BYTES(buffer);
279 break;
280
281 case 'u':
282 if (longflag)
283 sprintf(buffer, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
286 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
288 else
289 sprintf(buffer, "%u",
290 va_arg(vargs, unsigned int));
291 assert(strlen(buffer) < sizeof(buffer));
292 WRITE_BYTES(buffer);
293 break;
294
295 case 'i':
296 sprintf(buffer, "%i", va_arg(vargs, int));
297 assert(strlen(buffer) < sizeof(buffer));
298 WRITE_BYTES(buffer);
299 break;
300
301 case 'x':
302 sprintf(buffer, "%x", va_arg(vargs, int));
303 assert(strlen(buffer) < sizeof(buffer));
304 WRITE_BYTES(buffer);
305 break;
306
307 case 's':
308 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000309 Py_ssize_t i;
Victor Stinner03dab782015-10-14 00:21:35 +0200310
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200311 p = va_arg(vargs, const char*);
Victor Stinner03dab782015-10-14 00:21:35 +0200312 i = strlen(p);
313 if (prec > 0 && i > prec)
314 i = prec;
315 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
316 if (s == NULL)
317 goto error;
318 break;
319 }
320
321 case 'p':
322 sprintf(buffer, "%p", va_arg(vargs, void*));
323 assert(strlen(buffer) < sizeof(buffer));
324 /* %p is ill-defined: ensure leading 0x. */
325 if (buffer[1] == 'X')
326 buffer[1] = 'x';
327 else if (buffer[1] != 'x') {
328 memmove(buffer+2, buffer, strlen(buffer)+1);
329 buffer[0] = '0';
330 buffer[1] = 'x';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000331 }
Victor Stinner03dab782015-10-14 00:21:35 +0200332 WRITE_BYTES(buffer);
333 break;
334
335 case '%':
336 writer.min_size++;
337 *s++ = '%';
338 break;
339
340 default:
341 if (*f == 0) {
342 /* fix min_size if we reached the end of the format string */
343 writer.min_size++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000344 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000345
Victor Stinner03dab782015-10-14 00:21:35 +0200346 /* invalid format string: copy unformatted string and exit */
347 WRITE_BYTES(p);
348 return _PyBytesWriter_Finish(&writer, s);
349 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000351
Victor Stinner03dab782015-10-14 00:21:35 +0200352#undef WRITE_BYTES
353
354 return _PyBytesWriter_Finish(&writer, s);
355
356 error:
357 _PyBytesWriter_Dealloc(&writer);
358 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000359}
360
361PyObject *
362PyBytes_FromFormat(const char *format, ...)
363{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000364 PyObject* ret;
365 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000366
367#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000368 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000369#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000370 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000371#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 ret = PyBytes_FromFormatV(format, vargs);
373 va_end(vargs);
374 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000375}
376
Ethan Furmanb95b5612015-01-23 20:05:18 -0800377/* Helpers for formatstring */
378
379Py_LOCAL_INLINE(PyObject *)
380getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
381{
382 Py_ssize_t argidx = *p_argidx;
383 if (argidx < arglen) {
384 (*p_argidx)++;
385 if (arglen < 0)
386 return args;
387 else
388 return PyTuple_GetItem(args, argidx);
389 }
390 PyErr_SetString(PyExc_TypeError,
391 "not enough arguments for format string");
392 return NULL;
393}
394
395/* Format codes
396 * F_LJUST '-'
397 * F_SIGN '+'
398 * F_BLANK ' '
399 * F_ALT '#'
400 * F_ZERO '0'
401 */
402#define F_LJUST (1<<0)
403#define F_SIGN (1<<1)
404#define F_BLANK (1<<2)
405#define F_ALT (1<<3)
406#define F_ZERO (1<<4)
407
408/* Returns a new reference to a PyBytes object, or NULL on failure. */
409
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200410static char*
411formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200412 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800413{
414 char *p;
415 PyObject *result;
416 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200417 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800418
419 x = PyFloat_AsDouble(v);
420 if (x == -1.0 && PyErr_Occurred()) {
421 PyErr_Format(PyExc_TypeError, "float argument required, "
422 "not %.200s", Py_TYPE(v)->tp_name);
423 return NULL;
424 }
425
426 if (prec < 0)
427 prec = 6;
428
429 p = PyOS_double_to_string(x, type, prec,
430 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
431
432 if (p == NULL)
433 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200434
435 len = strlen(p);
436 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200437 str = _PyBytesWriter_Prepare(writer, str, len);
438 if (str == NULL)
439 return NULL;
Christian Heimesf051e432016-09-13 20:22:02 +0200440 memcpy(str, p, len);
Victor Stinner71dc3d82016-04-26 12:35:13 +0200441 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200442 str += len;
443 return str;
444 }
445
446 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800447 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200448 *p_result = result;
449 return str;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800450}
451
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300452static PyObject *
453formatlong(PyObject *v, int flags, int prec, int type)
454{
455 PyObject *result, *iobj;
456 if (type == 'i')
457 type = 'd';
458 if (PyLong_Check(v))
459 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
460 if (PyNumber_Check(v)) {
461 /* make sure number is a type of integer for o, x, and X */
462 if (type == 'o' || type == 'x' || type == 'X')
463 iobj = PyNumber_Index(v);
464 else
465 iobj = PyNumber_Long(v);
466 if (iobj == NULL) {
467 if (!PyErr_ExceptionMatches(PyExc_TypeError))
468 return NULL;
469 }
470 else if (!PyLong_Check(iobj))
471 Py_CLEAR(iobj);
472 if (iobj != NULL) {
473 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
474 Py_DECREF(iobj);
475 return result;
476 }
477 }
478 PyErr_Format(PyExc_TypeError,
479 "%%%c format: %s is required, not %.200s", type,
480 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
481 : "a number",
482 Py_TYPE(v)->tp_name);
483 return NULL;
484}
485
486static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200487byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800488{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300489 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200490 *p = PyBytes_AS_STRING(arg)[0];
491 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800492 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300493 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200494 *p = PyByteArray_AS_STRING(arg)[0];
495 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800496 }
497 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300498 PyObject *iobj;
499 long ival;
500 int overflow;
501 /* make sure number is a type of integer */
502 if (PyLong_Check(arg)) {
503 ival = PyLong_AsLongAndOverflow(arg, &overflow);
504 }
505 else {
506 iobj = PyNumber_Index(arg);
507 if (iobj == NULL) {
508 if (!PyErr_ExceptionMatches(PyExc_TypeError))
509 return 0;
510 goto onError;
511 }
512 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
513 Py_DECREF(iobj);
514 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300515 if (!overflow && ival == -1 && PyErr_Occurred())
516 goto onError;
517 if (overflow || !(0 <= ival && ival <= 255)) {
518 PyErr_SetString(PyExc_OverflowError,
519 "%c arg not in range(256)");
520 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800521 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300522 *p = (char)ival;
523 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800524 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300525 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200526 PyErr_SetString(PyExc_TypeError,
527 "%c requires an integer in range(256) or a single byte");
528 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800529}
530
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800531static PyObject *_PyBytes_FromBuffer(PyObject *x);
532
Ethan Furmanb95b5612015-01-23 20:05:18 -0800533static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200534format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800535{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200536 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800537 _Py_IDENTIFIER(__bytes__);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800538 /* is it a bytes object? */
539 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200540 *pbuf = PyBytes_AS_STRING(v);
541 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800542 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200543 return v;
544 }
545 if (PyByteArray_Check(v)) {
546 *pbuf = PyByteArray_AS_STRING(v);
547 *plen = PyByteArray_GET_SIZE(v);
548 Py_INCREF(v);
549 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800550 }
551 /* does it support __bytes__? */
552 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
553 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +0100554 result = _PyObject_CallNoArg(func);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800555 Py_DECREF(func);
556 if (result == NULL)
557 return NULL;
558 if (!PyBytes_Check(result)) {
559 PyErr_Format(PyExc_TypeError,
560 "__bytes__ returned non-bytes (type %.200s)",
561 Py_TYPE(result)->tp_name);
562 Py_DECREF(result);
563 return NULL;
564 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200565 *pbuf = PyBytes_AS_STRING(result);
566 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800567 return result;
568 }
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800569 /* does it support buffer protocol? */
570 if (PyObject_CheckBuffer(v)) {
571 /* maybe we can avoid making a copy of the buffer object here? */
572 result = _PyBytes_FromBuffer(v);
573 if (result == NULL)
574 return NULL;
575 *pbuf = PyBytes_AS_STRING(result);
576 *plen = PyBytes_GET_SIZE(result);
577 return result;
578 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800579 PyErr_Format(PyExc_TypeError,
Xiang Zhang7e2a54c2017-03-14 15:07:15 +0800580 "%%b requires a bytes-like object, "
581 "or an object that implements __bytes__, not '%.100s'",
Ethan Furmanb95b5612015-01-23 20:05:18 -0800582 Py_TYPE(v)->tp_name);
583 return NULL;
584}
585
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200586/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800587
588PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +0200589_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
590 PyObject *args, int use_bytearray)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800591{
Victor Stinner772b2b02015-10-14 09:56:53 +0200592 const char *fmt;
593 char *res;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800594 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200595 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800596 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800597 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200598 _PyBytesWriter writer;
599
Victor Stinner772b2b02015-10-14 09:56:53 +0200600 if (args == NULL) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800601 PyErr_BadInternalCall();
602 return NULL;
603 }
Victor Stinner772b2b02015-10-14 09:56:53 +0200604 fmt = format;
605 fmtcnt = format_len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200606
607 _PyBytesWriter_Init(&writer);
Victor Stinner772b2b02015-10-14 09:56:53 +0200608 writer.use_bytearray = use_bytearray;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200609
610 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
611 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800612 return NULL;
Victor Stinner772b2b02015-10-14 09:56:53 +0200613 if (!use_bytearray)
614 writer.overallocate = 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200615
Ethan Furmanb95b5612015-01-23 20:05:18 -0800616 if (PyTuple_Check(args)) {
617 arglen = PyTuple_GET_SIZE(args);
618 argidx = 0;
619 }
620 else {
621 arglen = -1;
622 argidx = -2;
623 }
624 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
625 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
626 !PyByteArray_Check(args)) {
627 dict = args;
628 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200629
Ethan Furmanb95b5612015-01-23 20:05:18 -0800630 while (--fmtcnt >= 0) {
631 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200632 Py_ssize_t len;
633 char *pos;
634
Xiang Zhangb76ad512017-03-06 17:17:05 +0800635 pos = (char *)memchr(fmt + 1, '%', fmtcnt);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200636 if (pos != NULL)
637 len = pos - fmt;
Victor Stinner772b2b02015-10-14 09:56:53 +0200638 else
Xiang Zhangb76ad512017-03-06 17:17:05 +0800639 len = fmtcnt + 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200640 assert(len != 0);
641
Christian Heimesf051e432016-09-13 20:22:02 +0200642 memcpy(res, fmt, len);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200643 res += len;
644 fmt += len;
645 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800646 }
647 else {
648 /* Got a format specifier */
649 int flags = 0;
650 Py_ssize_t width = -1;
651 int prec = -1;
652 int c = '\0';
653 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800654 PyObject *v = NULL;
655 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200656 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800657 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200658 Py_ssize_t len = 0;
659 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200660 Py_ssize_t alloc;
661#ifdef Py_DEBUG
662 char *before;
663#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -0800664
Ethan Furmanb95b5612015-01-23 20:05:18 -0800665 fmt++;
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200666 if (*fmt == '%') {
667 *res++ = '%';
668 fmt++;
669 fmtcnt--;
670 continue;
671 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800672 if (*fmt == '(') {
Victor Stinner772b2b02015-10-14 09:56:53 +0200673 const char *keystart;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800674 Py_ssize_t keylen;
675 PyObject *key;
676 int pcount = 1;
677
678 if (dict == NULL) {
679 PyErr_SetString(PyExc_TypeError,
680 "format requires a mapping");
681 goto error;
682 }
683 ++fmt;
684 --fmtcnt;
685 keystart = fmt;
686 /* Skip over balanced parentheses */
687 while (pcount > 0 && --fmtcnt >= 0) {
688 if (*fmt == ')')
689 --pcount;
690 else if (*fmt == '(')
691 ++pcount;
692 fmt++;
693 }
694 keylen = fmt - keystart - 1;
695 if (fmtcnt < 0 || pcount > 0) {
696 PyErr_SetString(PyExc_ValueError,
697 "incomplete format key");
698 goto error;
699 }
700 key = PyBytes_FromStringAndSize(keystart,
701 keylen);
702 if (key == NULL)
703 goto error;
704 if (args_owned) {
705 Py_DECREF(args);
706 args_owned = 0;
707 }
708 args = PyObject_GetItem(dict, key);
709 Py_DECREF(key);
710 if (args == NULL) {
711 goto error;
712 }
713 args_owned = 1;
714 arglen = -1;
715 argidx = -2;
716 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200717
718 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800719 while (--fmtcnt >= 0) {
720 switch (c = *fmt++) {
721 case '-': flags |= F_LJUST; continue;
722 case '+': flags |= F_SIGN; continue;
723 case ' ': flags |= F_BLANK; continue;
724 case '#': flags |= F_ALT; continue;
725 case '0': flags |= F_ZERO; continue;
726 }
727 break;
728 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200729
730 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800731 if (c == '*') {
732 v = getnextarg(args, arglen, &argidx);
733 if (v == NULL)
734 goto error;
735 if (!PyLong_Check(v)) {
736 PyErr_SetString(PyExc_TypeError,
737 "* wants int");
738 goto error;
739 }
740 width = PyLong_AsSsize_t(v);
741 if (width == -1 && PyErr_Occurred())
742 goto error;
743 if (width < 0) {
744 flags |= F_LJUST;
745 width = -width;
746 }
747 if (--fmtcnt >= 0)
748 c = *fmt++;
749 }
750 else if (c >= 0 && isdigit(c)) {
751 width = c - '0';
752 while (--fmtcnt >= 0) {
753 c = Py_CHARMASK(*fmt++);
754 if (!isdigit(c))
755 break;
756 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
757 PyErr_SetString(
758 PyExc_ValueError,
759 "width too big");
760 goto error;
761 }
762 width = width*10 + (c - '0');
763 }
764 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200765
766 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800767 if (c == '.') {
768 prec = 0;
769 if (--fmtcnt >= 0)
770 c = *fmt++;
771 if (c == '*') {
772 v = getnextarg(args, arglen, &argidx);
773 if (v == NULL)
774 goto error;
775 if (!PyLong_Check(v)) {
776 PyErr_SetString(
777 PyExc_TypeError,
778 "* wants int");
779 goto error;
780 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200781 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800782 if (prec == -1 && PyErr_Occurred())
783 goto error;
784 if (prec < 0)
785 prec = 0;
786 if (--fmtcnt >= 0)
787 c = *fmt++;
788 }
789 else if (c >= 0 && isdigit(c)) {
790 prec = c - '0';
791 while (--fmtcnt >= 0) {
792 c = Py_CHARMASK(*fmt++);
793 if (!isdigit(c))
794 break;
795 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
796 PyErr_SetString(
797 PyExc_ValueError,
798 "prec too big");
799 goto error;
800 }
801 prec = prec*10 + (c - '0');
802 }
803 }
804 } /* prec */
805 if (fmtcnt >= 0) {
806 if (c == 'h' || c == 'l' || c == 'L') {
807 if (--fmtcnt >= 0)
808 c = *fmt++;
809 }
810 }
811 if (fmtcnt < 0) {
812 PyErr_SetString(PyExc_ValueError,
813 "incomplete format");
814 goto error;
815 }
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +0200816 v = getnextarg(args, arglen, &argidx);
817 if (v == NULL)
818 goto error;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200819
820 if (fmtcnt < 0) {
821 /* last writer: disable writer overallocation */
822 writer.overallocate = 0;
823 }
824
Ethan Furmanb95b5612015-01-23 20:05:18 -0800825 sign = 0;
826 fill = ' ';
827 switch (c) {
Ethan Furman62e977f2015-03-11 08:17:00 -0700828 case 'r':
829 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800830 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200831 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800832 if (temp == NULL)
833 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200834 assert(PyUnicode_IS_ASCII(temp));
835 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
836 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800837 if (prec >= 0 && len > prec)
838 len = prec;
839 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200840
Ethan Furmanb95b5612015-01-23 20:05:18 -0800841 case 's':
842 // %s is only for 2/3 code; 3 only code should use %b
843 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200844 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800845 if (temp == NULL)
846 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800847 if (prec >= 0 && len > prec)
848 len = prec;
849 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200850
Ethan Furmanb95b5612015-01-23 20:05:18 -0800851 case 'i':
852 case 'd':
853 case 'u':
854 case 'o':
855 case 'x':
856 case 'X':
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200857 if (PyLong_CheckExact(v)
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200858 && width == -1 && prec == -1
859 && !(flags & (F_SIGN | F_BLANK))
860 && c != 'X')
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200861 {
862 /* Fast path */
863 int alternate = flags & F_ALT;
864 int base;
865
866 switch(c)
867 {
868 default:
869 assert(0 && "'type' not in [diuoxX]");
870 case 'd':
871 case 'i':
872 case 'u':
873 base = 10;
874 break;
875 case 'o':
876 base = 8;
877 break;
878 case 'x':
879 case 'X':
880 base = 16;
881 break;
882 }
883
884 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200885 writer.min_size -= 2; /* size preallocated for "%d" */
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200886 res = _PyLong_FormatBytesWriter(&writer, res,
887 v, base, alternate);
888 if (res == NULL)
889 goto error;
890 continue;
891 }
892
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300893 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200894 if (!temp)
895 goto error;
896 assert(PyUnicode_IS_ASCII(temp));
897 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
898 len = PyUnicode_GET_LENGTH(temp);
899 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800900 if (flags & F_ZERO)
901 fill = '0';
902 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200903
Ethan Furmanb95b5612015-01-23 20:05:18 -0800904 case 'e':
905 case 'E':
906 case 'f':
907 case 'F':
908 case 'g':
909 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200910 if (width == -1 && prec == -1
911 && !(flags & (F_SIGN | F_BLANK)))
912 {
913 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200914 writer.min_size -= 2; /* size preallocated for "%f" */
Victor Stinnerad771582015-10-09 12:38:53 +0200915 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200916 if (res == NULL)
917 goto error;
918 continue;
919 }
920
Victor Stinnerad771582015-10-09 12:38:53 +0200921 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800922 goto error;
923 pbuf = PyBytes_AS_STRING(temp);
924 len = PyBytes_GET_SIZE(temp);
925 sign = 1;
926 if (flags & F_ZERO)
927 fill = '0';
928 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200929
Ethan Furmanb95b5612015-01-23 20:05:18 -0800930 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200931 pbuf = &onechar;
932 len = byte_converter(v, &onechar);
933 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800934 goto error;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200935 if (width == -1) {
936 /* Fast path */
937 *res++ = onechar;
938 continue;
939 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800940 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200941
Ethan Furmanb95b5612015-01-23 20:05:18 -0800942 default:
943 PyErr_Format(PyExc_ValueError,
944 "unsupported format character '%c' (0x%x) "
945 "at index %zd",
946 c, c,
Victor Stinner772b2b02015-10-14 09:56:53 +0200947 (Py_ssize_t)(fmt - 1 - format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800948 goto error;
949 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200950
Ethan Furmanb95b5612015-01-23 20:05:18 -0800951 if (sign) {
952 if (*pbuf == '-' || *pbuf == '+') {
953 sign = *pbuf++;
954 len--;
955 }
956 else if (flags & F_SIGN)
957 sign = '+';
958 else if (flags & F_BLANK)
959 sign = ' ';
960 else
961 sign = 0;
962 }
963 if (width < len)
964 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200965
966 alloc = width;
967 if (sign != 0 && len == width)
968 alloc++;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200969 /* 2: size preallocated for %s */
970 if (alloc > 2) {
971 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200972 if (res == NULL)
973 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800974 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200975#ifdef Py_DEBUG
976 before = res;
977#endif
978
979 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800980 if (sign) {
981 if (fill != ' ')
982 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800983 if (width > len)
984 width--;
985 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200986
987 /* Write the numeric prefix for "x", "X" and "o" formats
988 if the alternate form is used.
989 For example, write "0x" for the "%#x" format. */
Serhiy Storchakab1a16192016-12-17 21:48:03 +0200990 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800991 assert(pbuf[0] == '0');
992 assert(pbuf[1] == c);
993 if (fill != ' ') {
994 *res++ = *pbuf++;
995 *res++ = *pbuf++;
996 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800997 width -= 2;
998 if (width < 0)
999 width = 0;
1000 len -= 2;
1001 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001002
1003 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001004 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001005 memset(res, fill, width - len);
1006 res += (width - len);
1007 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -08001008 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001009
1010 /* If padding with spaces: write sign if needed and/or numeric
1011 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001012 if (fill == ' ') {
1013 if (sign)
1014 *res++ = sign;
Serhiy Storchakab1a16192016-12-17 21:48:03 +02001015 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001016 assert(pbuf[0] == '0');
1017 assert(pbuf[1] == c);
1018 *res++ = *pbuf++;
1019 *res++ = *pbuf++;
1020 }
1021 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001022
1023 /* Copy bytes */
Christian Heimesf051e432016-09-13 20:22:02 +02001024 memcpy(res, pbuf, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001025 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001026
1027 /* Pad right with the fill character if needed */
1028 if (width > len) {
1029 memset(res, ' ', width - len);
1030 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001031 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001032
Serhiy Storchaka9f8ad3f2017-03-08 05:51:19 +02001033 if (dict && (argidx < arglen)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -08001034 PyErr_SetString(PyExc_TypeError,
1035 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -08001036 Py_XDECREF(temp);
1037 goto error;
1038 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001039 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001040
1041#ifdef Py_DEBUG
1042 /* check that we computed the exact size for this write */
1043 assert((res - before) == alloc);
1044#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -08001045 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001046
1047 /* If overallocation was disabled, ensure that it was the last
1048 write. Otherwise, we missed an optimization */
Victor Stinner772b2b02015-10-14 09:56:53 +02001049 assert(writer.overallocate || fmtcnt < 0 || use_bytearray);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001050 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001051
Ethan Furmanb95b5612015-01-23 20:05:18 -08001052 if (argidx < arglen && !dict) {
1053 PyErr_SetString(PyExc_TypeError,
1054 "not all arguments converted during bytes formatting");
1055 goto error;
1056 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001057
Ethan Furmanb95b5612015-01-23 20:05:18 -08001058 if (args_owned) {
1059 Py_DECREF(args);
1060 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001061 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001062
1063 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001064 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001065 if (args_owned) {
1066 Py_DECREF(args);
1067 }
1068 return NULL;
1069}
1070
1071/* =-= */
1072
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001073static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001074bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001075{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001076 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001077}
1078
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001079/* Unescape a backslash-escaped string. If unicode is non-zero,
1080 the string is a u-literal. If recode_encoding is non-zero,
1081 the string is UTF-8 encoded and should be re-encoded in the
1082 specified encoding. */
1083
Victor Stinner2ec80632015-10-14 13:32:13 +02001084static char *
1085_PyBytes_DecodeEscapeRecode(const char **s, const char *end,
1086 const char *errors, const char *recode_encoding,
1087 _PyBytesWriter *writer, char *p)
1088{
1089 PyObject *u, *w;
1090 const char* t;
1091
1092 t = *s;
1093 /* Decode non-ASCII bytes as UTF-8. */
1094 while (t < end && (*t & 0x80))
1095 t++;
1096 u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
1097 if (u == NULL)
1098 return NULL;
1099
1100 /* Recode them in target encoding. */
1101 w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
1102 Py_DECREF(u);
1103 if (w == NULL)
1104 return NULL;
1105 assert(PyBytes_Check(w));
1106
1107 /* Append bytes to output buffer. */
Raymond Hettinger15f44ab2016-08-30 10:47:49 -07001108 writer->min_size--; /* subtract 1 preallocated byte */
Victor Stinner2ec80632015-10-14 13:32:13 +02001109 p = _PyBytesWriter_WriteBytes(writer, p,
1110 PyBytes_AS_STRING(w),
1111 PyBytes_GET_SIZE(w));
1112 Py_DECREF(w);
1113 if (p == NULL)
1114 return NULL;
1115
1116 *s = t;
1117 return p;
1118}
1119
Eric V. Smith42454af2016-10-31 09:22:08 -04001120PyObject *_PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001121 Py_ssize_t len,
1122 const char *errors,
1123 Py_ssize_t unicode,
Eric V. Smith42454af2016-10-31 09:22:08 -04001124 const char *recode_encoding,
1125 const char **first_invalid_escape)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001126{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001127 int c;
Victor Stinner2ec80632015-10-14 13:32:13 +02001128 char *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001129 const char *end;
Victor Stinner2ec80632015-10-14 13:32:13 +02001130 _PyBytesWriter writer;
1131
1132 _PyBytesWriter_Init(&writer);
1133
1134 p = _PyBytesWriter_Alloc(&writer, len);
1135 if (p == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001136 return NULL;
Victor Stinner2ec80632015-10-14 13:32:13 +02001137 writer.overallocate = 1;
1138
Eric V. Smith42454af2016-10-31 09:22:08 -04001139 *first_invalid_escape = NULL;
1140
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001141 end = s + len;
1142 while (s < end) {
1143 if (*s != '\\') {
1144 non_esc:
Victor Stinner2ec80632015-10-14 13:32:13 +02001145 if (!(recode_encoding && (*s & 0x80))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001146 *p++ = *s++;
1147 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001148 else {
1149 /* non-ASCII character and need to recode */
1150 p = _PyBytes_DecodeEscapeRecode(&s, end,
1151 errors, recode_encoding,
1152 &writer, p);
1153 if (p == NULL)
1154 goto failed;
1155 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001156 continue;
1157 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001158
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001159 s++;
Victor Stinner2ec80632015-10-14 13:32:13 +02001160 if (s == end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001161 PyErr_SetString(PyExc_ValueError,
1162 "Trailing \\ in string");
1163 goto failed;
1164 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001165
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001166 switch (*s++) {
1167 /* XXX This assumes ASCII! */
1168 case '\n': break;
1169 case '\\': *p++ = '\\'; break;
1170 case '\'': *p++ = '\''; break;
1171 case '\"': *p++ = '\"'; break;
1172 case 'b': *p++ = '\b'; break;
1173 case 'f': *p++ = '\014'; break; /* FF */
1174 case 't': *p++ = '\t'; break;
1175 case 'n': *p++ = '\n'; break;
1176 case 'r': *p++ = '\r'; break;
1177 case 'v': *p++ = '\013'; break; /* VT */
1178 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1179 case '0': case '1': case '2': case '3':
1180 case '4': case '5': case '6': case '7':
1181 c = s[-1] - '0';
1182 if (s < end && '0' <= *s && *s <= '7') {
1183 c = (c<<3) + *s++ - '0';
1184 if (s < end && '0' <= *s && *s <= '7')
1185 c = (c<<3) + *s++ - '0';
1186 }
1187 *p++ = c;
1188 break;
1189 case 'x':
Victor Stinner2ec80632015-10-14 13:32:13 +02001190 if (s+1 < end) {
1191 int digit1, digit2;
1192 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1193 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1194 if (digit1 < 16 && digit2 < 16) {
1195 *p++ = (unsigned char)((digit1 << 4) + digit2);
1196 s += 2;
1197 break;
1198 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001199 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001200 /* invalid hexadecimal digits */
1201
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001202 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001203 PyErr_Format(PyExc_ValueError,
1204 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001205 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001206 goto failed;
1207 }
1208 if (strcmp(errors, "replace") == 0) {
1209 *p++ = '?';
1210 } else if (strcmp(errors, "ignore") == 0)
1211 /* do nothing */;
1212 else {
1213 PyErr_Format(PyExc_ValueError,
1214 "decoding error; unknown "
1215 "error handling code: %.400s",
1216 errors);
1217 goto failed;
1218 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001219 /* skip \x */
1220 if (s < end && Py_ISXDIGIT(s[0]))
1221 s++; /* and a hexdigit */
1222 break;
Victor Stinner2ec80632015-10-14 13:32:13 +02001223
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001224 default:
Eric V. Smith42454af2016-10-31 09:22:08 -04001225 if (*first_invalid_escape == NULL) {
1226 *first_invalid_escape = s-1; /* Back up one char, since we've
1227 already incremented s. */
1228 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001229 *p++ = '\\';
Eric V. Smith42454af2016-10-31 09:22:08 -04001230 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +02001231 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001232 UTF-8 bytes may follow. */
1233 }
1234 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001235
1236 return _PyBytesWriter_Finish(&writer, p);
1237
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001238 failed:
Victor Stinner2ec80632015-10-14 13:32:13 +02001239 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001240 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001241}
1242
Eric V. Smith42454af2016-10-31 09:22:08 -04001243PyObject *PyBytes_DecodeEscape(const char *s,
1244 Py_ssize_t len,
1245 const char *errors,
1246 Py_ssize_t unicode,
1247 const char *recode_encoding)
1248{
1249 const char* first_invalid_escape;
1250 PyObject *result = _PyBytes_DecodeEscape(s, len, errors, unicode,
1251 recode_encoding,
1252 &first_invalid_escape);
1253 if (result == NULL)
1254 return NULL;
1255 if (first_invalid_escape != NULL) {
1256 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1257 "invalid escape sequence '\\%c'",
1258 *first_invalid_escape) < 0) {
1259 Py_DECREF(result);
1260 return NULL;
1261 }
1262 }
1263 return result;
1264
1265}
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001266/* -------------------------------------------------------------------- */
1267/* object api */
1268
1269Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001270PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001271{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001272 if (!PyBytes_Check(op)) {
1273 PyErr_Format(PyExc_TypeError,
1274 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1275 return -1;
1276 }
1277 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001278}
1279
1280char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001281PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001282{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001283 if (!PyBytes_Check(op)) {
1284 PyErr_Format(PyExc_TypeError,
1285 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1286 return NULL;
1287 }
1288 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001289}
1290
1291int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001292PyBytes_AsStringAndSize(PyObject *obj,
1293 char **s,
1294 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001295{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001296 if (s == NULL) {
1297 PyErr_BadInternalCall();
1298 return -1;
1299 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001300
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001301 if (!PyBytes_Check(obj)) {
1302 PyErr_Format(PyExc_TypeError,
1303 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1304 return -1;
1305 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001306
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001307 *s = PyBytes_AS_STRING(obj);
1308 if (len != NULL)
1309 *len = PyBytes_GET_SIZE(obj);
1310 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001311 PyErr_SetString(PyExc_ValueError,
1312 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001313 return -1;
1314 }
1315 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001316}
Neal Norwitz6968b052007-02-27 19:02:19 +00001317
1318/* -------------------------------------------------------------------- */
1319/* Methods */
1320
Eric Smith0923d1d2009-04-16 20:16:10 +00001321#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001322
1323#include "stringlib/fastsearch.h"
1324#include "stringlib/count.h"
1325#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001326#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001327#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001328#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001329#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001330
Eric Smith0f78bff2009-11-30 01:01:42 +00001331#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001332
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001333PyObject *
1334PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001335{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001336 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001337 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001338 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001339 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001340 unsigned char quote, *s, *p;
1341
1342 /* Compute size of output string */
1343 squotes = dquotes = 0;
1344 newsize = 3; /* b'' */
1345 s = (unsigned char*)op->ob_sval;
1346 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001347 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001348 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001349 case '\'': squotes++; break;
1350 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001351 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001352 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001353 default:
1354 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001355 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001356 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001357 if (newsize > PY_SSIZE_T_MAX - incr)
1358 goto overflow;
1359 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001360 }
1361 quote = '\'';
1362 if (smartquotes && squotes && !dquotes)
1363 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001364 if (squotes && quote == '\'') {
1365 if (newsize > PY_SSIZE_T_MAX - squotes)
1366 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001367 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001368 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001369
1370 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001371 if (v == NULL) {
1372 return NULL;
1373 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001374 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001375
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001376 *p++ = 'b', *p++ = quote;
1377 for (i = 0; i < length; i++) {
1378 unsigned char c = op->ob_sval[i];
1379 if (c == quote || c == '\\')
1380 *p++ = '\\', *p++ = c;
1381 else if (c == '\t')
1382 *p++ = '\\', *p++ = 't';
1383 else if (c == '\n')
1384 *p++ = '\\', *p++ = 'n';
1385 else if (c == '\r')
1386 *p++ = '\\', *p++ = 'r';
1387 else if (c < ' ' || c >= 0x7f) {
1388 *p++ = '\\';
1389 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001390 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1391 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001392 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001393 else
1394 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001395 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001396 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001397 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001398 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001399
1400 overflow:
1401 PyErr_SetString(PyExc_OverflowError,
1402 "bytes object is too large to make repr");
1403 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001404}
1405
Neal Norwitz6968b052007-02-27 19:02:19 +00001406static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001407bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001408{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001409 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001410}
1411
Neal Norwitz6968b052007-02-27 19:02:19 +00001412static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001413bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001414{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001415 if (Py_BytesWarningFlag) {
1416 if (PyErr_WarnEx(PyExc_BytesWarning,
1417 "str() on a bytes instance", 1))
1418 return NULL;
1419 }
1420 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001421}
1422
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001423static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001424bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001425{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001426 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001427}
Neal Norwitz6968b052007-02-27 19:02:19 +00001428
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001429/* This is also used by PyBytes_Concat() */
1430static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001431bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001432{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001433 Py_buffer va, vb;
1434 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001435
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001436 va.len = -1;
1437 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001438 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1439 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001440 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
Serhiy Storchaka6b5a9ec2017-03-19 19:47:02 +02001441 Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001442 goto done;
1443 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001444
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001445 /* Optimize end cases */
1446 if (va.len == 0 && PyBytes_CheckExact(b)) {
1447 result = b;
1448 Py_INCREF(result);
1449 goto done;
1450 }
1451 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1452 result = a;
1453 Py_INCREF(result);
1454 goto done;
1455 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001456
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001457 if (va.len > PY_SSIZE_T_MAX - vb.len) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001458 PyErr_NoMemory();
1459 goto done;
1460 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001461
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001462 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001463 if (result != NULL) {
1464 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1465 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1466 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001467
1468 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001469 if (va.len != -1)
1470 PyBuffer_Release(&va);
1471 if (vb.len != -1)
1472 PyBuffer_Release(&vb);
1473 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001474}
Neal Norwitz6968b052007-02-27 19:02:19 +00001475
1476static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001477bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001478{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001479 Py_ssize_t i;
1480 Py_ssize_t j;
1481 Py_ssize_t size;
1482 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001483 size_t nbytes;
1484 if (n < 0)
1485 n = 0;
1486 /* watch out for overflows: the size can overflow int,
1487 * and the # of bytes needed can overflow size_t
1488 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001489 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001490 PyErr_SetString(PyExc_OverflowError,
1491 "repeated bytes are too long");
1492 return NULL;
1493 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001494 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001495 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1496 Py_INCREF(a);
1497 return (PyObject *)a;
1498 }
1499 nbytes = (size_t)size;
1500 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1501 PyErr_SetString(PyExc_OverflowError,
1502 "repeated bytes are too long");
1503 return NULL;
1504 }
1505 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1506 if (op == NULL)
1507 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +01001508 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001509 op->ob_shash = -1;
1510 op->ob_sval[size] = '\0';
1511 if (Py_SIZE(a) == 1 && n > 0) {
1512 memset(op->ob_sval, a->ob_sval[0] , n);
1513 return (PyObject *) op;
1514 }
1515 i = 0;
1516 if (i < size) {
Christian Heimesf051e432016-09-13 20:22:02 +02001517 memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001518 i = Py_SIZE(a);
1519 }
1520 while (i < size) {
1521 j = (i <= size-i) ? i : size-i;
Christian Heimesf051e432016-09-13 20:22:02 +02001522 memcpy(op->ob_sval+i, op->ob_sval, j);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001523 i += j;
1524 }
1525 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001526}
1527
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001528static int
1529bytes_contains(PyObject *self, PyObject *arg)
1530{
1531 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1532}
1533
Neal Norwitz6968b052007-02-27 19:02:19 +00001534static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001535bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001536{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001537 if (i < 0 || i >= Py_SIZE(a)) {
1538 PyErr_SetString(PyExc_IndexError, "index out of range");
1539 return NULL;
1540 }
1541 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001542}
1543
Benjamin Peterson621b4302016-09-09 13:54:34 -07001544static int
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001545bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1546{
1547 int cmp;
1548 Py_ssize_t len;
1549
1550 len = Py_SIZE(a);
1551 if (Py_SIZE(b) != len)
1552 return 0;
1553
1554 if (a->ob_sval[0] != b->ob_sval[0])
1555 return 0;
1556
1557 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1558 return (cmp == 0);
1559}
1560
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001561static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001562bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001563{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001564 int c;
1565 Py_ssize_t len_a, len_b;
1566 Py_ssize_t min_len;
1567 PyObject *result;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001568 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001569
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001570 /* Make sure both arguments are strings. */
1571 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001572 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001573 rc = PyObject_IsInstance((PyObject*)a,
1574 (PyObject*)&PyUnicode_Type);
1575 if (!rc)
1576 rc = PyObject_IsInstance((PyObject*)b,
1577 (PyObject*)&PyUnicode_Type);
1578 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001579 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001580 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001581 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001582 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001583 return NULL;
1584 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001585 else {
1586 rc = PyObject_IsInstance((PyObject*)a,
1587 (PyObject*)&PyLong_Type);
1588 if (!rc)
1589 rc = PyObject_IsInstance((PyObject*)b,
1590 (PyObject*)&PyLong_Type);
1591 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001592 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001593 if (rc) {
1594 if (PyErr_WarnEx(PyExc_BytesWarning,
1595 "Comparison between bytes and int", 1))
1596 return NULL;
1597 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001598 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001599 }
1600 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001601 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001602 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001603 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001604 case Py_EQ:
1605 case Py_LE:
1606 case Py_GE:
1607 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001608 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001609 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001610 case Py_NE:
1611 case Py_LT:
1612 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001613 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001614 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001615 default:
1616 PyErr_BadArgument();
1617 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001618 }
1619 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001620 else if (op == Py_EQ || op == Py_NE) {
1621 int eq = bytes_compare_eq(a, b);
1622 eq ^= (op == Py_NE);
1623 result = eq ? Py_True : Py_False;
1624 }
1625 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001626 len_a = Py_SIZE(a);
1627 len_b = Py_SIZE(b);
1628 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001629 if (min_len > 0) {
1630 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001631 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001632 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001633 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001634 else
1635 c = 0;
1636 if (c == 0)
1637 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1638 switch (op) {
1639 case Py_LT: c = c < 0; break;
1640 case Py_LE: c = c <= 0; break;
1641 case Py_GT: c = c > 0; break;
1642 case Py_GE: c = c >= 0; break;
1643 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001644 PyErr_BadArgument();
1645 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001646 }
1647 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001648 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001649
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001650 Py_INCREF(result);
1651 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001652}
1653
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001654static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001655bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001656{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001657 if (a->ob_shash == -1) {
1658 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001659 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001660 }
1661 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001662}
1663
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001664static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001665bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001666{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001667 if (PyIndex_Check(item)) {
1668 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1669 if (i == -1 && PyErr_Occurred())
1670 return NULL;
1671 if (i < 0)
1672 i += PyBytes_GET_SIZE(self);
1673 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1674 PyErr_SetString(PyExc_IndexError,
1675 "index out of range");
1676 return NULL;
1677 }
1678 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1679 }
1680 else if (PySlice_Check(item)) {
1681 Py_ssize_t start, stop, step, slicelength, cur, i;
1682 char* source_buf;
1683 char* result_buf;
1684 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001685
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001686 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001687 return NULL;
1688 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001689 slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1690 &stop, step);
Neal Norwitz6968b052007-02-27 19:02:19 +00001691
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001692 if (slicelength <= 0) {
1693 return PyBytes_FromStringAndSize("", 0);
1694 }
1695 else if (start == 0 && step == 1 &&
1696 slicelength == PyBytes_GET_SIZE(self) &&
1697 PyBytes_CheckExact(self)) {
1698 Py_INCREF(self);
1699 return (PyObject *)self;
1700 }
1701 else if (step == 1) {
1702 return PyBytes_FromStringAndSize(
1703 PyBytes_AS_STRING(self) + start,
1704 slicelength);
1705 }
1706 else {
1707 source_buf = PyBytes_AS_STRING(self);
1708 result = PyBytes_FromStringAndSize(NULL, slicelength);
1709 if (result == NULL)
1710 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001711
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001712 result_buf = PyBytes_AS_STRING(result);
1713 for (cur = start, i = 0; i < slicelength;
1714 cur += step, i++) {
1715 result_buf[i] = source_buf[cur];
1716 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001717
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001718 return result;
1719 }
1720 }
1721 else {
1722 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001723 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001724 Py_TYPE(item)->tp_name);
1725 return NULL;
1726 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001727}
1728
1729static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001730bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001731{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001732 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1733 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001734}
1735
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001736static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001737 (lenfunc)bytes_length, /*sq_length*/
1738 (binaryfunc)bytes_concat, /*sq_concat*/
1739 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1740 (ssizeargfunc)bytes_item, /*sq_item*/
1741 0, /*sq_slice*/
1742 0, /*sq_ass_item*/
1743 0, /*sq_ass_slice*/
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001744 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001745};
1746
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001747static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001748 (lenfunc)bytes_length,
1749 (binaryfunc)bytes_subscript,
1750 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001751};
1752
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001753static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001754 (getbufferproc)bytes_buffer_getbuffer,
1755 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001756};
1757
1758
1759#define LEFTSTRIP 0
1760#define RIGHTSTRIP 1
1761#define BOTHSTRIP 2
1762
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001763/*[clinic input]
1764bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001765
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001766 sep: object = None
1767 The delimiter according which to split the bytes.
1768 None (the default value) means split on ASCII whitespace characters
1769 (space, tab, return, newline, formfeed, vertical tab).
1770 maxsplit: Py_ssize_t = -1
1771 Maximum number of splits to do.
1772 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001773
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001774Return a list of the sections in the bytes, using sep as the delimiter.
1775[clinic start generated code]*/
1776
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001777static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001778bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1779/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001780{
1781 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001782 const char *s = PyBytes_AS_STRING(self), *sub;
1783 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001784 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001785
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001786 if (maxsplit < 0)
1787 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001788 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001789 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001790 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001791 return NULL;
1792 sub = vsub.buf;
1793 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001794
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001795 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1796 PyBuffer_Release(&vsub);
1797 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001798}
1799
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001800/*[clinic input]
1801bytes.partition
1802
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001803 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001804 /
1805
1806Partition the bytes into three parts using the given separator.
1807
1808This will search for the separator sep in the bytes. If the separator is found,
1809returns a 3-tuple containing the part before the separator, the separator
1810itself, and the part after it.
1811
1812If the separator is not found, returns a 3-tuple containing the original bytes
1813object and two empty bytes objects.
1814[clinic start generated code]*/
1815
Neal Norwitz6968b052007-02-27 19:02:19 +00001816static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001817bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001818/*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001819{
Neal Norwitz6968b052007-02-27 19:02:19 +00001820 return stringlib_partition(
1821 (PyObject*) self,
1822 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001823 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001824 );
1825}
1826
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001827/*[clinic input]
1828bytes.rpartition
1829
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001830 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001831 /
1832
1833Partition the bytes into three parts using the given separator.
1834
1835This will search for the separator sep in the bytes, starting and the end. If
1836the separator is found, returns a 3-tuple containing the part before the
1837separator, the separator itself, and the part after it.
1838
1839If the separator is not found, returns a 3-tuple containing two empty bytes
1840objects and the original bytes object.
1841[clinic start generated code]*/
1842
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001843static PyObject *
1844bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001845/*[clinic end generated code: output=191b114cbb028e50 input=67f689e63a62d478]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001846{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001847 return stringlib_rpartition(
1848 (PyObject*) self,
1849 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001850 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001851 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001852}
1853
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001854/*[clinic input]
1855bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001856
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001857Return a list of the sections in the bytes, using sep as the delimiter.
1858
1859Splitting is done starting at the end of the bytes and working to the front.
1860[clinic start generated code]*/
1861
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001862static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001863bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1864/*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001865{
1866 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001867 const char *s = PyBytes_AS_STRING(self), *sub;
1868 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001869 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001870
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001871 if (maxsplit < 0)
1872 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001873 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001874 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001875 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001876 return NULL;
1877 sub = vsub.buf;
1878 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001879
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001880 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1881 PyBuffer_Release(&vsub);
1882 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001883}
1884
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001885
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001886/*[clinic input]
1887bytes.join
1888
1889 iterable_of_bytes: object
1890 /
1891
1892Concatenate any number of bytes objects.
1893
1894The bytes whose method is called is inserted in between each pair.
1895
1896The result is returned as a new bytes object.
1897
1898Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1899[clinic start generated code]*/
1900
Neal Norwitz6968b052007-02-27 19:02:19 +00001901static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001902bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1903/*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001904{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001905 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001906}
1907
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001908PyObject *
1909_PyBytes_Join(PyObject *sep, PyObject *x)
1910{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001911 assert(sep != NULL && PyBytes_Check(sep));
1912 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001913 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001914}
1915
Serhiy Storchakae09132f2016-07-03 13:57:48 +03001916static PyObject *
1917bytes_find(PyBytesObject *self, PyObject *args)
1918{
1919 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1920}
1921
1922static PyObject *
1923bytes_index(PyBytesObject *self, PyObject *args)
1924{
1925 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1926}
1927
1928
1929static PyObject *
1930bytes_rfind(PyBytesObject *self, PyObject *args)
1931{
1932 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1933}
1934
1935
1936static PyObject *
1937bytes_rindex(PyBytesObject *self, PyObject *args)
1938{
1939 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1940}
1941
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001942
1943Py_LOCAL_INLINE(PyObject *)
1944do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001945{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001946 Py_buffer vsep;
1947 char *s = PyBytes_AS_STRING(self);
1948 Py_ssize_t len = PyBytes_GET_SIZE(self);
1949 char *sep;
1950 Py_ssize_t seplen;
1951 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001952
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001953 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001954 return NULL;
1955 sep = vsep.buf;
1956 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001957
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001958 i = 0;
1959 if (striptype != RIGHTSTRIP) {
1960 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1961 i++;
1962 }
1963 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001964
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001965 j = len;
1966 if (striptype != LEFTSTRIP) {
1967 do {
1968 j--;
1969 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1970 j++;
1971 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001972
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001973 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001974
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001975 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1976 Py_INCREF(self);
1977 return (PyObject*)self;
1978 }
1979 else
1980 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001981}
1982
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001983
1984Py_LOCAL_INLINE(PyObject *)
1985do_strip(PyBytesObject *self, int striptype)
1986{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001987 char *s = PyBytes_AS_STRING(self);
1988 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001989
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001990 i = 0;
1991 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001992 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001993 i++;
1994 }
1995 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001996
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001997 j = len;
1998 if (striptype != LEFTSTRIP) {
1999 do {
2000 j--;
David Malcolm96960882010-11-05 17:23:41 +00002001 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002002 j++;
2003 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002004
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002005 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2006 Py_INCREF(self);
2007 return (PyObject*)self;
2008 }
2009 else
2010 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002011}
2012
2013
2014Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002015do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002016{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002017 if (bytes != NULL && bytes != Py_None) {
2018 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002019 }
2020 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002021}
2022
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002023/*[clinic input]
2024bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002025
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002026 bytes: object = None
2027 /
2028
2029Strip leading and trailing bytes contained in the argument.
2030
2031If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2032[clinic start generated code]*/
2033
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002034static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002035bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002036/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002037{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002038 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002039}
2040
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002041/*[clinic input]
2042bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002043
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002044 bytes: object = None
2045 /
2046
2047Strip leading bytes contained in the argument.
2048
2049If the argument is omitted or None, strip leading ASCII whitespace.
2050[clinic start generated code]*/
2051
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002052static PyObject *
2053bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002054/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002055{
2056 return do_argstrip(self, LEFTSTRIP, bytes);
2057}
2058
2059/*[clinic input]
2060bytes.rstrip
2061
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002062 bytes: object = None
2063 /
2064
2065Strip trailing bytes contained in the argument.
2066
2067If the argument is omitted or None, strip trailing ASCII whitespace.
2068[clinic start generated code]*/
2069
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002070static PyObject *
2071bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002072/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002073{
2074 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002075}
Neal Norwitz6968b052007-02-27 19:02:19 +00002076
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002077
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002078static PyObject *
2079bytes_count(PyBytesObject *self, PyObject *args)
2080{
2081 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2082}
2083
2084
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002085/*[clinic input]
2086bytes.translate
2087
Victor Stinner049e5092014-08-17 22:20:00 +02002088 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002089 Translation table, which must be a bytes object of length 256.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002090 /
Martin Panter1b6c6da2016-08-27 08:35:02 +00002091 delete as deletechars: object(c_default="NULL") = b''
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002092
2093Return a copy with each character mapped by the given translation table.
2094
Martin Panter1b6c6da2016-08-27 08:35:02 +00002095All characters occurring in the optional argument delete are removed.
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002096The remaining characters are mapped through the given translation table.
2097[clinic start generated code]*/
2098
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002099static PyObject *
Martin Panter1b6c6da2016-08-27 08:35:02 +00002100bytes_translate_impl(PyBytesObject *self, PyObject *table,
Larry Hastings89964c42015-04-14 18:07:59 -04002101 PyObject *deletechars)
Martin Panter1b6c6da2016-08-27 08:35:02 +00002102/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002103{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002104 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002105 Py_buffer table_view = {NULL, NULL};
2106 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002107 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002108 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002109 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002110 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002111 Py_ssize_t inlen, tablen, dellen = 0;
2112 PyObject *result;
2113 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002114
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002115 if (PyBytes_Check(table)) {
2116 table_chars = PyBytes_AS_STRING(table);
2117 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002118 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002119 else if (table == Py_None) {
2120 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002121 tablen = 256;
2122 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002123 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002124 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002125 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002126 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002127 tablen = table_view.len;
2128 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002129
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002130 if (tablen != 256) {
2131 PyErr_SetString(PyExc_ValueError,
2132 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002133 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002134 return NULL;
2135 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002136
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002137 if (deletechars != NULL) {
2138 if (PyBytes_Check(deletechars)) {
2139 del_table_chars = PyBytes_AS_STRING(deletechars);
2140 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002141 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002142 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002143 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002144 PyBuffer_Release(&table_view);
2145 return NULL;
2146 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002147 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002148 dellen = del_table_view.len;
2149 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002150 }
2151 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002152 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002153 dellen = 0;
2154 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002155
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002156 inlen = PyBytes_GET_SIZE(input_obj);
2157 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002158 if (result == NULL) {
2159 PyBuffer_Release(&del_table_view);
2160 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002161 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002162 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002163 output_start = output = PyBytes_AS_STRING(result);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002164 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002165
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002166 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002167 /* If no deletions are required, use faster code */
2168 for (i = inlen; --i >= 0; ) {
2169 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002170 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002171 changed = 1;
2172 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002173 if (!changed && PyBytes_CheckExact(input_obj)) {
2174 Py_INCREF(input_obj);
2175 Py_DECREF(result);
2176 result = input_obj;
2177 }
2178 PyBuffer_Release(&del_table_view);
2179 PyBuffer_Release(&table_view);
2180 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002181 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002182
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002183 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002184 for (i = 0; i < 256; i++)
2185 trans_table[i] = Py_CHARMASK(i);
2186 } else {
2187 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002188 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002189 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002190 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002191
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002192 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002193 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002194 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002195
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002196 for (i = inlen; --i >= 0; ) {
2197 c = Py_CHARMASK(*input++);
2198 if (trans_table[c] != -1)
2199 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2200 continue;
2201 changed = 1;
2202 }
2203 if (!changed && PyBytes_CheckExact(input_obj)) {
2204 Py_DECREF(result);
2205 Py_INCREF(input_obj);
2206 return input_obj;
2207 }
2208 /* Fix the size of the resulting string */
2209 if (inlen > 0)
2210 _PyBytes_Resize(&result, output - output_start);
2211 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002212}
2213
2214
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002215/*[clinic input]
2216
2217@staticmethod
2218bytes.maketrans
2219
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002220 frm: Py_buffer
2221 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002222 /
2223
2224Return a translation table useable for the bytes or bytearray translate method.
2225
2226The returned table will be one where each byte in frm is mapped to the byte at
2227the same position in to.
2228
2229The bytes objects frm and to must be of the same length.
2230[clinic start generated code]*/
2231
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002232static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002233bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002234/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002235{
2236 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002237}
2238
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002239
2240/*[clinic input]
2241bytes.replace
2242
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002243 old: Py_buffer
2244 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002245 count: Py_ssize_t = -1
2246 Maximum number of occurrences to replace.
2247 -1 (the default value) means replace all occurrences.
2248 /
2249
2250Return a copy with all occurrences of substring old replaced by new.
2251
2252If the optional argument count is given, only the first count occurrences are
2253replaced.
2254[clinic start generated code]*/
2255
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002256static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002257bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
Larry Hastings89964c42015-04-14 18:07:59 -04002258 Py_ssize_t count)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002259/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002260{
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +03002261 return stringlib_replace((PyObject *)self,
2262 (const char *)old->buf, old->len,
2263 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002264}
2265
2266/** End DALKE **/
2267
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002268
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002269static PyObject *
2270bytes_startswith(PyBytesObject *self, PyObject *args)
2271{
2272 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2273}
2274
2275static PyObject *
2276bytes_endswith(PyBytesObject *self, PyObject *args)
2277{
2278 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2279}
2280
2281
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002282/*[clinic input]
2283bytes.decode
2284
2285 encoding: str(c_default="NULL") = 'utf-8'
2286 The encoding with which to decode the bytes.
2287 errors: str(c_default="NULL") = 'strict'
2288 The error handling scheme to use for the handling of decoding errors.
2289 The default is 'strict' meaning that decoding errors raise a
2290 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2291 as well as any other name registered with codecs.register_error that
2292 can handle UnicodeDecodeErrors.
2293
2294Decode the bytes using the codec registered for encoding.
2295[clinic start generated code]*/
2296
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002297static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002298bytes_decode_impl(PyBytesObject *self, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04002299 const char *errors)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002300/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002301{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002302 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002303}
2304
Guido van Rossum20188312006-05-05 15:15:40 +00002305
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002306/*[clinic input]
2307bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002308
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002309 keepends: bool(accept={int}) = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002310
2311Return a list of the lines in the bytes, breaking at line boundaries.
2312
2313Line breaks are not included in the resulting list unless keepends is given and
2314true.
2315[clinic start generated code]*/
2316
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002317static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002318bytes_splitlines_impl(PyBytesObject *self, int keepends)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02002319/*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002320{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002321 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002322 (PyObject*) self, PyBytes_AS_STRING(self),
2323 PyBytes_GET_SIZE(self), keepends
2324 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002325}
2326
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002327/*[clinic input]
2328@classmethod
2329bytes.fromhex
2330
2331 string: unicode
2332 /
2333
2334Create a bytes object from a string of hexadecimal numbers.
2335
2336Spaces between two numbers are accepted.
2337Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2338[clinic start generated code]*/
2339
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002340static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002341bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002342/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002343{
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002344 PyObject *result = _PyBytes_FromHex(string, 0);
2345 if (type != &PyBytes_Type && result != NULL) {
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01002346 Py_SETREF(result, PyObject_CallFunctionObjArgs((PyObject *)type,
2347 result, NULL));
Serhiy Storchaka0855e702016-07-01 17:22:31 +03002348 }
2349 return result;
Victor Stinner2bf89932015-10-14 11:25:33 +02002350}
2351
2352PyObject*
2353_PyBytes_FromHex(PyObject *string, int use_bytearray)
2354{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002355 char *buf;
Victor Stinner2bf89932015-10-14 11:25:33 +02002356 Py_ssize_t hexlen, invalid_char;
2357 unsigned int top, bot;
2358 Py_UCS1 *str, *end;
2359 _PyBytesWriter writer;
2360
2361 _PyBytesWriter_Init(&writer);
2362 writer.use_bytearray = use_bytearray;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002363
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002364 assert(PyUnicode_Check(string));
2365 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002366 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002367 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002368
Victor Stinner2bf89932015-10-14 11:25:33 +02002369 if (!PyUnicode_IS_ASCII(string)) {
2370 void *data = PyUnicode_DATA(string);
2371 unsigned int kind = PyUnicode_KIND(string);
2372 Py_ssize_t i;
2373
2374 /* search for the first non-ASCII character */
2375 for (i = 0; i < hexlen; i++) {
2376 if (PyUnicode_READ(kind, data, i) >= 128)
2377 break;
2378 }
2379 invalid_char = i;
2380 goto error;
2381 }
2382
2383 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2384 str = PyUnicode_1BYTE_DATA(string);
2385
2386 /* This overestimates if there are spaces */
2387 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2388 if (buf == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002389 return NULL;
Victor Stinner2bf89932015-10-14 11:25:33 +02002390
2391 end = str + hexlen;
2392 while (str < end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002393 /* skip over spaces in the input */
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002394 if (Py_ISSPACE(*str)) {
Victor Stinner2bf89932015-10-14 11:25:33 +02002395 do {
2396 str++;
Serhiy Storchakadd1da7f2016-12-19 18:51:37 +02002397 } while (Py_ISSPACE(*str));
Victor Stinner2bf89932015-10-14 11:25:33 +02002398 if (str >= end)
2399 break;
2400 }
2401
2402 top = _PyLong_DigitValue[*str];
2403 if (top >= 16) {
2404 invalid_char = str - PyUnicode_1BYTE_DATA(string);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002405 goto error;
2406 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002407 str++;
2408
2409 bot = _PyLong_DigitValue[*str];
2410 if (bot >= 16) {
2411 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2412 goto error;
2413 }
2414 str++;
2415
2416 *buf++ = (unsigned char)((top << 4) + bot);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002417 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002418
2419 return _PyBytesWriter_Finish(&writer, buf);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002420
2421 error:
Victor Stinner2bf89932015-10-14 11:25:33 +02002422 PyErr_Format(PyExc_ValueError,
2423 "non-hexadecimal number found in "
2424 "fromhex() arg at position %zd", invalid_char);
2425 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002426 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002427}
2428
Gregory P. Smith8cb65692015-04-25 23:22:26 +00002429PyDoc_STRVAR(hex__doc__,
2430"B.hex() -> string\n\
2431\n\
2432Create a string of hexadecimal numbers from a bytes object.\n\
2433Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'.");
2434
2435static PyObject *
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002436bytes_hex(PyBytesObject *self)
2437{
2438 char* argbuf = PyBytes_AS_STRING(self);
2439 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2440 return _Py_strhex(argbuf, arglen);
2441}
2442
2443static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002444bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002445{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002446 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002447}
2448
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002449
2450static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002451bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002452 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2453 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2454 _Py_capitalize__doc__},
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002455 {"center", (PyCFunction)stringlib_center, METH_VARARGS,
2456 _Py_center__doc__},
2457 {"count", (PyCFunction)bytes_count, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002458 _Py_count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002459 BYTES_DECODE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002460 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002461 _Py_endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02002462 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002463 _Py_expandtabs__doc__},
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002464 {"find", (PyCFunction)bytes_find, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002465 _Py_find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002466 BYTES_FROMHEX_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002467 {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__},
2468 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002469 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2470 _Py_isalnum__doc__},
2471 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2472 _Py_isalpha__doc__},
2473 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2474 _Py_isdigit__doc__},
2475 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2476 _Py_islower__doc__},
2477 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2478 _Py_isspace__doc__},
2479 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2480 _Py_istitle__doc__},
2481 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2482 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002483 BYTES_JOIN_METHODDEF
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002484 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, _Py_ljust__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002485 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002486 BYTES_LSTRIP_METHODDEF
2487 BYTES_MAKETRANS_METHODDEF
2488 BYTES_PARTITION_METHODDEF
2489 BYTES_REPLACE_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002490 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2491 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002492 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, _Py_rjust__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002493 BYTES_RPARTITION_METHODDEF
2494 BYTES_RSPLIT_METHODDEF
2495 BYTES_RSTRIP_METHODDEF
2496 BYTES_SPLIT_METHODDEF
2497 BYTES_SPLITLINES_METHODDEF
Serhiy Storchakae09132f2016-07-03 13:57:48 +03002498 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002499 _Py_startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002500 BYTES_STRIP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002501 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2502 _Py_swapcase__doc__},
2503 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002504 BYTES_TRANSLATE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002505 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002506 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, _Py_zfill__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002507 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002508};
2509
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002510static PyObject *
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002511bytes_mod(PyObject *self, PyObject *arg)
Ethan Furmanb95b5612015-01-23 20:05:18 -08002512{
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002513 if (!PyBytes_Check(self)) {
2514 Py_RETURN_NOTIMPLEMENTED;
Victor Stinner772b2b02015-10-14 09:56:53 +02002515 }
Victor Stinner772b2b02015-10-14 09:56:53 +02002516 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002517 arg, 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -08002518}
2519
2520static PyNumberMethods bytes_as_number = {
2521 0, /*nb_add*/
2522 0, /*nb_subtract*/
2523 0, /*nb_multiply*/
2524 bytes_mod, /*nb_remainder*/
2525};
2526
2527static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002528bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002529
2530static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002531bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002532{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002533 PyObject *x = NULL;
2534 const char *encoding = NULL;
2535 const char *errors = NULL;
2536 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002537 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002538 Py_ssize_t size;
2539 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002540 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002541
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002542 if (type != &PyBytes_Type)
Serhiy Storchaka15095802015-11-25 15:47:01 +02002543 return bytes_subtype_new(type, args, kwds);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002544 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2545 &encoding, &errors))
2546 return NULL;
2547 if (x == NULL) {
2548 if (encoding != NULL || errors != NULL) {
2549 PyErr_SetString(PyExc_TypeError,
2550 "encoding or errors without sequence "
2551 "argument");
2552 return NULL;
2553 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02002554 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002555 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002556
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002557 if (encoding != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002558 /* Encode via the codec registry */
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002559 if (!PyUnicode_Check(x)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002560 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002561 "encoding without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002562 return NULL;
2563 }
2564 new = PyUnicode_AsEncodedString(x, encoding, errors);
2565 if (new == NULL)
2566 return NULL;
2567 assert(PyBytes_Check(new));
2568 return new;
2569 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002570
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002571 if (errors != NULL) {
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002572 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002573 PyUnicode_Check(x) ?
2574 "string argument without an encoding" :
2575 "errors without a string argument");
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002576 return NULL;
2577 }
2578
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002579 /* We'd like to call PyObject_Bytes here, but we need to check for an
2580 integer argument before deferring to PyBytes_FromObject, something
2581 PyObject_Bytes doesn't do. */
2582 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2583 if (func != NULL) {
Victor Stinnerf17c3de2016-12-06 18:46:19 +01002584 new = _PyObject_CallNoArg(func);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002585 Py_DECREF(func);
2586 if (new == NULL)
2587 return NULL;
2588 if (!PyBytes_Check(new)) {
2589 PyErr_Format(PyExc_TypeError,
2590 "__bytes__ returned non-bytes (type %.200s)",
2591 Py_TYPE(new)->tp_name);
2592 Py_DECREF(new);
2593 return NULL;
2594 }
2595 return new;
2596 }
2597 else if (PyErr_Occurred())
2598 return NULL;
2599
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02002600 if (PyUnicode_Check(x)) {
2601 PyErr_SetString(PyExc_TypeError,
2602 "string argument without an encoding");
2603 return NULL;
2604 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002605 /* Is it an integer? */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002606 if (PyIndex_Check(x)) {
2607 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2608 if (size == -1 && PyErr_Occurred()) {
INADA Naokia634e232017-01-06 17:32:01 +09002609 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2610 return NULL;
2611 PyErr_Clear(); /* fall through */
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002612 }
INADA Naokia634e232017-01-06 17:32:01 +09002613 else {
2614 if (size < 0) {
2615 PyErr_SetString(PyExc_ValueError, "negative count");
2616 return NULL;
2617 }
2618 new = _PyBytes_FromSize(size, 1);
2619 if (new == NULL)
2620 return NULL;
2621 return new;
Serhiy Storchakaeb249882016-08-15 09:46:07 +03002622 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002623 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002624
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002625 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002626}
2627
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002628static PyObject*
2629_PyBytes_FromBuffer(PyObject *x)
2630{
2631 PyObject *new;
2632 Py_buffer view;
2633
2634 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2635 return NULL;
2636
2637 new = PyBytes_FromStringAndSize(NULL, view.len);
2638 if (!new)
2639 goto fail;
2640 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2641 &view, view.len, 'C') < 0)
2642 goto fail;
2643 PyBuffer_Release(&view);
2644 return new;
2645
2646fail:
2647 Py_XDECREF(new);
2648 PyBuffer_Release(&view);
2649 return NULL;
2650}
2651
Victor Stinner3c50ce32015-10-14 13:50:40 +02002652#define _PyBytes_FROM_LIST_BODY(x, GET_ITEM) \
2653 do { \
2654 PyObject *bytes; \
2655 Py_ssize_t i; \
2656 Py_ssize_t value; \
2657 char *str; \
2658 PyObject *item; \
2659 \
2660 bytes = PyBytes_FromStringAndSize(NULL, Py_SIZE(x)); \
2661 if (bytes == NULL) \
2662 return NULL; \
2663 str = ((PyBytesObject *)bytes)->ob_sval; \
2664 \
2665 for (i = 0; i < Py_SIZE(x); i++) { \
2666 item = GET_ITEM((x), i); \
Serhiy Storchakaf54d7812016-07-06 21:39:44 +03002667 value = PyNumber_AsSsize_t(item, NULL); \
Victor Stinner3c50ce32015-10-14 13:50:40 +02002668 if (value == -1 && PyErr_Occurred()) \
2669 goto error; \
2670 \
2671 if (value < 0 || value >= 256) { \
2672 PyErr_SetString(PyExc_ValueError, \
2673 "bytes must be in range(0, 256)"); \
2674 goto error; \
2675 } \
2676 *str++ = (char) value; \
2677 } \
2678 return bytes; \
2679 \
2680 error: \
2681 Py_DECREF(bytes); \
2682 return NULL; \
2683 } while (0)
2684
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002685static PyObject*
2686_PyBytes_FromList(PyObject *x)
2687{
Victor Stinner3c50ce32015-10-14 13:50:40 +02002688 _PyBytes_FROM_LIST_BODY(x, PyList_GET_ITEM);
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002689}
2690
2691static PyObject*
2692_PyBytes_FromTuple(PyObject *x)
2693{
Victor Stinner3c50ce32015-10-14 13:50:40 +02002694 _PyBytes_FROM_LIST_BODY(x, PyTuple_GET_ITEM);
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002695}
2696
2697static PyObject *
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002698_PyBytes_FromIterator(PyObject *it, PyObject *x)
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002699{
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002700 char *str;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002701 Py_ssize_t i, size;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002702 _PyBytesWriter writer;
2703
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002704 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02002705 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002706 if (size == -1 && PyErr_Occurred())
2707 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002708
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002709 _PyBytesWriter_Init(&writer);
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002710 str = _PyBytesWriter_Alloc(&writer, size);
2711 if (str == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002712 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002713 writer.overallocate = 1;
2714 size = writer.allocated;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002715
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002716 /* Run the iterator to exhaustion */
2717 for (i = 0; ; i++) {
2718 PyObject *item;
2719 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002720
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002721 /* Get the next item */
2722 item = PyIter_Next(it);
2723 if (item == NULL) {
2724 if (PyErr_Occurred())
2725 goto error;
2726 break;
2727 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002728
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002729 /* Interpret it as an int (__index__) */
Serhiy Storchakaf54d7812016-07-06 21:39:44 +03002730 value = PyNumber_AsSsize_t(item, NULL);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002731 Py_DECREF(item);
2732 if (value == -1 && PyErr_Occurred())
2733 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002734
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002735 /* Range check */
2736 if (value < 0 || value >= 256) {
2737 PyErr_SetString(PyExc_ValueError,
2738 "bytes must be in range(0, 256)");
2739 goto error;
2740 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002741
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002742 /* Append the byte */
2743 if (i >= size) {
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002744 str = _PyBytesWriter_Resize(&writer, str, size+1);
2745 if (str == NULL)
2746 return NULL;
2747 size = writer.allocated;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002748 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002749 *str++ = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002750 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002751
2752 return _PyBytesWriter_Finish(&writer, str);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002753
2754 error:
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02002755 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002756 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002757}
2758
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002759PyObject *
2760PyBytes_FromObject(PyObject *x)
2761{
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002762 PyObject *it, *result;
2763
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002764 if (x == NULL) {
2765 PyErr_BadInternalCall();
2766 return NULL;
2767 }
2768
2769 if (PyBytes_CheckExact(x)) {
2770 Py_INCREF(x);
2771 return x;
2772 }
2773
2774 /* Use the modern buffer interface */
2775 if (PyObject_CheckBuffer(x))
2776 return _PyBytes_FromBuffer(x);
2777
2778 if (PyList_CheckExact(x))
2779 return _PyBytes_FromList(x);
2780
2781 if (PyTuple_CheckExact(x))
2782 return _PyBytes_FromTuple(x);
2783
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002784 if (!PyUnicode_Check(x)) {
2785 it = PyObject_GetIter(x);
2786 if (it != NULL) {
2787 result = _PyBytes_FromIterator(it, x);
2788 Py_DECREF(it);
2789 return result;
2790 }
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002791 }
2792
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03002793 PyErr_Format(PyExc_TypeError,
2794 "cannot convert '%.200s' object to bytes",
2795 x->ob_type->tp_name);
2796 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02002797}
2798
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002799static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002800bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002801{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002802 PyObject *tmp, *pnew;
2803 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002804
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002805 assert(PyType_IsSubtype(type, &PyBytes_Type));
2806 tmp = bytes_new(&PyBytes_Type, args, kwds);
2807 if (tmp == NULL)
2808 return NULL;
Serhiy Storchaka15095802015-11-25 15:47:01 +02002809 assert(PyBytes_Check(tmp));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002810 n = PyBytes_GET_SIZE(tmp);
2811 pnew = type->tp_alloc(type, n);
2812 if (pnew != NULL) {
Christian Heimesf051e432016-09-13 20:22:02 +02002813 memcpy(PyBytes_AS_STRING(pnew),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002814 PyBytes_AS_STRING(tmp), n+1);
2815 ((PyBytesObject *)pnew)->ob_shash =
2816 ((PyBytesObject *)tmp)->ob_shash;
2817 }
2818 Py_DECREF(tmp);
2819 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002820}
2821
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002822PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002823"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002824bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002825bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002826bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2827bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002828\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002829Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002830 - an iterable yielding integers in range(256)\n\
2831 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002832 - any object implementing the buffer API.\n\
2833 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002834
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002835static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002836
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002837PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002838 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2839 "bytes",
2840 PyBytesObject_SIZE,
2841 sizeof(char),
2842 bytes_dealloc, /* tp_dealloc */
2843 0, /* tp_print */
2844 0, /* tp_getattr */
2845 0, /* tp_setattr */
2846 0, /* tp_reserved */
2847 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08002848 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002849 &bytes_as_sequence, /* tp_as_sequence */
2850 &bytes_as_mapping, /* tp_as_mapping */
2851 (hashfunc)bytes_hash, /* tp_hash */
2852 0, /* tp_call */
2853 bytes_str, /* tp_str */
2854 PyObject_GenericGetAttr, /* tp_getattro */
2855 0, /* tp_setattro */
2856 &bytes_as_buffer, /* tp_as_buffer */
2857 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2858 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2859 bytes_doc, /* tp_doc */
2860 0, /* tp_traverse */
2861 0, /* tp_clear */
2862 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2863 0, /* tp_weaklistoffset */
2864 bytes_iter, /* tp_iter */
2865 0, /* tp_iternext */
2866 bytes_methods, /* tp_methods */
2867 0, /* tp_members */
2868 0, /* tp_getset */
2869 &PyBaseObject_Type, /* tp_base */
2870 0, /* tp_dict */
2871 0, /* tp_descr_get */
2872 0, /* tp_descr_set */
2873 0, /* tp_dictoffset */
2874 0, /* tp_init */
2875 0, /* tp_alloc */
2876 bytes_new, /* tp_new */
2877 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002878};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002879
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002880void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002881PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002882{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002883 assert(pv != NULL);
2884 if (*pv == NULL)
2885 return;
2886 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002887 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002888 return;
2889 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02002890
2891 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2892 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05002893 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02002894 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02002895
Antoine Pitrou161d6952014-05-01 14:36:20 +02002896 wb.len = -1;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002897 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02002898 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2899 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2900 Py_CLEAR(*pv);
2901 return;
2902 }
2903
2904 oldsize = PyBytes_GET_SIZE(*pv);
2905 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2906 PyErr_NoMemory();
2907 goto error;
2908 }
2909 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2910 goto error;
2911
2912 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2913 PyBuffer_Release(&wb);
2914 return;
2915
2916 error:
2917 PyBuffer_Release(&wb);
2918 Py_CLEAR(*pv);
2919 return;
2920 }
2921
2922 else {
2923 /* Multiple references, need to create new object */
2924 PyObject *v;
2925 v = bytes_concat(*pv, w);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002926 Py_SETREF(*pv, v);
Antoine Pitrou161d6952014-05-01 14:36:20 +02002927 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002928}
2929
2930void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002931PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002932{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002933 PyBytes_Concat(pv, w);
2934 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002935}
2936
2937
Ethan Furmanb95b5612015-01-23 20:05:18 -08002938/* The following function breaks the notion that bytes are immutable:
2939 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002940 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08002941 as creating a new bytes object and destroying the old one, only
2942 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002943 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08002944 Note that if there's not enough memory to resize the bytes object, the
2945 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002946 memory" exception is set, and -1 is returned. Else (on success) 0 is
2947 returned, and the value in *pv may or may not be the same as on input.
2948 As always, an extra byte is allocated for a trailing \0 byte (newsize
2949 does *not* include that), and a trailing \0 byte is stored.
2950*/
2951
2952int
2953_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2954{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002955 PyObject *v;
2956 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002957 v = *pv;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002958 if (!PyBytes_Check(v) || newsize < 0) {
2959 goto error;
2960 }
2961 if (Py_SIZE(v) == newsize) {
2962 /* return early if newsize equals to v->ob_size */
2963 return 0;
2964 }
2965 if (Py_REFCNT(v) != 1) {
2966 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002967 }
2968 /* XXX UNREF/NEWREF interface should be more symmetrical */
2969 _Py_DEC_REFTOTAL;
2970 _Py_ForgetReference(v);
2971 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03002972 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002973 if (*pv == NULL) {
2974 PyObject_Del(v);
2975 PyErr_NoMemory();
2976 return -1;
2977 }
2978 _Py_NewReference(*pv);
2979 sv = (PyBytesObject *) *pv;
2980 Py_SIZE(sv) = newsize;
2981 sv->ob_sval[newsize] = '\0';
2982 sv->ob_shash = -1; /* invalidate cached hash value */
2983 return 0;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03002984error:
2985 *pv = 0;
2986 Py_DECREF(v);
2987 PyErr_BadInternalCall();
2988 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002989}
2990
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002991void
2992PyBytes_Fini(void)
2993{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002994 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002995 for (i = 0; i < UCHAR_MAX + 1; i++)
2996 Py_CLEAR(characters[i]);
2997 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002998}
2999
Benjamin Peterson4116f362008-05-27 00:36:20 +00003000/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003001
3002typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003003 PyObject_HEAD
3004 Py_ssize_t it_index;
3005 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003006} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003007
3008static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003009striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003010{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003011 _PyObject_GC_UNTRACK(it);
3012 Py_XDECREF(it->it_seq);
3013 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003014}
3015
3016static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003017striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003018{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003019 Py_VISIT(it->it_seq);
3020 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003021}
3022
3023static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003024striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003025{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003026 PyBytesObject *seq;
3027 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003028
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003029 assert(it != NULL);
3030 seq = it->it_seq;
3031 if (seq == NULL)
3032 return NULL;
3033 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003034
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003035 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3036 item = PyLong_FromLong(
3037 (unsigned char)seq->ob_sval[it->it_index]);
3038 if (item != NULL)
3039 ++it->it_index;
3040 return item;
3041 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003042
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003043 it->it_seq = NULL;
Serhiy Storchakafbb1c5e2016-03-30 20:40:02 +03003044 Py_DECREF(seq);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003045 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003046}
3047
3048static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003049striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003050{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003051 Py_ssize_t len = 0;
3052 if (it->it_seq)
3053 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3054 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003055}
3056
3057PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003058 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003059
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003060static PyObject *
3061striter_reduce(striterobject *it)
3062{
3063 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02003064 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003065 it->it_seq, it->it_index);
3066 } else {
Serhiy Storchaka460bd0d2016-11-20 12:16:46 +02003067 return Py_BuildValue("N(())", _PyObject_GetBuiltin("iter"));
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003068 }
3069}
3070
3071PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3072
3073static PyObject *
3074striter_setstate(striterobject *it, PyObject *state)
3075{
3076 Py_ssize_t index = PyLong_AsSsize_t(state);
3077 if (index == -1 && PyErr_Occurred())
3078 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003079 if (it->it_seq != NULL) {
3080 if (index < 0)
3081 index = 0;
3082 else if (index > PyBytes_GET_SIZE(it->it_seq))
3083 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3084 it->it_index = index;
3085 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003086 Py_RETURN_NONE;
3087}
3088
3089PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3090
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003091static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003092 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3093 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003094 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3095 reduce_doc},
3096 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3097 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003098 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003099};
3100
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003101PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003102 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3103 "bytes_iterator", /* tp_name */
3104 sizeof(striterobject), /* tp_basicsize */
3105 0, /* tp_itemsize */
3106 /* methods */
3107 (destructor)striter_dealloc, /* tp_dealloc */
3108 0, /* tp_print */
3109 0, /* tp_getattr */
3110 0, /* tp_setattr */
3111 0, /* tp_reserved */
3112 0, /* tp_repr */
3113 0, /* tp_as_number */
3114 0, /* tp_as_sequence */
3115 0, /* tp_as_mapping */
3116 0, /* tp_hash */
3117 0, /* tp_call */
3118 0, /* tp_str */
3119 PyObject_GenericGetAttr, /* tp_getattro */
3120 0, /* tp_setattro */
3121 0, /* tp_as_buffer */
3122 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3123 0, /* tp_doc */
3124 (traverseproc)striter_traverse, /* tp_traverse */
3125 0, /* tp_clear */
3126 0, /* tp_richcompare */
3127 0, /* tp_weaklistoffset */
3128 PyObject_SelfIter, /* tp_iter */
3129 (iternextfunc)striter_next, /* tp_iternext */
3130 striter_methods, /* tp_methods */
3131 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003132};
3133
3134static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003135bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003136{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003137 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003138
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003139 if (!PyBytes_Check(seq)) {
3140 PyErr_BadInternalCall();
3141 return NULL;
3142 }
3143 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3144 if (it == NULL)
3145 return NULL;
3146 it->it_index = 0;
3147 Py_INCREF(seq);
3148 it->it_seq = (PyBytesObject *)seq;
3149 _PyObject_GC_TRACK(it);
3150 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003151}
Victor Stinner00165072015-10-09 01:53:21 +02003152
3153
3154/* _PyBytesWriter API */
3155
3156#ifdef MS_WINDOWS
3157 /* On Windows, overallocate by 50% is the best factor */
3158# define OVERALLOCATE_FACTOR 2
3159#else
3160 /* On Linux, overallocate by 25% is the best factor */
3161# define OVERALLOCATE_FACTOR 4
3162#endif
3163
3164void
3165_PyBytesWriter_Init(_PyBytesWriter *writer)
3166{
Victor Stinner661aacc2015-10-14 09:41:48 +02003167 /* Set all attributes before small_buffer to 0 */
3168 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003169#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003170 memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003171#endif
3172}
3173
3174void
3175_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3176{
3177 Py_CLEAR(writer->buffer);
3178}
3179
3180Py_LOCAL_INLINE(char*)
3181_PyBytesWriter_AsString(_PyBytesWriter *writer)
3182{
Victor Stinner661aacc2015-10-14 09:41:48 +02003183 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003184 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003185 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003186 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003187 else if (writer->use_bytearray) {
3188 assert(writer->buffer != NULL);
3189 return PyByteArray_AS_STRING(writer->buffer);
3190 }
3191 else {
3192 assert(writer->buffer != NULL);
3193 return PyBytes_AS_STRING(writer->buffer);
3194 }
Victor Stinner00165072015-10-09 01:53:21 +02003195}
3196
3197Py_LOCAL_INLINE(Py_ssize_t)
Victor Stinner2bf89932015-10-14 11:25:33 +02003198_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
Victor Stinner00165072015-10-09 01:53:21 +02003199{
3200 char *start = _PyBytesWriter_AsString(writer);
3201 assert(str != NULL);
3202 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003203 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003204 return str - start;
3205}
3206
3207Py_LOCAL_INLINE(void)
3208_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3209{
3210#ifdef Py_DEBUG
3211 char *start, *end;
3212
Victor Stinner661aacc2015-10-14 09:41:48 +02003213 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003214 assert(writer->buffer == NULL);
3215 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003216 else {
3217 assert(writer->buffer != NULL);
3218 if (writer->use_bytearray)
3219 assert(PyByteArray_CheckExact(writer->buffer));
3220 else
3221 assert(PyBytes_CheckExact(writer->buffer));
3222 assert(Py_REFCNT(writer->buffer) == 1);
3223 }
Victor Stinner00165072015-10-09 01:53:21 +02003224
Victor Stinner661aacc2015-10-14 09:41:48 +02003225 if (writer->use_bytearray) {
3226 /* bytearray has its own overallocation algorithm,
3227 writer overallocation must be disabled */
3228 assert(!writer->overallocate);
3229 }
3230
3231 assert(0 <= writer->allocated);
Victor Stinner53926a12015-10-09 12:37:03 +02003232 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003233 /* the last byte must always be null */
Victor Stinner661aacc2015-10-14 09:41:48 +02003234 start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003235 assert(start[writer->allocated] == 0);
3236
3237 end = start + writer->allocated;
3238 assert(str != NULL);
3239 assert(start <= str && str <= end);
3240#endif
3241}
3242
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003243void*
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003244_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
Victor Stinner00165072015-10-09 01:53:21 +02003245{
3246 Py_ssize_t allocated, pos;
3247
3248 _PyBytesWriter_CheckConsistency(writer, str);
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003249 assert(writer->allocated < size);
Victor Stinner00165072015-10-09 01:53:21 +02003250
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003251 allocated = size;
Victor Stinner00165072015-10-09 01:53:21 +02003252 if (writer->overallocate
3253 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3254 /* overallocate to limit the number of realloc() */
3255 allocated += allocated / OVERALLOCATE_FACTOR;
3256 }
3257
Victor Stinner2bf89932015-10-14 11:25:33 +02003258 pos = _PyBytesWriter_GetSize(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003259 if (!writer->use_small_buffer) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003260 if (writer->use_bytearray) {
3261 if (PyByteArray_Resize(writer->buffer, allocated))
3262 goto error;
3263 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3264 but we cannot use ob_alloc because bytes may need to be moved
3265 to use the whole buffer. bytearray uses an internal optimization
3266 to avoid moving or copying bytes when bytes are removed at the
3267 beginning (ex: del bytearray[:1]). */
3268 }
3269 else {
3270 if (_PyBytes_Resize(&writer->buffer, allocated))
3271 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003272 }
3273 }
3274 else {
3275 /* convert from stack buffer to bytes object buffer */
3276 assert(writer->buffer == NULL);
3277
Victor Stinner661aacc2015-10-14 09:41:48 +02003278 if (writer->use_bytearray)
3279 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3280 else
3281 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003282 if (writer->buffer == NULL)
Victor Stinner661aacc2015-10-14 09:41:48 +02003283 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003284
3285 if (pos != 0) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003286 char *dest;
3287 if (writer->use_bytearray)
3288 dest = PyByteArray_AS_STRING(writer->buffer);
3289 else
3290 dest = PyBytes_AS_STRING(writer->buffer);
Christian Heimesf051e432016-09-13 20:22:02 +02003291 memcpy(dest,
Victor Stinnerb3653a32015-10-09 03:38:24 +02003292 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02003293 pos);
3294 }
3295
Victor Stinnerb3653a32015-10-09 03:38:24 +02003296 writer->use_small_buffer = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003297#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003298 memset(writer->small_buffer, 0xDB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003299#endif
Victor Stinner00165072015-10-09 01:53:21 +02003300 }
3301 writer->allocated = allocated;
3302
3303 str = _PyBytesWriter_AsString(writer) + pos;
3304 _PyBytesWriter_CheckConsistency(writer, str);
3305 return str;
Victor Stinner661aacc2015-10-14 09:41:48 +02003306
3307error:
3308 _PyBytesWriter_Dealloc(writer);
3309 return NULL;
Victor Stinner00165072015-10-09 01:53:21 +02003310}
3311
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003312void*
3313_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3314{
3315 Py_ssize_t new_min_size;
3316
3317 _PyBytesWriter_CheckConsistency(writer, str);
3318 assert(size >= 0);
3319
3320 if (size == 0) {
3321 /* nothing to do */
3322 return str;
3323 }
3324
3325 if (writer->min_size > PY_SSIZE_T_MAX - size) {
3326 PyErr_NoMemory();
3327 _PyBytesWriter_Dealloc(writer);
3328 return NULL;
3329 }
3330 new_min_size = writer->min_size + size;
3331
3332 if (new_min_size > writer->allocated)
3333 str = _PyBytesWriter_Resize(writer, str, new_min_size);
3334
3335 writer->min_size = new_min_size;
3336 return str;
3337}
3338
Victor Stinner00165072015-10-09 01:53:21 +02003339/* Allocate the buffer to write size bytes.
3340 Return the pointer to the beginning of buffer data.
3341 Raise an exception and return NULL on error. */
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003342void*
Victor Stinner00165072015-10-09 01:53:21 +02003343_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3344{
3345 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02003346 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02003347 assert(size >= 0);
3348
Victor Stinnerb3653a32015-10-09 03:38:24 +02003349 writer->use_small_buffer = 1;
Victor Stinnerb13b97d2015-10-09 02:52:16 +02003350#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003351 writer->allocated = sizeof(writer->small_buffer) - 1;
Victor Stinnerf6358a72015-10-14 12:02:39 +02003352 /* In debug mode, don't use the full small buffer because it is less
3353 efficient than bytes and bytearray objects to detect buffer underflow
3354 and buffer overflow. Use 10 bytes of the small buffer to test also
3355 code using the smaller buffer in debug mode.
3356
3357 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3358 in debug mode to also be able to detect stack overflow when running
3359 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3360 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3361 stack overflow. */
3362 writer->allocated = Py_MIN(writer->allocated, 10);
3363 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3364 to detect buffer overflow */
Victor Stinnerb3653a32015-10-09 03:38:24 +02003365 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003366#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02003367 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02003368#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02003369 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02003370}
3371
3372PyObject *
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003373_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
Victor Stinner00165072015-10-09 01:53:21 +02003374{
Victor Stinner2bf89932015-10-14 11:25:33 +02003375 Py_ssize_t size;
Victor Stinner00165072015-10-09 01:53:21 +02003376 PyObject *result;
3377
3378 _PyBytesWriter_CheckConsistency(writer, str);
3379
Victor Stinner2bf89932015-10-14 11:25:33 +02003380 size = _PyBytesWriter_GetSize(writer, str);
3381 if (size == 0 && !writer->use_bytearray) {
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003382 Py_CLEAR(writer->buffer);
3383 /* Get the empty byte string singleton */
3384 result = PyBytes_FromStringAndSize(NULL, 0);
3385 }
3386 else if (writer->use_small_buffer) {
Victor Stinnere914d412016-04-15 17:52:27 +02003387 if (writer->use_bytearray) {
3388 result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3389 }
3390 else {
3391 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3392 }
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003393 }
3394 else {
3395 result = writer->buffer;
3396 writer->buffer = NULL;
3397
Victor Stinner2bf89932015-10-14 11:25:33 +02003398 if (size != writer->allocated) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003399 if (writer->use_bytearray) {
Victor Stinner2bf89932015-10-14 11:25:33 +02003400 if (PyByteArray_Resize(result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003401 Py_DECREF(result);
3402 return NULL;
3403 }
3404 }
3405 else {
Victor Stinner2bf89932015-10-14 11:25:33 +02003406 if (_PyBytes_Resize(&result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003407 assert(result == NULL);
3408 return NULL;
3409 }
Victor Stinner00165072015-10-09 01:53:21 +02003410 }
3411 }
Victor Stinner00165072015-10-09 01:53:21 +02003412 }
Victor Stinner00165072015-10-09 01:53:21 +02003413 return result;
3414}
Victor Stinnerce179bf2015-10-09 12:57:22 +02003415
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003416void*
Victor Stinnere9aa5952015-10-12 13:57:47 +02003417_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003418 const void *bytes, Py_ssize_t size)
Victor Stinnerce179bf2015-10-09 12:57:22 +02003419{
Victor Stinnere9aa5952015-10-12 13:57:47 +02003420 char *str = (char *)ptr;
3421
Victor Stinnerce179bf2015-10-09 12:57:22 +02003422 str = _PyBytesWriter_Prepare(writer, str, size);
3423 if (str == NULL)
3424 return NULL;
3425
Christian Heimesf051e432016-09-13 20:22:02 +02003426 memcpy(str, bytes, size);
Victor Stinnerce179bf2015-10-09 12:57:22 +02003427 str += size;
3428
3429 return str;
3430}