blob: 7ba90aa307b890d5b8eae015667aaab792e09cb5 [file] [log] [blame]
Serhiy Storchaka107f3cc2017-10-29 12:25:38 +02001/* bytes object implementation */
2
3#define PY_SSIZE_T_CLEAN
4
5#include "Python.h"
6#include "internal/mem.h"
7#include "internal/pystate.h"
8
9#include "bytes_methods.h"
10#include "pystrhex.h"
11#include <stddef.h>
12
13/*[clinic input]
14class bytes "PyBytesObject *" "&PyBytes_Type"
15[clinic start generated code]*/
16/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
17
18#include "clinic/bytesobject.c.h"
19
20#ifdef COUNT_ALLOCS
21Py_ssize_t null_strings, one_strings;
22#endif
23
24static PyBytesObject *characters[UCHAR_MAX + 1];
25static PyBytesObject *nullstring;
26
27/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
28 for a string of length n should request PyBytesObject_SIZE + n bytes.
29
30 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
31 3 bytes per string allocation on a typical system.
32*/
33#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
34
35/* Forward declaration */
36Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
37 char *str);
38
39/*
40 For PyBytes_FromString(), the parameter `str' points to a null-terminated
41 string containing exactly `size' bytes.
42
43 For PyBytes_FromStringAndSize(), the parameter `str' is
44 either NULL or else points to a string containing at least `size' bytes.
45 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
46 not have to be null-terminated. (Therefore it is safe to construct a
47 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
48 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
49 bytes (setting the last byte to the null terminating character) and you can
50 fill in the data yourself. If `str' is non-NULL then the resulting
51 PyBytes object must be treated as immutable and you must not fill in nor
52 alter the data yourself, since the strings may be shared.
53
54 The PyObject member `op->ob_size', which denotes the number of "extra
55 items" in a variable-size object, will contain the number of bytes
56 allocated for string data, not counting the null terminating character.
57 It is therefore equal to the `size' parameter (for
58 PyBytes_FromStringAndSize()) or the length of the string in the `str'
59 parameter (for PyBytes_FromString()).
60*/
61static PyObject *
62_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
63{
64 PyBytesObject *op;
65 assert(size >= 0);
66
67 if (size == 0 && (op = nullstring) != NULL) {
68#ifdef COUNT_ALLOCS
69 null_strings++;
70#endif
71 Py_INCREF(op);
72 return (PyObject *)op;
73 }
74
75 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
76 PyErr_SetString(PyExc_OverflowError,
77 "byte string is too large");
78 return NULL;
79 }
80
81 /* Inline PyObject_NewVar */
82 if (use_calloc)
83 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
84 else
85 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
86 if (op == NULL)
87 return PyErr_NoMemory();
88 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
89 op->ob_shash = -1;
90 if (!use_calloc)
91 op->ob_sval[size] = '\0';
92 /* empty byte string singleton */
93 if (size == 0) {
94 nullstring = op;
95 Py_INCREF(op);
96 }
97 return (PyObject *) op;
98}
99
100PyObject *
101PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
102{
103 PyBytesObject *op;
104 if (size < 0) {
105 PyErr_SetString(PyExc_SystemError,
106 "Negative size passed to PyBytes_FromStringAndSize");
107 return NULL;
108 }
109 if (size == 1 && str != NULL &&
110 (op = characters[*str & UCHAR_MAX]) != NULL)
111 {
112#ifdef COUNT_ALLOCS
113 one_strings++;
114#endif
115 Py_INCREF(op);
116 return (PyObject *)op;
117 }
118
119 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
120 if (op == NULL)
121 return NULL;
122 if (str == NULL)
123 return (PyObject *) op;
124
125 memcpy(op->ob_sval, str, size);
126 /* share short strings */
127 if (size == 1) {
128 characters[*str & UCHAR_MAX] = op;
129 Py_INCREF(op);
130 }
131 return (PyObject *) op;
132}
133
134PyObject *
135PyBytes_FromString(const char *str)
136{
137 size_t size;
138 PyBytesObject *op;
139
140 assert(str != NULL);
141 size = strlen(str);
142 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
143 PyErr_SetString(PyExc_OverflowError,
144 "byte string is too long");
145 return NULL;
146 }
147 if (size == 0 && (op = nullstring) != NULL) {
148#ifdef COUNT_ALLOCS
149 null_strings++;
150#endif
151 Py_INCREF(op);
152 return (PyObject *)op;
153 }
154 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
155#ifdef COUNT_ALLOCS
156 one_strings++;
157#endif
158 Py_INCREF(op);
159 return (PyObject *)op;
160 }
161
162 /* Inline PyObject_NewVar */
163 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
164 if (op == NULL)
165 return PyErr_NoMemory();
166 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
167 op->ob_shash = -1;
168 memcpy(op->ob_sval, str, size+1);
169 /* share short strings */
170 if (size == 0) {
171 nullstring = op;
172 Py_INCREF(op);
173 } else if (size == 1) {
174 characters[*str & UCHAR_MAX] = op;
175 Py_INCREF(op);
176 }
177 return (PyObject *) op;
178}
179
180PyObject *
181PyBytes_FromFormatV(const char *format, va_list vargs)
182{
183 char *s;
184 const char *f;
185 const char *p;
186 Py_ssize_t prec;
187 int longflag;
188 int size_tflag;
189 /* Longest 64-bit formatted numbers:
190 - "18446744073709551615\0" (21 bytes)
191 - "-9223372036854775808\0" (21 bytes)
192 Decimal takes the most space (it isn't enough for octal.)
193
194 Longest 64-bit pointer representation:
195 "0xffffffffffffffff\0" (19 bytes). */
196 char buffer[21];
197 _PyBytesWriter writer;
198
199 _PyBytesWriter_Init(&writer);
200
201 s = _PyBytesWriter_Alloc(&writer, strlen(format));
202 if (s == NULL)
203 return NULL;
204 writer.overallocate = 1;
205
206#define WRITE_BYTES(str) \
207 do { \
208 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
209 if (s == NULL) \
210 goto error; \
211 } while (0)
212
213 for (f = format; *f; f++) {
214 if (*f != '%') {
215 *s++ = *f;
216 continue;
217 }
218
219 p = f++;
220
221 /* ignore the width (ex: 10 in "%10s") */
222 while (Py_ISDIGIT(*f))
223 f++;
224
225 /* parse the precision (ex: 10 in "%.10s") */
226 prec = 0;
227 if (*f == '.') {
228 f++;
229 for (; Py_ISDIGIT(*f); f++) {
230 prec = (prec * 10) + (*f - '0');
231 }
232 }
233
234 while (*f && *f != '%' && !Py_ISALPHA(*f))
235 f++;
236
237 /* handle the long flag ('l'), but only for %ld and %lu.
238 others can be added when necessary. */
239 longflag = 0;
240 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
241 longflag = 1;
242 ++f;
243 }
244
245 /* handle the size_t flag ('z'). */
246 size_tflag = 0;
247 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
248 size_tflag = 1;
249 ++f;
250 }
251
252 /* subtract bytes preallocated for the format string
253 (ex: 2 for "%s") */
254 writer.min_size -= (f - p + 1);
255
256 switch (*f) {
257 case 'c':
258 {
259 int c = va_arg(vargs, int);
260 if (c < 0 || c > 255) {
261 PyErr_SetString(PyExc_OverflowError,
262 "PyBytes_FromFormatV(): %c format "
263 "expects an integer in range [0; 255]");
264 goto error;
265 }
266 writer.min_size++;
267 *s++ = (unsigned char)c;
268 break;
269 }
270
271 case 'd':
272 if (longflag)
273 sprintf(buffer, "%ld", va_arg(vargs, long));
274 else if (size_tflag)
275 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
277 else
278 sprintf(buffer, "%d", va_arg(vargs, int));
279 assert(strlen(buffer) < sizeof(buffer));
280 WRITE_BYTES(buffer);
281 break;
282
283 case 'u':
284 if (longflag)
285 sprintf(buffer, "%lu",
286 va_arg(vargs, unsigned long));
287 else if (size_tflag)
288 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
289 va_arg(vargs, size_t));
290 else
291 sprintf(buffer, "%u",
292 va_arg(vargs, unsigned int));
293 assert(strlen(buffer) < sizeof(buffer));
294 WRITE_BYTES(buffer);
295 break;
296
297 case 'i':
298 sprintf(buffer, "%i", va_arg(vargs, int));
299 assert(strlen(buffer) < sizeof(buffer));
300 WRITE_BYTES(buffer);
301 break;
302
303 case 'x':
304 sprintf(buffer, "%x", va_arg(vargs, int));
305 assert(strlen(buffer) < sizeof(buffer));
306 WRITE_BYTES(buffer);
307 break;
308
309 case 's':
310 {
311 Py_ssize_t i;
312
313 p = va_arg(vargs, const char*);
314 i = strlen(p);
315 if (prec > 0 && i > prec)
316 i = prec;
317 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
318 if (s == NULL)
319 goto error;
320 break;
321 }
322
323 case 'p':
324 sprintf(buffer, "%p", va_arg(vargs, void*));
325 assert(strlen(buffer) < sizeof(buffer));
326 /* %p is ill-defined: ensure leading 0x. */
327 if (buffer[1] == 'X')
328 buffer[1] = 'x';
329 else if (buffer[1] != 'x') {
330 memmove(buffer+2, buffer, strlen(buffer)+1);
331 buffer[0] = '0';
332 buffer[1] = 'x';
333 }
334 WRITE_BYTES(buffer);
335 break;
336
337 case '%':
338 writer.min_size++;
339 *s++ = '%';
340 break;
341
342 default:
343 if (*f == 0) {
344 /* fix min_size if we reached the end of the format string */
345 writer.min_size++;
346 }
347
348 /* invalid format string: copy unformatted string and exit */
349 WRITE_BYTES(p);
350 return _PyBytesWriter_Finish(&writer, s);
351 }
352 }
353
354#undef WRITE_BYTES
355
356 return _PyBytesWriter_Finish(&writer, s);
357
358 error:
359 _PyBytesWriter_Dealloc(&writer);
360 return NULL;
361}
362
363PyObject *
364PyBytes_FromFormat(const char *format, ...)
365{
366 PyObject* ret;
367 va_list vargs;
368
369#ifdef HAVE_STDARG_PROTOTYPES
370 va_start(vargs, format);
371#else
372 va_start(vargs);
373#endif
374 ret = PyBytes_FromFormatV(format, vargs);
375 va_end(vargs);
376 return ret;
377}
378
379/* Helpers for formatstring */
380
381Py_LOCAL_INLINE(PyObject *)
382getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
383{
384 Py_ssize_t argidx = *p_argidx;
385 if (argidx < arglen) {
386 (*p_argidx)++;
387 if (arglen < 0)
388 return args;
389 else
390 return PyTuple_GetItem(args, argidx);
391 }
392 PyErr_SetString(PyExc_TypeError,
393 "not enough arguments for format string");
394 return NULL;
395}
396
397/* Format codes
398 * F_LJUST '-'
399 * F_SIGN '+'
400 * F_BLANK ' '
401 * F_ALT '#'
402 * F_ZERO '0'
403 */
404#define F_LJUST (1<<0)
405#define F_SIGN (1<<1)
406#define F_BLANK (1<<2)
407#define F_ALT (1<<3)
408#define F_ZERO (1<<4)
409
410/* Returns a new reference to a PyBytes object, or NULL on failure. */
411
412static char*
413formatfloat(PyObject *v, int flags, int prec, int type,
414 PyObject **p_result, _PyBytesWriter *writer, char *str)
415{
416 char *p;
417 PyObject *result;
418 double x;
419 size_t len;
420
421 x = PyFloat_AsDouble(v);
422 if (x == -1.0 && PyErr_Occurred()) {
423 PyErr_Format(PyExc_TypeError, "float argument required, "
424 "not %.200s", Py_TYPE(v)->tp_name);
425 return NULL;
426 }
427
428 if (prec < 0)
429 prec = 6;
430
431 p = PyOS_double_to_string(x, type, prec,
432 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
433
434 if (p == NULL)
435 return NULL;
436
437 len = strlen(p);
438 if (writer != NULL) {
439 str = _PyBytesWriter_Prepare(writer, str, len);
440 if (str == NULL)
441 return NULL;
442 memcpy(str, p, len);
443 PyMem_Free(p);
444 str += len;
445 return str;
446 }
447
448 result = PyBytes_FromStringAndSize(p, len);
449 PyMem_Free(p);
450 *p_result = result;
451 return str;
452}
453
454static PyObject *
455formatlong(PyObject *v, int flags, int prec, int type)
456{
457 PyObject *result, *iobj;
458 if (type == 'i')
459 type = 'd';
460 if (PyLong_Check(v))
461 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
462 if (PyNumber_Check(v)) {
463 /* make sure number is a type of integer for o, x, and X */
464 if (type == 'o' || type == 'x' || type == 'X')
465 iobj = PyNumber_Index(v);
466 else
467 iobj = PyNumber_Long(v);
468 if (iobj == NULL) {
469 if (!PyErr_ExceptionMatches(PyExc_TypeError))
470 return NULL;
471 }
472 else if (!PyLong_Check(iobj))
473 Py_CLEAR(iobj);
474 if (iobj != NULL) {
475 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
476 Py_DECREF(iobj);
477 return result;
478 }
479 }
480 PyErr_Format(PyExc_TypeError,
481 "%%%c format: %s is required, not %.200s", type,
482 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
483 : "a number",
484 Py_TYPE(v)->tp_name);
485 return NULL;
486}
487
488static int
489byte_converter(PyObject *arg, char *p)
490{
491 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
492 *p = PyBytes_AS_STRING(arg)[0];
493 return 1;
494 }
495 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
496 *p = PyByteArray_AS_STRING(arg)[0];
497 return 1;
498 }
499 else {
500 PyObject *iobj;
501 long ival;
502 int overflow;
503 /* make sure number is a type of integer */
504 if (PyLong_Check(arg)) {
505 ival = PyLong_AsLongAndOverflow(arg, &overflow);
506 }
507 else {
508 iobj = PyNumber_Index(arg);
509 if (iobj == NULL) {
510 if (!PyErr_ExceptionMatches(PyExc_TypeError))
511 return 0;
512 goto onError;
513 }
514 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
515 Py_DECREF(iobj);
516 }
517 if (!overflow && ival == -1 && PyErr_Occurred())
518 goto onError;
519 if (overflow || !(0 <= ival && ival <= 255)) {
520 PyErr_SetString(PyExc_OverflowError,
521 "%c arg not in range(256)");
522 return 0;
523 }
524 *p = (char)ival;
525 return 1;
526 }
527 onError:
528 PyErr_SetString(PyExc_TypeError,
529 "%c requires an integer in range(256) or a single byte");
530 return 0;
531}
532
533static PyObject *_PyBytes_FromBuffer(PyObject *x);
534
535static PyObject *
536format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
537{
538 PyObject *func, *result;
539 _Py_IDENTIFIER(__bytes__);
540 /* is it a bytes object? */
541 if (PyBytes_Check(v)) {
542 *pbuf = PyBytes_AS_STRING(v);
543 *plen = PyBytes_GET_SIZE(v);
544 Py_INCREF(v);
545 return v;
546 }
547 if (PyByteArray_Check(v)) {
548 *pbuf = PyByteArray_AS_STRING(v);
549 *plen = PyByteArray_GET_SIZE(v);
550 Py_INCREF(v);
551 return v;
552 }
553 /* does it support __bytes__? */
554 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
555 if (func != NULL) {
556 result = _PyObject_CallNoArg(func);
557 Py_DECREF(func);
558 if (result == NULL)
559 return NULL;
560 if (!PyBytes_Check(result)) {
561 PyErr_Format(PyExc_TypeError,
562 "__bytes__ returned non-bytes (type %.200s)",
563 Py_TYPE(result)->tp_name);
564 Py_DECREF(result);
565 return NULL;
566 }
567 *pbuf = PyBytes_AS_STRING(result);
568 *plen = PyBytes_GET_SIZE(result);
569 return result;
570 }
571 /* does it support buffer protocol? */
572 if (PyObject_CheckBuffer(v)) {
573 /* maybe we can avoid making a copy of the buffer object here? */
574 result = _PyBytes_FromBuffer(v);
575 if (result == NULL)
576 return NULL;
577 *pbuf = PyBytes_AS_STRING(result);
578 *plen = PyBytes_GET_SIZE(result);
579 return result;
580 }
581 PyErr_Format(PyExc_TypeError,
582 "%%b requires a bytes-like object, "
583 "or an object that implements __bytes__, not '%.100s'",
584 Py_TYPE(v)->tp_name);
585 return NULL;
586}
587
588/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
589
590PyObject *
591_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
592 PyObject *args, int use_bytearray)
593{
594 const char *fmt;
595 char *res;
596 Py_ssize_t arglen, argidx;
597 Py_ssize_t fmtcnt;
598 int args_owned = 0;
599 PyObject *dict = NULL;
600 _PyBytesWriter writer;
601
602 if (args == NULL) {
603 PyErr_BadInternalCall();
604 return NULL;
605 }
606 fmt = format;
607 fmtcnt = format_len;
608
609 _PyBytesWriter_Init(&writer);
610 writer.use_bytearray = use_bytearray;
611
612 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
613 if (res == NULL)
614 return NULL;
615 if (!use_bytearray)
616 writer.overallocate = 1;
617
618 if (PyTuple_Check(args)) {
619 arglen = PyTuple_GET_SIZE(args);
620 argidx = 0;
621 }
622 else {
623 arglen = -1;
624 argidx = -2;
625 }
626 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
627 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
628 !PyByteArray_Check(args)) {
629 dict = args;
630 }
631
632 while (--fmtcnt >= 0) {
633 if (*fmt != '%') {
634 Py_ssize_t len;
635 char *pos;
636
637 pos = (char *)memchr(fmt + 1, '%', fmtcnt);
638 if (pos != NULL)
639 len = pos - fmt;
640 else
641 len = fmtcnt + 1;
642 assert(len != 0);
643
644 memcpy(res, fmt, len);
645 res += len;
646 fmt += len;
647 fmtcnt -= (len - 1);
648 }
649 else {
650 /* Got a format specifier */
651 int flags = 0;
652 Py_ssize_t width = -1;
653 int prec = -1;
654 int c = '\0';
655 int fill;
656 PyObject *v = NULL;
657 PyObject *temp = NULL;
658 const char *pbuf = NULL;
659 int sign;
660 Py_ssize_t len = 0;
661 char onechar; /* For byte_converter() */
662 Py_ssize_t alloc;
663#ifdef Py_DEBUG
664 char *before;
665#endif
666
667 fmt++;
668 if (*fmt == '%') {
669 *res++ = '%';
670 fmt++;
671 fmtcnt--;
672 continue;
673 }
674 if (*fmt == '(') {
675 const char *keystart;
676 Py_ssize_t keylen;
677 PyObject *key;
678 int pcount = 1;
679
680 if (dict == NULL) {
681 PyErr_SetString(PyExc_TypeError,
682 "format requires a mapping");
683 goto error;
684 }
685 ++fmt;
686 --fmtcnt;
687 keystart = fmt;
688 /* Skip over balanced parentheses */
689 while (pcount > 0 && --fmtcnt >= 0) {
690 if (*fmt == ')')
691 --pcount;
692 else if (*fmt == '(')
693 ++pcount;
694 fmt++;
695 }
696 keylen = fmt - keystart - 1;
697 if (fmtcnt < 0 || pcount > 0) {
698 PyErr_SetString(PyExc_ValueError,
699 "incomplete format key");
700 goto error;
701 }
702 key = PyBytes_FromStringAndSize(keystart,
703 keylen);
704 if (key == NULL)
705 goto error;
706 if (args_owned) {
707 Py_DECREF(args);
708 args_owned = 0;
709 }
710 args = PyObject_GetItem(dict, key);
711 Py_DECREF(key);
712 if (args == NULL) {
713 goto error;
714 }
715 args_owned = 1;
716 arglen = -1;
717 argidx = -2;
718 }
719
720 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
721 while (--fmtcnt >= 0) {
722 switch (c = *fmt++) {
723 case '-': flags |= F_LJUST; continue;
724 case '+': flags |= F_SIGN; continue;
725 case ' ': flags |= F_BLANK; continue;
726 case '#': flags |= F_ALT; continue;
727 case '0': flags |= F_ZERO; continue;
728 }
729 break;
730 }
731
732 /* Parse width. Example: "%10s" => width=10 */
733 if (c == '*') {
734 v = getnextarg(args, arglen, &argidx);
735 if (v == NULL)
736 goto error;
737 if (!PyLong_Check(v)) {
738 PyErr_SetString(PyExc_TypeError,
739 "* wants int");
740 goto error;
741 }
742 width = PyLong_AsSsize_t(v);
743 if (width == -1 && PyErr_Occurred())
744 goto error;
745 if (width < 0) {
746 flags |= F_LJUST;
747 width = -width;
748 }
749 if (--fmtcnt >= 0)
750 c = *fmt++;
751 }
752 else if (c >= 0 && isdigit(c)) {
753 width = c - '0';
754 while (--fmtcnt >= 0) {
755 c = Py_CHARMASK(*fmt++);
756 if (!isdigit(c))
757 break;
758 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
759 PyErr_SetString(
760 PyExc_ValueError,
761 "width too big");
762 goto error;
763 }
764 width = width*10 + (c - '0');
765 }
766 }
767
768 /* Parse precision. Example: "%.3f" => prec=3 */
769 if (c == '.') {
770 prec = 0;
771 if (--fmtcnt >= 0)
772 c = *fmt++;
773 if (c == '*') {
774 v = getnextarg(args, arglen, &argidx);
775 if (v == NULL)
776 goto error;
777 if (!PyLong_Check(v)) {
778 PyErr_SetString(
779 PyExc_TypeError,
780 "* wants int");
781 goto error;
782 }
783 prec = _PyLong_AsInt(v);
784 if (prec == -1 && PyErr_Occurred())
785 goto error;
786 if (prec < 0)
787 prec = 0;
788 if (--fmtcnt >= 0)
789 c = *fmt++;
790 }
791 else if (c >= 0 && isdigit(c)) {
792 prec = c - '0';
793 while (--fmtcnt >= 0) {
794 c = Py_CHARMASK(*fmt++);
795 if (!isdigit(c))
796 break;
797 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
798 PyErr_SetString(
799 PyExc_ValueError,
800 "prec too big");
801 goto error;
802 }
803 prec = prec*10 + (c - '0');
804 }
805 }
806 } /* prec */
807 if (fmtcnt >= 0) {
808 if (c == 'h' || c == 'l' || c == 'L') {
809 if (--fmtcnt >= 0)
810 c = *fmt++;
811 }
812 }
813 if (fmtcnt < 0) {
814 PyErr_SetString(PyExc_ValueError,
815 "incomplete format");
816 goto error;
817 }
818 v = getnextarg(args, arglen, &argidx);
819 if (v == NULL)
820 goto error;
821
822 if (fmtcnt < 0) {
823 /* last writer: disable writer overallocation */
824 writer.overallocate = 0;
825 }
826
827 sign = 0;
828 fill = ' ';
829 switch (c) {
830 case 'r':
831 // %r is only for 2/3 code; 3 only code should use %a
832 case 'a':
833 temp = PyObject_ASCII(v);
834 if (temp == NULL)
835 goto error;
836 assert(PyUnicode_IS_ASCII(temp));
837 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
838 len = PyUnicode_GET_LENGTH(temp);
839 if (prec >= 0 && len > prec)
840 len = prec;
841 break;
842
843 case 's':
844 // %s is only for 2/3 code; 3 only code should use %b
845 case 'b':
846 temp = format_obj(v, &pbuf, &len);
847 if (temp == NULL)
848 goto error;
849 if (prec >= 0 && len > prec)
850 len = prec;
851 break;
852
853 case 'i':
854 case 'd':
855 case 'u':
856 case 'o':
857 case 'x':
858 case 'X':
859 if (PyLong_CheckExact(v)
860 && width == -1 && prec == -1
861 && !(flags & (F_SIGN | F_BLANK))
862 && c != 'X')
863 {
864 /* Fast path */
865 int alternate = flags & F_ALT;
866 int base;
867
868 switch(c)
869 {
870 default:
871 Py_UNREACHABLE();
872 case 'd':
873 case 'i':
874 case 'u':
875 base = 10;
876 break;
877 case 'o':
878 base = 8;
879 break;
880 case 'x':
881 case 'X':
882 base = 16;
883 break;
884 }
885
886 /* Fast path */
887 writer.min_size -= 2; /* size preallocated for "%d" */
888 res = _PyLong_FormatBytesWriter(&writer, res,
889 v, base, alternate);
890 if (res == NULL)
891 goto error;
892 continue;
893 }
894
895 temp = formatlong(v, flags, prec, c);
896 if (!temp)
897 goto error;
898 assert(PyUnicode_IS_ASCII(temp));
899 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
900 len = PyUnicode_GET_LENGTH(temp);
901 sign = 1;
902 if (flags & F_ZERO)
903 fill = '0';
904 break;
905
906 case 'e':
907 case 'E':
908 case 'f':
909 case 'F':
910 case 'g':
911 case 'G':
912 if (width == -1 && prec == -1
913 && !(flags & (F_SIGN | F_BLANK)))
914 {
915 /* Fast path */
916 writer.min_size -= 2; /* size preallocated for "%f" */
917 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
918 if (res == NULL)
919 goto error;
920 continue;
921 }
922
923 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
924 goto error;
925 pbuf = PyBytes_AS_STRING(temp);
926 len = PyBytes_GET_SIZE(temp);
927 sign = 1;
928 if (flags & F_ZERO)
929 fill = '0';
930 break;
931
932 case 'c':
933 pbuf = &onechar;
934 len = byte_converter(v, &onechar);
935 if (!len)
936 goto error;
937 if (width == -1) {
938 /* Fast path */
939 *res++ = onechar;
940 continue;
941 }
942 break;
943
944 default:
945 PyErr_Format(PyExc_ValueError,
946 "unsupported format character '%c' (0x%x) "
947 "at index %zd",
948 c, c,
949 (Py_ssize_t)(fmt - 1 - format));
950 goto error;
951 }
952
953 if (sign) {
954 if (*pbuf == '-' || *pbuf == '+') {
955 sign = *pbuf++;
956 len--;
957 }
958 else if (flags & F_SIGN)
959 sign = '+';
960 else if (flags & F_BLANK)
961 sign = ' ';
962 else
963 sign = 0;
964 }
965 if (width < len)
966 width = len;
967
968 alloc = width;
969 if (sign != 0 && len == width)
970 alloc++;
971 /* 2: size preallocated for %s */
972 if (alloc > 2) {
973 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
974 if (res == NULL)
975 goto error;
976 }
977#ifdef Py_DEBUG
978 before = res;
979#endif
980
981 /* Write the sign if needed */
982 if (sign) {
983 if (fill != ' ')
984 *res++ = sign;
985 if (width > len)
986 width--;
987 }
988
989 /* Write the numeric prefix for "x", "X" and "o" formats
990 if the alternate form is used.
991 For example, write "0x" for the "%#x" format. */
992 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
993 assert(pbuf[0] == '0');
994 assert(pbuf[1] == c);
995 if (fill != ' ') {
996 *res++ = *pbuf++;
997 *res++ = *pbuf++;
998 }
999 width -= 2;
1000 if (width < 0)
1001 width = 0;
1002 len -= 2;
1003 }
1004
1005 /* Pad left with the fill character if needed */
1006 if (width > len && !(flags & F_LJUST)) {
1007 memset(res, fill, width - len);
1008 res += (width - len);
1009 width = len;
1010 }
1011
1012 /* If padding with spaces: write sign if needed and/or numeric
1013 prefix if the alternate form is used */
1014 if (fill == ' ') {
1015 if (sign)
1016 *res++ = sign;
1017 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1018 assert(pbuf[0] == '0');
1019 assert(pbuf[1] == c);
1020 *res++ = *pbuf++;
1021 *res++ = *pbuf++;
1022 }
1023 }
1024
1025 /* Copy bytes */
1026 memcpy(res, pbuf, len);
1027 res += len;
1028
1029 /* Pad right with the fill character if needed */
1030 if (width > len) {
1031 memset(res, ' ', width - len);
1032 res += (width - len);
1033 }
1034
1035 if (dict && (argidx < arglen)) {
1036 PyErr_SetString(PyExc_TypeError,
1037 "not all arguments converted during bytes formatting");
1038 Py_XDECREF(temp);
1039 goto error;
1040 }
1041 Py_XDECREF(temp);
1042
1043#ifdef Py_DEBUG
1044 /* check that we computed the exact size for this write */
1045 assert((res - before) == alloc);
1046#endif
1047 } /* '%' */
1048
1049 /* If overallocation was disabled, ensure that it was the last
1050 write. Otherwise, we missed an optimization */
1051 assert(writer.overallocate || fmtcnt < 0 || use_bytearray);
1052 } /* until end */
1053
1054 if (argidx < arglen && !dict) {
1055 PyErr_SetString(PyExc_TypeError,
1056 "not all arguments converted during bytes formatting");
1057 goto error;
1058 }
1059
1060 if (args_owned) {
1061 Py_DECREF(args);
1062 }
1063 return _PyBytesWriter_Finish(&writer, res);
1064
1065 error:
1066 _PyBytesWriter_Dealloc(&writer);
1067 if (args_owned) {
1068 Py_DECREF(args);
1069 }
1070 return NULL;
1071}
1072
1073/* =-= */
1074
1075static void
1076bytes_dealloc(PyObject *op)
1077{
1078 Py_TYPE(op)->tp_free(op);
1079}
1080
1081/* Unescape a backslash-escaped string. If unicode is non-zero,
1082 the string is a u-literal. If recode_encoding is non-zero,
1083 the string is UTF-8 encoded and should be re-encoded in the
1084 specified encoding. */
1085
1086static char *
1087_PyBytes_DecodeEscapeRecode(const char **s, const char *end,
1088 const char *errors, const char *recode_encoding,
1089 _PyBytesWriter *writer, char *p)
1090{
1091 PyObject *u, *w;
1092 const char* t;
1093
1094 t = *s;
1095 /* Decode non-ASCII bytes as UTF-8. */
1096 while (t < end && (*t & 0x80))
1097 t++;
1098 u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
1099 if (u == NULL)
1100 return NULL;
1101
1102 /* Recode them in target encoding. */
1103 w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
1104 Py_DECREF(u);
1105 if (w == NULL)
1106 return NULL;
1107 assert(PyBytes_Check(w));
1108
1109 /* Append bytes to output buffer. */
1110 writer->min_size--; /* subtract 1 preallocated byte */
1111 p = _PyBytesWriter_WriteBytes(writer, p,
1112 PyBytes_AS_STRING(w),
1113 PyBytes_GET_SIZE(w));
1114 Py_DECREF(w);
1115 if (p == NULL)
1116 return NULL;
1117
1118 *s = t;
1119 return p;
1120}
1121
1122PyObject *_PyBytes_DecodeEscape(const char *s,
1123 Py_ssize_t len,
1124 const char *errors,
1125 Py_ssize_t unicode,
1126 const char *recode_encoding,
1127 const char **first_invalid_escape)
1128{
1129 int c;
1130 char *p;
1131 const char *end;
1132 _PyBytesWriter writer;
1133
1134 _PyBytesWriter_Init(&writer);
1135
1136 p = _PyBytesWriter_Alloc(&writer, len);
1137 if (p == NULL)
1138 return NULL;
1139 writer.overallocate = 1;
1140
1141 *first_invalid_escape = NULL;
1142
1143 end = s + len;
1144 while (s < end) {
1145 if (*s != '\\') {
1146 non_esc:
1147 if (!(recode_encoding && (*s & 0x80))) {
1148 *p++ = *s++;
1149 }
1150 else {
1151 /* non-ASCII character and need to recode */
1152 p = _PyBytes_DecodeEscapeRecode(&s, end,
1153 errors, recode_encoding,
1154 &writer, p);
1155 if (p == NULL)
1156 goto failed;
1157 }
1158 continue;
1159 }
1160
1161 s++;
1162 if (s == end) {
1163 PyErr_SetString(PyExc_ValueError,
1164 "Trailing \\ in string");
1165 goto failed;
1166 }
1167
1168 switch (*s++) {
1169 /* XXX This assumes ASCII! */
1170 case '\n': break;
1171 case '\\': *p++ = '\\'; break;
1172 case '\'': *p++ = '\''; break;
1173 case '\"': *p++ = '\"'; break;
1174 case 'b': *p++ = '\b'; break;
1175 case 'f': *p++ = '\014'; break; /* FF */
1176 case 't': *p++ = '\t'; break;
1177 case 'n': *p++ = '\n'; break;
1178 case 'r': *p++ = '\r'; break;
1179 case 'v': *p++ = '\013'; break; /* VT */
1180 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1181 case '0': case '1': case '2': case '3':
1182 case '4': case '5': case '6': case '7':
1183 c = s[-1] - '0';
1184 if (s < end && '0' <= *s && *s <= '7') {
1185 c = (c<<3) + *s++ - '0';
1186 if (s < end && '0' <= *s && *s <= '7')
1187 c = (c<<3) + *s++ - '0';
1188 }
1189 *p++ = c;
1190 break;
1191 case 'x':
1192 if (s+1 < end) {
1193 int digit1, digit2;
1194 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1195 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1196 if (digit1 < 16 && digit2 < 16) {
1197 *p++ = (unsigned char)((digit1 << 4) + digit2);
1198 s += 2;
1199 break;
1200 }
1201 }
1202 /* invalid hexadecimal digits */
1203
1204 if (!errors || strcmp(errors, "strict") == 0) {
1205 PyErr_Format(PyExc_ValueError,
1206 "invalid \\x escape at position %d",
1207 s - 2 - (end - len));
1208 goto failed;
1209 }
1210 if (strcmp(errors, "replace") == 0) {
1211 *p++ = '?';
1212 } else if (strcmp(errors, "ignore") == 0)
1213 /* do nothing */;
1214 else {
1215 PyErr_Format(PyExc_ValueError,
1216 "decoding error; unknown "
1217 "error handling code: %.400s",
1218 errors);
1219 goto failed;
1220 }
1221 /* skip \x */
1222 if (s < end && Py_ISXDIGIT(s[0]))
1223 s++; /* and a hexdigit */
1224 break;
1225
1226 default:
1227 if (*first_invalid_escape == NULL) {
1228 *first_invalid_escape = s-1; /* Back up one char, since we've
1229 already incremented s. */
1230 }
1231 *p++ = '\\';
1232 s--;
1233 goto non_esc; /* an arbitrary number of unescaped
1234 UTF-8 bytes may follow. */
1235 }
1236 }
1237
1238 return _PyBytesWriter_Finish(&writer, p);
1239
1240 failed:
1241 _PyBytesWriter_Dealloc(&writer);
1242 return NULL;
1243}
1244
1245PyObject *PyBytes_DecodeEscape(const char *s,
1246 Py_ssize_t len,
1247 const char *errors,
1248 Py_ssize_t unicode,
1249 const char *recode_encoding)
1250{
1251 const char* first_invalid_escape;
1252 PyObject *result = _PyBytes_DecodeEscape(s, len, errors, unicode,
1253 recode_encoding,
1254 &first_invalid_escape);
1255 if (result == NULL)
1256 return NULL;
1257 if (first_invalid_escape != NULL) {
1258 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1259 "invalid escape sequence '\\%c'",
1260 (unsigned char)*first_invalid_escape) < 0) {
1261 Py_DECREF(result);
1262 return NULL;
1263 }
1264 }
1265 return result;
1266
1267}
1268/* -------------------------------------------------------------------- */
1269/* object api */
1270
1271Py_ssize_t
1272PyBytes_Size(PyObject *op)
1273{
1274 if (!PyBytes_Check(op)) {
1275 PyErr_Format(PyExc_TypeError,
1276 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1277 return -1;
1278 }
1279 return Py_SIZE(op);
1280}
1281
1282char *
1283PyBytes_AsString(PyObject *op)
1284{
1285 if (!PyBytes_Check(op)) {
1286 PyErr_Format(PyExc_TypeError,
1287 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1288 return NULL;
1289 }
1290 return ((PyBytesObject *)op)->ob_sval;
1291}
1292
1293int
1294PyBytes_AsStringAndSize(PyObject *obj,
1295 char **s,
1296 Py_ssize_t *len)
1297{
1298 if (s == NULL) {
1299 PyErr_BadInternalCall();
1300 return -1;
1301 }
1302
1303 if (!PyBytes_Check(obj)) {
1304 PyErr_Format(PyExc_TypeError,
1305 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1306 return -1;
1307 }
1308
1309 *s = PyBytes_AS_STRING(obj);
1310 if (len != NULL)
1311 *len = PyBytes_GET_SIZE(obj);
1312 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
1313 PyErr_SetString(PyExc_ValueError,
1314 "embedded null byte");
1315 return -1;
1316 }
1317 return 0;
1318}
1319
1320/* -------------------------------------------------------------------- */
1321/* Methods */
1322
1323#include "stringlib/stringdefs.h"
1324
1325#include "stringlib/fastsearch.h"
1326#include "stringlib/count.h"
1327#include "stringlib/find.h"
1328#include "stringlib/join.h"
1329#include "stringlib/partition.h"
1330#include "stringlib/split.h"
1331#include "stringlib/ctype.h"
1332
1333#include "stringlib/transmogrify.h"
1334
1335PyObject *
1336PyBytes_Repr(PyObject *obj, int smartquotes)
1337{
1338 PyBytesObject* op = (PyBytesObject*) obj;
1339 Py_ssize_t i, length = Py_SIZE(op);
1340 Py_ssize_t newsize, squotes, dquotes;
1341 PyObject *v;
1342 unsigned char quote, *s, *p;
1343
1344 /* Compute size of output string */
1345 squotes = dquotes = 0;
1346 newsize = 3; /* b'' */
1347 s = (unsigned char*)op->ob_sval;
1348 for (i = 0; i < length; i++) {
1349 Py_ssize_t incr = 1;
1350 switch(s[i]) {
1351 case '\'': squotes++; break;
1352 case '"': dquotes++; break;
1353 case '\\': case '\t': case '\n': case '\r':
1354 incr = 2; break; /* \C */
1355 default:
1356 if (s[i] < ' ' || s[i] >= 0x7f)
1357 incr = 4; /* \xHH */
1358 }
1359 if (newsize > PY_SSIZE_T_MAX - incr)
1360 goto overflow;
1361 newsize += incr;
1362 }
1363 quote = '\'';
1364 if (smartquotes && squotes && !dquotes)
1365 quote = '"';
1366 if (squotes && quote == '\'') {
1367 if (newsize > PY_SSIZE_T_MAX - squotes)
1368 goto overflow;
1369 newsize += squotes;
1370 }
1371
1372 v = PyUnicode_New(newsize, 127);
1373 if (v == NULL) {
1374 return NULL;
1375 }
1376 p = PyUnicode_1BYTE_DATA(v);
1377
1378 *p++ = 'b', *p++ = quote;
1379 for (i = 0; i < length; i++) {
1380 unsigned char c = op->ob_sval[i];
1381 if (c == quote || c == '\\')
1382 *p++ = '\\', *p++ = c;
1383 else if (c == '\t')
1384 *p++ = '\\', *p++ = 't';
1385 else if (c == '\n')
1386 *p++ = '\\', *p++ = 'n';
1387 else if (c == '\r')
1388 *p++ = '\\', *p++ = 'r';
1389 else if (c < ' ' || c >= 0x7f) {
1390 *p++ = '\\';
1391 *p++ = 'x';
1392 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1393 *p++ = Py_hexdigits[c & 0xf];
1394 }
1395 else
1396 *p++ = c;
1397 }
1398 *p++ = quote;
1399 assert(_PyUnicode_CheckConsistency(v, 1));
1400 return v;
1401
1402 overflow:
1403 PyErr_SetString(PyExc_OverflowError,
1404 "bytes object is too large to make repr");
1405 return NULL;
1406}
1407
1408static PyObject *
1409bytes_repr(PyObject *op)
1410{
1411 return PyBytes_Repr(op, 1);
1412}
1413
1414static PyObject *
1415bytes_str(PyObject *op)
1416{
1417 if (Py_BytesWarningFlag) {
1418 if (PyErr_WarnEx(PyExc_BytesWarning,
1419 "str() on a bytes instance", 1))
1420 return NULL;
1421 }
1422 return bytes_repr(op);
1423}
1424
1425static Py_ssize_t
1426bytes_length(PyBytesObject *a)
1427{
1428 return Py_SIZE(a);
1429}
1430
1431/* This is also used by PyBytes_Concat() */
1432static PyObject *
1433bytes_concat(PyObject *a, PyObject *b)
1434{
1435 Py_buffer va, vb;
1436 PyObject *result = NULL;
1437
1438 va.len = -1;
1439 vb.len = -1;
1440 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1441 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
1442 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1443 Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
1444 goto done;
1445 }
1446
1447 /* Optimize end cases */
1448 if (va.len == 0 && PyBytes_CheckExact(b)) {
1449 result = b;
1450 Py_INCREF(result);
1451 goto done;
1452 }
1453 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1454 result = a;
1455 Py_INCREF(result);
1456 goto done;
1457 }
1458
1459 if (va.len > PY_SSIZE_T_MAX - vb.len) {
1460 PyErr_NoMemory();
1461 goto done;
1462 }
1463
1464 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
1465 if (result != NULL) {
1466 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1467 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1468 }
1469
1470 done:
1471 if (va.len != -1)
1472 PyBuffer_Release(&va);
1473 if (vb.len != -1)
1474 PyBuffer_Release(&vb);
1475 return result;
1476}
1477
1478static PyObject *
1479bytes_repeat(PyBytesObject *a, Py_ssize_t n)
1480{
1481 Py_ssize_t i;
1482 Py_ssize_t j;
1483 Py_ssize_t size;
1484 PyBytesObject *op;
1485 size_t nbytes;
1486 if (n < 0)
1487 n = 0;
1488 /* watch out for overflows: the size can overflow int,
1489 * and the # of bytes needed can overflow size_t
1490 */
1491 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1492 PyErr_SetString(PyExc_OverflowError,
1493 "repeated bytes are too long");
1494 return NULL;
1495 }
1496 size = Py_SIZE(a) * n;
1497 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1498 Py_INCREF(a);
1499 return (PyObject *)a;
1500 }
1501 nbytes = (size_t)size;
1502 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1503 PyErr_SetString(PyExc_OverflowError,
1504 "repeated bytes are too long");
1505 return NULL;
1506 }
1507 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1508 if (op == NULL)
1509 return PyErr_NoMemory();
1510 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
1511 op->ob_shash = -1;
1512 op->ob_sval[size] = '\0';
1513 if (Py_SIZE(a) == 1 && n > 0) {
1514 memset(op->ob_sval, a->ob_sval[0] , n);
1515 return (PyObject *) op;
1516 }
1517 i = 0;
1518 if (i < size) {
1519 memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
1520 i = Py_SIZE(a);
1521 }
1522 while (i < size) {
1523 j = (i <= size-i) ? i : size-i;
1524 memcpy(op->ob_sval+i, op->ob_sval, j);
1525 i += j;
1526 }
1527 return (PyObject *) op;
1528}
1529
1530static int
1531bytes_contains(PyObject *self, PyObject *arg)
1532{
1533 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1534}
1535
1536static PyObject *
1537bytes_item(PyBytesObject *a, Py_ssize_t i)
1538{
1539 if (i < 0 || i >= Py_SIZE(a)) {
1540 PyErr_SetString(PyExc_IndexError, "index out of range");
1541 return NULL;
1542 }
1543 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
1544}
1545
1546static int
1547bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1548{
1549 int cmp;
1550 Py_ssize_t len;
1551
1552 len = Py_SIZE(a);
1553 if (Py_SIZE(b) != len)
1554 return 0;
1555
1556 if (a->ob_sval[0] != b->ob_sval[0])
1557 return 0;
1558
1559 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1560 return (cmp == 0);
1561}
1562
1563static PyObject*
1564bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
1565{
1566 int c;
1567 Py_ssize_t len_a, len_b;
1568 Py_ssize_t min_len;
1569 PyObject *result;
1570 int rc;
1571
1572 /* Make sure both arguments are strings. */
1573 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
1574 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
1575 rc = PyObject_IsInstance((PyObject*)a,
1576 (PyObject*)&PyUnicode_Type);
1577 if (!rc)
1578 rc = PyObject_IsInstance((PyObject*)b,
1579 (PyObject*)&PyUnicode_Type);
1580 if (rc < 0)
1581 return NULL;
1582 if (rc) {
1583 if (PyErr_WarnEx(PyExc_BytesWarning,
1584 "Comparison between bytes and string", 1))
1585 return NULL;
1586 }
1587 else {
1588 rc = PyObject_IsInstance((PyObject*)a,
1589 (PyObject*)&PyLong_Type);
1590 if (!rc)
1591 rc = PyObject_IsInstance((PyObject*)b,
1592 (PyObject*)&PyLong_Type);
1593 if (rc < 0)
1594 return NULL;
1595 if (rc) {
1596 if (PyErr_WarnEx(PyExc_BytesWarning,
1597 "Comparison between bytes and int", 1))
1598 return NULL;
1599 }
1600 }
1601 }
1602 result = Py_NotImplemented;
1603 }
1604 else if (a == b) {
1605 switch (op) {
1606 case Py_EQ:
1607 case Py_LE:
1608 case Py_GE:
1609 /* a string is equal to itself */
1610 result = Py_True;
1611 break;
1612 case Py_NE:
1613 case Py_LT:
1614 case Py_GT:
1615 result = Py_False;
1616 break;
1617 default:
1618 PyErr_BadArgument();
1619 return NULL;
1620 }
1621 }
1622 else if (op == Py_EQ || op == Py_NE) {
1623 int eq = bytes_compare_eq(a, b);
1624 eq ^= (op == Py_NE);
1625 result = eq ? Py_True : Py_False;
1626 }
1627 else {
1628 len_a = Py_SIZE(a);
1629 len_b = Py_SIZE(b);
1630 min_len = Py_MIN(len_a, len_b);
1631 if (min_len > 0) {
1632 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1633 if (c == 0)
1634 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1635 }
1636 else
1637 c = 0;
1638 if (c == 0)
1639 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1640 switch (op) {
1641 case Py_LT: c = c < 0; break;
1642 case Py_LE: c = c <= 0; break;
1643 case Py_GT: c = c > 0; break;
1644 case Py_GE: c = c >= 0; break;
1645 default:
1646 PyErr_BadArgument();
1647 return NULL;
1648 }
1649 result = c ? Py_True : Py_False;
1650 }
1651
1652 Py_INCREF(result);
1653 return result;
1654}
1655
1656static Py_hash_t
1657bytes_hash(PyBytesObject *a)
1658{
1659 if (a->ob_shash == -1) {
1660 /* Can't fail */
1661 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
1662 }
1663 return a->ob_shash;
1664}
1665
1666static PyObject*
1667bytes_subscript(PyBytesObject* self, PyObject* item)
1668{
1669 if (PyIndex_Check(item)) {
1670 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1671 if (i == -1 && PyErr_Occurred())
1672 return NULL;
1673 if (i < 0)
1674 i += PyBytes_GET_SIZE(self);
1675 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1676 PyErr_SetString(PyExc_IndexError,
1677 "index out of range");
1678 return NULL;
1679 }
1680 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1681 }
1682 else if (PySlice_Check(item)) {
1683 Py_ssize_t start, stop, step, slicelength, cur, i;
1684 char* source_buf;
1685 char* result_buf;
1686 PyObject* result;
1687
1688 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1689 return NULL;
1690 }
1691 slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1692 &stop, step);
1693
1694 if (slicelength <= 0) {
1695 return PyBytes_FromStringAndSize("", 0);
1696 }
1697 else if (start == 0 && step == 1 &&
1698 slicelength == PyBytes_GET_SIZE(self) &&
1699 PyBytes_CheckExact(self)) {
1700 Py_INCREF(self);
1701 return (PyObject *)self;
1702 }
1703 else if (step == 1) {
1704 return PyBytes_FromStringAndSize(
1705 PyBytes_AS_STRING(self) + start,
1706 slicelength);
1707 }
1708 else {
1709 source_buf = PyBytes_AS_STRING(self);
1710 result = PyBytes_FromStringAndSize(NULL, slicelength);
1711 if (result == NULL)
1712 return NULL;
1713
1714 result_buf = PyBytes_AS_STRING(result);
1715 for (cur = start, i = 0; i < slicelength;
1716 cur += step, i++) {
1717 result_buf[i] = source_buf[cur];
1718 }
1719
1720 return result;
1721 }
1722 }
1723 else {
1724 PyErr_Format(PyExc_TypeError,
1725 "byte indices must be integers or slices, not %.200s",
1726 Py_TYPE(item)->tp_name);
1727 return NULL;
1728 }
1729}
1730
1731static int
1732bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
1733{
1734 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1735 1, flags);
1736}
1737
1738static PySequenceMethods bytes_as_sequence = {
1739 (lenfunc)bytes_length, /*sq_length*/
1740 (binaryfunc)bytes_concat, /*sq_concat*/
1741 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1742 (ssizeargfunc)bytes_item, /*sq_item*/
1743 0, /*sq_slice*/
1744 0, /*sq_ass_item*/
1745 0, /*sq_ass_slice*/
1746 (objobjproc)bytes_contains /*sq_contains*/
1747};
1748
1749static PyMappingMethods bytes_as_mapping = {
1750 (lenfunc)bytes_length,
1751 (binaryfunc)bytes_subscript,
1752 0,
1753};
1754
1755static PyBufferProcs bytes_as_buffer = {
1756 (getbufferproc)bytes_buffer_getbuffer,
1757 NULL,
1758};
1759
1760
1761#define LEFTSTRIP 0
1762#define RIGHTSTRIP 1
1763#define BOTHSTRIP 2
1764
1765/*[clinic input]
1766bytes.split
1767
1768 sep: object = None
1769 The delimiter according which to split the bytes.
1770 None (the default value) means split on ASCII whitespace characters
1771 (space, tab, return, newline, formfeed, vertical tab).
1772 maxsplit: Py_ssize_t = -1
1773 Maximum number of splits to do.
1774 -1 (the default value) means no limit.
1775
1776Return a list of the sections in the bytes, using sep as the delimiter.
1777[clinic start generated code]*/
1778
1779static PyObject *
1780bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1781/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
1782{
1783 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1784 const char *s = PyBytes_AS_STRING(self), *sub;
1785 Py_buffer vsub;
1786 PyObject *list;
1787
1788 if (maxsplit < 0)
1789 maxsplit = PY_SSIZE_T_MAX;
1790 if (sep == Py_None)
1791 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1792 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1793 return NULL;
1794 sub = vsub.buf;
1795 n = vsub.len;
1796
1797 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1798 PyBuffer_Release(&vsub);
1799 return list;
1800}
1801
1802/*[clinic input]
1803bytes.partition
1804
1805 sep: Py_buffer
1806 /
1807
1808Partition the bytes into three parts using the given separator.
1809
1810This will search for the separator sep in the bytes. If the separator is found,
1811returns a 3-tuple containing the part before the separator, the separator
1812itself, and the part after it.
1813
1814If the separator is not found, returns a 3-tuple containing the original bytes
1815object and two empty bytes objects.
1816[clinic start generated code]*/
1817
1818static PyObject *
1819bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
1820/*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
1821{
1822 return stringlib_partition(
1823 (PyObject*) self,
1824 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1825 sep->obj, (const char *)sep->buf, sep->len
1826 );
1827}
1828
1829/*[clinic input]
1830bytes.rpartition
1831
1832 sep: Py_buffer
1833 /
1834
1835Partition the bytes into three parts using the given separator.
1836
1837This will search for the separator sep in the bytes, starting at the end. If
1838the separator is found, returns a 3-tuple containing the part before the
1839separator, the separator itself, and the part after it.
1840
1841If the separator is not found, returns a 3-tuple containing two empty bytes
1842objects and the original bytes object.
1843[clinic start generated code]*/
1844
1845static PyObject *
1846bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1847/*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
1848{
1849 return stringlib_rpartition(
1850 (PyObject*) self,
1851 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1852 sep->obj, (const char *)sep->buf, sep->len
1853 );
1854}
1855
1856/*[clinic input]
1857bytes.rsplit = bytes.split
1858
1859Return a list of the sections in the bytes, using sep as the delimiter.
1860
1861Splitting is done starting at the end of the bytes and working to the front.
1862[clinic start generated code]*/
1863
1864static PyObject *
1865bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1866/*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
1867{
1868 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1869 const char *s = PyBytes_AS_STRING(self), *sub;
1870 Py_buffer vsub;
1871 PyObject *list;
1872
1873 if (maxsplit < 0)
1874 maxsplit = PY_SSIZE_T_MAX;
1875 if (sep == Py_None)
1876 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1877 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1878 return NULL;
1879 sub = vsub.buf;
1880 n = vsub.len;
1881
1882 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1883 PyBuffer_Release(&vsub);
1884 return list;
1885}
1886
1887
1888/*[clinic input]
1889bytes.join
1890
1891 iterable_of_bytes: object
1892 /
1893
1894Concatenate any number of bytes objects.
1895
1896The bytes whose method is called is inserted in between each pair.
1897
1898The result is returned as a new bytes object.
1899
1900Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1901[clinic start generated code]*/
1902
1903static PyObject *
1904bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1905/*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
1906{
1907 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
1908}
1909
1910PyObject *
1911_PyBytes_Join(PyObject *sep, PyObject *x)
1912{
1913 assert(sep != NULL && PyBytes_Check(sep));
1914 assert(x != NULL);
1915 return bytes_join((PyBytesObject*)sep, x);
1916}
1917
1918static PyObject *
1919bytes_find(PyBytesObject *self, PyObject *args)
1920{
1921 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1922}
1923
1924static PyObject *
1925bytes_index(PyBytesObject *self, PyObject *args)
1926{
1927 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1928}
1929
1930
1931static PyObject *
1932bytes_rfind(PyBytesObject *self, PyObject *args)
1933{
1934 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1935}
1936
1937
1938static PyObject *
1939bytes_rindex(PyBytesObject *self, PyObject *args)
1940{
1941 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1942}
1943
1944
1945Py_LOCAL_INLINE(PyObject *)
1946do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
1947{
1948 Py_buffer vsep;
1949 char *s = PyBytes_AS_STRING(self);
1950 Py_ssize_t len = PyBytes_GET_SIZE(self);
1951 char *sep;
1952 Py_ssize_t seplen;
1953 Py_ssize_t i, j;
1954
1955 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
1956 return NULL;
1957 sep = vsep.buf;
1958 seplen = vsep.len;
1959
1960 i = 0;
1961 if (striptype != RIGHTSTRIP) {
1962 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1963 i++;
1964 }
1965 }
1966
1967 j = len;
1968 if (striptype != LEFTSTRIP) {
1969 do {
1970 j--;
1971 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1972 j++;
1973 }
1974
1975 PyBuffer_Release(&vsep);
1976
1977 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1978 Py_INCREF(self);
1979 return (PyObject*)self;
1980 }
1981 else
1982 return PyBytes_FromStringAndSize(s+i, j-i);
1983}
1984
1985
1986Py_LOCAL_INLINE(PyObject *)
1987do_strip(PyBytesObject *self, int striptype)
1988{
1989 char *s = PyBytes_AS_STRING(self);
1990 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
1991
1992 i = 0;
1993 if (striptype != RIGHTSTRIP) {
1994 while (i < len && Py_ISSPACE(s[i])) {
1995 i++;
1996 }
1997 }
1998
1999 j = len;
2000 if (striptype != LEFTSTRIP) {
2001 do {
2002 j--;
2003 } while (j >= i && Py_ISSPACE(s[j]));
2004 j++;
2005 }
2006
2007 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2008 Py_INCREF(self);
2009 return (PyObject*)self;
2010 }
2011 else
2012 return PyBytes_FromStringAndSize(s+i, j-i);
2013}
2014
2015
2016Py_LOCAL_INLINE(PyObject *)
2017do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
2018{
2019 if (bytes != NULL && bytes != Py_None) {
2020 return do_xstrip(self, striptype, bytes);
2021 }
2022 return do_strip(self, striptype);
2023}
2024
2025/*[clinic input]
2026bytes.strip
2027
2028 bytes: object = None
2029 /
2030
2031Strip leading and trailing bytes contained in the argument.
2032
2033If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2034[clinic start generated code]*/
2035
2036static PyObject *
2037bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
2038/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
2039{
2040 return do_argstrip(self, BOTHSTRIP, bytes);
2041}
2042
2043/*[clinic input]
2044bytes.lstrip
2045
2046 bytes: object = None
2047 /
2048
2049Strip leading bytes contained in the argument.
2050
2051If the argument is omitted or None, strip leading ASCII whitespace.
2052[clinic start generated code]*/
2053
2054static PyObject *
2055bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
2056/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
2057{
2058 return do_argstrip(self, LEFTSTRIP, bytes);
2059}
2060
2061/*[clinic input]
2062bytes.rstrip
2063
2064 bytes: object = None
2065 /
2066
2067Strip trailing bytes contained in the argument.
2068
2069If the argument is omitted or None, strip trailing ASCII whitespace.
2070[clinic start generated code]*/
2071
2072static PyObject *
2073bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
2074/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
2075{
2076 return do_argstrip(self, RIGHTSTRIP, bytes);
2077}
2078
2079
2080static PyObject *
2081bytes_count(PyBytesObject *self, PyObject *args)
2082{
2083 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2084}
2085
2086
2087/*[clinic input]
2088bytes.translate
2089
2090 table: object
2091 Translation table, which must be a bytes object of length 256.
2092 /
2093 delete as deletechars: object(c_default="NULL") = b''
2094
2095Return a copy with each character mapped by the given translation table.
2096
2097All characters occurring in the optional argument delete are removed.
2098The remaining characters are mapped through the given translation table.
2099[clinic start generated code]*/
2100
2101static PyObject *
2102bytes_translate_impl(PyBytesObject *self, PyObject *table,
2103 PyObject *deletechars)
2104/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
2105{
2106 char *input, *output;
2107 Py_buffer table_view = {NULL, NULL};
2108 Py_buffer del_table_view = {NULL, NULL};
2109 const char *table_chars;
2110 Py_ssize_t i, c, changed = 0;
2111 PyObject *input_obj = (PyObject*)self;
2112 const char *output_start, *del_table_chars=NULL;
2113 Py_ssize_t inlen, tablen, dellen = 0;
2114 PyObject *result;
2115 int trans_table[256];
2116
2117 if (PyBytes_Check(table)) {
2118 table_chars = PyBytes_AS_STRING(table);
2119 tablen = PyBytes_GET_SIZE(table);
2120 }
2121 else if (table == Py_None) {
2122 table_chars = NULL;
2123 tablen = 256;
2124 }
2125 else {
2126 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
2127 return NULL;
2128 table_chars = table_view.buf;
2129 tablen = table_view.len;
2130 }
2131
2132 if (tablen != 256) {
2133 PyErr_SetString(PyExc_ValueError,
2134 "translation table must be 256 characters long");
2135 PyBuffer_Release(&table_view);
2136 return NULL;
2137 }
2138
2139 if (deletechars != NULL) {
2140 if (PyBytes_Check(deletechars)) {
2141 del_table_chars = PyBytes_AS_STRING(deletechars);
2142 dellen = PyBytes_GET_SIZE(deletechars);
2143 }
2144 else {
2145 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
2146 PyBuffer_Release(&table_view);
2147 return NULL;
2148 }
2149 del_table_chars = del_table_view.buf;
2150 dellen = del_table_view.len;
2151 }
2152 }
2153 else {
2154 del_table_chars = NULL;
2155 dellen = 0;
2156 }
2157
2158 inlen = PyBytes_GET_SIZE(input_obj);
2159 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2160 if (result == NULL) {
2161 PyBuffer_Release(&del_table_view);
2162 PyBuffer_Release(&table_view);
2163 return NULL;
2164 }
2165 output_start = output = PyBytes_AS_STRING(result);
2166 input = PyBytes_AS_STRING(input_obj);
2167
2168 if (dellen == 0 && table_chars != NULL) {
2169 /* If no deletions are required, use faster code */
2170 for (i = inlen; --i >= 0; ) {
2171 c = Py_CHARMASK(*input++);
2172 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
2173 changed = 1;
2174 }
2175 if (!changed && PyBytes_CheckExact(input_obj)) {
2176 Py_INCREF(input_obj);
2177 Py_DECREF(result);
2178 result = input_obj;
2179 }
2180 PyBuffer_Release(&del_table_view);
2181 PyBuffer_Release(&table_view);
2182 return result;
2183 }
2184
2185 if (table_chars == NULL) {
2186 for (i = 0; i < 256; i++)
2187 trans_table[i] = Py_CHARMASK(i);
2188 } else {
2189 for (i = 0; i < 256; i++)
2190 trans_table[i] = Py_CHARMASK(table_chars[i]);
2191 }
2192 PyBuffer_Release(&table_view);
2193
2194 for (i = 0; i < dellen; i++)
2195 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
2196 PyBuffer_Release(&del_table_view);
2197
2198 for (i = inlen; --i >= 0; ) {
2199 c = Py_CHARMASK(*input++);
2200 if (trans_table[c] != -1)
2201 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2202 continue;
2203 changed = 1;
2204 }
2205 if (!changed && PyBytes_CheckExact(input_obj)) {
2206 Py_DECREF(result);
2207 Py_INCREF(input_obj);
2208 return input_obj;
2209 }
2210 /* Fix the size of the resulting string */
2211 if (inlen > 0)
2212 _PyBytes_Resize(&result, output - output_start);
2213 return result;
2214}
2215
2216
2217/*[clinic input]
2218
2219@staticmethod
2220bytes.maketrans
2221
2222 frm: Py_buffer
2223 to: Py_buffer
2224 /
2225
2226Return a translation table useable for the bytes or bytearray translate method.
2227
2228The returned table will be one where each byte in frm is mapped to the byte at
2229the same position in to.
2230
2231The bytes objects frm and to must be of the same length.
2232[clinic start generated code]*/
2233
2234static PyObject *
2235bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
2236/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
2237{
2238 return _Py_bytes_maketrans(frm, to);
2239}
2240
2241
2242/*[clinic input]
2243bytes.replace
2244
2245 old: Py_buffer
2246 new: Py_buffer
2247 count: Py_ssize_t = -1
2248 Maximum number of occurrences to replace.
2249 -1 (the default value) means replace all occurrences.
2250 /
2251
2252Return a copy with all occurrences of substring old replaced by new.
2253
2254If the optional argument count is given, only the first count occurrences are
2255replaced.
2256[clinic start generated code]*/
2257
2258static PyObject *
2259bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
2260 Py_ssize_t count)
2261/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
2262{
2263 return stringlib_replace((PyObject *)self,
2264 (const char *)old->buf, old->len,
2265 (const char *)new->buf, new->len, count);
2266}
2267
2268/** End DALKE **/
2269
2270
2271static PyObject *
2272bytes_startswith(PyBytesObject *self, PyObject *args)
2273{
2274 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2275}
2276
2277static PyObject *
2278bytes_endswith(PyBytesObject *self, PyObject *args)
2279{
2280 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2281}
2282
2283
2284/*[clinic input]
2285bytes.decode
2286
2287 encoding: str(c_default="NULL") = 'utf-8'
2288 The encoding with which to decode the bytes.
2289 errors: str(c_default="NULL") = 'strict'
2290 The error handling scheme to use for the handling of decoding errors.
2291 The default is 'strict' meaning that decoding errors raise a
2292 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2293 as well as any other name registered with codecs.register_error that
2294 can handle UnicodeDecodeErrors.
2295
2296Decode the bytes using the codec registered for encoding.
2297[clinic start generated code]*/
2298
2299static PyObject *
2300bytes_decode_impl(PyBytesObject *self, const char *encoding,
2301 const char *errors)
2302/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
2303{
2304 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
2305}
2306
2307
2308/*[clinic input]
2309bytes.splitlines
2310
2311 keepends: bool(accept={int}) = False
2312
2313Return a list of the lines in the bytes, breaking at line boundaries.
2314
2315Line breaks are not included in the resulting list unless keepends is given and
2316true.
2317[clinic start generated code]*/
2318
2319static PyObject *
2320bytes_splitlines_impl(PyBytesObject *self, int keepends)
2321/*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
2322{
2323 return stringlib_splitlines(
2324 (PyObject*) self, PyBytes_AS_STRING(self),
2325 PyBytes_GET_SIZE(self), keepends
2326 );
2327}
2328
2329/*[clinic input]
2330@classmethod
2331bytes.fromhex
2332
2333 string: unicode
2334 /
2335
2336Create a bytes object from a string of hexadecimal numbers.
2337
2338Spaces between two numbers are accepted.
2339Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2340[clinic start generated code]*/
2341
2342static PyObject *
2343bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
2344/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
2345{
2346 PyObject *result = _PyBytes_FromHex(string, 0);
2347 if (type != &PyBytes_Type && result != NULL) {
2348 Py_SETREF(result, PyObject_CallFunctionObjArgs((PyObject *)type,
2349 result, NULL));
2350 }
2351 return result;
2352}
2353
2354PyObject*
2355_PyBytes_FromHex(PyObject *string, int use_bytearray)
2356{
2357 char *buf;
2358 Py_ssize_t hexlen, invalid_char;
2359 unsigned int top, bot;
2360 Py_UCS1 *str, *end;
2361 _PyBytesWriter writer;
2362
2363 _PyBytesWriter_Init(&writer);
2364 writer.use_bytearray = use_bytearray;
2365
2366 assert(PyUnicode_Check(string));
2367 if (PyUnicode_READY(string))
2368 return NULL;
2369 hexlen = PyUnicode_GET_LENGTH(string);
2370
2371 if (!PyUnicode_IS_ASCII(string)) {
2372 void *data = PyUnicode_DATA(string);
2373 unsigned int kind = PyUnicode_KIND(string);
2374 Py_ssize_t i;
2375
2376 /* search for the first non-ASCII character */
2377 for (i = 0; i < hexlen; i++) {
2378 if (PyUnicode_READ(kind, data, i) >= 128)
2379 break;
2380 }
2381 invalid_char = i;
2382 goto error;
2383 }
2384
2385 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2386 str = PyUnicode_1BYTE_DATA(string);
2387
2388 /* This overestimates if there are spaces */
2389 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2390 if (buf == NULL)
2391 return NULL;
2392
2393 end = str + hexlen;
2394 while (str < end) {
2395 /* skip over spaces in the input */
2396 if (Py_ISSPACE(*str)) {
2397 do {
2398 str++;
2399 } while (Py_ISSPACE(*str));
2400 if (str >= end)
2401 break;
2402 }
2403
2404 top = _PyLong_DigitValue[*str];
2405 if (top >= 16) {
2406 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2407 goto error;
2408 }
2409 str++;
2410
2411 bot = _PyLong_DigitValue[*str];
2412 if (bot >= 16) {
2413 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2414 goto error;
2415 }
2416 str++;
2417
2418 *buf++ = (unsigned char)((top << 4) + bot);
2419 }
2420
2421 return _PyBytesWriter_Finish(&writer, buf);
2422
2423 error:
2424 PyErr_Format(PyExc_ValueError,
2425 "non-hexadecimal number found in "
2426 "fromhex() arg at position %zd", invalid_char);
2427 _PyBytesWriter_Dealloc(&writer);
2428 return NULL;
2429}
2430
2431PyDoc_STRVAR(hex__doc__,
2432"B.hex() -> string\n\
2433\n\
2434Create a string of hexadecimal numbers from a bytes object.\n\
2435Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'.");
2436
2437static PyObject *
2438bytes_hex(PyBytesObject *self)
2439{
2440 char* argbuf = PyBytes_AS_STRING(self);
2441 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2442 return _Py_strhex(argbuf, arglen);
2443}
2444
2445static PyObject *
2446bytes_getnewargs(PyBytesObject *v)
2447{
2448 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
2449}
2450
2451
2452static PyMethodDef
2453bytes_methods[] = {
2454 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2455 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2456 _Py_capitalize__doc__},
2457 {"center", (PyCFunction)stringlib_center, METH_VARARGS,
2458 _Py_center__doc__},
2459 {"count", (PyCFunction)bytes_count, METH_VARARGS,
2460 _Py_count__doc__},
2461 BYTES_DECODE_METHODDEF
2462 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2463 _Py_endswith__doc__},
2464 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
2465 _Py_expandtabs__doc__},
2466 {"find", (PyCFunction)bytes_find, METH_VARARGS,
2467 _Py_find__doc__},
2468 BYTES_FROMHEX_METHODDEF
2469 {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__},
2470 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
2471 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2472 _Py_isalnum__doc__},
2473 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2474 _Py_isalpha__doc__},
2475 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2476 _Py_isdigit__doc__},
2477 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2478 _Py_islower__doc__},
2479 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2480 _Py_isspace__doc__},
2481 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2482 _Py_istitle__doc__},
2483 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2484 _Py_isupper__doc__},
2485 BYTES_JOIN_METHODDEF
2486 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, _Py_ljust__doc__},
2487 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2488 BYTES_LSTRIP_METHODDEF
2489 BYTES_MAKETRANS_METHODDEF
2490 BYTES_PARTITION_METHODDEF
2491 BYTES_REPLACE_METHODDEF
2492 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2493 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
2494 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, _Py_rjust__doc__},
2495 BYTES_RPARTITION_METHODDEF
2496 BYTES_RSPLIT_METHODDEF
2497 BYTES_RSTRIP_METHODDEF
2498 BYTES_SPLIT_METHODDEF
2499 BYTES_SPLITLINES_METHODDEF
2500 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2501 _Py_startswith__doc__},
2502 BYTES_STRIP_METHODDEF
2503 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2504 _Py_swapcase__doc__},
2505 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2506 BYTES_TRANSLATE_METHODDEF
2507 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2508 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, _Py_zfill__doc__},
2509 {NULL, NULL} /* sentinel */
2510};
2511
2512static PyObject *
2513bytes_mod(PyObject *self, PyObject *arg)
2514{
2515 if (!PyBytes_Check(self)) {
2516 Py_RETURN_NOTIMPLEMENTED;
2517 }
2518 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2519 arg, 0);
2520}
2521
2522static PyNumberMethods bytes_as_number = {
2523 0, /*nb_add*/
2524 0, /*nb_subtract*/
2525 0, /*nb_multiply*/
2526 bytes_mod, /*nb_remainder*/
2527};
2528
2529static PyObject *
2530bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2531
2532static PyObject *
2533bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2534{
2535 PyObject *x = NULL;
2536 const char *encoding = NULL;
2537 const char *errors = NULL;
2538 PyObject *new = NULL;
2539 PyObject *func;
2540 Py_ssize_t size;
2541 static char *kwlist[] = {"source", "encoding", "errors", 0};
2542 _Py_IDENTIFIER(__bytes__);
2543
2544 if (type != &PyBytes_Type)
2545 return bytes_subtype_new(type, args, kwds);
2546 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2547 &encoding, &errors))
2548 return NULL;
2549 if (x == NULL) {
2550 if (encoding != NULL || errors != NULL) {
2551 PyErr_SetString(PyExc_TypeError,
2552 "encoding or errors without sequence "
2553 "argument");
2554 return NULL;
2555 }
2556 return PyBytes_FromStringAndSize(NULL, 0);
2557 }
2558
2559 if (encoding != NULL) {
2560 /* Encode via the codec registry */
2561 if (!PyUnicode_Check(x)) {
2562 PyErr_SetString(PyExc_TypeError,
2563 "encoding without a string argument");
2564 return NULL;
2565 }
2566 new = PyUnicode_AsEncodedString(x, encoding, errors);
2567 if (new == NULL)
2568 return NULL;
2569 assert(PyBytes_Check(new));
2570 return new;
2571 }
2572
2573 if (errors != NULL) {
2574 PyErr_SetString(PyExc_TypeError,
2575 PyUnicode_Check(x) ?
2576 "string argument without an encoding" :
2577 "errors without a string argument");
2578 return NULL;
2579 }
2580
2581 /* We'd like to call PyObject_Bytes here, but we need to check for an
2582 integer argument before deferring to PyBytes_FromObject, something
2583 PyObject_Bytes doesn't do. */
2584 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2585 if (func != NULL) {
2586 new = _PyObject_CallNoArg(func);
2587 Py_DECREF(func);
2588 if (new == NULL)
2589 return NULL;
2590 if (!PyBytes_Check(new)) {
2591 PyErr_Format(PyExc_TypeError,
2592 "__bytes__ returned non-bytes (type %.200s)",
2593 Py_TYPE(new)->tp_name);
2594 Py_DECREF(new);
2595 return NULL;
2596 }
2597 return new;
2598 }
2599 else if (PyErr_Occurred())
2600 return NULL;
2601
2602 if (PyUnicode_Check(x)) {
2603 PyErr_SetString(PyExc_TypeError,
2604 "string argument without an encoding");
2605 return NULL;
2606 }
2607 /* Is it an integer? */
2608 if (PyIndex_Check(x)) {
2609 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2610 if (size == -1 && PyErr_Occurred()) {
2611 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2612 return NULL;
2613 PyErr_Clear(); /* fall through */
2614 }
2615 else {
2616 if (size < 0) {
2617 PyErr_SetString(PyExc_ValueError, "negative count");
2618 return NULL;
2619 }
2620 new = _PyBytes_FromSize(size, 1);
2621 if (new == NULL)
2622 return NULL;
2623 return new;
2624 }
2625 }
2626
2627 return PyBytes_FromObject(x);
2628}
2629
2630static PyObject*
2631_PyBytes_FromBuffer(PyObject *x)
2632{
2633 PyObject *new;
2634 Py_buffer view;
2635
2636 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2637 return NULL;
2638
2639 new = PyBytes_FromStringAndSize(NULL, view.len);
2640 if (!new)
2641 goto fail;
2642 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2643 &view, view.len, 'C') < 0)
2644 goto fail;
2645 PyBuffer_Release(&view);
2646 return new;
2647
2648fail:
2649 Py_XDECREF(new);
2650 PyBuffer_Release(&view);
2651 return NULL;
2652}
2653
2654#define _PyBytes_FROM_LIST_BODY(x, GET_ITEM) \
2655 do { \
2656 PyObject *bytes; \
2657 Py_ssize_t i; \
2658 Py_ssize_t value; \
2659 char *str; \
2660 PyObject *item; \
2661 \
2662 bytes = PyBytes_FromStringAndSize(NULL, Py_SIZE(x)); \
2663 if (bytes == NULL) \
2664 return NULL; \
2665 str = ((PyBytesObject *)bytes)->ob_sval; \
2666 \
2667 for (i = 0; i < Py_SIZE(x); i++) { \
2668 item = GET_ITEM((x), i); \
2669 value = PyNumber_AsSsize_t(item, NULL); \
2670 if (value == -1 && PyErr_Occurred()) \
2671 goto error; \
2672 \
2673 if (value < 0 || value >= 256) { \
2674 PyErr_SetString(PyExc_ValueError, \
2675 "bytes must be in range(0, 256)"); \
2676 goto error; \
2677 } \
2678 *str++ = (char) value; \
2679 } \
2680 return bytes; \
2681 \
2682 error: \
2683 Py_DECREF(bytes); \
2684 return NULL; \
2685 } while (0)
2686
2687static PyObject*
2688_PyBytes_FromList(PyObject *x)
2689{
2690 _PyBytes_FROM_LIST_BODY(x, PyList_GET_ITEM);
2691}
2692
2693static PyObject*
2694_PyBytes_FromTuple(PyObject *x)
2695{
2696 _PyBytes_FROM_LIST_BODY(x, PyTuple_GET_ITEM);
2697}
2698
2699static PyObject *
2700_PyBytes_FromIterator(PyObject *it, PyObject *x)
2701{
2702 char *str;
2703 Py_ssize_t i, size;
2704 _PyBytesWriter writer;
2705
2706 /* For iterator version, create a string object and resize as needed */
2707 size = PyObject_LengthHint(x, 64);
2708 if (size == -1 && PyErr_Occurred())
2709 return NULL;
2710
2711 _PyBytesWriter_Init(&writer);
2712 str = _PyBytesWriter_Alloc(&writer, size);
2713 if (str == NULL)
2714 return NULL;
2715 writer.overallocate = 1;
2716 size = writer.allocated;
2717
2718 /* Run the iterator to exhaustion */
2719 for (i = 0; ; i++) {
2720 PyObject *item;
2721 Py_ssize_t value;
2722
2723 /* Get the next item */
2724 item = PyIter_Next(it);
2725 if (item == NULL) {
2726 if (PyErr_Occurred())
2727 goto error;
2728 break;
2729 }
2730
2731 /* Interpret it as an int (__index__) */
2732 value = PyNumber_AsSsize_t(item, NULL);
2733 Py_DECREF(item);
2734 if (value == -1 && PyErr_Occurred())
2735 goto error;
2736
2737 /* Range check */
2738 if (value < 0 || value >= 256) {
2739 PyErr_SetString(PyExc_ValueError,
2740 "bytes must be in range(0, 256)");
2741 goto error;
2742 }
2743
2744 /* Append the byte */
2745 if (i >= size) {
2746 str = _PyBytesWriter_Resize(&writer, str, size+1);
2747 if (str == NULL)
2748 return NULL;
2749 size = writer.allocated;
2750 }
2751 *str++ = (char) value;
2752 }
2753
2754 return _PyBytesWriter_Finish(&writer, str);
2755
2756 error:
2757 _PyBytesWriter_Dealloc(&writer);
2758 return NULL;
2759}
2760
2761PyObject *
2762PyBytes_FromObject(PyObject *x)
2763{
2764 PyObject *it, *result;
2765
2766 if (x == NULL) {
2767 PyErr_BadInternalCall();
2768 return NULL;
2769 }
2770
2771 if (PyBytes_CheckExact(x)) {
2772 Py_INCREF(x);
2773 return x;
2774 }
2775
2776 /* Use the modern buffer interface */
2777 if (PyObject_CheckBuffer(x))
2778 return _PyBytes_FromBuffer(x);
2779
2780 if (PyList_CheckExact(x))
2781 return _PyBytes_FromList(x);
2782
2783 if (PyTuple_CheckExact(x))
2784 return _PyBytes_FromTuple(x);
2785
2786 if (!PyUnicode_Check(x)) {
2787 it = PyObject_GetIter(x);
2788 if (it != NULL) {
2789 result = _PyBytes_FromIterator(it, x);
2790 Py_DECREF(it);
2791 return result;
2792 }
2793 }
2794
2795 PyErr_Format(PyExc_TypeError,
2796 "cannot convert '%.200s' object to bytes",
2797 x->ob_type->tp_name);
2798 return NULL;
2799}
2800
2801static PyObject *
2802bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2803{
2804 PyObject *tmp, *pnew;
2805 Py_ssize_t n;
2806
2807 assert(PyType_IsSubtype(type, &PyBytes_Type));
2808 tmp = bytes_new(&PyBytes_Type, args, kwds);
2809 if (tmp == NULL)
2810 return NULL;
2811 assert(PyBytes_Check(tmp));
2812 n = PyBytes_GET_SIZE(tmp);
2813 pnew = type->tp_alloc(type, n);
2814 if (pnew != NULL) {
2815 memcpy(PyBytes_AS_STRING(pnew),
2816 PyBytes_AS_STRING(tmp), n+1);
2817 ((PyBytesObject *)pnew)->ob_shash =
2818 ((PyBytesObject *)tmp)->ob_shash;
2819 }
2820 Py_DECREF(tmp);
2821 return pnew;
2822}
2823
2824PyDoc_STRVAR(bytes_doc,
2825"bytes(iterable_of_ints) -> bytes\n\
2826bytes(string, encoding[, errors]) -> bytes\n\
2827bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
2828bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2829bytes() -> empty bytes object\n\
2830\n\
2831Construct an immutable array of bytes from:\n\
2832 - an iterable yielding integers in range(256)\n\
2833 - a text string encoded using the specified encoding\n\
2834 - any object implementing the buffer API.\n\
2835 - an integer");
2836
2837static PyObject *bytes_iter(PyObject *seq);
2838
2839PyTypeObject PyBytes_Type = {
2840 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2841 "bytes",
2842 PyBytesObject_SIZE,
2843 sizeof(char),
2844 bytes_dealloc, /* tp_dealloc */
2845 0, /* tp_print */
2846 0, /* tp_getattr */
2847 0, /* tp_setattr */
2848 0, /* tp_reserved */
2849 (reprfunc)bytes_repr, /* tp_repr */
2850 &bytes_as_number, /* tp_as_number */
2851 &bytes_as_sequence, /* tp_as_sequence */
2852 &bytes_as_mapping, /* tp_as_mapping */
2853 (hashfunc)bytes_hash, /* tp_hash */
2854 0, /* tp_call */
2855 bytes_str, /* tp_str */
2856 PyObject_GenericGetAttr, /* tp_getattro */
2857 0, /* tp_setattro */
2858 &bytes_as_buffer, /* tp_as_buffer */
2859 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2860 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2861 bytes_doc, /* tp_doc */
2862 0, /* tp_traverse */
2863 0, /* tp_clear */
2864 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2865 0, /* tp_weaklistoffset */
2866 bytes_iter, /* tp_iter */
2867 0, /* tp_iternext */
2868 bytes_methods, /* tp_methods */
2869 0, /* tp_members */
2870 0, /* tp_getset */
2871 &PyBaseObject_Type, /* tp_base */
2872 0, /* tp_dict */
2873 0, /* tp_descr_get */
2874 0, /* tp_descr_set */
2875 0, /* tp_dictoffset */
2876 0, /* tp_init */
2877 0, /* tp_alloc */
2878 bytes_new, /* tp_new */
2879 PyObject_Del, /* tp_free */
2880};
2881
2882void
2883PyBytes_Concat(PyObject **pv, PyObject *w)
2884{
2885 assert(pv != NULL);
2886 if (*pv == NULL)
2887 return;
2888 if (w == NULL) {
2889 Py_CLEAR(*pv);
2890 return;
2891 }
2892
2893 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2894 /* Only one reference, so we can resize in place */
2895 Py_ssize_t oldsize;
2896 Py_buffer wb;
2897
2898 wb.len = -1;
2899 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
2900 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2901 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2902 Py_CLEAR(*pv);
2903 return;
2904 }
2905
2906 oldsize = PyBytes_GET_SIZE(*pv);
2907 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2908 PyErr_NoMemory();
2909 goto error;
2910 }
2911 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2912 goto error;
2913
2914 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2915 PyBuffer_Release(&wb);
2916 return;
2917
2918 error:
2919 PyBuffer_Release(&wb);
2920 Py_CLEAR(*pv);
2921 return;
2922 }
2923
2924 else {
2925 /* Multiple references, need to create new object */
2926 PyObject *v;
2927 v = bytes_concat(*pv, w);
2928 Py_SETREF(*pv, v);
2929 }
2930}
2931
2932void
2933PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
2934{
2935 PyBytes_Concat(pv, w);
2936 Py_XDECREF(w);
2937}
2938
2939
2940/* The following function breaks the notion that bytes are immutable:
2941 it changes the size of a bytes object. We get away with this only if there
2942 is only one module referencing the object. You can also think of it
2943 as creating a new bytes object and destroying the old one, only
2944 more efficiently. In any case, don't use this if the bytes object may
2945 already be known to some other part of the code...
2946 Note that if there's not enough memory to resize the bytes object, the
2947 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
2948 memory" exception is set, and -1 is returned. Else (on success) 0 is
2949 returned, and the value in *pv may or may not be the same as on input.
2950 As always, an extra byte is allocated for a trailing \0 byte (newsize
2951 does *not* include that), and a trailing \0 byte is stored.
2952*/
2953
2954int
2955_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2956{
2957 PyObject *v;
2958 PyBytesObject *sv;
2959 v = *pv;
2960 if (!PyBytes_Check(v) || newsize < 0) {
2961 goto error;
2962 }
2963 if (Py_SIZE(v) == newsize) {
2964 /* return early if newsize equals to v->ob_size */
2965 return 0;
2966 }
2967 if (Py_REFCNT(v) != 1) {
2968 goto error;
2969 }
2970 /* XXX UNREF/NEWREF interface should be more symmetrical */
2971 _Py_DEC_REFTOTAL;
2972 _Py_ForgetReference(v);
2973 *pv = (PyObject *)
2974 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
2975 if (*pv == NULL) {
2976 PyObject_Del(v);
2977 PyErr_NoMemory();
2978 return -1;
2979 }
2980 _Py_NewReference(*pv);
2981 sv = (PyBytesObject *) *pv;
2982 Py_SIZE(sv) = newsize;
2983 sv->ob_sval[newsize] = '\0';
2984 sv->ob_shash = -1; /* invalidate cached hash value */
2985 return 0;
2986error:
2987 *pv = 0;
2988 Py_DECREF(v);
2989 PyErr_BadInternalCall();
2990 return -1;
2991}
2992
2993void
2994PyBytes_Fini(void)
2995{
2996 int i;
2997 for (i = 0; i < UCHAR_MAX + 1; i++)
2998 Py_CLEAR(characters[i]);
2999 Py_CLEAR(nullstring);
3000}
3001
3002/*********************** Bytes Iterator ****************************/
3003
3004typedef struct {
3005 PyObject_HEAD
3006 Py_ssize_t it_index;
3007 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3008} striterobject;
3009
3010static void
3011striter_dealloc(striterobject *it)
3012{
3013 _PyObject_GC_UNTRACK(it);
3014 Py_XDECREF(it->it_seq);
3015 PyObject_GC_Del(it);
3016}
3017
3018static int
3019striter_traverse(striterobject *it, visitproc visit, void *arg)
3020{
3021 Py_VISIT(it->it_seq);
3022 return 0;
3023}
3024
3025static PyObject *
3026striter_next(striterobject *it)
3027{
3028 PyBytesObject *seq;
3029 PyObject *item;
3030
3031 assert(it != NULL);
3032 seq = it->it_seq;
3033 if (seq == NULL)
3034 return NULL;
3035 assert(PyBytes_Check(seq));
3036
3037 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3038 item = PyLong_FromLong(
3039 (unsigned char)seq->ob_sval[it->it_index]);
3040 if (item != NULL)
3041 ++it->it_index;
3042 return item;
3043 }
3044
3045 it->it_seq = NULL;
3046 Py_DECREF(seq);
3047 return NULL;
3048}
3049
3050static PyObject *
3051striter_len(striterobject *it)
3052{
3053 Py_ssize_t len = 0;
3054 if (it->it_seq)
3055 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3056 return PyLong_FromSsize_t(len);
3057}
3058
3059PyDoc_STRVAR(length_hint_doc,
3060 "Private method returning an estimate of len(list(it)).");
3061
3062static PyObject *
3063striter_reduce(striterobject *it)
3064{
3065 if (it->it_seq != NULL) {
3066 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
3067 it->it_seq, it->it_index);
3068 } else {
3069 return Py_BuildValue("N(())", _PyObject_GetBuiltin("iter"));
3070 }
3071}
3072
3073PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3074
3075static PyObject *
3076striter_setstate(striterobject *it, PyObject *state)
3077{
3078 Py_ssize_t index = PyLong_AsSsize_t(state);
3079 if (index == -1 && PyErr_Occurred())
3080 return NULL;
3081 if (it->it_seq != NULL) {
3082 if (index < 0)
3083 index = 0;
3084 else if (index > PyBytes_GET_SIZE(it->it_seq))
3085 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3086 it->it_index = index;
3087 }
3088 Py_RETURN_NONE;
3089}
3090
3091PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3092
3093static PyMethodDef striter_methods[] = {
3094 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3095 length_hint_doc},
3096 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3097 reduce_doc},
3098 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3099 setstate_doc},
3100 {NULL, NULL} /* sentinel */
3101};
3102
3103PyTypeObject PyBytesIter_Type = {
3104 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3105 "bytes_iterator", /* tp_name */
3106 sizeof(striterobject), /* tp_basicsize */
3107 0, /* tp_itemsize */
3108 /* methods */
3109 (destructor)striter_dealloc, /* tp_dealloc */
3110 0, /* tp_print */
3111 0, /* tp_getattr */
3112 0, /* tp_setattr */
3113 0, /* tp_reserved */
3114 0, /* tp_repr */
3115 0, /* tp_as_number */
3116 0, /* tp_as_sequence */
3117 0, /* tp_as_mapping */
3118 0, /* tp_hash */
3119 0, /* tp_call */
3120 0, /* tp_str */
3121 PyObject_GenericGetAttr, /* tp_getattro */
3122 0, /* tp_setattro */
3123 0, /* tp_as_buffer */
3124 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3125 0, /* tp_doc */
3126 (traverseproc)striter_traverse, /* tp_traverse */
3127 0, /* tp_clear */
3128 0, /* tp_richcompare */
3129 0, /* tp_weaklistoffset */
3130 PyObject_SelfIter, /* tp_iter */
3131 (iternextfunc)striter_next, /* tp_iternext */
3132 striter_methods, /* tp_methods */
3133 0,
3134};
3135
3136static PyObject *
3137bytes_iter(PyObject *seq)
3138{
3139 striterobject *it;
3140
3141 if (!PyBytes_Check(seq)) {
3142 PyErr_BadInternalCall();
3143 return NULL;
3144 }
3145 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3146 if (it == NULL)
3147 return NULL;
3148 it->it_index = 0;
3149 Py_INCREF(seq);
3150 it->it_seq = (PyBytesObject *)seq;
3151 _PyObject_GC_TRACK(it);
3152 return (PyObject *)it;
3153}
3154
3155
3156/* _PyBytesWriter API */
3157
3158#ifdef MS_WINDOWS
3159 /* On Windows, overallocate by 50% is the best factor */
3160# define OVERALLOCATE_FACTOR 2
3161#else
3162 /* On Linux, overallocate by 25% is the best factor */
3163# define OVERALLOCATE_FACTOR 4
3164#endif
3165
3166void
3167_PyBytesWriter_Init(_PyBytesWriter *writer)
3168{
3169 /* Set all attributes before small_buffer to 0 */
3170 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
3171#ifdef Py_DEBUG
3172 memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer));
3173#endif
3174}
3175
3176void
3177_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3178{
3179 Py_CLEAR(writer->buffer);
3180}
3181
3182Py_LOCAL_INLINE(char*)
3183_PyBytesWriter_AsString(_PyBytesWriter *writer)
3184{
3185 if (writer->use_small_buffer) {
3186 assert(writer->buffer == NULL);
3187 return writer->small_buffer;
3188 }
3189 else if (writer->use_bytearray) {
3190 assert(writer->buffer != NULL);
3191 return PyByteArray_AS_STRING(writer->buffer);
3192 }
3193 else {
3194 assert(writer->buffer != NULL);
3195 return PyBytes_AS_STRING(writer->buffer);
3196 }
3197}
3198
3199Py_LOCAL_INLINE(Py_ssize_t)
3200_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
3201{
3202 char *start = _PyBytesWriter_AsString(writer);
3203 assert(str != NULL);
3204 assert(str >= start);
3205 assert(str - start <= writer->allocated);
3206 return str - start;
3207}
3208
3209Py_LOCAL_INLINE(void)
3210_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3211{
3212#ifdef Py_DEBUG
3213 char *start, *end;
3214
3215 if (writer->use_small_buffer) {
3216 assert(writer->buffer == NULL);
3217 }
3218 else {
3219 assert(writer->buffer != NULL);
3220 if (writer->use_bytearray)
3221 assert(PyByteArray_CheckExact(writer->buffer));
3222 else
3223 assert(PyBytes_CheckExact(writer->buffer));
3224 assert(Py_REFCNT(writer->buffer) == 1);
3225 }
3226
3227 if (writer->use_bytearray) {
3228 /* bytearray has its own overallocation algorithm,
3229 writer overallocation must be disabled */
3230 assert(!writer->overallocate);
3231 }
3232
3233 assert(0 <= writer->allocated);
3234 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
3235 /* the last byte must always be null */
3236 start = _PyBytesWriter_AsString(writer);
3237 assert(start[writer->allocated] == 0);
3238
3239 end = start + writer->allocated;
3240 assert(str != NULL);
3241 assert(start <= str && str <= end);
3242#endif
3243}
3244
3245void*
3246_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3247{
3248 Py_ssize_t allocated, pos;
3249
3250 _PyBytesWriter_CheckConsistency(writer, str);
3251 assert(writer->allocated < size);
3252
3253 allocated = size;
3254 if (writer->overallocate
3255 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3256 /* overallocate to limit the number of realloc() */
3257 allocated += allocated / OVERALLOCATE_FACTOR;
3258 }
3259
3260 pos = _PyBytesWriter_GetSize(writer, str);
3261 if (!writer->use_small_buffer) {
3262 if (writer->use_bytearray) {
3263 if (PyByteArray_Resize(writer->buffer, allocated))
3264 goto error;
3265 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3266 but we cannot use ob_alloc because bytes may need to be moved
3267 to use the whole buffer. bytearray uses an internal optimization
3268 to avoid moving or copying bytes when bytes are removed at the
3269 beginning (ex: del bytearray[:1]). */
3270 }
3271 else {
3272 if (_PyBytes_Resize(&writer->buffer, allocated))
3273 goto error;
3274 }
3275 }
3276 else {
3277 /* convert from stack buffer to bytes object buffer */
3278 assert(writer->buffer == NULL);
3279
3280 if (writer->use_bytearray)
3281 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3282 else
3283 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
3284 if (writer->buffer == NULL)
3285 goto error;
3286
3287 if (pos != 0) {
3288 char *dest;
3289 if (writer->use_bytearray)
3290 dest = PyByteArray_AS_STRING(writer->buffer);
3291 else
3292 dest = PyBytes_AS_STRING(writer->buffer);
3293 memcpy(dest,
3294 writer->small_buffer,
3295 pos);
3296 }
3297
3298 writer->use_small_buffer = 0;
3299#ifdef Py_DEBUG
3300 memset(writer->small_buffer, 0xDB, sizeof(writer->small_buffer));
3301#endif
3302 }
3303 writer->allocated = allocated;
3304
3305 str = _PyBytesWriter_AsString(writer) + pos;
3306 _PyBytesWriter_CheckConsistency(writer, str);
3307 return str;
3308
3309error:
3310 _PyBytesWriter_Dealloc(writer);
3311 return NULL;
3312}
3313
3314void*
3315_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3316{
3317 Py_ssize_t new_min_size;
3318
3319 _PyBytesWriter_CheckConsistency(writer, str);
3320 assert(size >= 0);
3321
3322 if (size == 0) {
3323 /* nothing to do */
3324 return str;
3325 }
3326
3327 if (writer->min_size > PY_SSIZE_T_MAX - size) {
3328 PyErr_NoMemory();
3329 _PyBytesWriter_Dealloc(writer);
3330 return NULL;
3331 }
3332 new_min_size = writer->min_size + size;
3333
3334 if (new_min_size > writer->allocated)
3335 str = _PyBytesWriter_Resize(writer, str, new_min_size);
3336
3337 writer->min_size = new_min_size;
3338 return str;
3339}
3340
3341/* Allocate the buffer to write size bytes.
3342 Return the pointer to the beginning of buffer data.
3343 Raise an exception and return NULL on error. */
3344void*
3345_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3346{
3347 /* ensure that _PyBytesWriter_Alloc() is only called once */
3348 assert(writer->min_size == 0 && writer->buffer == NULL);
3349 assert(size >= 0);
3350
3351 writer->use_small_buffer = 1;
3352#ifdef Py_DEBUG
3353 writer->allocated = sizeof(writer->small_buffer) - 1;
3354 /* In debug mode, don't use the full small buffer because it is less
3355 efficient than bytes and bytearray objects to detect buffer underflow
3356 and buffer overflow. Use 10 bytes of the small buffer to test also
3357 code using the smaller buffer in debug mode.
3358
3359 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3360 in debug mode to also be able to detect stack overflow when running
3361 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3362 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3363 stack overflow. */
3364 writer->allocated = Py_MIN(writer->allocated, 10);
3365 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3366 to detect buffer overflow */
3367 writer->small_buffer[writer->allocated] = 0;
3368#else
3369 writer->allocated = sizeof(writer->small_buffer);
3370#endif
3371 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
3372}
3373
3374PyObject *
3375_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
3376{
3377 Py_ssize_t size;
3378 PyObject *result;
3379
3380 _PyBytesWriter_CheckConsistency(writer, str);
3381
3382 size = _PyBytesWriter_GetSize(writer, str);
3383 if (size == 0 && !writer->use_bytearray) {
3384 Py_CLEAR(writer->buffer);
3385 /* Get the empty byte string singleton */
3386 result = PyBytes_FromStringAndSize(NULL, 0);
3387 }
3388 else if (writer->use_small_buffer) {
3389 if (writer->use_bytearray) {
3390 result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3391 }
3392 else {
3393 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3394 }
3395 }
3396 else {
3397 result = writer->buffer;
3398 writer->buffer = NULL;
3399
3400 if (size != writer->allocated) {
3401 if (writer->use_bytearray) {
3402 if (PyByteArray_Resize(result, size)) {
3403 Py_DECREF(result);
3404 return NULL;
3405 }
3406 }
3407 else {
3408 if (_PyBytes_Resize(&result, size)) {
3409 assert(result == NULL);
3410 return NULL;
3411 }
3412 }
3413 }
3414 }
3415 return result;
3416}
3417
3418void*
3419_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
3420 const void *bytes, Py_ssize_t size)
3421{
3422 char *str = (char *)ptr;
3423
3424 str = _PyBytesWriter_Prepare(writer, str, size);
3425 if (str == NULL)
3426 return NULL;
3427
3428 memcpy(str, bytes, size);
3429 str += size;
3430
3431 return str;
3432}