blob: 2bf39e3529378f42963306e3d1035d34a5c4bb37 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Gregory P. Smith8cb65692015-04-25 23:22:26 +00008#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00009#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000010
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020011/*[clinic input]
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030012class bytes "PyBytesObject *" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020013[clinic start generated code]*/
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +030014/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020015
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030016#include "clinic/bytesobject.c.h"
17
Christian Heimes2c9c7a52008-05-26 13:42:13 +000018#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000019Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000020#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000021
Christian Heimes2c9c7a52008-05-26 13:42:13 +000022static PyBytesObject *characters[UCHAR_MAX + 1];
23static PyBytesObject *nullstring;
24
Mark Dickinsonfd24b322008-12-06 15:33:31 +000025/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
26 for a string of length n should request PyBytesObject_SIZE + n bytes.
27
28 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
29 3 bytes per string allocation on a typical system.
30*/
31#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
32
Victor Stinner2bf89932015-10-14 11:25:33 +020033/* Forward declaration */
34Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
35 char *str);
36
Christian Heimes2c9c7a52008-05-26 13:42:13 +000037/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000038 For PyBytes_FromString(), the parameter `str' points to a null-terminated
39 string containing exactly `size' bytes.
40
41 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
42 either NULL or else points to a string containing at least `size' bytes.
43 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
44 not have to be null-terminated. (Therefore it is safe to construct a
45 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
46 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
47 bytes (setting the last byte to the null terminating character) and you can
48 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000049 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000050 alter the data yourself, since the strings may be shared.
51
52 The PyObject member `op->ob_size', which denotes the number of "extra
53 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020054 allocated for string data, not counting the null terminating character.
55 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000056 PyBytes_FromStringAndSize()) or the length of the string in the `str'
57 parameter (for PyBytes_FromString()).
58*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020059static PyObject *
60_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000061{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020062 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020063 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020064
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000066#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000068#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000069 Py_INCREF(op);
70 return (PyObject *)op;
71 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000072
Victor Stinner049e5092014-08-17 22:20:00 +020073 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000074 PyErr_SetString(PyExc_OverflowError,
75 "byte string is too large");
76 return NULL;
77 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000078
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000079 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020080 if (use_calloc)
81 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
82 else
83 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084 if (op == NULL)
85 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +010086 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020088 if (!use_calloc)
89 op->ob_sval[size] = '\0';
90 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 if (size == 0) {
92 nullstring = op;
93 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020094 }
95 return (PyObject *) op;
96}
97
98PyObject *
99PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
100{
101 PyBytesObject *op;
102 if (size < 0) {
103 PyErr_SetString(PyExc_SystemError,
104 "Negative size passed to PyBytes_FromStringAndSize");
105 return NULL;
106 }
107 if (size == 1 && str != NULL &&
108 (op = characters[*str & UCHAR_MAX]) != NULL)
109 {
110#ifdef COUNT_ALLOCS
111 one_strings++;
112#endif
113 Py_INCREF(op);
114 return (PyObject *)op;
115 }
116
117 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
118 if (op == NULL)
119 return NULL;
120 if (str == NULL)
121 return (PyObject *) op;
122
123 Py_MEMCPY(op->ob_sval, str, size);
124 /* share short strings */
125 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000126 characters[*str & UCHAR_MAX] = op;
127 Py_INCREF(op);
128 }
129 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000130}
131
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000132PyObject *
133PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000134{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200135 size_t size;
136 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000137
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000138 assert(str != NULL);
139 size = strlen(str);
140 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
141 PyErr_SetString(PyExc_OverflowError,
142 "byte string is too long");
143 return NULL;
144 }
145 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000146#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000147 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000148#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 Py_INCREF(op);
150 return (PyObject *)op;
151 }
152 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000153#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000155#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 Py_INCREF(op);
157 return (PyObject *)op;
158 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000159
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000160 /* Inline PyObject_NewVar */
161 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
162 if (op == NULL)
163 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100164 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000165 op->ob_shash = -1;
166 Py_MEMCPY(op->ob_sval, str, size+1);
167 /* share short strings */
168 if (size == 0) {
169 nullstring = op;
170 Py_INCREF(op);
171 } else if (size == 1) {
172 characters[*str & UCHAR_MAX] = op;
173 Py_INCREF(op);
174 }
175 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000176}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000177
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000178PyObject *
179PyBytes_FromFormatV(const char *format, va_list vargs)
180{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000181 char *s;
Victor Stinner03dab782015-10-14 00:21:35 +0200182 const char *f;
183 const char *p;
184 Py_ssize_t prec;
185 int longflag;
186 int size_tflag;
187 /* Longest 64-bit formatted numbers:
188 - "18446744073709551615\0" (21 bytes)
189 - "-9223372036854775808\0" (21 bytes)
190 Decimal takes the most space (it isn't enough for octal.)
Guido van Rossum343e97f2007-04-09 00:43:24 +0000191
Victor Stinner03dab782015-10-14 00:21:35 +0200192 Longest 64-bit pointer representation:
193 "0xffffffffffffffff\0" (19 bytes). */
194 char buffer[21];
195 _PyBytesWriter writer;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000196
Victor Stinner03dab782015-10-14 00:21:35 +0200197 _PyBytesWriter_Init(&writer);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000198
Victor Stinner03dab782015-10-14 00:21:35 +0200199 s = _PyBytesWriter_Alloc(&writer, strlen(format));
200 if (s == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000201 return NULL;
Victor Stinner03dab782015-10-14 00:21:35 +0200202 writer.overallocate = 1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000203
Victor Stinner03dab782015-10-14 00:21:35 +0200204#define WRITE_BYTES(str) \
205 do { \
206 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
207 if (s == NULL) \
208 goto error; \
209 } while (0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000210
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000211 for (f = format; *f; f++) {
Victor Stinner03dab782015-10-14 00:21:35 +0200212 if (*f != '%') {
213 *s++ = *f;
214 continue;
215 }
216
217 p = f++;
218
219 /* ignore the width (ex: 10 in "%10s") */
220 while (Py_ISDIGIT(*f))
221 f++;
222
223 /* parse the precision (ex: 10 in "%.10s") */
224 prec = 0;
225 if (*f == '.') {
226 f++;
227 for (; Py_ISDIGIT(*f); f++) {
228 prec = (prec * 10) + (*f - '0');
229 }
230 }
231
232 while (*f && *f != '%' && !Py_ISALPHA(*f))
233 f++;
234
235 /* handle the long flag ('l'), but only for %ld and %lu.
236 others can be added when necessary. */
237 longflag = 0;
238 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
239 longflag = 1;
240 ++f;
241 }
242
243 /* handle the size_t flag ('z'). */
244 size_tflag = 0;
245 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
246 size_tflag = 1;
247 ++f;
248 }
249
250 /* substract bytes preallocated for the format string
251 (ex: 2 for "%s") */
252 writer.min_size -= (f - p + 1);
253
254 switch (*f) {
255 case 'c':
256 {
257 int c = va_arg(vargs, int);
258 if (c < 0 || c > 255) {
259 PyErr_SetString(PyExc_OverflowError,
260 "PyBytes_FromFormatV(): %c format "
261 "expects an integer in range [0; 255]");
262 goto error;
263 }
264 writer.min_size++;
265 *s++ = (unsigned char)c;
266 break;
267 }
268
269 case 'd':
270 if (longflag)
271 sprintf(buffer, "%ld", va_arg(vargs, long));
272 else if (size_tflag)
273 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
274 va_arg(vargs, Py_ssize_t));
275 else
276 sprintf(buffer, "%d", va_arg(vargs, int));
277 assert(strlen(buffer) < sizeof(buffer));
278 WRITE_BYTES(buffer);
279 break;
280
281 case 'u':
282 if (longflag)
283 sprintf(buffer, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
286 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
288 else
289 sprintf(buffer, "%u",
290 va_arg(vargs, unsigned int));
291 assert(strlen(buffer) < sizeof(buffer));
292 WRITE_BYTES(buffer);
293 break;
294
295 case 'i':
296 sprintf(buffer, "%i", va_arg(vargs, int));
297 assert(strlen(buffer) < sizeof(buffer));
298 WRITE_BYTES(buffer);
299 break;
300
301 case 'x':
302 sprintf(buffer, "%x", va_arg(vargs, int));
303 assert(strlen(buffer) < sizeof(buffer));
304 WRITE_BYTES(buffer);
305 break;
306
307 case 's':
308 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000309 Py_ssize_t i;
Victor Stinner03dab782015-10-14 00:21:35 +0200310
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200311 p = va_arg(vargs, const char*);
Victor Stinner03dab782015-10-14 00:21:35 +0200312 i = strlen(p);
313 if (prec > 0 && i > prec)
314 i = prec;
315 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
316 if (s == NULL)
317 goto error;
318 break;
319 }
320
321 case 'p':
322 sprintf(buffer, "%p", va_arg(vargs, void*));
323 assert(strlen(buffer) < sizeof(buffer));
324 /* %p is ill-defined: ensure leading 0x. */
325 if (buffer[1] == 'X')
326 buffer[1] = 'x';
327 else if (buffer[1] != 'x') {
328 memmove(buffer+2, buffer, strlen(buffer)+1);
329 buffer[0] = '0';
330 buffer[1] = 'x';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000331 }
Victor Stinner03dab782015-10-14 00:21:35 +0200332 WRITE_BYTES(buffer);
333 break;
334
335 case '%':
336 writer.min_size++;
337 *s++ = '%';
338 break;
339
340 default:
341 if (*f == 0) {
342 /* fix min_size if we reached the end of the format string */
343 writer.min_size++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000344 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000345
Victor Stinner03dab782015-10-14 00:21:35 +0200346 /* invalid format string: copy unformatted string and exit */
347 WRITE_BYTES(p);
348 return _PyBytesWriter_Finish(&writer, s);
349 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000351
Victor Stinner03dab782015-10-14 00:21:35 +0200352#undef WRITE_BYTES
353
354 return _PyBytesWriter_Finish(&writer, s);
355
356 error:
357 _PyBytesWriter_Dealloc(&writer);
358 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000359}
360
361PyObject *
362PyBytes_FromFormat(const char *format, ...)
363{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000364 PyObject* ret;
365 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000366
367#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000368 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000369#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000370 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000371#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 ret = PyBytes_FromFormatV(format, vargs);
373 va_end(vargs);
374 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000375}
376
Ethan Furmanb95b5612015-01-23 20:05:18 -0800377/* Helpers for formatstring */
378
379Py_LOCAL_INLINE(PyObject *)
380getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
381{
382 Py_ssize_t argidx = *p_argidx;
383 if (argidx < arglen) {
384 (*p_argidx)++;
385 if (arglen < 0)
386 return args;
387 else
388 return PyTuple_GetItem(args, argidx);
389 }
390 PyErr_SetString(PyExc_TypeError,
391 "not enough arguments for format string");
392 return NULL;
393}
394
395/* Format codes
396 * F_LJUST '-'
397 * F_SIGN '+'
398 * F_BLANK ' '
399 * F_ALT '#'
400 * F_ZERO '0'
401 */
402#define F_LJUST (1<<0)
403#define F_SIGN (1<<1)
404#define F_BLANK (1<<2)
405#define F_ALT (1<<3)
406#define F_ZERO (1<<4)
407
408/* Returns a new reference to a PyBytes object, or NULL on failure. */
409
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200410static char*
411formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200412 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800413{
414 char *p;
415 PyObject *result;
416 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200417 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800418
419 x = PyFloat_AsDouble(v);
420 if (x == -1.0 && PyErr_Occurred()) {
421 PyErr_Format(PyExc_TypeError, "float argument required, "
422 "not %.200s", Py_TYPE(v)->tp_name);
423 return NULL;
424 }
425
426 if (prec < 0)
427 prec = 6;
428
429 p = PyOS_double_to_string(x, type, prec,
430 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
431
432 if (p == NULL)
433 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200434
435 len = strlen(p);
436 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200437 str = _PyBytesWriter_Prepare(writer, str, len);
438 if (str == NULL)
439 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200440 Py_MEMCPY(str, p, len);
Victor Stinner71dc3d82016-04-26 12:35:13 +0200441 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200442 str += len;
443 return str;
444 }
445
446 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800447 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200448 *p_result = result;
449 return str;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800450}
451
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300452static PyObject *
453formatlong(PyObject *v, int flags, int prec, int type)
454{
455 PyObject *result, *iobj;
456 if (type == 'i')
457 type = 'd';
458 if (PyLong_Check(v))
459 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
460 if (PyNumber_Check(v)) {
461 /* make sure number is a type of integer for o, x, and X */
462 if (type == 'o' || type == 'x' || type == 'X')
463 iobj = PyNumber_Index(v);
464 else
465 iobj = PyNumber_Long(v);
466 if (iobj == NULL) {
467 if (!PyErr_ExceptionMatches(PyExc_TypeError))
468 return NULL;
469 }
470 else if (!PyLong_Check(iobj))
471 Py_CLEAR(iobj);
472 if (iobj != NULL) {
473 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
474 Py_DECREF(iobj);
475 return result;
476 }
477 }
478 PyErr_Format(PyExc_TypeError,
479 "%%%c format: %s is required, not %.200s", type,
480 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
481 : "a number",
482 Py_TYPE(v)->tp_name);
483 return NULL;
484}
485
486static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200487byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800488{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300489 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200490 *p = PyBytes_AS_STRING(arg)[0];
491 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800492 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300493 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200494 *p = PyByteArray_AS_STRING(arg)[0];
495 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800496 }
497 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300498 PyObject *iobj;
499 long ival;
500 int overflow;
501 /* make sure number is a type of integer */
502 if (PyLong_Check(arg)) {
503 ival = PyLong_AsLongAndOverflow(arg, &overflow);
504 }
505 else {
506 iobj = PyNumber_Index(arg);
507 if (iobj == NULL) {
508 if (!PyErr_ExceptionMatches(PyExc_TypeError))
509 return 0;
510 goto onError;
511 }
512 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
513 Py_DECREF(iobj);
514 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300515 if (!overflow && ival == -1 && PyErr_Occurred())
516 goto onError;
517 if (overflow || !(0 <= ival && ival <= 255)) {
518 PyErr_SetString(PyExc_OverflowError,
519 "%c arg not in range(256)");
520 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800521 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300522 *p = (char)ival;
523 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800524 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300525 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200526 PyErr_SetString(PyExc_TypeError,
527 "%c requires an integer in range(256) or a single byte");
528 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800529}
530
531static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200532format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800533{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200534 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800535 _Py_IDENTIFIER(__bytes__);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800536 /* is it a bytes object? */
537 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200538 *pbuf = PyBytes_AS_STRING(v);
539 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800540 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200541 return v;
542 }
543 if (PyByteArray_Check(v)) {
544 *pbuf = PyByteArray_AS_STRING(v);
545 *plen = PyByteArray_GET_SIZE(v);
546 Py_INCREF(v);
547 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800548 }
549 /* does it support __bytes__? */
550 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
551 if (func != NULL) {
552 result = PyObject_CallFunctionObjArgs(func, NULL);
553 Py_DECREF(func);
554 if (result == NULL)
555 return NULL;
556 if (!PyBytes_Check(result)) {
557 PyErr_Format(PyExc_TypeError,
558 "__bytes__ returned non-bytes (type %.200s)",
559 Py_TYPE(result)->tp_name);
560 Py_DECREF(result);
561 return NULL;
562 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200563 *pbuf = PyBytes_AS_STRING(result);
564 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800565 return result;
566 }
567 PyErr_Format(PyExc_TypeError,
568 "%%b requires bytes, or an object that implements __bytes__, not '%.100s'",
569 Py_TYPE(v)->tp_name);
570 return NULL;
571}
572
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200573/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800574
575PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +0200576_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
577 PyObject *args, int use_bytearray)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800578{
Victor Stinner772b2b02015-10-14 09:56:53 +0200579 const char *fmt;
580 char *res;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800581 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200582 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800583 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800584 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200585 _PyBytesWriter writer;
586
Victor Stinner772b2b02015-10-14 09:56:53 +0200587 if (args == NULL) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800588 PyErr_BadInternalCall();
589 return NULL;
590 }
Victor Stinner772b2b02015-10-14 09:56:53 +0200591 fmt = format;
592 fmtcnt = format_len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200593
594 _PyBytesWriter_Init(&writer);
Victor Stinner772b2b02015-10-14 09:56:53 +0200595 writer.use_bytearray = use_bytearray;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200596
597 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
598 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800599 return NULL;
Victor Stinner772b2b02015-10-14 09:56:53 +0200600 if (!use_bytearray)
601 writer.overallocate = 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200602
Ethan Furmanb95b5612015-01-23 20:05:18 -0800603 if (PyTuple_Check(args)) {
604 arglen = PyTuple_GET_SIZE(args);
605 argidx = 0;
606 }
607 else {
608 arglen = -1;
609 argidx = -2;
610 }
611 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
612 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
613 !PyByteArray_Check(args)) {
614 dict = args;
615 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200616
Ethan Furmanb95b5612015-01-23 20:05:18 -0800617 while (--fmtcnt >= 0) {
618 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200619 Py_ssize_t len;
620 char *pos;
621
622 pos = strchr(fmt + 1, '%');
623 if (pos != NULL)
624 len = pos - fmt;
Victor Stinner772b2b02015-10-14 09:56:53 +0200625 else
626 len = format_len - (fmt - format);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200627 assert(len != 0);
628
629 Py_MEMCPY(res, fmt, len);
630 res += len;
631 fmt += len;
632 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800633 }
634 else {
635 /* Got a format specifier */
636 int flags = 0;
637 Py_ssize_t width = -1;
638 int prec = -1;
639 int c = '\0';
640 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800641 PyObject *v = NULL;
642 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200643 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800644 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200645 Py_ssize_t len = 0;
646 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200647 Py_ssize_t alloc;
648#ifdef Py_DEBUG
649 char *before;
650#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -0800651
Ethan Furmanb95b5612015-01-23 20:05:18 -0800652 fmt++;
653 if (*fmt == '(') {
Victor Stinner772b2b02015-10-14 09:56:53 +0200654 const char *keystart;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800655 Py_ssize_t keylen;
656 PyObject *key;
657 int pcount = 1;
658
659 if (dict == NULL) {
660 PyErr_SetString(PyExc_TypeError,
661 "format requires a mapping");
662 goto error;
663 }
664 ++fmt;
665 --fmtcnt;
666 keystart = fmt;
667 /* Skip over balanced parentheses */
668 while (pcount > 0 && --fmtcnt >= 0) {
669 if (*fmt == ')')
670 --pcount;
671 else if (*fmt == '(')
672 ++pcount;
673 fmt++;
674 }
675 keylen = fmt - keystart - 1;
676 if (fmtcnt < 0 || pcount > 0) {
677 PyErr_SetString(PyExc_ValueError,
678 "incomplete format key");
679 goto error;
680 }
681 key = PyBytes_FromStringAndSize(keystart,
682 keylen);
683 if (key == NULL)
684 goto error;
685 if (args_owned) {
686 Py_DECREF(args);
687 args_owned = 0;
688 }
689 args = PyObject_GetItem(dict, key);
690 Py_DECREF(key);
691 if (args == NULL) {
692 goto error;
693 }
694 args_owned = 1;
695 arglen = -1;
696 argidx = -2;
697 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200698
699 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800700 while (--fmtcnt >= 0) {
701 switch (c = *fmt++) {
702 case '-': flags |= F_LJUST; continue;
703 case '+': flags |= F_SIGN; continue;
704 case ' ': flags |= F_BLANK; continue;
705 case '#': flags |= F_ALT; continue;
706 case '0': flags |= F_ZERO; continue;
707 }
708 break;
709 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200710
711 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800712 if (c == '*') {
713 v = getnextarg(args, arglen, &argidx);
714 if (v == NULL)
715 goto error;
716 if (!PyLong_Check(v)) {
717 PyErr_SetString(PyExc_TypeError,
718 "* wants int");
719 goto error;
720 }
721 width = PyLong_AsSsize_t(v);
722 if (width == -1 && PyErr_Occurred())
723 goto error;
724 if (width < 0) {
725 flags |= F_LJUST;
726 width = -width;
727 }
728 if (--fmtcnt >= 0)
729 c = *fmt++;
730 }
731 else if (c >= 0 && isdigit(c)) {
732 width = c - '0';
733 while (--fmtcnt >= 0) {
734 c = Py_CHARMASK(*fmt++);
735 if (!isdigit(c))
736 break;
737 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
738 PyErr_SetString(
739 PyExc_ValueError,
740 "width too big");
741 goto error;
742 }
743 width = width*10 + (c - '0');
744 }
745 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200746
747 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800748 if (c == '.') {
749 prec = 0;
750 if (--fmtcnt >= 0)
751 c = *fmt++;
752 if (c == '*') {
753 v = getnextarg(args, arglen, &argidx);
754 if (v == NULL)
755 goto error;
756 if (!PyLong_Check(v)) {
757 PyErr_SetString(
758 PyExc_TypeError,
759 "* wants int");
760 goto error;
761 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200762 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800763 if (prec == -1 && PyErr_Occurred())
764 goto error;
765 if (prec < 0)
766 prec = 0;
767 if (--fmtcnt >= 0)
768 c = *fmt++;
769 }
770 else if (c >= 0 && isdigit(c)) {
771 prec = c - '0';
772 while (--fmtcnt >= 0) {
773 c = Py_CHARMASK(*fmt++);
774 if (!isdigit(c))
775 break;
776 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
777 PyErr_SetString(
778 PyExc_ValueError,
779 "prec too big");
780 goto error;
781 }
782 prec = prec*10 + (c - '0');
783 }
784 }
785 } /* prec */
786 if (fmtcnt >= 0) {
787 if (c == 'h' || c == 'l' || c == 'L') {
788 if (--fmtcnt >= 0)
789 c = *fmt++;
790 }
791 }
792 if (fmtcnt < 0) {
793 PyErr_SetString(PyExc_ValueError,
794 "incomplete format");
795 goto error;
796 }
797 if (c != '%') {
798 v = getnextarg(args, arglen, &argidx);
799 if (v == NULL)
800 goto error;
801 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200802
803 if (fmtcnt < 0) {
804 /* last writer: disable writer overallocation */
805 writer.overallocate = 0;
806 }
807
Ethan Furmanb95b5612015-01-23 20:05:18 -0800808 sign = 0;
809 fill = ' ';
810 switch (c) {
811 case '%':
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200812 *res++ = '%';
813 continue;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200814
Ethan Furman62e977f2015-03-11 08:17:00 -0700815 case 'r':
816 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800817 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200818 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800819 if (temp == NULL)
820 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200821 assert(PyUnicode_IS_ASCII(temp));
822 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
823 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800824 if (prec >= 0 && len > prec)
825 len = prec;
826 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200827
Ethan Furmanb95b5612015-01-23 20:05:18 -0800828 case 's':
829 // %s is only for 2/3 code; 3 only code should use %b
830 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200831 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800832 if (temp == NULL)
833 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800834 if (prec >= 0 && len > prec)
835 len = prec;
836 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200837
Ethan Furmanb95b5612015-01-23 20:05:18 -0800838 case 'i':
839 case 'd':
840 case 'u':
841 case 'o':
842 case 'x':
843 case 'X':
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200844 if (PyLong_CheckExact(v)
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200845 && width == -1 && prec == -1
846 && !(flags & (F_SIGN | F_BLANK))
847 && c != 'X')
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200848 {
849 /* Fast path */
850 int alternate = flags & F_ALT;
851 int base;
852
853 switch(c)
854 {
855 default:
856 assert(0 && "'type' not in [diuoxX]");
857 case 'd':
858 case 'i':
859 case 'u':
860 base = 10;
861 break;
862 case 'o':
863 base = 8;
864 break;
865 case 'x':
866 case 'X':
867 base = 16;
868 break;
869 }
870
871 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200872 writer.min_size -= 2; /* size preallocated for "%d" */
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200873 res = _PyLong_FormatBytesWriter(&writer, res,
874 v, base, alternate);
875 if (res == NULL)
876 goto error;
877 continue;
878 }
879
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300880 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200881 if (!temp)
882 goto error;
883 assert(PyUnicode_IS_ASCII(temp));
884 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
885 len = PyUnicode_GET_LENGTH(temp);
886 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800887 if (flags & F_ZERO)
888 fill = '0';
889 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200890
Ethan Furmanb95b5612015-01-23 20:05:18 -0800891 case 'e':
892 case 'E':
893 case 'f':
894 case 'F':
895 case 'g':
896 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200897 if (width == -1 && prec == -1
898 && !(flags & (F_SIGN | F_BLANK)))
899 {
900 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200901 writer.min_size -= 2; /* size preallocated for "%f" */
Victor Stinnerad771582015-10-09 12:38:53 +0200902 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200903 if (res == NULL)
904 goto error;
905 continue;
906 }
907
Victor Stinnerad771582015-10-09 12:38:53 +0200908 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800909 goto error;
910 pbuf = PyBytes_AS_STRING(temp);
911 len = PyBytes_GET_SIZE(temp);
912 sign = 1;
913 if (flags & F_ZERO)
914 fill = '0';
915 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200916
Ethan Furmanb95b5612015-01-23 20:05:18 -0800917 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200918 pbuf = &onechar;
919 len = byte_converter(v, &onechar);
920 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800921 goto error;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200922 if (width == -1) {
923 /* Fast path */
924 *res++ = onechar;
925 continue;
926 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800927 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200928
Ethan Furmanb95b5612015-01-23 20:05:18 -0800929 default:
930 PyErr_Format(PyExc_ValueError,
931 "unsupported format character '%c' (0x%x) "
932 "at index %zd",
933 c, c,
Victor Stinner772b2b02015-10-14 09:56:53 +0200934 (Py_ssize_t)(fmt - 1 - format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800935 goto error;
936 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200937
Ethan Furmanb95b5612015-01-23 20:05:18 -0800938 if (sign) {
939 if (*pbuf == '-' || *pbuf == '+') {
940 sign = *pbuf++;
941 len--;
942 }
943 else if (flags & F_SIGN)
944 sign = '+';
945 else if (flags & F_BLANK)
946 sign = ' ';
947 else
948 sign = 0;
949 }
950 if (width < len)
951 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200952
953 alloc = width;
954 if (sign != 0 && len == width)
955 alloc++;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200956 /* 2: size preallocated for %s */
957 if (alloc > 2) {
958 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200959 if (res == NULL)
960 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800961 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200962#ifdef Py_DEBUG
963 before = res;
964#endif
965
966 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800967 if (sign) {
968 if (fill != ' ')
969 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800970 if (width > len)
971 width--;
972 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200973
974 /* Write the numeric prefix for "x", "X" and "o" formats
975 if the alternate form is used.
976 For example, write "0x" for the "%#x" format. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800977 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
978 assert(pbuf[0] == '0');
979 assert(pbuf[1] == c);
980 if (fill != ' ') {
981 *res++ = *pbuf++;
982 *res++ = *pbuf++;
983 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800984 width -= 2;
985 if (width < 0)
986 width = 0;
987 len -= 2;
988 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200989
990 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800991 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200992 memset(res, fill, width - len);
993 res += (width - len);
994 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800995 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200996
997 /* If padding with spaces: write sign if needed and/or numeric
998 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800999 if (fill == ' ') {
1000 if (sign)
1001 *res++ = sign;
1002 if ((flags & F_ALT) &&
1003 (c == 'x' || c == 'X')) {
1004 assert(pbuf[0] == '0');
1005 assert(pbuf[1] == c);
1006 *res++ = *pbuf++;
1007 *res++ = *pbuf++;
1008 }
1009 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001010
1011 /* Copy bytes */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001012 Py_MEMCPY(res, pbuf, len);
1013 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001014
1015 /* Pad right with the fill character if needed */
1016 if (width > len) {
1017 memset(res, ' ', width - len);
1018 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001019 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001020
Ethan Furmanb95b5612015-01-23 20:05:18 -08001021 if (dict && (argidx < arglen) && c != '%') {
1022 PyErr_SetString(PyExc_TypeError,
1023 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -08001024 Py_XDECREF(temp);
1025 goto error;
1026 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001027 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001028
1029#ifdef Py_DEBUG
1030 /* check that we computed the exact size for this write */
1031 assert((res - before) == alloc);
1032#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -08001033 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001034
1035 /* If overallocation was disabled, ensure that it was the last
1036 write. Otherwise, we missed an optimization */
Victor Stinner772b2b02015-10-14 09:56:53 +02001037 assert(writer.overallocate || fmtcnt < 0 || use_bytearray);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001038 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001039
Ethan Furmanb95b5612015-01-23 20:05:18 -08001040 if (argidx < arglen && !dict) {
1041 PyErr_SetString(PyExc_TypeError,
1042 "not all arguments converted during bytes formatting");
1043 goto error;
1044 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001045
Ethan Furmanb95b5612015-01-23 20:05:18 -08001046 if (args_owned) {
1047 Py_DECREF(args);
1048 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001049 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001050
1051 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001052 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001053 if (args_owned) {
1054 Py_DECREF(args);
1055 }
1056 return NULL;
1057}
1058
1059/* =-= */
1060
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001061static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001062bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001063{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001064 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001065}
1066
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001067/* Unescape a backslash-escaped string. If unicode is non-zero,
1068 the string is a u-literal. If recode_encoding is non-zero,
1069 the string is UTF-8 encoded and should be re-encoded in the
1070 specified encoding. */
1071
Victor Stinner2ec80632015-10-14 13:32:13 +02001072static char *
1073_PyBytes_DecodeEscapeRecode(const char **s, const char *end,
1074 const char *errors, const char *recode_encoding,
1075 _PyBytesWriter *writer, char *p)
1076{
1077 PyObject *u, *w;
1078 const char* t;
1079
1080 t = *s;
1081 /* Decode non-ASCII bytes as UTF-8. */
1082 while (t < end && (*t & 0x80))
1083 t++;
1084 u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
1085 if (u == NULL)
1086 return NULL;
1087
1088 /* Recode them in target encoding. */
1089 w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
1090 Py_DECREF(u);
1091 if (w == NULL)
1092 return NULL;
1093 assert(PyBytes_Check(w));
1094
1095 /* Append bytes to output buffer. */
1096 writer->min_size--; /* substract 1 preallocated byte */
1097 p = _PyBytesWriter_WriteBytes(writer, p,
1098 PyBytes_AS_STRING(w),
1099 PyBytes_GET_SIZE(w));
1100 Py_DECREF(w);
1101 if (p == NULL)
1102 return NULL;
1103
1104 *s = t;
1105 return p;
1106}
1107
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001108PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001109 Py_ssize_t len,
1110 const char *errors,
1111 Py_ssize_t unicode,
1112 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001113{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001114 int c;
Victor Stinner2ec80632015-10-14 13:32:13 +02001115 char *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001116 const char *end;
Victor Stinner2ec80632015-10-14 13:32:13 +02001117 _PyBytesWriter writer;
1118
1119 _PyBytesWriter_Init(&writer);
1120
1121 p = _PyBytesWriter_Alloc(&writer, len);
1122 if (p == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001123 return NULL;
Victor Stinner2ec80632015-10-14 13:32:13 +02001124 writer.overallocate = 1;
1125
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001126 end = s + len;
1127 while (s < end) {
1128 if (*s != '\\') {
1129 non_esc:
Victor Stinner2ec80632015-10-14 13:32:13 +02001130 if (!(recode_encoding && (*s & 0x80))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001131 *p++ = *s++;
1132 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001133 else {
1134 /* non-ASCII character and need to recode */
1135 p = _PyBytes_DecodeEscapeRecode(&s, end,
1136 errors, recode_encoding,
1137 &writer, p);
1138 if (p == NULL)
1139 goto failed;
1140 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001141 continue;
1142 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001143
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001144 s++;
Victor Stinner2ec80632015-10-14 13:32:13 +02001145 if (s == end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001146 PyErr_SetString(PyExc_ValueError,
1147 "Trailing \\ in string");
1148 goto failed;
1149 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001150
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001151 switch (*s++) {
1152 /* XXX This assumes ASCII! */
1153 case '\n': break;
1154 case '\\': *p++ = '\\'; break;
1155 case '\'': *p++ = '\''; break;
1156 case '\"': *p++ = '\"'; break;
1157 case 'b': *p++ = '\b'; break;
1158 case 'f': *p++ = '\014'; break; /* FF */
1159 case 't': *p++ = '\t'; break;
1160 case 'n': *p++ = '\n'; break;
1161 case 'r': *p++ = '\r'; break;
1162 case 'v': *p++ = '\013'; break; /* VT */
1163 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1164 case '0': case '1': case '2': case '3':
1165 case '4': case '5': case '6': case '7':
1166 c = s[-1] - '0';
1167 if (s < end && '0' <= *s && *s <= '7') {
1168 c = (c<<3) + *s++ - '0';
1169 if (s < end && '0' <= *s && *s <= '7')
1170 c = (c<<3) + *s++ - '0';
1171 }
1172 *p++ = c;
1173 break;
1174 case 'x':
Victor Stinner2ec80632015-10-14 13:32:13 +02001175 if (s+1 < end) {
1176 int digit1, digit2;
1177 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1178 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1179 if (digit1 < 16 && digit2 < 16) {
1180 *p++ = (unsigned char)((digit1 << 4) + digit2);
1181 s += 2;
1182 break;
1183 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001184 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001185 /* invalid hexadecimal digits */
1186
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001187 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001188 PyErr_Format(PyExc_ValueError,
1189 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001190 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001191 goto failed;
1192 }
1193 if (strcmp(errors, "replace") == 0) {
1194 *p++ = '?';
1195 } else if (strcmp(errors, "ignore") == 0)
1196 /* do nothing */;
1197 else {
1198 PyErr_Format(PyExc_ValueError,
1199 "decoding error; unknown "
1200 "error handling code: %.400s",
1201 errors);
1202 goto failed;
1203 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001204 /* skip \x */
1205 if (s < end && Py_ISXDIGIT(s[0]))
1206 s++; /* and a hexdigit */
1207 break;
Victor Stinner2ec80632015-10-14 13:32:13 +02001208
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001209 default:
1210 *p++ = '\\';
1211 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +02001212 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001213 UTF-8 bytes may follow. */
1214 }
1215 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001216
1217 return _PyBytesWriter_Finish(&writer, p);
1218
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001219 failed:
Victor Stinner2ec80632015-10-14 13:32:13 +02001220 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001221 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001222}
1223
1224/* -------------------------------------------------------------------- */
1225/* object api */
1226
1227Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001228PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001229{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001230 if (!PyBytes_Check(op)) {
1231 PyErr_Format(PyExc_TypeError,
1232 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1233 return -1;
1234 }
1235 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001236}
1237
1238char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001239PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001240{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001241 if (!PyBytes_Check(op)) {
1242 PyErr_Format(PyExc_TypeError,
1243 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1244 return NULL;
1245 }
1246 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001247}
1248
1249int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001250PyBytes_AsStringAndSize(PyObject *obj,
1251 char **s,
1252 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001253{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001254 if (s == NULL) {
1255 PyErr_BadInternalCall();
1256 return -1;
1257 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001258
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001259 if (!PyBytes_Check(obj)) {
1260 PyErr_Format(PyExc_TypeError,
1261 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1262 return -1;
1263 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001264
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001265 *s = PyBytes_AS_STRING(obj);
1266 if (len != NULL)
1267 *len = PyBytes_GET_SIZE(obj);
1268 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001269 PyErr_SetString(PyExc_ValueError,
1270 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001271 return -1;
1272 }
1273 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001274}
Neal Norwitz6968b052007-02-27 19:02:19 +00001275
1276/* -------------------------------------------------------------------- */
1277/* Methods */
1278
Eric Smith0923d1d2009-04-16 20:16:10 +00001279#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001280
1281#include "stringlib/fastsearch.h"
1282#include "stringlib/count.h"
1283#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001284#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001285#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001286#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001287#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001288
Eric Smith0f78bff2009-11-30 01:01:42 +00001289#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001290
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001291PyObject *
1292PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001293{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001294 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001295 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001296 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001297 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001298 unsigned char quote, *s, *p;
1299
1300 /* Compute size of output string */
1301 squotes = dquotes = 0;
1302 newsize = 3; /* b'' */
1303 s = (unsigned char*)op->ob_sval;
1304 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001305 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001306 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001307 case '\'': squotes++; break;
1308 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001309 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001310 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001311 default:
1312 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001313 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001314 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001315 if (newsize > PY_SSIZE_T_MAX - incr)
1316 goto overflow;
1317 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001318 }
1319 quote = '\'';
1320 if (smartquotes && squotes && !dquotes)
1321 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001322 if (squotes && quote == '\'') {
1323 if (newsize > PY_SSIZE_T_MAX - squotes)
1324 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001325 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001326 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001327
1328 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001329 if (v == NULL) {
1330 return NULL;
1331 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001332 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001333
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001334 *p++ = 'b', *p++ = quote;
1335 for (i = 0; i < length; i++) {
1336 unsigned char c = op->ob_sval[i];
1337 if (c == quote || c == '\\')
1338 *p++ = '\\', *p++ = c;
1339 else if (c == '\t')
1340 *p++ = '\\', *p++ = 't';
1341 else if (c == '\n')
1342 *p++ = '\\', *p++ = 'n';
1343 else if (c == '\r')
1344 *p++ = '\\', *p++ = 'r';
1345 else if (c < ' ' || c >= 0x7f) {
1346 *p++ = '\\';
1347 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001348 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1349 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001350 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001351 else
1352 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001353 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001354 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001355 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001356 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001357
1358 overflow:
1359 PyErr_SetString(PyExc_OverflowError,
1360 "bytes object is too large to make repr");
1361 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001362}
1363
Neal Norwitz6968b052007-02-27 19:02:19 +00001364static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001365bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001366{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001367 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001368}
1369
Neal Norwitz6968b052007-02-27 19:02:19 +00001370static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001371bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001372{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001373 if (Py_BytesWarningFlag) {
1374 if (PyErr_WarnEx(PyExc_BytesWarning,
1375 "str() on a bytes instance", 1))
1376 return NULL;
1377 }
1378 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001379}
1380
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001381static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001382bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001383{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001384 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001385}
Neal Norwitz6968b052007-02-27 19:02:19 +00001386
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001387/* This is also used by PyBytes_Concat() */
1388static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001389bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001390{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001391 Py_ssize_t size;
1392 Py_buffer va, vb;
1393 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001394
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001395 va.len = -1;
1396 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001397 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1398 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001399 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1400 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
1401 goto done;
1402 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001403
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001404 /* Optimize end cases */
1405 if (va.len == 0 && PyBytes_CheckExact(b)) {
1406 result = b;
1407 Py_INCREF(result);
1408 goto done;
1409 }
1410 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1411 result = a;
1412 Py_INCREF(result);
1413 goto done;
1414 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001415
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001416 size = va.len + vb.len;
1417 if (size < 0) {
1418 PyErr_NoMemory();
1419 goto done;
1420 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001421
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001422 result = PyBytes_FromStringAndSize(NULL, size);
1423 if (result != NULL) {
1424 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1425 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1426 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001427
1428 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001429 if (va.len != -1)
1430 PyBuffer_Release(&va);
1431 if (vb.len != -1)
1432 PyBuffer_Release(&vb);
1433 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001434}
Neal Norwitz6968b052007-02-27 19:02:19 +00001435
1436static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001437bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001438{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001439 Py_ssize_t i;
1440 Py_ssize_t j;
1441 Py_ssize_t size;
1442 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001443 size_t nbytes;
1444 if (n < 0)
1445 n = 0;
1446 /* watch out for overflows: the size can overflow int,
1447 * and the # of bytes needed can overflow size_t
1448 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001449 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001450 PyErr_SetString(PyExc_OverflowError,
1451 "repeated bytes are too long");
1452 return NULL;
1453 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001454 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001455 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1456 Py_INCREF(a);
1457 return (PyObject *)a;
1458 }
1459 nbytes = (size_t)size;
1460 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1461 PyErr_SetString(PyExc_OverflowError,
1462 "repeated bytes are too long");
1463 return NULL;
1464 }
1465 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1466 if (op == NULL)
1467 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +01001468 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001469 op->ob_shash = -1;
1470 op->ob_sval[size] = '\0';
1471 if (Py_SIZE(a) == 1 && n > 0) {
1472 memset(op->ob_sval, a->ob_sval[0] , n);
1473 return (PyObject *) op;
1474 }
1475 i = 0;
1476 if (i < size) {
1477 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1478 i = Py_SIZE(a);
1479 }
1480 while (i < size) {
1481 j = (i <= size-i) ? i : size-i;
1482 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1483 i += j;
1484 }
1485 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001486}
1487
Guido van Rossum98297ee2007-11-06 21:34:58 +00001488static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001489bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +00001490{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03001491 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001492}
1493
Neal Norwitz6968b052007-02-27 19:02:19 +00001494static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001495bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001496{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001497 if (i < 0 || i >= Py_SIZE(a)) {
1498 PyErr_SetString(PyExc_IndexError, "index out of range");
1499 return NULL;
1500 }
1501 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001502}
1503
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001504Py_LOCAL(int)
1505bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1506{
1507 int cmp;
1508 Py_ssize_t len;
1509
1510 len = Py_SIZE(a);
1511 if (Py_SIZE(b) != len)
1512 return 0;
1513
1514 if (a->ob_sval[0] != b->ob_sval[0])
1515 return 0;
1516
1517 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1518 return (cmp == 0);
1519}
1520
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001521static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001522bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001523{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001524 int c;
1525 Py_ssize_t len_a, len_b;
1526 Py_ssize_t min_len;
1527 PyObject *result;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001528 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001529
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001530 /* Make sure both arguments are strings. */
1531 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001532 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001533 rc = PyObject_IsInstance((PyObject*)a,
1534 (PyObject*)&PyUnicode_Type);
1535 if (!rc)
1536 rc = PyObject_IsInstance((PyObject*)b,
1537 (PyObject*)&PyUnicode_Type);
1538 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001539 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001540 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001541 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001542 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001543 return NULL;
1544 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001545 else {
1546 rc = PyObject_IsInstance((PyObject*)a,
1547 (PyObject*)&PyLong_Type);
1548 if (!rc)
1549 rc = PyObject_IsInstance((PyObject*)b,
1550 (PyObject*)&PyLong_Type);
1551 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001552 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001553 if (rc) {
1554 if (PyErr_WarnEx(PyExc_BytesWarning,
1555 "Comparison between bytes and int", 1))
1556 return NULL;
1557 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001558 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001559 }
1560 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001561 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001562 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001563 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001564 case Py_EQ:
1565 case Py_LE:
1566 case Py_GE:
1567 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001568 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001569 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001570 case Py_NE:
1571 case Py_LT:
1572 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001573 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001574 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001575 default:
1576 PyErr_BadArgument();
1577 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001578 }
1579 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001580 else if (op == Py_EQ || op == Py_NE) {
1581 int eq = bytes_compare_eq(a, b);
1582 eq ^= (op == Py_NE);
1583 result = eq ? Py_True : Py_False;
1584 }
1585 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001586 len_a = Py_SIZE(a);
1587 len_b = Py_SIZE(b);
1588 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001589 if (min_len > 0) {
1590 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001591 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001592 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001593 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001594 else
1595 c = 0;
1596 if (c == 0)
1597 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1598 switch (op) {
1599 case Py_LT: c = c < 0; break;
1600 case Py_LE: c = c <= 0; break;
1601 case Py_GT: c = c > 0; break;
1602 case Py_GE: c = c >= 0; break;
1603 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001604 PyErr_BadArgument();
1605 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001606 }
1607 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001608 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001609
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001610 Py_INCREF(result);
1611 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001612}
1613
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001614static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001615bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001616{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001617 if (a->ob_shash == -1) {
1618 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001619 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001620 }
1621 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001622}
1623
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001624static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001625bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001626{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001627 if (PyIndex_Check(item)) {
1628 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1629 if (i == -1 && PyErr_Occurred())
1630 return NULL;
1631 if (i < 0)
1632 i += PyBytes_GET_SIZE(self);
1633 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1634 PyErr_SetString(PyExc_IndexError,
1635 "index out of range");
1636 return NULL;
1637 }
1638 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1639 }
1640 else if (PySlice_Check(item)) {
1641 Py_ssize_t start, stop, step, slicelength, cur, i;
1642 char* source_buf;
1643 char* result_buf;
1644 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001645
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001646 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001647 PyBytes_GET_SIZE(self),
1648 &start, &stop, &step, &slicelength) < 0) {
1649 return NULL;
1650 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001651
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001652 if (slicelength <= 0) {
1653 return PyBytes_FromStringAndSize("", 0);
1654 }
1655 else if (start == 0 && step == 1 &&
1656 slicelength == PyBytes_GET_SIZE(self) &&
1657 PyBytes_CheckExact(self)) {
1658 Py_INCREF(self);
1659 return (PyObject *)self;
1660 }
1661 else if (step == 1) {
1662 return PyBytes_FromStringAndSize(
1663 PyBytes_AS_STRING(self) + start,
1664 slicelength);
1665 }
1666 else {
1667 source_buf = PyBytes_AS_STRING(self);
1668 result = PyBytes_FromStringAndSize(NULL, slicelength);
1669 if (result == NULL)
1670 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001671
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001672 result_buf = PyBytes_AS_STRING(result);
1673 for (cur = start, i = 0; i < slicelength;
1674 cur += step, i++) {
1675 result_buf[i] = source_buf[cur];
1676 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001677
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001678 return result;
1679 }
1680 }
1681 else {
1682 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001683 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001684 Py_TYPE(item)->tp_name);
1685 return NULL;
1686 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001687}
1688
1689static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001690bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001691{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001692 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1693 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001694}
1695
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001696static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001697 (lenfunc)bytes_length, /*sq_length*/
1698 (binaryfunc)bytes_concat, /*sq_concat*/
1699 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1700 (ssizeargfunc)bytes_item, /*sq_item*/
1701 0, /*sq_slice*/
1702 0, /*sq_ass_item*/
1703 0, /*sq_ass_slice*/
1704 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001705};
1706
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001707static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001708 (lenfunc)bytes_length,
1709 (binaryfunc)bytes_subscript,
1710 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001711};
1712
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001713static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001714 (getbufferproc)bytes_buffer_getbuffer,
1715 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001716};
1717
1718
1719#define LEFTSTRIP 0
1720#define RIGHTSTRIP 1
1721#define BOTHSTRIP 2
1722
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001723/*[clinic input]
1724bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001725
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001726 sep: object = None
1727 The delimiter according which to split the bytes.
1728 None (the default value) means split on ASCII whitespace characters
1729 (space, tab, return, newline, formfeed, vertical tab).
1730 maxsplit: Py_ssize_t = -1
1731 Maximum number of splits to do.
1732 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001733
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001734Return a list of the sections in the bytes, using sep as the delimiter.
1735[clinic start generated code]*/
1736
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001737static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001738bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1739/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001740{
1741 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001742 const char *s = PyBytes_AS_STRING(self), *sub;
1743 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001744 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001745
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001746 if (maxsplit < 0)
1747 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001748 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001749 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001750 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001751 return NULL;
1752 sub = vsub.buf;
1753 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001754
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001755 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1756 PyBuffer_Release(&vsub);
1757 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001758}
1759
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001760/*[clinic input]
1761bytes.partition
1762
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001763 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001764 /
1765
1766Partition the bytes into three parts using the given separator.
1767
1768This will search for the separator sep in the bytes. If the separator is found,
1769returns a 3-tuple containing the part before the separator, the separator
1770itself, and the part after it.
1771
1772If the separator is not found, returns a 3-tuple containing the original bytes
1773object and two empty bytes objects.
1774[clinic start generated code]*/
1775
Neal Norwitz6968b052007-02-27 19:02:19 +00001776static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001777bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001778/*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001779{
Neal Norwitz6968b052007-02-27 19:02:19 +00001780 return stringlib_partition(
1781 (PyObject*) self,
1782 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001783 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001784 );
1785}
1786
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001787/*[clinic input]
1788bytes.rpartition
1789
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001790 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001791 /
1792
1793Partition the bytes into three parts using the given separator.
1794
1795This will search for the separator sep in the bytes, starting and the end. If
1796the separator is found, returns a 3-tuple containing the part before the
1797separator, the separator itself, and the part after it.
1798
1799If the separator is not found, returns a 3-tuple containing two empty bytes
1800objects and the original bytes object.
1801[clinic start generated code]*/
1802
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001803static PyObject *
1804bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001805/*[clinic end generated code: output=191b114cbb028e50 input=67f689e63a62d478]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001806{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001807 return stringlib_rpartition(
1808 (PyObject*) self,
1809 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001810 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001811 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001812}
1813
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001814/*[clinic input]
1815bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001816
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001817Return a list of the sections in the bytes, using sep as the delimiter.
1818
1819Splitting is done starting at the end of the bytes and working to the front.
1820[clinic start generated code]*/
1821
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001822static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001823bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1824/*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001825{
1826 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001827 const char *s = PyBytes_AS_STRING(self), *sub;
1828 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001829 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001830
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001831 if (maxsplit < 0)
1832 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001833 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001834 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001835 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001836 return NULL;
1837 sub = vsub.buf;
1838 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001839
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001840 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1841 PyBuffer_Release(&vsub);
1842 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001843}
1844
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001845
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001846/*[clinic input]
1847bytes.join
1848
1849 iterable_of_bytes: object
1850 /
1851
1852Concatenate any number of bytes objects.
1853
1854The bytes whose method is called is inserted in between each pair.
1855
1856The result is returned as a new bytes object.
1857
1858Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1859[clinic start generated code]*/
1860
Neal Norwitz6968b052007-02-27 19:02:19 +00001861static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001862bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1863/*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001864{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001865 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001866}
1867
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001868PyObject *
1869_PyBytes_Join(PyObject *sep, PyObject *x)
1870{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001871 assert(sep != NULL && PyBytes_Check(sep));
1872 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001873 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001874}
1875
Neal Norwitz6968b052007-02-27 19:02:19 +00001876static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001877bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001878{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03001879 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
Neal Norwitz6968b052007-02-27 19:02:19 +00001880}
1881
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001882static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001883bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001884{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03001885 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001886}
1887
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001888
Neal Norwitz6968b052007-02-27 19:02:19 +00001889static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001890bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001891{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03001892 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
Neal Norwitz6968b052007-02-27 19:02:19 +00001893}
1894
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001895
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001896static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001897bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001898{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03001899 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001900}
1901
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001902
1903Py_LOCAL_INLINE(PyObject *)
1904do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001905{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001906 Py_buffer vsep;
1907 char *s = PyBytes_AS_STRING(self);
1908 Py_ssize_t len = PyBytes_GET_SIZE(self);
1909 char *sep;
1910 Py_ssize_t seplen;
1911 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001912
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001913 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001914 return NULL;
1915 sep = vsep.buf;
1916 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001917
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001918 i = 0;
1919 if (striptype != RIGHTSTRIP) {
1920 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1921 i++;
1922 }
1923 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001924
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001925 j = len;
1926 if (striptype != LEFTSTRIP) {
1927 do {
1928 j--;
1929 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1930 j++;
1931 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001932
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001933 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001934
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001935 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1936 Py_INCREF(self);
1937 return (PyObject*)self;
1938 }
1939 else
1940 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001941}
1942
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001943
1944Py_LOCAL_INLINE(PyObject *)
1945do_strip(PyBytesObject *self, int striptype)
1946{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001947 char *s = PyBytes_AS_STRING(self);
1948 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001949
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001950 i = 0;
1951 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001952 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001953 i++;
1954 }
1955 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001956
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001957 j = len;
1958 if (striptype != LEFTSTRIP) {
1959 do {
1960 j--;
David Malcolm96960882010-11-05 17:23:41 +00001961 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001962 j++;
1963 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001964
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001965 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1966 Py_INCREF(self);
1967 return (PyObject*)self;
1968 }
1969 else
1970 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001971}
1972
1973
1974Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001975do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001976{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001977 if (bytes != NULL && bytes != Py_None) {
1978 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001979 }
1980 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001981}
1982
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001983/*[clinic input]
1984bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001985
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001986 bytes: object = None
1987 /
1988
1989Strip leading and trailing bytes contained in the argument.
1990
1991If the argument is omitted or None, strip leading and trailing ASCII whitespace.
1992[clinic start generated code]*/
1993
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001994static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001995bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001996/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001997{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001998 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001999}
2000
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002001/*[clinic input]
2002bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002003
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002004 bytes: object = None
2005 /
2006
2007Strip leading bytes contained in the argument.
2008
2009If the argument is omitted or None, strip leading ASCII whitespace.
2010[clinic start generated code]*/
2011
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002012static PyObject *
2013bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002014/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002015{
2016 return do_argstrip(self, LEFTSTRIP, bytes);
2017}
2018
2019/*[clinic input]
2020bytes.rstrip
2021
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002022 bytes: object = None
2023 /
2024
2025Strip trailing bytes contained in the argument.
2026
2027If the argument is omitted or None, strip trailing ASCII whitespace.
2028[clinic start generated code]*/
2029
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002030static PyObject *
2031bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002032/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002033{
2034 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002035}
Neal Norwitz6968b052007-02-27 19:02:19 +00002036
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002037
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002038static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002039bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002040{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002041 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002042}
2043
2044
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002045/*[clinic input]
2046bytes.translate
2047
Victor Stinner049e5092014-08-17 22:20:00 +02002048 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002049 Translation table, which must be a bytes object of length 256.
2050 [
2051 deletechars: object
2052 ]
2053 /
2054
2055Return a copy with each character mapped by the given translation table.
2056
2057All characters occurring in the optional argument deletechars are removed.
2058The remaining characters are mapped through the given translation table.
2059[clinic start generated code]*/
2060
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002061static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002062bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1,
2063 PyObject *deletechars)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002064/*[clinic end generated code: output=233df850eb50bf8d input=ca20edf39d780d49]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002065{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002066 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002067 Py_buffer table_view = {NULL, NULL};
2068 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002069 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002070 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002071 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002072 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002073 Py_ssize_t inlen, tablen, dellen = 0;
2074 PyObject *result;
2075 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002076
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002077 if (PyBytes_Check(table)) {
2078 table_chars = PyBytes_AS_STRING(table);
2079 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002080 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002081 else if (table == Py_None) {
2082 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002083 tablen = 256;
2084 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002085 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002086 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002087 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002088 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002089 tablen = table_view.len;
2090 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002091
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002092 if (tablen != 256) {
2093 PyErr_SetString(PyExc_ValueError,
2094 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002095 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002096 return NULL;
2097 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002098
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002099 if (deletechars != NULL) {
2100 if (PyBytes_Check(deletechars)) {
2101 del_table_chars = PyBytes_AS_STRING(deletechars);
2102 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002103 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002104 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002105 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002106 PyBuffer_Release(&table_view);
2107 return NULL;
2108 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002109 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002110 dellen = del_table_view.len;
2111 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002112 }
2113 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002114 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002115 dellen = 0;
2116 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002117
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002118 inlen = PyBytes_GET_SIZE(input_obj);
2119 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002120 if (result == NULL) {
2121 PyBuffer_Release(&del_table_view);
2122 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002123 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002124 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002125 output_start = output = PyBytes_AS_STRING(result);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002126 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002127
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002128 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002129 /* If no deletions are required, use faster code */
2130 for (i = inlen; --i >= 0; ) {
2131 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002132 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002133 changed = 1;
2134 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002135 if (!changed && PyBytes_CheckExact(input_obj)) {
2136 Py_INCREF(input_obj);
2137 Py_DECREF(result);
2138 result = input_obj;
2139 }
2140 PyBuffer_Release(&del_table_view);
2141 PyBuffer_Release(&table_view);
2142 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002143 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002144
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002145 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002146 for (i = 0; i < 256; i++)
2147 trans_table[i] = Py_CHARMASK(i);
2148 } else {
2149 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002150 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002151 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002152 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002153
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002154 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002155 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002156 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002157
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002158 for (i = inlen; --i >= 0; ) {
2159 c = Py_CHARMASK(*input++);
2160 if (trans_table[c] != -1)
2161 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2162 continue;
2163 changed = 1;
2164 }
2165 if (!changed && PyBytes_CheckExact(input_obj)) {
2166 Py_DECREF(result);
2167 Py_INCREF(input_obj);
2168 return input_obj;
2169 }
2170 /* Fix the size of the resulting string */
2171 if (inlen > 0)
2172 _PyBytes_Resize(&result, output - output_start);
2173 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002174}
2175
2176
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002177/*[clinic input]
2178
2179@staticmethod
2180bytes.maketrans
2181
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002182 frm: Py_buffer
2183 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002184 /
2185
2186Return a translation table useable for the bytes or bytearray translate method.
2187
2188The returned table will be one where each byte in frm is mapped to the byte at
2189the same position in to.
2190
2191The bytes objects frm and to must be of the same length.
2192[clinic start generated code]*/
2193
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002194static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002195bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002196/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002197{
2198 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002199}
2200
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002201/* find and count characters and substrings */
2202
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002203#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002204 ((char *)memchr((const void *)(target), c, target_len))
2205
2206/* String ops must return a string. */
2207/* If the object is subclass of string, create a copy */
2208Py_LOCAL(PyBytesObject *)
2209return_self(PyBytesObject *self)
2210{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002211 if (PyBytes_CheckExact(self)) {
2212 Py_INCREF(self);
2213 return self;
2214 }
2215 return (PyBytesObject *)PyBytes_FromStringAndSize(
2216 PyBytes_AS_STRING(self),
2217 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002218}
2219
2220Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00002221countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002222{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002223 Py_ssize_t count=0;
2224 const char *start=target;
2225 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002226
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002227 while ( (start=findchar(start, end-start, c)) != NULL ) {
2228 count++;
2229 if (count >= maxcount)
2230 break;
2231 start += 1;
2232 }
2233 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002234}
2235
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002236
2237/* Algorithms for different cases of string replacement */
2238
2239/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2240Py_LOCAL(PyBytesObject *)
2241replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002242 const char *to_s, Py_ssize_t to_len,
2243 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002244{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002245 char *self_s, *result_s;
2246 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002247 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002248 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002249
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002250 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002251
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002252 /* 1 at the end plus 1 after every character;
2253 count = min(maxcount, self_len + 1) */
2254 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002255 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002256 else
2257 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
2258 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002259
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002260 /* Check for overflow */
2261 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002262 assert(count > 0);
2263 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002264 PyErr_SetString(PyExc_OverflowError,
2265 "replacement bytes are too long");
2266 return NULL;
2267 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002268 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002269
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002270 if (! (result = (PyBytesObject *)
2271 PyBytes_FromStringAndSize(NULL, result_len)) )
2272 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002273
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002274 self_s = PyBytes_AS_STRING(self);
2275 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002276
Victor Stinnerfac39562016-03-21 10:38:58 +01002277 if (to_len > 1) {
2278 /* Lay the first one down (guaranteed this will occur) */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002279 Py_MEMCPY(result_s, to_s, to_len);
2280 result_s += to_len;
Victor Stinnerfac39562016-03-21 10:38:58 +01002281 count -= 1;
2282
2283 for (i = 0; i < count; i++) {
2284 *result_s++ = *self_s++;
2285 Py_MEMCPY(result_s, to_s, to_len);
2286 result_s += to_len;
2287 }
2288 }
2289 else {
2290 result_s[0] = to_s[0];
2291 result_s += to_len;
2292 count -= 1;
2293 for (i = 0; i < count; i++) {
2294 *result_s++ = *self_s++;
2295 result_s[0] = to_s[0];
2296 result_s += to_len;
2297 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002298 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002299
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002300 /* Copy the rest of the original string */
2301 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002302
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002303 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002304}
2305
2306/* Special case for deleting a single character */
2307/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2308Py_LOCAL(PyBytesObject *)
2309replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002310 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002311{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002312 char *self_s, *result_s;
2313 char *start, *next, *end;
2314 Py_ssize_t self_len, result_len;
2315 Py_ssize_t count;
2316 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002317
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002318 self_len = PyBytes_GET_SIZE(self);
2319 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002320
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002321 count = countchar(self_s, self_len, from_c, maxcount);
2322 if (count == 0) {
2323 return return_self(self);
2324 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002325
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002326 result_len = self_len - count; /* from_len == 1 */
2327 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002328
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002329 if ( (result = (PyBytesObject *)
2330 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2331 return NULL;
2332 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002333
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002334 start = self_s;
2335 end = self_s + self_len;
2336 while (count-- > 0) {
2337 next = findchar(start, end-start, from_c);
2338 if (next == NULL)
2339 break;
2340 Py_MEMCPY(result_s, start, next-start);
2341 result_s += (next-start);
2342 start = next+1;
2343 }
2344 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002345
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002346 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002347}
2348
2349/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2350
2351Py_LOCAL(PyBytesObject *)
2352replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002353 const char *from_s, Py_ssize_t from_len,
2354 Py_ssize_t maxcount) {
2355 char *self_s, *result_s;
2356 char *start, *next, *end;
2357 Py_ssize_t self_len, result_len;
2358 Py_ssize_t count, offset;
2359 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002360
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002361 self_len = PyBytes_GET_SIZE(self);
2362 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002363
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002364 count = stringlib_count(self_s, self_len,
2365 from_s, from_len,
2366 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002367
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002368 if (count == 0) {
2369 /* no matches */
2370 return return_self(self);
2371 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002372
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002373 result_len = self_len - (count * from_len);
2374 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002375
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002376 if ( (result = (PyBytesObject *)
2377 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2378 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002379
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002380 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002381
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002382 start = self_s;
2383 end = self_s + self_len;
2384 while (count-- > 0) {
2385 offset = stringlib_find(start, end-start,
2386 from_s, from_len,
2387 0);
2388 if (offset == -1)
2389 break;
2390 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002391
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002392 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002393
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002394 result_s += (next-start);
2395 start = next+from_len;
2396 }
2397 Py_MEMCPY(result_s, start, end-start);
2398 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002399}
2400
2401/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2402Py_LOCAL(PyBytesObject *)
2403replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002404 char from_c, char to_c,
2405 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002406{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002407 char *self_s, *result_s, *start, *end, *next;
2408 Py_ssize_t self_len;
2409 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002410
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002411 /* The result string will be the same size */
2412 self_s = PyBytes_AS_STRING(self);
2413 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002414
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002415 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002416
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002417 if (next == NULL) {
2418 /* No matches; return the original string */
2419 return return_self(self);
2420 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002421
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002422 /* Need to make a new string */
2423 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2424 if (result == NULL)
2425 return NULL;
2426 result_s = PyBytes_AS_STRING(result);
2427 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002428
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002429 /* change everything in-place, starting with this one */
2430 start = result_s + (next-self_s);
2431 *start = to_c;
2432 start++;
2433 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002434
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002435 while (--maxcount > 0) {
2436 next = findchar(start, end-start, from_c);
2437 if (next == NULL)
2438 break;
2439 *next = to_c;
2440 start = next+1;
2441 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002442
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002443 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002444}
2445
2446/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2447Py_LOCAL(PyBytesObject *)
2448replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002449 const char *from_s, Py_ssize_t from_len,
2450 const char *to_s, Py_ssize_t to_len,
2451 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002452{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002453 char *result_s, *start, *end;
2454 char *self_s;
2455 Py_ssize_t self_len, offset;
2456 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002457
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002458 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002459
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002460 self_s = PyBytes_AS_STRING(self);
2461 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002462
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002463 offset = stringlib_find(self_s, self_len,
2464 from_s, from_len,
2465 0);
2466 if (offset == -1) {
2467 /* No matches; return the original string */
2468 return return_self(self);
2469 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002470
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002471 /* Need to make a new string */
2472 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2473 if (result == NULL)
2474 return NULL;
2475 result_s = PyBytes_AS_STRING(result);
2476 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002477
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002478 /* change everything in-place, starting with this one */
2479 start = result_s + offset;
2480 Py_MEMCPY(start, to_s, from_len);
2481 start += from_len;
2482 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002483
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002484 while ( --maxcount > 0) {
2485 offset = stringlib_find(start, end-start,
2486 from_s, from_len,
2487 0);
2488 if (offset==-1)
2489 break;
2490 Py_MEMCPY(start+offset, to_s, from_len);
2491 start += offset+from_len;
2492 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002493
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002494 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002495}
2496
2497/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2498Py_LOCAL(PyBytesObject *)
2499replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002500 char from_c,
2501 const char *to_s, Py_ssize_t to_len,
2502 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002503{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002504 char *self_s, *result_s;
2505 char *start, *next, *end;
2506 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002507 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002508 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002509
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002510 self_s = PyBytes_AS_STRING(self);
2511 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002512
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002513 count = countchar(self_s, self_len, from_c, maxcount);
2514 if (count == 0) {
2515 /* no matches, return unchanged */
2516 return return_self(self);
2517 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002518
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002519 /* use the difference between current and new, hence the "-1" */
2520 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002521 assert(count > 0);
2522 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002523 PyErr_SetString(PyExc_OverflowError,
2524 "replacement bytes are too long");
2525 return NULL;
2526 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002527 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002528
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002529 if ( (result = (PyBytesObject *)
2530 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2531 return NULL;
2532 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002533
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002534 start = self_s;
2535 end = self_s + self_len;
2536 while (count-- > 0) {
2537 next = findchar(start, end-start, from_c);
2538 if (next == NULL)
2539 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002540
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002541 if (next == start) {
2542 /* replace with the 'to' */
2543 Py_MEMCPY(result_s, to_s, to_len);
2544 result_s += to_len;
2545 start += 1;
2546 } else {
2547 /* copy the unchanged old then the 'to' */
2548 Py_MEMCPY(result_s, start, next-start);
2549 result_s += (next-start);
2550 Py_MEMCPY(result_s, to_s, to_len);
2551 result_s += to_len;
2552 start = next+1;
2553 }
2554 }
2555 /* Copy the remainder of the remaining string */
2556 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002557
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002558 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002559}
2560
2561/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2562Py_LOCAL(PyBytesObject *)
2563replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002564 const char *from_s, Py_ssize_t from_len,
2565 const char *to_s, Py_ssize_t to_len,
2566 Py_ssize_t maxcount) {
2567 char *self_s, *result_s;
2568 char *start, *next, *end;
2569 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002570 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002571 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002572
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002573 self_s = PyBytes_AS_STRING(self);
2574 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002575
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002576 count = stringlib_count(self_s, self_len,
2577 from_s, from_len,
2578 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002579
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002580 if (count == 0) {
2581 /* no matches, return unchanged */
2582 return return_self(self);
2583 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002584
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002585 /* Check for overflow */
2586 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002587 assert(count > 0);
2588 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002589 PyErr_SetString(PyExc_OverflowError,
2590 "replacement bytes are too long");
2591 return NULL;
2592 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002593 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002594
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002595 if ( (result = (PyBytesObject *)
2596 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2597 return NULL;
2598 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002599
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002600 start = self_s;
2601 end = self_s + self_len;
2602 while (count-- > 0) {
2603 offset = stringlib_find(start, end-start,
2604 from_s, from_len,
2605 0);
2606 if (offset == -1)
2607 break;
2608 next = start+offset;
2609 if (next == start) {
2610 /* replace with the 'to' */
2611 Py_MEMCPY(result_s, to_s, to_len);
2612 result_s += to_len;
2613 start += from_len;
2614 } else {
2615 /* copy the unchanged old then the 'to' */
2616 Py_MEMCPY(result_s, start, next-start);
2617 result_s += (next-start);
2618 Py_MEMCPY(result_s, to_s, to_len);
2619 result_s += to_len;
2620 start = next+from_len;
2621 }
2622 }
2623 /* Copy the remainder of the remaining string */
2624 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002625
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002626 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002627}
2628
2629
2630Py_LOCAL(PyBytesObject *)
2631replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002632 const char *from_s, Py_ssize_t from_len,
2633 const char *to_s, Py_ssize_t to_len,
2634 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002635{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002636 if (maxcount < 0) {
2637 maxcount = PY_SSIZE_T_MAX;
2638 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2639 /* nothing to do; return the original string */
2640 return return_self(self);
2641 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002642
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002643 if (maxcount == 0 ||
2644 (from_len == 0 && to_len == 0)) {
2645 /* nothing to do; return the original string */
2646 return return_self(self);
2647 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002648
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002649 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002650
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002651 if (from_len == 0) {
2652 /* insert the 'to' string everywhere. */
2653 /* >>> "Python".replace("", ".") */
2654 /* '.P.y.t.h.o.n.' */
2655 return replace_interleave(self, to_s, to_len, maxcount);
2656 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002657
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002658 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2659 /* point for an empty self string to generate a non-empty string */
2660 /* Special case so the remaining code always gets a non-empty string */
2661 if (PyBytes_GET_SIZE(self) == 0) {
2662 return return_self(self);
2663 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002664
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002665 if (to_len == 0) {
2666 /* delete all occurrences of 'from' string */
2667 if (from_len == 1) {
2668 return replace_delete_single_character(
2669 self, from_s[0], maxcount);
2670 } else {
2671 return replace_delete_substring(self, from_s,
2672 from_len, maxcount);
2673 }
2674 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002675
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002676 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002677
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002678 if (from_len == to_len) {
2679 if (from_len == 1) {
2680 return replace_single_character_in_place(
2681 self,
2682 from_s[0],
2683 to_s[0],
2684 maxcount);
2685 } else {
2686 return replace_substring_in_place(
2687 self, from_s, from_len, to_s, to_len,
2688 maxcount);
2689 }
2690 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002691
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002692 /* Otherwise use the more generic algorithms */
2693 if (from_len == 1) {
2694 return replace_single_character(self, from_s[0],
2695 to_s, to_len, maxcount);
2696 } else {
2697 /* len('from')>=2, len('to')>=1 */
2698 return replace_substring(self, from_s, from_len, to_s, to_len,
2699 maxcount);
2700 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002701}
2702
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002703
2704/*[clinic input]
2705bytes.replace
2706
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002707 old: Py_buffer
2708 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002709 count: Py_ssize_t = -1
2710 Maximum number of occurrences to replace.
2711 -1 (the default value) means replace all occurrences.
2712 /
2713
2714Return a copy with all occurrences of substring old replaced by new.
2715
2716If the optional argument count is given, only the first count occurrences are
2717replaced.
2718[clinic start generated code]*/
2719
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002720static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002721bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
Larry Hastings89964c42015-04-14 18:07:59 -04002722 Py_ssize_t count)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002723/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002724{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002725 return (PyObject *)replace((PyBytesObject *) self,
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002726 (const char *)old->buf, old->len,
2727 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002728}
2729
2730/** End DALKE **/
2731
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002732
2733static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002734bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002735{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002736 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002737}
2738
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002739static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002740bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002741{
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002742 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002743}
2744
2745
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002746/*[clinic input]
2747bytes.decode
2748
2749 encoding: str(c_default="NULL") = 'utf-8'
2750 The encoding with which to decode the bytes.
2751 errors: str(c_default="NULL") = 'strict'
2752 The error handling scheme to use for the handling of decoding errors.
2753 The default is 'strict' meaning that decoding errors raise a
2754 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2755 as well as any other name registered with codecs.register_error that
2756 can handle UnicodeDecodeErrors.
2757
2758Decode the bytes using the codec registered for encoding.
2759[clinic start generated code]*/
2760
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002761static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002762bytes_decode_impl(PyBytesObject *self, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04002763 const char *errors)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002764/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002765{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002766 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002767}
2768
Guido van Rossum20188312006-05-05 15:15:40 +00002769
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002770/*[clinic input]
2771bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002772
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +03002773 keepends: int(c_default="0") = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002774
2775Return a list of the lines in the bytes, breaking at line boundaries.
2776
2777Line breaks are not included in the resulting list unless keepends is given and
2778true.
2779[clinic start generated code]*/
2780
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002781static PyObject *
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03002782bytes_splitlines_impl(PyBytesObject *self, int keepends)
2783/*[clinic end generated code: output=3484149a5d880ffb input=7f4aac67144f9944]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002784{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002785 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002786 (PyObject*) self, PyBytes_AS_STRING(self),
2787 PyBytes_GET_SIZE(self), keepends
2788 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002789}
2790
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002791/*[clinic input]
2792@classmethod
2793bytes.fromhex
2794
2795 string: unicode
2796 /
2797
2798Create a bytes object from a string of hexadecimal numbers.
2799
2800Spaces between two numbers are accepted.
2801Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2802[clinic start generated code]*/
2803
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002804static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002805bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002806/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002807{
Victor Stinner2bf89932015-10-14 11:25:33 +02002808 return _PyBytes_FromHex(string, 0);
2809}
2810
2811PyObject*
2812_PyBytes_FromHex(PyObject *string, int use_bytearray)
2813{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002814 char *buf;
Victor Stinner2bf89932015-10-14 11:25:33 +02002815 Py_ssize_t hexlen, invalid_char;
2816 unsigned int top, bot;
2817 Py_UCS1 *str, *end;
2818 _PyBytesWriter writer;
2819
2820 _PyBytesWriter_Init(&writer);
2821 writer.use_bytearray = use_bytearray;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002822
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002823 assert(PyUnicode_Check(string));
2824 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002825 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002826 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002827
Victor Stinner2bf89932015-10-14 11:25:33 +02002828 if (!PyUnicode_IS_ASCII(string)) {
2829 void *data = PyUnicode_DATA(string);
2830 unsigned int kind = PyUnicode_KIND(string);
2831 Py_ssize_t i;
2832
2833 /* search for the first non-ASCII character */
2834 for (i = 0; i < hexlen; i++) {
2835 if (PyUnicode_READ(kind, data, i) >= 128)
2836 break;
2837 }
2838 invalid_char = i;
2839 goto error;
2840 }
2841
2842 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2843 str = PyUnicode_1BYTE_DATA(string);
2844
2845 /* This overestimates if there are spaces */
2846 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2847 if (buf == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002848 return NULL;
Victor Stinner2bf89932015-10-14 11:25:33 +02002849
2850 end = str + hexlen;
2851 while (str < end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002852 /* skip over spaces in the input */
Victor Stinner2bf89932015-10-14 11:25:33 +02002853 if (*str == ' ') {
2854 do {
2855 str++;
2856 } while (*str == ' ');
2857 if (str >= end)
2858 break;
2859 }
2860
2861 top = _PyLong_DigitValue[*str];
2862 if (top >= 16) {
2863 invalid_char = str - PyUnicode_1BYTE_DATA(string);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002864 goto error;
2865 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002866 str++;
2867
2868 bot = _PyLong_DigitValue[*str];
2869 if (bot >= 16) {
2870 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2871 goto error;
2872 }
2873 str++;
2874
2875 *buf++ = (unsigned char)((top << 4) + bot);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002876 }
Victor Stinner2bf89932015-10-14 11:25:33 +02002877
2878 return _PyBytesWriter_Finish(&writer, buf);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002879
2880 error:
Victor Stinner2bf89932015-10-14 11:25:33 +02002881 PyErr_Format(PyExc_ValueError,
2882 "non-hexadecimal number found in "
2883 "fromhex() arg at position %zd", invalid_char);
2884 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002885 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002886}
2887
Gregory P. Smith8cb65692015-04-25 23:22:26 +00002888PyDoc_STRVAR(hex__doc__,
2889"B.hex() -> string\n\
2890\n\
2891Create a string of hexadecimal numbers from a bytes object.\n\
2892Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'.");
2893
2894static PyObject *
2895bytes_hex(PyBytesObject *self)
2896{
2897 char* argbuf = PyBytes_AS_STRING(self);
2898 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2899 return _Py_strhex(argbuf, arglen);
2900}
2901
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002902static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002903bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002904{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002905 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002906}
2907
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002908
2909static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002910bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002911 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2912 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2913 _Py_capitalize__doc__},
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002914 {"center", (PyCFunction)stringlib_center, METH_VARARGS,
2915 _Py_center__doc__},
2916 {"count", (PyCFunction)bytes_count, METH_VARARGS,
2917 _Py_count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002918 BYTES_DECODE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002919 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002920 _Py_endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02002921 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002922 _Py_expandtabs__doc__},
2923 {"find", (PyCFunction)bytes_find, METH_VARARGS,
2924 _Py_find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002925 BYTES_FROMHEX_METHODDEF
Gregory P. Smith8cb65692015-04-25 23:22:26 +00002926 {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__},
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002927 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002928 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2929 _Py_isalnum__doc__},
2930 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2931 _Py_isalpha__doc__},
2932 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2933 _Py_isdigit__doc__},
2934 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2935 _Py_islower__doc__},
2936 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2937 _Py_isspace__doc__},
2938 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2939 _Py_istitle__doc__},
2940 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2941 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002942 BYTES_JOIN_METHODDEF
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002943 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, _Py_ljust__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002944 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002945 BYTES_LSTRIP_METHODDEF
2946 BYTES_MAKETRANS_METHODDEF
2947 BYTES_PARTITION_METHODDEF
2948 BYTES_REPLACE_METHODDEF
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002949 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2950 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
2951 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, _Py_rjust__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002952 BYTES_RPARTITION_METHODDEF
2953 BYTES_RSPLIT_METHODDEF
2954 BYTES_RSTRIP_METHODDEF
2955 BYTES_SPLIT_METHODDEF
2956 BYTES_SPLITLINES_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002957 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002958 _Py_startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002959 BYTES_STRIP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002960 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2961 _Py_swapcase__doc__},
2962 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002963 BYTES_TRANSLATE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002964 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Serhiy Storchakadd40fc32016-05-04 22:23:26 +03002965 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, _Py_zfill__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002966 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002967};
2968
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002969static PyObject *
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002970bytes_mod(PyObject *self, PyObject *arg)
Ethan Furmanb95b5612015-01-23 20:05:18 -08002971{
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002972 if (!PyBytes_Check(self)) {
2973 Py_RETURN_NOTIMPLEMENTED;
Victor Stinner772b2b02015-10-14 09:56:53 +02002974 }
Victor Stinner772b2b02015-10-14 09:56:53 +02002975 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03002976 arg, 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -08002977}
2978
2979static PyNumberMethods bytes_as_number = {
2980 0, /*nb_add*/
2981 0, /*nb_subtract*/
2982 0, /*nb_multiply*/
2983 bytes_mod, /*nb_remainder*/
2984};
2985
2986static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002987bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002988
2989static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002990bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002991{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002992 PyObject *x = NULL;
2993 const char *encoding = NULL;
2994 const char *errors = NULL;
2995 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002996 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002997 Py_ssize_t size;
2998 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002999 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003000
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003001 if (type != &PyBytes_Type)
Serhiy Storchaka15095802015-11-25 15:47:01 +02003002 return bytes_subtype_new(type, args, kwds);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003003 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
3004 &encoding, &errors))
3005 return NULL;
3006 if (x == NULL) {
3007 if (encoding != NULL || errors != NULL) {
3008 PyErr_SetString(PyExc_TypeError,
3009 "encoding or errors without sequence "
3010 "argument");
3011 return NULL;
3012 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02003013 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003014 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003015
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003016 if (encoding != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003017 /* Encode via the codec registry */
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003018 if (!PyUnicode_Check(x)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003019 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003020 "encoding without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003021 return NULL;
3022 }
3023 new = PyUnicode_AsEncodedString(x, encoding, errors);
3024 if (new == NULL)
3025 return NULL;
3026 assert(PyBytes_Check(new));
3027 return new;
3028 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003029
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003030 if (errors != NULL) {
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02003031 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003032 PyUnicode_Check(x) ?
3033 "string argument without an encoding" :
3034 "errors without a string argument");
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02003035 return NULL;
3036 }
3037
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003038 /* We'd like to call PyObject_Bytes here, but we need to check for an
3039 integer argument before deferring to PyBytes_FromObject, something
3040 PyObject_Bytes doesn't do. */
3041 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
3042 if (func != NULL) {
3043 new = PyObject_CallFunctionObjArgs(func, NULL);
3044 Py_DECREF(func);
3045 if (new == NULL)
3046 return NULL;
3047 if (!PyBytes_Check(new)) {
3048 PyErr_Format(PyExc_TypeError,
3049 "__bytes__ returned non-bytes (type %.200s)",
3050 Py_TYPE(new)->tp_name);
3051 Py_DECREF(new);
3052 return NULL;
3053 }
3054 return new;
3055 }
3056 else if (PyErr_Occurred())
3057 return NULL;
3058
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003059 if (PyUnicode_Check(x)) {
3060 PyErr_SetString(PyExc_TypeError,
3061 "string argument without an encoding");
3062 return NULL;
3063 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003064 /* Is it an integer? */
3065 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
3066 if (size == -1 && PyErr_Occurred()) {
3067 if (PyErr_ExceptionMatches(PyExc_OverflowError))
3068 return NULL;
3069 PyErr_Clear();
3070 }
3071 else if (size < 0) {
3072 PyErr_SetString(PyExc_ValueError, "negative count");
3073 return NULL;
3074 }
3075 else {
Victor Stinnerdb067af2014-05-02 22:31:14 +02003076 new = _PyBytes_FromSize(size, 1);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003077 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003078 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003079 return new;
3080 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003081
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003082 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00003083}
3084
Victor Stinnerf2eafa32015-10-14 13:44:29 +02003085static PyObject*
3086_PyBytes_FromBuffer(PyObject *x)
3087{
3088 PyObject *new;
3089 Py_buffer view;
3090
3091 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
3092 return NULL;
3093
3094 new = PyBytes_FromStringAndSize(NULL, view.len);
3095 if (!new)
3096 goto fail;
3097 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
3098 &view, view.len, 'C') < 0)
3099 goto fail;
3100 PyBuffer_Release(&view);
3101 return new;
3102
3103fail:
3104 Py_XDECREF(new);
3105 PyBuffer_Release(&view);
3106 return NULL;
3107}
3108
Victor Stinner3c50ce32015-10-14 13:50:40 +02003109#define _PyBytes_FROM_LIST_BODY(x, GET_ITEM) \
3110 do { \
3111 PyObject *bytes; \
3112 Py_ssize_t i; \
3113 Py_ssize_t value; \
3114 char *str; \
3115 PyObject *item; \
3116 \
3117 bytes = PyBytes_FromStringAndSize(NULL, Py_SIZE(x)); \
3118 if (bytes == NULL) \
3119 return NULL; \
3120 str = ((PyBytesObject *)bytes)->ob_sval; \
3121 \
3122 for (i = 0; i < Py_SIZE(x); i++) { \
3123 item = GET_ITEM((x), i); \
3124 value = PyNumber_AsSsize_t(item, PyExc_ValueError); \
3125 if (value == -1 && PyErr_Occurred()) \
3126 goto error; \
3127 \
3128 if (value < 0 || value >= 256) { \
3129 PyErr_SetString(PyExc_ValueError, \
3130 "bytes must be in range(0, 256)"); \
3131 goto error; \
3132 } \
3133 *str++ = (char) value; \
3134 } \
3135 return bytes; \
3136 \
3137 error: \
3138 Py_DECREF(bytes); \
3139 return NULL; \
3140 } while (0)
3141
Victor Stinnerf2eafa32015-10-14 13:44:29 +02003142static PyObject*
3143_PyBytes_FromList(PyObject *x)
3144{
Victor Stinner3c50ce32015-10-14 13:50:40 +02003145 _PyBytes_FROM_LIST_BODY(x, PyList_GET_ITEM);
Victor Stinnerf2eafa32015-10-14 13:44:29 +02003146}
3147
3148static PyObject*
3149_PyBytes_FromTuple(PyObject *x)
3150{
Victor Stinner3c50ce32015-10-14 13:50:40 +02003151 _PyBytes_FROM_LIST_BODY(x, PyTuple_GET_ITEM);
Victor Stinnerf2eafa32015-10-14 13:44:29 +02003152}
3153
3154static PyObject *
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03003155_PyBytes_FromIterator(PyObject *it, PyObject *x)
Benjamin Petersonc15a0732008-08-26 16:46:47 +00003156{
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02003157 char *str;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003158 Py_ssize_t i, size;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02003159 _PyBytesWriter writer;
3160
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003161 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02003162 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003163 if (size == -1 && PyErr_Occurred())
3164 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02003165
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03003166 _PyBytesWriter_Init(&writer);
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02003167 str = _PyBytesWriter_Alloc(&writer, size);
3168 if (str == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003169 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02003170 writer.overallocate = 1;
3171 size = writer.allocated;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003172
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003173 /* Run the iterator to exhaustion */
3174 for (i = 0; ; i++) {
3175 PyObject *item;
3176 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003177
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003178 /* Get the next item */
3179 item = PyIter_Next(it);
3180 if (item == NULL) {
3181 if (PyErr_Occurred())
3182 goto error;
3183 break;
3184 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003185
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003186 /* Interpret it as an int (__index__) */
3187 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3188 Py_DECREF(item);
3189 if (value == -1 && PyErr_Occurred())
3190 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003191
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003192 /* Range check */
3193 if (value < 0 || value >= 256) {
3194 PyErr_SetString(PyExc_ValueError,
3195 "bytes must be in range(0, 256)");
3196 goto error;
3197 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003198
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003199 /* Append the byte */
3200 if (i >= size) {
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02003201 str = _PyBytesWriter_Resize(&writer, str, size+1);
3202 if (str == NULL)
3203 return NULL;
3204 size = writer.allocated;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003205 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02003206 *str++ = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003207 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02003208
3209 return _PyBytesWriter_Finish(&writer, str);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003210
3211 error:
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02003212 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003213 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003214}
3215
Victor Stinnerf2eafa32015-10-14 13:44:29 +02003216PyObject *
3217PyBytes_FromObject(PyObject *x)
3218{
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03003219 PyObject *it, *result;
3220
Victor Stinnerf2eafa32015-10-14 13:44:29 +02003221 if (x == NULL) {
3222 PyErr_BadInternalCall();
3223 return NULL;
3224 }
3225
3226 if (PyBytes_CheckExact(x)) {
3227 Py_INCREF(x);
3228 return x;
3229 }
3230
3231 /* Use the modern buffer interface */
3232 if (PyObject_CheckBuffer(x))
3233 return _PyBytes_FromBuffer(x);
3234
3235 if (PyList_CheckExact(x))
3236 return _PyBytes_FromList(x);
3237
3238 if (PyTuple_CheckExact(x))
3239 return _PyBytes_FromTuple(x);
3240
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03003241 if (!PyUnicode_Check(x)) {
3242 it = PyObject_GetIter(x);
3243 if (it != NULL) {
3244 result = _PyBytes_FromIterator(it, x);
3245 Py_DECREF(it);
3246 return result;
3247 }
Victor Stinnerf2eafa32015-10-14 13:44:29 +02003248 }
3249
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03003250 PyErr_Format(PyExc_TypeError,
3251 "cannot convert '%.200s' object to bytes",
3252 x->ob_type->tp_name);
3253 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02003254}
3255
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003256static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02003257bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003258{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003259 PyObject *tmp, *pnew;
3260 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003261
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003262 assert(PyType_IsSubtype(type, &PyBytes_Type));
3263 tmp = bytes_new(&PyBytes_Type, args, kwds);
3264 if (tmp == NULL)
3265 return NULL;
Serhiy Storchaka15095802015-11-25 15:47:01 +02003266 assert(PyBytes_Check(tmp));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003267 n = PyBytes_GET_SIZE(tmp);
3268 pnew = type->tp_alloc(type, n);
3269 if (pnew != NULL) {
3270 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3271 PyBytes_AS_STRING(tmp), n+1);
3272 ((PyBytesObject *)pnew)->ob_shash =
3273 ((PyBytesObject *)tmp)->ob_shash;
3274 }
3275 Py_DECREF(tmp);
3276 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003277}
3278
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003279PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003280"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003281bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003282bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003283bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3284bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003285\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003286Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003287 - an iterable yielding integers in range(256)\n\
3288 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003289 - any object implementing the buffer API.\n\
3290 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003291
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003292static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003293
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003294PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003295 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3296 "bytes",
3297 PyBytesObject_SIZE,
3298 sizeof(char),
3299 bytes_dealloc, /* tp_dealloc */
3300 0, /* tp_print */
3301 0, /* tp_getattr */
3302 0, /* tp_setattr */
3303 0, /* tp_reserved */
3304 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08003305 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003306 &bytes_as_sequence, /* tp_as_sequence */
3307 &bytes_as_mapping, /* tp_as_mapping */
3308 (hashfunc)bytes_hash, /* tp_hash */
3309 0, /* tp_call */
3310 bytes_str, /* tp_str */
3311 PyObject_GenericGetAttr, /* tp_getattro */
3312 0, /* tp_setattro */
3313 &bytes_as_buffer, /* tp_as_buffer */
3314 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3315 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3316 bytes_doc, /* tp_doc */
3317 0, /* tp_traverse */
3318 0, /* tp_clear */
3319 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3320 0, /* tp_weaklistoffset */
3321 bytes_iter, /* tp_iter */
3322 0, /* tp_iternext */
3323 bytes_methods, /* tp_methods */
3324 0, /* tp_members */
3325 0, /* tp_getset */
3326 &PyBaseObject_Type, /* tp_base */
3327 0, /* tp_dict */
3328 0, /* tp_descr_get */
3329 0, /* tp_descr_set */
3330 0, /* tp_dictoffset */
3331 0, /* tp_init */
3332 0, /* tp_alloc */
3333 bytes_new, /* tp_new */
3334 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003335};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003336
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003337void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003338PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003339{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003340 assert(pv != NULL);
3341 if (*pv == NULL)
3342 return;
3343 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003344 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003345 return;
3346 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02003347
3348 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
3349 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05003350 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02003351 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02003352
Antoine Pitrou161d6952014-05-01 14:36:20 +02003353 wb.len = -1;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003354 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02003355 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3356 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3357 Py_CLEAR(*pv);
3358 return;
3359 }
3360
3361 oldsize = PyBytes_GET_SIZE(*pv);
3362 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3363 PyErr_NoMemory();
3364 goto error;
3365 }
3366 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3367 goto error;
3368
3369 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3370 PyBuffer_Release(&wb);
3371 return;
3372
3373 error:
3374 PyBuffer_Release(&wb);
3375 Py_CLEAR(*pv);
3376 return;
3377 }
3378
3379 else {
3380 /* Multiple references, need to create new object */
3381 PyObject *v;
3382 v = bytes_concat(*pv, w);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03003383 Py_SETREF(*pv, v);
Antoine Pitrou161d6952014-05-01 14:36:20 +02003384 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003385}
3386
3387void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003388PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003389{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003390 PyBytes_Concat(pv, w);
3391 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003392}
3393
3394
Ethan Furmanb95b5612015-01-23 20:05:18 -08003395/* The following function breaks the notion that bytes are immutable:
3396 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003397 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08003398 as creating a new bytes object and destroying the old one, only
3399 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003400 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08003401 Note that if there's not enough memory to resize the bytes object, the
3402 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003403 memory" exception is set, and -1 is returned. Else (on success) 0 is
3404 returned, and the value in *pv may or may not be the same as on input.
3405 As always, an extra byte is allocated for a trailing \0 byte (newsize
3406 does *not* include that), and a trailing \0 byte is stored.
3407*/
3408
3409int
3410_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3411{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003412 PyObject *v;
3413 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003414 v = *pv;
3415 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
3416 *pv = 0;
3417 Py_DECREF(v);
3418 PyErr_BadInternalCall();
3419 return -1;
3420 }
3421 /* XXX UNREF/NEWREF interface should be more symmetrical */
3422 _Py_DEC_REFTOTAL;
3423 _Py_ForgetReference(v);
3424 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03003425 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003426 if (*pv == NULL) {
3427 PyObject_Del(v);
3428 PyErr_NoMemory();
3429 return -1;
3430 }
3431 _Py_NewReference(*pv);
3432 sv = (PyBytesObject *) *pv;
3433 Py_SIZE(sv) = newsize;
3434 sv->ob_sval[newsize] = '\0';
3435 sv->ob_shash = -1; /* invalidate cached hash value */
3436 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003437}
3438
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003439void
3440PyBytes_Fini(void)
3441{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003442 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003443 for (i = 0; i < UCHAR_MAX + 1; i++)
3444 Py_CLEAR(characters[i]);
3445 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003446}
3447
Benjamin Peterson4116f362008-05-27 00:36:20 +00003448/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003449
3450typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003451 PyObject_HEAD
3452 Py_ssize_t it_index;
3453 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003454} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003455
3456static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003457striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003458{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003459 _PyObject_GC_UNTRACK(it);
3460 Py_XDECREF(it->it_seq);
3461 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003462}
3463
3464static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003465striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003466{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003467 Py_VISIT(it->it_seq);
3468 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003469}
3470
3471static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003472striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003473{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003474 PyBytesObject *seq;
3475 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003476
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003477 assert(it != NULL);
3478 seq = it->it_seq;
3479 if (seq == NULL)
3480 return NULL;
3481 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003482
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003483 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3484 item = PyLong_FromLong(
3485 (unsigned char)seq->ob_sval[it->it_index]);
3486 if (item != NULL)
3487 ++it->it_index;
3488 return item;
3489 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003490
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003491 it->it_seq = NULL;
Serhiy Storchakafbb1c5e2016-03-30 20:40:02 +03003492 Py_DECREF(seq);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003493 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003494}
3495
3496static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003497striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003498{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003499 Py_ssize_t len = 0;
3500 if (it->it_seq)
3501 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3502 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003503}
3504
3505PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003506 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003507
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003508static PyObject *
3509striter_reduce(striterobject *it)
3510{
3511 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02003512 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003513 it->it_seq, it->it_index);
3514 } else {
3515 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
3516 if (u == NULL)
3517 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02003518 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003519 }
3520}
3521
3522PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3523
3524static PyObject *
3525striter_setstate(striterobject *it, PyObject *state)
3526{
3527 Py_ssize_t index = PyLong_AsSsize_t(state);
3528 if (index == -1 && PyErr_Occurred())
3529 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003530 if (it->it_seq != NULL) {
3531 if (index < 0)
3532 index = 0;
3533 else if (index > PyBytes_GET_SIZE(it->it_seq))
3534 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3535 it->it_index = index;
3536 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003537 Py_RETURN_NONE;
3538}
3539
3540PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3541
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003542static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003543 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3544 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003545 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3546 reduce_doc},
3547 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3548 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003549 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003550};
3551
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003552PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003553 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3554 "bytes_iterator", /* tp_name */
3555 sizeof(striterobject), /* tp_basicsize */
3556 0, /* tp_itemsize */
3557 /* methods */
3558 (destructor)striter_dealloc, /* tp_dealloc */
3559 0, /* tp_print */
3560 0, /* tp_getattr */
3561 0, /* tp_setattr */
3562 0, /* tp_reserved */
3563 0, /* tp_repr */
3564 0, /* tp_as_number */
3565 0, /* tp_as_sequence */
3566 0, /* tp_as_mapping */
3567 0, /* tp_hash */
3568 0, /* tp_call */
3569 0, /* tp_str */
3570 PyObject_GenericGetAttr, /* tp_getattro */
3571 0, /* tp_setattro */
3572 0, /* tp_as_buffer */
3573 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3574 0, /* tp_doc */
3575 (traverseproc)striter_traverse, /* tp_traverse */
3576 0, /* tp_clear */
3577 0, /* tp_richcompare */
3578 0, /* tp_weaklistoffset */
3579 PyObject_SelfIter, /* tp_iter */
3580 (iternextfunc)striter_next, /* tp_iternext */
3581 striter_methods, /* tp_methods */
3582 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003583};
3584
3585static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003586bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003587{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003588 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003589
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003590 if (!PyBytes_Check(seq)) {
3591 PyErr_BadInternalCall();
3592 return NULL;
3593 }
3594 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3595 if (it == NULL)
3596 return NULL;
3597 it->it_index = 0;
3598 Py_INCREF(seq);
3599 it->it_seq = (PyBytesObject *)seq;
3600 _PyObject_GC_TRACK(it);
3601 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003602}
Victor Stinner00165072015-10-09 01:53:21 +02003603
3604
3605/* _PyBytesWriter API */
3606
3607#ifdef MS_WINDOWS
3608 /* On Windows, overallocate by 50% is the best factor */
3609# define OVERALLOCATE_FACTOR 2
3610#else
3611 /* On Linux, overallocate by 25% is the best factor */
3612# define OVERALLOCATE_FACTOR 4
3613#endif
3614
3615void
3616_PyBytesWriter_Init(_PyBytesWriter *writer)
3617{
Victor Stinner661aacc2015-10-14 09:41:48 +02003618 /* Set all attributes before small_buffer to 0 */
3619 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003620#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003621 memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003622#endif
3623}
3624
3625void
3626_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3627{
3628 Py_CLEAR(writer->buffer);
3629}
3630
3631Py_LOCAL_INLINE(char*)
3632_PyBytesWriter_AsString(_PyBytesWriter *writer)
3633{
Victor Stinner661aacc2015-10-14 09:41:48 +02003634 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003635 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003636 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003637 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003638 else if (writer->use_bytearray) {
3639 assert(writer->buffer != NULL);
3640 return PyByteArray_AS_STRING(writer->buffer);
3641 }
3642 else {
3643 assert(writer->buffer != NULL);
3644 return PyBytes_AS_STRING(writer->buffer);
3645 }
Victor Stinner00165072015-10-09 01:53:21 +02003646}
3647
3648Py_LOCAL_INLINE(Py_ssize_t)
Victor Stinner2bf89932015-10-14 11:25:33 +02003649_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
Victor Stinner00165072015-10-09 01:53:21 +02003650{
3651 char *start = _PyBytesWriter_AsString(writer);
3652 assert(str != NULL);
3653 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003654 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003655 return str - start;
3656}
3657
3658Py_LOCAL_INLINE(void)
3659_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3660{
3661#ifdef Py_DEBUG
3662 char *start, *end;
3663
Victor Stinner661aacc2015-10-14 09:41:48 +02003664 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003665 assert(writer->buffer == NULL);
3666 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003667 else {
3668 assert(writer->buffer != NULL);
3669 if (writer->use_bytearray)
3670 assert(PyByteArray_CheckExact(writer->buffer));
3671 else
3672 assert(PyBytes_CheckExact(writer->buffer));
3673 assert(Py_REFCNT(writer->buffer) == 1);
3674 }
Victor Stinner00165072015-10-09 01:53:21 +02003675
Victor Stinner661aacc2015-10-14 09:41:48 +02003676 if (writer->use_bytearray) {
3677 /* bytearray has its own overallocation algorithm,
3678 writer overallocation must be disabled */
3679 assert(!writer->overallocate);
3680 }
3681
3682 assert(0 <= writer->allocated);
Victor Stinner53926a12015-10-09 12:37:03 +02003683 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003684 /* the last byte must always be null */
Victor Stinner661aacc2015-10-14 09:41:48 +02003685 start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003686 assert(start[writer->allocated] == 0);
3687
3688 end = start + writer->allocated;
3689 assert(str != NULL);
3690 assert(start <= str && str <= end);
3691#endif
3692}
3693
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003694void*
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003695_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
Victor Stinner00165072015-10-09 01:53:21 +02003696{
3697 Py_ssize_t allocated, pos;
3698
3699 _PyBytesWriter_CheckConsistency(writer, str);
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003700 assert(writer->allocated < size);
Victor Stinner00165072015-10-09 01:53:21 +02003701
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003702 allocated = size;
Victor Stinner00165072015-10-09 01:53:21 +02003703 if (writer->overallocate
3704 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3705 /* overallocate to limit the number of realloc() */
3706 allocated += allocated / OVERALLOCATE_FACTOR;
3707 }
3708
Victor Stinner2bf89932015-10-14 11:25:33 +02003709 pos = _PyBytesWriter_GetSize(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003710 if (!writer->use_small_buffer) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003711 if (writer->use_bytearray) {
3712 if (PyByteArray_Resize(writer->buffer, allocated))
3713 goto error;
3714 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3715 but we cannot use ob_alloc because bytes may need to be moved
3716 to use the whole buffer. bytearray uses an internal optimization
3717 to avoid moving or copying bytes when bytes are removed at the
3718 beginning (ex: del bytearray[:1]). */
3719 }
3720 else {
3721 if (_PyBytes_Resize(&writer->buffer, allocated))
3722 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003723 }
3724 }
3725 else {
3726 /* convert from stack buffer to bytes object buffer */
3727 assert(writer->buffer == NULL);
3728
Victor Stinner661aacc2015-10-14 09:41:48 +02003729 if (writer->use_bytearray)
3730 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3731 else
3732 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003733 if (writer->buffer == NULL)
Victor Stinner661aacc2015-10-14 09:41:48 +02003734 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003735
3736 if (pos != 0) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003737 char *dest;
3738 if (writer->use_bytearray)
3739 dest = PyByteArray_AS_STRING(writer->buffer);
3740 else
3741 dest = PyBytes_AS_STRING(writer->buffer);
3742 Py_MEMCPY(dest,
Victor Stinnerb3653a32015-10-09 03:38:24 +02003743 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02003744 pos);
3745 }
3746
Victor Stinnerb3653a32015-10-09 03:38:24 +02003747 writer->use_small_buffer = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003748#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003749 memset(writer->small_buffer, 0xDB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003750#endif
Victor Stinner00165072015-10-09 01:53:21 +02003751 }
3752 writer->allocated = allocated;
3753
3754 str = _PyBytesWriter_AsString(writer) + pos;
3755 _PyBytesWriter_CheckConsistency(writer, str);
3756 return str;
Victor Stinner661aacc2015-10-14 09:41:48 +02003757
3758error:
3759 _PyBytesWriter_Dealloc(writer);
3760 return NULL;
Victor Stinner00165072015-10-09 01:53:21 +02003761}
3762
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02003763void*
3764_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3765{
3766 Py_ssize_t new_min_size;
3767
3768 _PyBytesWriter_CheckConsistency(writer, str);
3769 assert(size >= 0);
3770
3771 if (size == 0) {
3772 /* nothing to do */
3773 return str;
3774 }
3775
3776 if (writer->min_size > PY_SSIZE_T_MAX - size) {
3777 PyErr_NoMemory();
3778 _PyBytesWriter_Dealloc(writer);
3779 return NULL;
3780 }
3781 new_min_size = writer->min_size + size;
3782
3783 if (new_min_size > writer->allocated)
3784 str = _PyBytesWriter_Resize(writer, str, new_min_size);
3785
3786 writer->min_size = new_min_size;
3787 return str;
3788}
3789
Victor Stinner00165072015-10-09 01:53:21 +02003790/* Allocate the buffer to write size bytes.
3791 Return the pointer to the beginning of buffer data.
3792 Raise an exception and return NULL on error. */
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003793void*
Victor Stinner00165072015-10-09 01:53:21 +02003794_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3795{
3796 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02003797 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02003798 assert(size >= 0);
3799
Victor Stinnerb3653a32015-10-09 03:38:24 +02003800 writer->use_small_buffer = 1;
Victor Stinnerb13b97d2015-10-09 02:52:16 +02003801#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003802 writer->allocated = sizeof(writer->small_buffer) - 1;
Victor Stinnerf6358a72015-10-14 12:02:39 +02003803 /* In debug mode, don't use the full small buffer because it is less
3804 efficient than bytes and bytearray objects to detect buffer underflow
3805 and buffer overflow. Use 10 bytes of the small buffer to test also
3806 code using the smaller buffer in debug mode.
3807
3808 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3809 in debug mode to also be able to detect stack overflow when running
3810 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3811 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3812 stack overflow. */
3813 writer->allocated = Py_MIN(writer->allocated, 10);
3814 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3815 to detect buffer overflow */
Victor Stinnerb3653a32015-10-09 03:38:24 +02003816 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003817#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02003818 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02003819#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02003820 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02003821}
3822
3823PyObject *
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003824_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
Victor Stinner00165072015-10-09 01:53:21 +02003825{
Victor Stinner2bf89932015-10-14 11:25:33 +02003826 Py_ssize_t size;
Victor Stinner00165072015-10-09 01:53:21 +02003827 PyObject *result;
3828
3829 _PyBytesWriter_CheckConsistency(writer, str);
3830
Victor Stinner2bf89932015-10-14 11:25:33 +02003831 size = _PyBytesWriter_GetSize(writer, str);
3832 if (size == 0 && !writer->use_bytearray) {
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003833 Py_CLEAR(writer->buffer);
3834 /* Get the empty byte string singleton */
3835 result = PyBytes_FromStringAndSize(NULL, 0);
3836 }
3837 else if (writer->use_small_buffer) {
Victor Stinnere914d412016-04-15 17:52:27 +02003838 if (writer->use_bytearray) {
3839 result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3840 }
3841 else {
3842 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3843 }
Victor Stinner6c2cdae2015-10-12 13:29:43 +02003844 }
3845 else {
3846 result = writer->buffer;
3847 writer->buffer = NULL;
3848
Victor Stinner2bf89932015-10-14 11:25:33 +02003849 if (size != writer->allocated) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003850 if (writer->use_bytearray) {
Victor Stinner2bf89932015-10-14 11:25:33 +02003851 if (PyByteArray_Resize(result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003852 Py_DECREF(result);
3853 return NULL;
3854 }
3855 }
3856 else {
Victor Stinner2bf89932015-10-14 11:25:33 +02003857 if (_PyBytes_Resize(&result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003858 assert(result == NULL);
3859 return NULL;
3860 }
Victor Stinner00165072015-10-09 01:53:21 +02003861 }
3862 }
Victor Stinner00165072015-10-09 01:53:21 +02003863 }
Victor Stinner00165072015-10-09 01:53:21 +02003864 return result;
3865}
Victor Stinnerce179bf2015-10-09 12:57:22 +02003866
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003867void*
Victor Stinnere9aa5952015-10-12 13:57:47 +02003868_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003869 const void *bytes, Py_ssize_t size)
Victor Stinnerce179bf2015-10-09 12:57:22 +02003870{
Victor Stinnere9aa5952015-10-12 13:57:47 +02003871 char *str = (char *)ptr;
3872
Victor Stinnerce179bf2015-10-09 12:57:22 +02003873 str = _PyBytesWriter_Prepare(writer, str, size);
3874 if (str == NULL)
3875 return NULL;
3876
3877 Py_MEMCPY(str, bytes, size);
3878 str += size;
3879
3880 return str;
3881}