blob: 5bbbcde3fbdad5b74a55418595d7460b22d65323 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Gregory P. Smith8cb65692015-04-25 23:22:26 +00008#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00009#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000010
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020011/*[clinic input]
Martin v. Löwis0efea322014-07-27 17:29:17 +020012class bytes "PyBytesObject*" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020013[clinic start generated code]*/
Martin v. Löwis0efea322014-07-27 17:29:17 +020014/*[clinic end generated code: output=da39a3ee5e6b4b0d input=1a1d9102afc1b00c]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020015
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030016#include "clinic/bytesobject.c.h"
17
Christian Heimes2c9c7a52008-05-26 13:42:13 +000018#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000019Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000020#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000021
Christian Heimes2c9c7a52008-05-26 13:42:13 +000022static PyBytesObject *characters[UCHAR_MAX + 1];
23static PyBytesObject *nullstring;
24
Mark Dickinsonfd24b322008-12-06 15:33:31 +000025/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
26 for a string of length n should request PyBytesObject_SIZE + n bytes.
27
28 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
29 3 bytes per string allocation on a typical system.
30*/
31#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
32
Victor Stinner2bf89932015-10-14 11:25:33 +020033/* Forward declaration */
34Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
35 char *str);
36
Christian Heimes2c9c7a52008-05-26 13:42:13 +000037/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000038 For PyBytes_FromString(), the parameter `str' points to a null-terminated
39 string containing exactly `size' bytes.
40
41 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
42 either NULL or else points to a string containing at least `size' bytes.
43 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
44 not have to be null-terminated. (Therefore it is safe to construct a
45 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
46 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
47 bytes (setting the last byte to the null terminating character) and you can
48 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000049 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000050 alter the data yourself, since the strings may be shared.
51
52 The PyObject member `op->ob_size', which denotes the number of "extra
53 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020054 allocated for string data, not counting the null terminating character.
55 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000056 PyBytes_FromStringAndSize()) or the length of the string in the `str'
57 parameter (for PyBytes_FromString()).
58*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020059static PyObject *
60_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000061{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020062 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020063 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020064
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000066#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000068#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000069 Py_INCREF(op);
70 return (PyObject *)op;
71 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000072
Victor Stinner049e5092014-08-17 22:20:00 +020073 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000074 PyErr_SetString(PyExc_OverflowError,
75 "byte string is too large");
76 return NULL;
77 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000078
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000079 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020080 if (use_calloc)
81 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
82 else
83 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084 if (op == NULL)
85 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +010086 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020088 if (!use_calloc)
89 op->ob_sval[size] = '\0';
90 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 if (size == 0) {
92 nullstring = op;
93 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020094 }
95 return (PyObject *) op;
96}
97
98PyObject *
99PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
100{
101 PyBytesObject *op;
102 if (size < 0) {
103 PyErr_SetString(PyExc_SystemError,
104 "Negative size passed to PyBytes_FromStringAndSize");
105 return NULL;
106 }
107 if (size == 1 && str != NULL &&
108 (op = characters[*str & UCHAR_MAX]) != NULL)
109 {
110#ifdef COUNT_ALLOCS
111 one_strings++;
112#endif
113 Py_INCREF(op);
114 return (PyObject *)op;
115 }
116
117 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
118 if (op == NULL)
119 return NULL;
120 if (str == NULL)
121 return (PyObject *) op;
122
123 Py_MEMCPY(op->ob_sval, str, size);
124 /* share short strings */
125 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000126 characters[*str & UCHAR_MAX] = op;
127 Py_INCREF(op);
128 }
129 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000130}
131
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000132PyObject *
133PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000134{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200135 size_t size;
136 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000137
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000138 assert(str != NULL);
139 size = strlen(str);
140 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
141 PyErr_SetString(PyExc_OverflowError,
142 "byte string is too long");
143 return NULL;
144 }
145 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000146#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000147 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000148#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 Py_INCREF(op);
150 return (PyObject *)op;
151 }
152 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000153#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000155#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 Py_INCREF(op);
157 return (PyObject *)op;
158 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000159
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000160 /* Inline PyObject_NewVar */
161 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
162 if (op == NULL)
163 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100164 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000165 op->ob_shash = -1;
166 Py_MEMCPY(op->ob_sval, str, size+1);
167 /* share short strings */
168 if (size == 0) {
169 nullstring = op;
170 Py_INCREF(op);
171 } else if (size == 1) {
172 characters[*str & UCHAR_MAX] = op;
173 Py_INCREF(op);
174 }
175 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000176}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000177
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000178PyObject *
179PyBytes_FromFormatV(const char *format, va_list vargs)
180{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000181 char *s;
Victor Stinner03dab782015-10-14 00:21:35 +0200182 const char *f;
183 const char *p;
184 Py_ssize_t prec;
185 int longflag;
186 int size_tflag;
187 /* Longest 64-bit formatted numbers:
188 - "18446744073709551615\0" (21 bytes)
189 - "-9223372036854775808\0" (21 bytes)
190 Decimal takes the most space (it isn't enough for octal.)
Guido van Rossum343e97f2007-04-09 00:43:24 +0000191
Victor Stinner03dab782015-10-14 00:21:35 +0200192 Longest 64-bit pointer representation:
193 "0xffffffffffffffff\0" (19 bytes). */
194 char buffer[21];
195 _PyBytesWriter writer;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000196
Victor Stinner03dab782015-10-14 00:21:35 +0200197 _PyBytesWriter_Init(&writer);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000198
Victor Stinner03dab782015-10-14 00:21:35 +0200199 s = _PyBytesWriter_Alloc(&writer, strlen(format));
200 if (s == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000201 return NULL;
Victor Stinner03dab782015-10-14 00:21:35 +0200202 writer.overallocate = 1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000203
Victor Stinner03dab782015-10-14 00:21:35 +0200204#define WRITE_BYTES(str) \
205 do { \
206 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
207 if (s == NULL) \
208 goto error; \
209 } while (0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000210
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000211 for (f = format; *f; f++) {
Victor Stinner03dab782015-10-14 00:21:35 +0200212 if (*f != '%') {
213 *s++ = *f;
214 continue;
215 }
216
217 p = f++;
218
219 /* ignore the width (ex: 10 in "%10s") */
220 while (Py_ISDIGIT(*f))
221 f++;
222
223 /* parse the precision (ex: 10 in "%.10s") */
224 prec = 0;
225 if (*f == '.') {
226 f++;
227 for (; Py_ISDIGIT(*f); f++) {
228 prec = (prec * 10) + (*f - '0');
229 }
230 }
231
232 while (*f && *f != '%' && !Py_ISALPHA(*f))
233 f++;
234
235 /* handle the long flag ('l'), but only for %ld and %lu.
236 others can be added when necessary. */
237 longflag = 0;
238 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
239 longflag = 1;
240 ++f;
241 }
242
243 /* handle the size_t flag ('z'). */
244 size_tflag = 0;
245 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
246 size_tflag = 1;
247 ++f;
248 }
249
250 /* substract bytes preallocated for the format string
251 (ex: 2 for "%s") */
252 writer.min_size -= (f - p + 1);
253
254 switch (*f) {
255 case 'c':
256 {
257 int c = va_arg(vargs, int);
258 if (c < 0 || c > 255) {
259 PyErr_SetString(PyExc_OverflowError,
260 "PyBytes_FromFormatV(): %c format "
261 "expects an integer in range [0; 255]");
262 goto error;
263 }
264 writer.min_size++;
265 *s++ = (unsigned char)c;
266 break;
267 }
268
269 case 'd':
270 if (longflag)
271 sprintf(buffer, "%ld", va_arg(vargs, long));
272 else if (size_tflag)
273 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
274 va_arg(vargs, Py_ssize_t));
275 else
276 sprintf(buffer, "%d", va_arg(vargs, int));
277 assert(strlen(buffer) < sizeof(buffer));
278 WRITE_BYTES(buffer);
279 break;
280
281 case 'u':
282 if (longflag)
283 sprintf(buffer, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
286 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
288 else
289 sprintf(buffer, "%u",
290 va_arg(vargs, unsigned int));
291 assert(strlen(buffer) < sizeof(buffer));
292 WRITE_BYTES(buffer);
293 break;
294
295 case 'i':
296 sprintf(buffer, "%i", va_arg(vargs, int));
297 assert(strlen(buffer) < sizeof(buffer));
298 WRITE_BYTES(buffer);
299 break;
300
301 case 'x':
302 sprintf(buffer, "%x", va_arg(vargs, int));
303 assert(strlen(buffer) < sizeof(buffer));
304 WRITE_BYTES(buffer);
305 break;
306
307 case 's':
308 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000309 Py_ssize_t i;
Victor Stinner03dab782015-10-14 00:21:35 +0200310
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200311 p = va_arg(vargs, const char*);
Victor Stinner03dab782015-10-14 00:21:35 +0200312 i = strlen(p);
313 if (prec > 0 && i > prec)
314 i = prec;
315 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
316 if (s == NULL)
317 goto error;
318 break;
319 }
320
321 case 'p':
322 sprintf(buffer, "%p", va_arg(vargs, void*));
323 assert(strlen(buffer) < sizeof(buffer));
324 /* %p is ill-defined: ensure leading 0x. */
325 if (buffer[1] == 'X')
326 buffer[1] = 'x';
327 else if (buffer[1] != 'x') {
328 memmove(buffer+2, buffer, strlen(buffer)+1);
329 buffer[0] = '0';
330 buffer[1] = 'x';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000331 }
Victor Stinner03dab782015-10-14 00:21:35 +0200332 WRITE_BYTES(buffer);
333 break;
334
335 case '%':
336 writer.min_size++;
337 *s++ = '%';
338 break;
339
340 default:
341 if (*f == 0) {
342 /* fix min_size if we reached the end of the format string */
343 writer.min_size++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000344 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000345
Victor Stinner03dab782015-10-14 00:21:35 +0200346 /* invalid format string: copy unformatted string and exit */
347 WRITE_BYTES(p);
348 return _PyBytesWriter_Finish(&writer, s);
349 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000351
Victor Stinner03dab782015-10-14 00:21:35 +0200352#undef WRITE_BYTES
353
354 return _PyBytesWriter_Finish(&writer, s);
355
356 error:
357 _PyBytesWriter_Dealloc(&writer);
358 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000359}
360
361PyObject *
362PyBytes_FromFormat(const char *format, ...)
363{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000364 PyObject* ret;
365 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000366
367#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000368 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000369#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000370 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000371#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 ret = PyBytes_FromFormatV(format, vargs);
373 va_end(vargs);
374 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000375}
376
Ethan Furmanb95b5612015-01-23 20:05:18 -0800377/* Helpers for formatstring */
378
379Py_LOCAL_INLINE(PyObject *)
380getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
381{
382 Py_ssize_t argidx = *p_argidx;
383 if (argidx < arglen) {
384 (*p_argidx)++;
385 if (arglen < 0)
386 return args;
387 else
388 return PyTuple_GetItem(args, argidx);
389 }
390 PyErr_SetString(PyExc_TypeError,
391 "not enough arguments for format string");
392 return NULL;
393}
394
395/* Format codes
396 * F_LJUST '-'
397 * F_SIGN '+'
398 * F_BLANK ' '
399 * F_ALT '#'
400 * F_ZERO '0'
401 */
402#define F_LJUST (1<<0)
403#define F_SIGN (1<<1)
404#define F_BLANK (1<<2)
405#define F_ALT (1<<3)
406#define F_ZERO (1<<4)
407
408/* Returns a new reference to a PyBytes object, or NULL on failure. */
409
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200410static char*
411formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200412 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800413{
414 char *p;
415 PyObject *result;
416 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200417 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800418
419 x = PyFloat_AsDouble(v);
420 if (x == -1.0 && PyErr_Occurred()) {
421 PyErr_Format(PyExc_TypeError, "float argument required, "
422 "not %.200s", Py_TYPE(v)->tp_name);
423 return NULL;
424 }
425
426 if (prec < 0)
427 prec = 6;
428
429 p = PyOS_double_to_string(x, type, prec,
430 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
431
432 if (p == NULL)
433 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200434
435 len = strlen(p);
436 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200437 str = _PyBytesWriter_Prepare(writer, str, len);
438 if (str == NULL)
439 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200440 Py_MEMCPY(str, p, len);
441 str += len;
442 return str;
443 }
444
445 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800446 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200447 *p_result = result;
448 return str;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800449}
450
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300451static PyObject *
452formatlong(PyObject *v, int flags, int prec, int type)
453{
454 PyObject *result, *iobj;
455 if (type == 'i')
456 type = 'd';
457 if (PyLong_Check(v))
458 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
459 if (PyNumber_Check(v)) {
460 /* make sure number is a type of integer for o, x, and X */
461 if (type == 'o' || type == 'x' || type == 'X')
462 iobj = PyNumber_Index(v);
463 else
464 iobj = PyNumber_Long(v);
465 if (iobj == NULL) {
466 if (!PyErr_ExceptionMatches(PyExc_TypeError))
467 return NULL;
468 }
469 else if (!PyLong_Check(iobj))
470 Py_CLEAR(iobj);
471 if (iobj != NULL) {
472 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
473 Py_DECREF(iobj);
474 return result;
475 }
476 }
477 PyErr_Format(PyExc_TypeError,
478 "%%%c format: %s is required, not %.200s", type,
479 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
480 : "a number",
481 Py_TYPE(v)->tp_name);
482 return NULL;
483}
484
485static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200486byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800487{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200488 if (PyBytes_Check(arg) && PyBytes_Size(arg) == 1) {
489 *p = PyBytes_AS_STRING(arg)[0];
490 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800491 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200492 else if (PyByteArray_Check(arg) && PyByteArray_Size(arg) == 1) {
493 *p = PyByteArray_AS_STRING(arg)[0];
494 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800495 }
496 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300497 PyObject *iobj;
498 long ival;
499 int overflow;
500 /* make sure number is a type of integer */
501 if (PyLong_Check(arg)) {
502 ival = PyLong_AsLongAndOverflow(arg, &overflow);
503 }
504 else {
505 iobj = PyNumber_Index(arg);
506 if (iobj == NULL) {
507 if (!PyErr_ExceptionMatches(PyExc_TypeError))
508 return 0;
509 goto onError;
510 }
511 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
512 Py_DECREF(iobj);
513 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300514 if (!overflow && ival == -1 && PyErr_Occurred())
515 goto onError;
516 if (overflow || !(0 <= ival && ival <= 255)) {
517 PyErr_SetString(PyExc_OverflowError,
518 "%c arg not in range(256)");
519 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800520 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300521 *p = (char)ival;
522 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800523 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300524 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200525 PyErr_SetString(PyExc_TypeError,
526 "%c requires an integer in range(256) or a single byte");
527 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800528}
529
530static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200531format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800532{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200533 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800534 _Py_IDENTIFIER(__bytes__);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800535 /* is it a bytes object? */
536 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200537 *pbuf = PyBytes_AS_STRING(v);
538 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800539 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200540 return v;
541 }
542 if (PyByteArray_Check(v)) {
543 *pbuf = PyByteArray_AS_STRING(v);
544 *plen = PyByteArray_GET_SIZE(v);
545 Py_INCREF(v);
546 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800547 }
548 /* does it support __bytes__? */
549 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
550 if (func != NULL) {
551 result = PyObject_CallFunctionObjArgs(func, NULL);
552 Py_DECREF(func);
553 if (result == NULL)
554 return NULL;
555 if (!PyBytes_Check(result)) {
556 PyErr_Format(PyExc_TypeError,
557 "__bytes__ returned non-bytes (type %.200s)",
558 Py_TYPE(result)->tp_name);
559 Py_DECREF(result);
560 return NULL;
561 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200562 *pbuf = PyBytes_AS_STRING(result);
563 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800564 return result;
565 }
566 PyErr_Format(PyExc_TypeError,
567 "%%b requires bytes, or an object that implements __bytes__, not '%.100s'",
568 Py_TYPE(v)->tp_name);
569 return NULL;
570}
571
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200572/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800573
574PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +0200575_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
576 PyObject *args, int use_bytearray)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800577{
Victor Stinner772b2b02015-10-14 09:56:53 +0200578 const char *fmt;
579 char *res;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800580 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200581 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800582 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800583 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200584 _PyBytesWriter writer;
585
Victor Stinner772b2b02015-10-14 09:56:53 +0200586 if (args == NULL) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800587 PyErr_BadInternalCall();
588 return NULL;
589 }
Victor Stinner772b2b02015-10-14 09:56:53 +0200590 fmt = format;
591 fmtcnt = format_len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200592
593 _PyBytesWriter_Init(&writer);
Victor Stinner772b2b02015-10-14 09:56:53 +0200594 writer.use_bytearray = use_bytearray;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200595
596 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
597 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800598 return NULL;
Victor Stinner772b2b02015-10-14 09:56:53 +0200599 if (!use_bytearray)
600 writer.overallocate = 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200601
Ethan Furmanb95b5612015-01-23 20:05:18 -0800602 if (PyTuple_Check(args)) {
603 arglen = PyTuple_GET_SIZE(args);
604 argidx = 0;
605 }
606 else {
607 arglen = -1;
608 argidx = -2;
609 }
610 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
611 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
612 !PyByteArray_Check(args)) {
613 dict = args;
614 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200615
Ethan Furmanb95b5612015-01-23 20:05:18 -0800616 while (--fmtcnt >= 0) {
617 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200618 Py_ssize_t len;
619 char *pos;
620
621 pos = strchr(fmt + 1, '%');
622 if (pos != NULL)
623 len = pos - fmt;
Victor Stinner772b2b02015-10-14 09:56:53 +0200624 else
625 len = format_len - (fmt - format);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200626 assert(len != 0);
627
628 Py_MEMCPY(res, fmt, len);
629 res += len;
630 fmt += len;
631 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800632 }
633 else {
634 /* Got a format specifier */
635 int flags = 0;
636 Py_ssize_t width = -1;
637 int prec = -1;
638 int c = '\0';
639 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800640 PyObject *v = NULL;
641 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200642 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800643 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200644 Py_ssize_t len = 0;
645 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200646 Py_ssize_t alloc;
647#ifdef Py_DEBUG
648 char *before;
649#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -0800650
Ethan Furmanb95b5612015-01-23 20:05:18 -0800651 fmt++;
652 if (*fmt == '(') {
Victor Stinner772b2b02015-10-14 09:56:53 +0200653 const char *keystart;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800654 Py_ssize_t keylen;
655 PyObject *key;
656 int pcount = 1;
657
658 if (dict == NULL) {
659 PyErr_SetString(PyExc_TypeError,
660 "format requires a mapping");
661 goto error;
662 }
663 ++fmt;
664 --fmtcnt;
665 keystart = fmt;
666 /* Skip over balanced parentheses */
667 while (pcount > 0 && --fmtcnt >= 0) {
668 if (*fmt == ')')
669 --pcount;
670 else if (*fmt == '(')
671 ++pcount;
672 fmt++;
673 }
674 keylen = fmt - keystart - 1;
675 if (fmtcnt < 0 || pcount > 0) {
676 PyErr_SetString(PyExc_ValueError,
677 "incomplete format key");
678 goto error;
679 }
680 key = PyBytes_FromStringAndSize(keystart,
681 keylen);
682 if (key == NULL)
683 goto error;
684 if (args_owned) {
685 Py_DECREF(args);
686 args_owned = 0;
687 }
688 args = PyObject_GetItem(dict, key);
689 Py_DECREF(key);
690 if (args == NULL) {
691 goto error;
692 }
693 args_owned = 1;
694 arglen = -1;
695 argidx = -2;
696 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200697
698 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800699 while (--fmtcnt >= 0) {
700 switch (c = *fmt++) {
701 case '-': flags |= F_LJUST; continue;
702 case '+': flags |= F_SIGN; continue;
703 case ' ': flags |= F_BLANK; continue;
704 case '#': flags |= F_ALT; continue;
705 case '0': flags |= F_ZERO; continue;
706 }
707 break;
708 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200709
710 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800711 if (c == '*') {
712 v = getnextarg(args, arglen, &argidx);
713 if (v == NULL)
714 goto error;
715 if (!PyLong_Check(v)) {
716 PyErr_SetString(PyExc_TypeError,
717 "* wants int");
718 goto error;
719 }
720 width = PyLong_AsSsize_t(v);
721 if (width == -1 && PyErr_Occurred())
722 goto error;
723 if (width < 0) {
724 flags |= F_LJUST;
725 width = -width;
726 }
727 if (--fmtcnt >= 0)
728 c = *fmt++;
729 }
730 else if (c >= 0 && isdigit(c)) {
731 width = c - '0';
732 while (--fmtcnt >= 0) {
733 c = Py_CHARMASK(*fmt++);
734 if (!isdigit(c))
735 break;
736 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
737 PyErr_SetString(
738 PyExc_ValueError,
739 "width too big");
740 goto error;
741 }
742 width = width*10 + (c - '0');
743 }
744 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200745
746 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800747 if (c == '.') {
748 prec = 0;
749 if (--fmtcnt >= 0)
750 c = *fmt++;
751 if (c == '*') {
752 v = getnextarg(args, arglen, &argidx);
753 if (v == NULL)
754 goto error;
755 if (!PyLong_Check(v)) {
756 PyErr_SetString(
757 PyExc_TypeError,
758 "* wants int");
759 goto error;
760 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200761 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800762 if (prec == -1 && PyErr_Occurred())
763 goto error;
764 if (prec < 0)
765 prec = 0;
766 if (--fmtcnt >= 0)
767 c = *fmt++;
768 }
769 else if (c >= 0 && isdigit(c)) {
770 prec = c - '0';
771 while (--fmtcnt >= 0) {
772 c = Py_CHARMASK(*fmt++);
773 if (!isdigit(c))
774 break;
775 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
776 PyErr_SetString(
777 PyExc_ValueError,
778 "prec too big");
779 goto error;
780 }
781 prec = prec*10 + (c - '0');
782 }
783 }
784 } /* prec */
785 if (fmtcnt >= 0) {
786 if (c == 'h' || c == 'l' || c == 'L') {
787 if (--fmtcnt >= 0)
788 c = *fmt++;
789 }
790 }
791 if (fmtcnt < 0) {
792 PyErr_SetString(PyExc_ValueError,
793 "incomplete format");
794 goto error;
795 }
796 if (c != '%') {
797 v = getnextarg(args, arglen, &argidx);
798 if (v == NULL)
799 goto error;
800 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200801
802 if (fmtcnt < 0) {
803 /* last writer: disable writer overallocation */
804 writer.overallocate = 0;
805 }
806
Ethan Furmanb95b5612015-01-23 20:05:18 -0800807 sign = 0;
808 fill = ' ';
809 switch (c) {
810 case '%':
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200811 *res++ = '%';
812 continue;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200813
Ethan Furman62e977f2015-03-11 08:17:00 -0700814 case 'r':
815 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800816 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200817 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800818 if (temp == NULL)
819 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200820 assert(PyUnicode_IS_ASCII(temp));
821 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
822 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800823 if (prec >= 0 && len > prec)
824 len = prec;
825 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200826
Ethan Furmanb95b5612015-01-23 20:05:18 -0800827 case 's':
828 // %s is only for 2/3 code; 3 only code should use %b
829 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200830 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800831 if (temp == NULL)
832 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800833 if (prec >= 0 && len > prec)
834 len = prec;
835 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200836
Ethan Furmanb95b5612015-01-23 20:05:18 -0800837 case 'i':
838 case 'd':
839 case 'u':
840 case 'o':
841 case 'x':
842 case 'X':
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200843 if (PyLong_CheckExact(v)
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200844 && width == -1 && prec == -1
845 && !(flags & (F_SIGN | F_BLANK))
846 && c != 'X')
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200847 {
848 /* Fast path */
849 int alternate = flags & F_ALT;
850 int base;
851
852 switch(c)
853 {
854 default:
855 assert(0 && "'type' not in [diuoxX]");
856 case 'd':
857 case 'i':
858 case 'u':
859 base = 10;
860 break;
861 case 'o':
862 base = 8;
863 break;
864 case 'x':
865 case 'X':
866 base = 16;
867 break;
868 }
869
870 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200871 writer.min_size -= 2; /* size preallocated for "%d" */
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200872 res = _PyLong_FormatBytesWriter(&writer, res,
873 v, base, alternate);
874 if (res == NULL)
875 goto error;
876 continue;
877 }
878
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300879 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200880 if (!temp)
881 goto error;
882 assert(PyUnicode_IS_ASCII(temp));
883 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
884 len = PyUnicode_GET_LENGTH(temp);
885 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800886 if (flags & F_ZERO)
887 fill = '0';
888 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200889
Ethan Furmanb95b5612015-01-23 20:05:18 -0800890 case 'e':
891 case 'E':
892 case 'f':
893 case 'F':
894 case 'g':
895 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200896 if (width == -1 && prec == -1
897 && !(flags & (F_SIGN | F_BLANK)))
898 {
899 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200900 writer.min_size -= 2; /* size preallocated for "%f" */
Victor Stinnerad771582015-10-09 12:38:53 +0200901 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200902 if (res == NULL)
903 goto error;
904 continue;
905 }
906
Victor Stinnerad771582015-10-09 12:38:53 +0200907 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800908 goto error;
909 pbuf = PyBytes_AS_STRING(temp);
910 len = PyBytes_GET_SIZE(temp);
911 sign = 1;
912 if (flags & F_ZERO)
913 fill = '0';
914 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200915
Ethan Furmanb95b5612015-01-23 20:05:18 -0800916 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200917 pbuf = &onechar;
918 len = byte_converter(v, &onechar);
919 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800920 goto error;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200921 if (width == -1) {
922 /* Fast path */
923 *res++ = onechar;
924 continue;
925 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800926 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200927
Ethan Furmanb95b5612015-01-23 20:05:18 -0800928 default:
929 PyErr_Format(PyExc_ValueError,
930 "unsupported format character '%c' (0x%x) "
931 "at index %zd",
932 c, c,
Victor Stinner772b2b02015-10-14 09:56:53 +0200933 (Py_ssize_t)(fmt - 1 - format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800934 goto error;
935 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200936
Ethan Furmanb95b5612015-01-23 20:05:18 -0800937 if (sign) {
938 if (*pbuf == '-' || *pbuf == '+') {
939 sign = *pbuf++;
940 len--;
941 }
942 else if (flags & F_SIGN)
943 sign = '+';
944 else if (flags & F_BLANK)
945 sign = ' ';
946 else
947 sign = 0;
948 }
949 if (width < len)
950 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200951
952 alloc = width;
953 if (sign != 0 && len == width)
954 alloc++;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200955 /* 2: size preallocated for %s */
956 if (alloc > 2) {
957 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200958 if (res == NULL)
959 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800960 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200961#ifdef Py_DEBUG
962 before = res;
963#endif
964
965 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800966 if (sign) {
967 if (fill != ' ')
968 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800969 if (width > len)
970 width--;
971 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200972
973 /* Write the numeric prefix for "x", "X" and "o" formats
974 if the alternate form is used.
975 For example, write "0x" for the "%#x" format. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800976 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
977 assert(pbuf[0] == '0');
978 assert(pbuf[1] == c);
979 if (fill != ' ') {
980 *res++ = *pbuf++;
981 *res++ = *pbuf++;
982 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800983 width -= 2;
984 if (width < 0)
985 width = 0;
986 len -= 2;
987 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200988
989 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800990 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200991 memset(res, fill, width - len);
992 res += (width - len);
993 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800994 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200995
996 /* If padding with spaces: write sign if needed and/or numeric
997 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800998 if (fill == ' ') {
999 if (sign)
1000 *res++ = sign;
1001 if ((flags & F_ALT) &&
1002 (c == 'x' || c == 'X')) {
1003 assert(pbuf[0] == '0');
1004 assert(pbuf[1] == c);
1005 *res++ = *pbuf++;
1006 *res++ = *pbuf++;
1007 }
1008 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001009
1010 /* Copy bytes */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001011 Py_MEMCPY(res, pbuf, len);
1012 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001013
1014 /* Pad right with the fill character if needed */
1015 if (width > len) {
1016 memset(res, ' ', width - len);
1017 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001018 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001019
Ethan Furmanb95b5612015-01-23 20:05:18 -08001020 if (dict && (argidx < arglen) && c != '%') {
1021 PyErr_SetString(PyExc_TypeError,
1022 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -08001023 Py_XDECREF(temp);
1024 goto error;
1025 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001026 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001027
1028#ifdef Py_DEBUG
1029 /* check that we computed the exact size for this write */
1030 assert((res - before) == alloc);
1031#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -08001032 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001033
1034 /* If overallocation was disabled, ensure that it was the last
1035 write. Otherwise, we missed an optimization */
Victor Stinner772b2b02015-10-14 09:56:53 +02001036 assert(writer.overallocate || fmtcnt < 0 || use_bytearray);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001037 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001038
Ethan Furmanb95b5612015-01-23 20:05:18 -08001039 if (argidx < arglen && !dict) {
1040 PyErr_SetString(PyExc_TypeError,
1041 "not all arguments converted during bytes formatting");
1042 goto error;
1043 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001044
Ethan Furmanb95b5612015-01-23 20:05:18 -08001045 if (args_owned) {
1046 Py_DECREF(args);
1047 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001048 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001049
1050 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001051 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001052 if (args_owned) {
1053 Py_DECREF(args);
1054 }
1055 return NULL;
1056}
1057
1058/* =-= */
1059
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001060static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001061bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001062{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001063 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001064}
1065
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001066/* Unescape a backslash-escaped string. If unicode is non-zero,
1067 the string is a u-literal. If recode_encoding is non-zero,
1068 the string is UTF-8 encoded and should be re-encoded in the
1069 specified encoding. */
1070
Victor Stinner2ec80632015-10-14 13:32:13 +02001071static char *
1072_PyBytes_DecodeEscapeRecode(const char **s, const char *end,
1073 const char *errors, const char *recode_encoding,
1074 _PyBytesWriter *writer, char *p)
1075{
1076 PyObject *u, *w;
1077 const char* t;
1078
1079 t = *s;
1080 /* Decode non-ASCII bytes as UTF-8. */
1081 while (t < end && (*t & 0x80))
1082 t++;
1083 u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
1084 if (u == NULL)
1085 return NULL;
1086
1087 /* Recode them in target encoding. */
1088 w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
1089 Py_DECREF(u);
1090 if (w == NULL)
1091 return NULL;
1092 assert(PyBytes_Check(w));
1093
1094 /* Append bytes to output buffer. */
1095 writer->min_size--; /* substract 1 preallocated byte */
1096 p = _PyBytesWriter_WriteBytes(writer, p,
1097 PyBytes_AS_STRING(w),
1098 PyBytes_GET_SIZE(w));
1099 Py_DECREF(w);
1100 if (p == NULL)
1101 return NULL;
1102
1103 *s = t;
1104 return p;
1105}
1106
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001107PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001108 Py_ssize_t len,
1109 const char *errors,
1110 Py_ssize_t unicode,
1111 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001112{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001113 int c;
Victor Stinner2ec80632015-10-14 13:32:13 +02001114 char *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001115 const char *end;
Victor Stinner2ec80632015-10-14 13:32:13 +02001116 _PyBytesWriter writer;
1117
1118 _PyBytesWriter_Init(&writer);
1119
1120 p = _PyBytesWriter_Alloc(&writer, len);
1121 if (p == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001122 return NULL;
Victor Stinner2ec80632015-10-14 13:32:13 +02001123 writer.overallocate = 1;
1124
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001125 end = s + len;
1126 while (s < end) {
1127 if (*s != '\\') {
1128 non_esc:
Victor Stinner2ec80632015-10-14 13:32:13 +02001129 if (!(recode_encoding && (*s & 0x80))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001130 *p++ = *s++;
1131 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001132 else {
1133 /* non-ASCII character and need to recode */
1134 p = _PyBytes_DecodeEscapeRecode(&s, end,
1135 errors, recode_encoding,
1136 &writer, p);
1137 if (p == NULL)
1138 goto failed;
1139 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001140 continue;
1141 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001142
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001143 s++;
Victor Stinner2ec80632015-10-14 13:32:13 +02001144 if (s == end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001145 PyErr_SetString(PyExc_ValueError,
1146 "Trailing \\ in string");
1147 goto failed;
1148 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001149
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001150 switch (*s++) {
1151 /* XXX This assumes ASCII! */
1152 case '\n': break;
1153 case '\\': *p++ = '\\'; break;
1154 case '\'': *p++ = '\''; break;
1155 case '\"': *p++ = '\"'; break;
1156 case 'b': *p++ = '\b'; break;
1157 case 'f': *p++ = '\014'; break; /* FF */
1158 case 't': *p++ = '\t'; break;
1159 case 'n': *p++ = '\n'; break;
1160 case 'r': *p++ = '\r'; break;
1161 case 'v': *p++ = '\013'; break; /* VT */
1162 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1163 case '0': case '1': case '2': case '3':
1164 case '4': case '5': case '6': case '7':
1165 c = s[-1] - '0';
1166 if (s < end && '0' <= *s && *s <= '7') {
1167 c = (c<<3) + *s++ - '0';
1168 if (s < end && '0' <= *s && *s <= '7')
1169 c = (c<<3) + *s++ - '0';
1170 }
1171 *p++ = c;
1172 break;
1173 case 'x':
Victor Stinner2ec80632015-10-14 13:32:13 +02001174 if (s+1 < end) {
1175 int digit1, digit2;
1176 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1177 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1178 if (digit1 < 16 && digit2 < 16) {
1179 *p++ = (unsigned char)((digit1 << 4) + digit2);
1180 s += 2;
1181 break;
1182 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001183 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001184 /* invalid hexadecimal digits */
1185
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001186 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001187 PyErr_Format(PyExc_ValueError,
1188 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001189 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001190 goto failed;
1191 }
1192 if (strcmp(errors, "replace") == 0) {
1193 *p++ = '?';
1194 } else if (strcmp(errors, "ignore") == 0)
1195 /* do nothing */;
1196 else {
1197 PyErr_Format(PyExc_ValueError,
1198 "decoding error; unknown "
1199 "error handling code: %.400s",
1200 errors);
1201 goto failed;
1202 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001203 /* skip \x */
1204 if (s < end && Py_ISXDIGIT(s[0]))
1205 s++; /* and a hexdigit */
1206 break;
Victor Stinner2ec80632015-10-14 13:32:13 +02001207
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001208 default:
1209 *p++ = '\\';
1210 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +02001211 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001212 UTF-8 bytes may follow. */
1213 }
1214 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001215
1216 return _PyBytesWriter_Finish(&writer, p);
1217
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001218 failed:
Victor Stinner2ec80632015-10-14 13:32:13 +02001219 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001220 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001221}
1222
1223/* -------------------------------------------------------------------- */
1224/* object api */
1225
1226Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001227PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001228{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001229 if (!PyBytes_Check(op)) {
1230 PyErr_Format(PyExc_TypeError,
1231 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1232 return -1;
1233 }
1234 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001235}
1236
1237char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001238PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001239{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001240 if (!PyBytes_Check(op)) {
1241 PyErr_Format(PyExc_TypeError,
1242 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1243 return NULL;
1244 }
1245 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001246}
1247
1248int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001249PyBytes_AsStringAndSize(PyObject *obj,
1250 char **s,
1251 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001252{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001253 if (s == NULL) {
1254 PyErr_BadInternalCall();
1255 return -1;
1256 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001257
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001258 if (!PyBytes_Check(obj)) {
1259 PyErr_Format(PyExc_TypeError,
1260 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1261 return -1;
1262 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001263
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001264 *s = PyBytes_AS_STRING(obj);
1265 if (len != NULL)
1266 *len = PyBytes_GET_SIZE(obj);
1267 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001268 PyErr_SetString(PyExc_ValueError,
1269 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001270 return -1;
1271 }
1272 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001273}
Neal Norwitz6968b052007-02-27 19:02:19 +00001274
1275/* -------------------------------------------------------------------- */
1276/* Methods */
1277
Eric Smith0923d1d2009-04-16 20:16:10 +00001278#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001279
1280#include "stringlib/fastsearch.h"
1281#include "stringlib/count.h"
1282#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001283#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001284#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001285#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001286#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001287
Eric Smith0f78bff2009-11-30 01:01:42 +00001288#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001289
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001290PyObject *
1291PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001292{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001293 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001294 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001295 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001296 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001297 unsigned char quote, *s, *p;
1298
1299 /* Compute size of output string */
1300 squotes = dquotes = 0;
1301 newsize = 3; /* b'' */
1302 s = (unsigned char*)op->ob_sval;
1303 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001304 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001305 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001306 case '\'': squotes++; break;
1307 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001308 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001309 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001310 default:
1311 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001312 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001313 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001314 if (newsize > PY_SSIZE_T_MAX - incr)
1315 goto overflow;
1316 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001317 }
1318 quote = '\'';
1319 if (smartquotes && squotes && !dquotes)
1320 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001321 if (squotes && quote == '\'') {
1322 if (newsize > PY_SSIZE_T_MAX - squotes)
1323 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001324 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001325 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001326
1327 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001328 if (v == NULL) {
1329 return NULL;
1330 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001331 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001332
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001333 *p++ = 'b', *p++ = quote;
1334 for (i = 0; i < length; i++) {
1335 unsigned char c = op->ob_sval[i];
1336 if (c == quote || c == '\\')
1337 *p++ = '\\', *p++ = c;
1338 else if (c == '\t')
1339 *p++ = '\\', *p++ = 't';
1340 else if (c == '\n')
1341 *p++ = '\\', *p++ = 'n';
1342 else if (c == '\r')
1343 *p++ = '\\', *p++ = 'r';
1344 else if (c < ' ' || c >= 0x7f) {
1345 *p++ = '\\';
1346 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001347 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1348 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001349 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001350 else
1351 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001352 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001353 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001354 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001355 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001356
1357 overflow:
1358 PyErr_SetString(PyExc_OverflowError,
1359 "bytes object is too large to make repr");
1360 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001361}
1362
Neal Norwitz6968b052007-02-27 19:02:19 +00001363static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001364bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001365{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001366 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001367}
1368
Neal Norwitz6968b052007-02-27 19:02:19 +00001369static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001370bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001371{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001372 if (Py_BytesWarningFlag) {
1373 if (PyErr_WarnEx(PyExc_BytesWarning,
1374 "str() on a bytes instance", 1))
1375 return NULL;
1376 }
1377 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001378}
1379
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001380static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001381bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001382{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001383 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001384}
Neal Norwitz6968b052007-02-27 19:02:19 +00001385
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001386/* This is also used by PyBytes_Concat() */
1387static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001388bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001389{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001390 Py_ssize_t size;
1391 Py_buffer va, vb;
1392 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001393
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001394 va.len = -1;
1395 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001396 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1397 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001398 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1399 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
1400 goto done;
1401 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001402
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001403 /* Optimize end cases */
1404 if (va.len == 0 && PyBytes_CheckExact(b)) {
1405 result = b;
1406 Py_INCREF(result);
1407 goto done;
1408 }
1409 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1410 result = a;
1411 Py_INCREF(result);
1412 goto done;
1413 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001414
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001415 size = va.len + vb.len;
1416 if (size < 0) {
1417 PyErr_NoMemory();
1418 goto done;
1419 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001420
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001421 result = PyBytes_FromStringAndSize(NULL, size);
1422 if (result != NULL) {
1423 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1424 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1425 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001426
1427 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001428 if (va.len != -1)
1429 PyBuffer_Release(&va);
1430 if (vb.len != -1)
1431 PyBuffer_Release(&vb);
1432 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001433}
Neal Norwitz6968b052007-02-27 19:02:19 +00001434
1435static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001436bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001437{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001438 Py_ssize_t i;
1439 Py_ssize_t j;
1440 Py_ssize_t size;
1441 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001442 size_t nbytes;
1443 if (n < 0)
1444 n = 0;
1445 /* watch out for overflows: the size can overflow int,
1446 * and the # of bytes needed can overflow size_t
1447 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001448 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001449 PyErr_SetString(PyExc_OverflowError,
1450 "repeated bytes are too long");
1451 return NULL;
1452 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001453 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001454 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1455 Py_INCREF(a);
1456 return (PyObject *)a;
1457 }
1458 nbytes = (size_t)size;
1459 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1460 PyErr_SetString(PyExc_OverflowError,
1461 "repeated bytes are too long");
1462 return NULL;
1463 }
1464 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1465 if (op == NULL)
1466 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +01001467 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001468 op->ob_shash = -1;
1469 op->ob_sval[size] = '\0';
1470 if (Py_SIZE(a) == 1 && n > 0) {
1471 memset(op->ob_sval, a->ob_sval[0] , n);
1472 return (PyObject *) op;
1473 }
1474 i = 0;
1475 if (i < size) {
1476 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1477 i = Py_SIZE(a);
1478 }
1479 while (i < size) {
1480 j = (i <= size-i) ? i : size-i;
1481 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1482 i += j;
1483 }
1484 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001485}
1486
Guido van Rossum98297ee2007-11-06 21:34:58 +00001487static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001488bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +00001489{
1490 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1491 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001492 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +00001493 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +00001494 PyErr_Clear();
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001495 if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
Antoine Pitroud1188562010-06-09 16:38:55 +00001496 return -1;
1497 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
1498 varg.buf, varg.len, 0);
1499 PyBuffer_Release(&varg);
1500 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001501 }
1502 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001503 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1504 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001505 }
1506
Antoine Pitrou0010d372010-08-15 17:12:55 +00001507 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001508}
1509
Neal Norwitz6968b052007-02-27 19:02:19 +00001510static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001511bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001512{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001513 if (i < 0 || i >= Py_SIZE(a)) {
1514 PyErr_SetString(PyExc_IndexError, "index out of range");
1515 return NULL;
1516 }
1517 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001518}
1519
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001520Py_LOCAL(int)
1521bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1522{
1523 int cmp;
1524 Py_ssize_t len;
1525
1526 len = Py_SIZE(a);
1527 if (Py_SIZE(b) != len)
1528 return 0;
1529
1530 if (a->ob_sval[0] != b->ob_sval[0])
1531 return 0;
1532
1533 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1534 return (cmp == 0);
1535}
1536
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001537static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001538bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001539{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001540 int c;
1541 Py_ssize_t len_a, len_b;
1542 Py_ssize_t min_len;
1543 PyObject *result;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001544 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001545
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001546 /* Make sure both arguments are strings. */
1547 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001548 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001549 rc = PyObject_IsInstance((PyObject*)a,
1550 (PyObject*)&PyUnicode_Type);
1551 if (!rc)
1552 rc = PyObject_IsInstance((PyObject*)b,
1553 (PyObject*)&PyUnicode_Type);
1554 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001555 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001556 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001557 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001558 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001559 return NULL;
1560 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001561 else {
1562 rc = PyObject_IsInstance((PyObject*)a,
1563 (PyObject*)&PyLong_Type);
1564 if (!rc)
1565 rc = PyObject_IsInstance((PyObject*)b,
1566 (PyObject*)&PyLong_Type);
1567 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001568 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001569 if (rc) {
1570 if (PyErr_WarnEx(PyExc_BytesWarning,
1571 "Comparison between bytes and int", 1))
1572 return NULL;
1573 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001574 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001575 }
1576 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001577 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001578 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001579 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001580 case Py_EQ:
1581 case Py_LE:
1582 case Py_GE:
1583 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001584 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001585 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001586 case Py_NE:
1587 case Py_LT:
1588 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001589 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001590 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001591 default:
1592 PyErr_BadArgument();
1593 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001594 }
1595 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001596 else if (op == Py_EQ || op == Py_NE) {
1597 int eq = bytes_compare_eq(a, b);
1598 eq ^= (op == Py_NE);
1599 result = eq ? Py_True : Py_False;
1600 }
1601 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001602 len_a = Py_SIZE(a);
1603 len_b = Py_SIZE(b);
1604 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001605 if (min_len > 0) {
1606 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001607 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001608 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001609 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001610 else
1611 c = 0;
1612 if (c == 0)
1613 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1614 switch (op) {
1615 case Py_LT: c = c < 0; break;
1616 case Py_LE: c = c <= 0; break;
1617 case Py_GT: c = c > 0; break;
1618 case Py_GE: c = c >= 0; break;
1619 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001620 PyErr_BadArgument();
1621 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001622 }
1623 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001624 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001625
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001626 Py_INCREF(result);
1627 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001628}
1629
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001630static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001631bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001632{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001633 if (a->ob_shash == -1) {
1634 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001635 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001636 }
1637 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001638}
1639
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001640static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001641bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001642{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001643 if (PyIndex_Check(item)) {
1644 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1645 if (i == -1 && PyErr_Occurred())
1646 return NULL;
1647 if (i < 0)
1648 i += PyBytes_GET_SIZE(self);
1649 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1650 PyErr_SetString(PyExc_IndexError,
1651 "index out of range");
1652 return NULL;
1653 }
1654 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1655 }
1656 else if (PySlice_Check(item)) {
1657 Py_ssize_t start, stop, step, slicelength, cur, i;
1658 char* source_buf;
1659 char* result_buf;
1660 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001661
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001662 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001663 PyBytes_GET_SIZE(self),
1664 &start, &stop, &step, &slicelength) < 0) {
1665 return NULL;
1666 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001667
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001668 if (slicelength <= 0) {
1669 return PyBytes_FromStringAndSize("", 0);
1670 }
1671 else if (start == 0 && step == 1 &&
1672 slicelength == PyBytes_GET_SIZE(self) &&
1673 PyBytes_CheckExact(self)) {
1674 Py_INCREF(self);
1675 return (PyObject *)self;
1676 }
1677 else if (step == 1) {
1678 return PyBytes_FromStringAndSize(
1679 PyBytes_AS_STRING(self) + start,
1680 slicelength);
1681 }
1682 else {
1683 source_buf = PyBytes_AS_STRING(self);
1684 result = PyBytes_FromStringAndSize(NULL, slicelength);
1685 if (result == NULL)
1686 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001687
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001688 result_buf = PyBytes_AS_STRING(result);
1689 for (cur = start, i = 0; i < slicelength;
1690 cur += step, i++) {
1691 result_buf[i] = source_buf[cur];
1692 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001693
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001694 return result;
1695 }
1696 }
1697 else {
1698 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001699 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001700 Py_TYPE(item)->tp_name);
1701 return NULL;
1702 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001703}
1704
1705static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001706bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001707{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001708 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1709 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001710}
1711
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001712static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001713 (lenfunc)bytes_length, /*sq_length*/
1714 (binaryfunc)bytes_concat, /*sq_concat*/
1715 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1716 (ssizeargfunc)bytes_item, /*sq_item*/
1717 0, /*sq_slice*/
1718 0, /*sq_ass_item*/
1719 0, /*sq_ass_slice*/
1720 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001721};
1722
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001723static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001724 (lenfunc)bytes_length,
1725 (binaryfunc)bytes_subscript,
1726 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001727};
1728
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001729static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001730 (getbufferproc)bytes_buffer_getbuffer,
1731 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001732};
1733
1734
1735#define LEFTSTRIP 0
1736#define RIGHTSTRIP 1
1737#define BOTHSTRIP 2
1738
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001739/*[clinic input]
1740bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001741
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001742 sep: object = None
1743 The delimiter according which to split the bytes.
1744 None (the default value) means split on ASCII whitespace characters
1745 (space, tab, return, newline, formfeed, vertical tab).
1746 maxsplit: Py_ssize_t = -1
1747 Maximum number of splits to do.
1748 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001749
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001750Return a list of the sections in the bytes, using sep as the delimiter.
1751[clinic start generated code]*/
1752
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001753static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001754bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001755/*[clinic end generated code: output=8bde44dacb36ef2e input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001756{
1757 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001758 const char *s = PyBytes_AS_STRING(self), *sub;
1759 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001760 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001761
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001762 if (maxsplit < 0)
1763 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001764 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001765 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001766 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001767 return NULL;
1768 sub = vsub.buf;
1769 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001770
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001771 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1772 PyBuffer_Release(&vsub);
1773 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001774}
1775
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001776/*[clinic input]
1777bytes.partition
1778
1779 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001780 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001781 /
1782
1783Partition the bytes into three parts using the given separator.
1784
1785This will search for the separator sep in the bytes. If the separator is found,
1786returns a 3-tuple containing the part before the separator, the separator
1787itself, and the part after it.
1788
1789If the separator is not found, returns a 3-tuple containing the original bytes
1790object and two empty bytes objects.
1791[clinic start generated code]*/
1792
Neal Norwitz6968b052007-02-27 19:02:19 +00001793static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001794bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001795/*[clinic end generated code: output=f532b392a17ff695 input=bc855dc63ca949de]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001796{
Neal Norwitz6968b052007-02-27 19:02:19 +00001797 return stringlib_partition(
1798 (PyObject*) self,
1799 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001800 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001801 );
1802}
1803
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001804/*[clinic input]
1805bytes.rpartition
1806
1807 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001808 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001809 /
1810
1811Partition the bytes into three parts using the given separator.
1812
1813This will search for the separator sep in the bytes, starting and the end. If
1814the separator is found, returns a 3-tuple containing the part before the
1815separator, the separator itself, and the part after it.
1816
1817If the separator is not found, returns a 3-tuple containing two empty bytes
1818objects and the original bytes object.
1819[clinic start generated code]*/
1820
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001821static PyObject *
1822bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001823/*[clinic end generated code: output=191b114cbb028e50 input=6588fff262a9170e]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001824{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001825 return stringlib_rpartition(
1826 (PyObject*) self,
1827 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001828 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001829 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001830}
1831
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001832/*[clinic input]
1833bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001834
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001835Return a list of the sections in the bytes, using sep as the delimiter.
1836
1837Splitting is done starting at the end of the bytes and working to the front.
1838[clinic start generated code]*/
1839
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001840static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001841bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001842/*[clinic end generated code: output=0b6570b977911d88 input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001843{
1844 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001845 const char *s = PyBytes_AS_STRING(self), *sub;
1846 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001847 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001848
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001849 if (maxsplit < 0)
1850 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001851 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001852 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001853 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001854 return NULL;
1855 sub = vsub.buf;
1856 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001857
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001858 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1859 PyBuffer_Release(&vsub);
1860 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001861}
1862
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001863
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001864/*[clinic input]
1865bytes.join
1866
1867 iterable_of_bytes: object
1868 /
1869
1870Concatenate any number of bytes objects.
1871
1872The bytes whose method is called is inserted in between each pair.
1873
1874The result is returned as a new bytes object.
1875
1876Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1877[clinic start generated code]*/
1878
Neal Norwitz6968b052007-02-27 19:02:19 +00001879static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001880bytes_join(PyBytesObject*self, PyObject *iterable_of_bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001881/*[clinic end generated code: output=634aff14764ff997 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001882{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001883 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001884}
1885
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001886PyObject *
1887_PyBytes_Join(PyObject *sep, PyObject *x)
1888{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001889 assert(sep != NULL && PyBytes_Check(sep));
1890 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001891 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001892}
1893
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001894/* helper macro to fixup start/end slice values */
1895#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001896 if (end > len) \
1897 end = len; \
1898 else if (end < 0) { \
1899 end += len; \
1900 if (end < 0) \
1901 end = 0; \
1902 } \
1903 if (start < 0) { \
1904 start += len; \
1905 if (start < 0) \
1906 start = 0; \
1907 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001908
1909Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001910bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001911{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001912 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001913 char byte;
1914 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001915 const char *sub;
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001916 Py_ssize_t len, sub_len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001917 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001918 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001919
Antoine Pitrouac65d962011-10-20 23:54:17 +02001920 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1921 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001922 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001923
Antoine Pitrouac65d962011-10-20 23:54:17 +02001924 if (subobj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001925 if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001926 return -2;
1927
1928 sub = subbuf.buf;
1929 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001930 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001931 else {
1932 sub = &byte;
1933 sub_len = 1;
1934 }
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001935 len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001936
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001937 ADJUST_INDICES(start, end, len);
1938 if (end - start < sub_len)
1939 res = -1;
Serhiy Storchaka413fdce2015-11-14 15:42:17 +02001940 else if (sub_len == 1) {
1941 if (dir > 0)
1942 res = stringlib_find_char(
1943 PyBytes_AS_STRING(self) + start, end - start,
1944 *sub);
1945 else
1946 res = stringlib_rfind_char(
1947 PyBytes_AS_STRING(self) + start, end - start,
1948 *sub);
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001949 if (res >= 0)
1950 res += start;
1951 }
1952 else {
1953 if (dir > 0)
1954 res = stringlib_find_slice(
1955 PyBytes_AS_STRING(self), len,
1956 sub, sub_len, start, end);
1957 else
1958 res = stringlib_rfind_slice(
1959 PyBytes_AS_STRING(self), len,
1960 sub, sub_len, start, end);
1961 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001962
1963 if (subobj)
1964 PyBuffer_Release(&subbuf);
1965
1966 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001967}
1968
1969
1970PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001971"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001972\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001973Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001974such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001975arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001976\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001977Return -1 on failure.");
1978
Neal Norwitz6968b052007-02-27 19:02:19 +00001979static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001980bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001981{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001982 Py_ssize_t result = bytes_find_internal(self, args, +1);
1983 if (result == -2)
1984 return NULL;
1985 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001986}
1987
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001988
1989PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001990"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001991\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001992Like B.find() but raise ValueError when the substring is not found.");
1993
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001994static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001995bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001996{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001997 Py_ssize_t result = bytes_find_internal(self, args, +1);
1998 if (result == -2)
1999 return NULL;
2000 if (result == -1) {
2001 PyErr_SetString(PyExc_ValueError,
2002 "substring not found");
2003 return NULL;
2004 }
2005 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00002006}
2007
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002008
2009PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002010"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002011\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002012Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08002013such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002014arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002015\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002016Return -1 on failure.");
2017
Neal Norwitz6968b052007-02-27 19:02:19 +00002018static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002019bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00002020{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002021 Py_ssize_t result = bytes_find_internal(self, args, -1);
2022 if (result == -2)
2023 return NULL;
2024 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00002025}
2026
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002027
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002028PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002029"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002030\n\
2031Like B.rfind() but raise ValueError when the substring is not found.");
2032
2033static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002034bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002035{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002036 Py_ssize_t result = bytes_find_internal(self, args, -1);
2037 if (result == -2)
2038 return NULL;
2039 if (result == -1) {
2040 PyErr_SetString(PyExc_ValueError,
2041 "substring not found");
2042 return NULL;
2043 }
2044 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002045}
2046
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002047
2048Py_LOCAL_INLINE(PyObject *)
2049do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002050{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002051 Py_buffer vsep;
2052 char *s = PyBytes_AS_STRING(self);
2053 Py_ssize_t len = PyBytes_GET_SIZE(self);
2054 char *sep;
2055 Py_ssize_t seplen;
2056 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002057
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002058 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002059 return NULL;
2060 sep = vsep.buf;
2061 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002062
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002063 i = 0;
2064 if (striptype != RIGHTSTRIP) {
2065 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2066 i++;
2067 }
2068 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002069
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002070 j = len;
2071 if (striptype != LEFTSTRIP) {
2072 do {
2073 j--;
2074 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2075 j++;
2076 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002077
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002078 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002079
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002080 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2081 Py_INCREF(self);
2082 return (PyObject*)self;
2083 }
2084 else
2085 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002086}
2087
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002088
2089Py_LOCAL_INLINE(PyObject *)
2090do_strip(PyBytesObject *self, int striptype)
2091{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002092 char *s = PyBytes_AS_STRING(self);
2093 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002094
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002095 i = 0;
2096 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00002097 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002098 i++;
2099 }
2100 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002101
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002102 j = len;
2103 if (striptype != LEFTSTRIP) {
2104 do {
2105 j--;
David Malcolm96960882010-11-05 17:23:41 +00002106 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002107 j++;
2108 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002109
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002110 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2111 Py_INCREF(self);
2112 return (PyObject*)self;
2113 }
2114 else
2115 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002116}
2117
2118
2119Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002120do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002121{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002122 if (bytes != NULL && bytes != Py_None) {
2123 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002124 }
2125 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002126}
2127
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002128/*[clinic input]
2129bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002130
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002131 self: self(type="PyBytesObject *")
2132 bytes: object = None
2133 /
2134
2135Strip leading and trailing bytes contained in the argument.
2136
2137If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2138[clinic start generated code]*/
2139
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002140static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002141bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002142/*[clinic end generated code: output=c7c228d3bd104a1b input=37daa5fad1395d95]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002143{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002144 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002145}
2146
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002147/*[clinic input]
2148bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002149
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002150 self: self(type="PyBytesObject *")
2151 bytes: object = None
2152 /
2153
2154Strip leading bytes contained in the argument.
2155
2156If the argument is omitted or None, strip leading ASCII whitespace.
2157[clinic start generated code]*/
2158
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002159static PyObject *
2160bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002161/*[clinic end generated code: output=28602e586f524e82 input=88811b09dfbc2988]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002162{
2163 return do_argstrip(self, LEFTSTRIP, bytes);
2164}
2165
2166/*[clinic input]
2167bytes.rstrip
2168
2169 self: self(type="PyBytesObject *")
2170 bytes: object = None
2171 /
2172
2173Strip trailing bytes contained in the argument.
2174
2175If the argument is omitted or None, strip trailing ASCII whitespace.
2176[clinic start generated code]*/
2177
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002178static PyObject *
2179bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002180/*[clinic end generated code: output=547e3815c95447da input=8f93c9cd361f0140]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002181{
2182 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002183}
Neal Norwitz6968b052007-02-27 19:02:19 +00002184
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002185
2186PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002187"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002188\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002189Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002190string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002191as in slice notation.");
2192
2193static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002194bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002195{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002196 PyObject *sub_obj;
2197 const char *str = PyBytes_AS_STRING(self), *sub;
2198 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02002199 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002200 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002201
Antoine Pitrouac65d962011-10-20 23:54:17 +02002202 Py_buffer vsub;
2203 PyObject *count_obj;
2204
2205 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
2206 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002207 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002208
Antoine Pitrouac65d962011-10-20 23:54:17 +02002209 if (sub_obj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002210 if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02002211 return NULL;
2212
2213 sub = vsub.buf;
2214 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002215 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02002216 else {
2217 sub = &byte;
2218 sub_len = 1;
2219 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002220
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002221 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002222
Antoine Pitrouac65d962011-10-20 23:54:17 +02002223 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002224 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2225 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02002226
2227 if (sub_obj)
2228 PyBuffer_Release(&vsub);
2229
2230 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002231}
2232
2233
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002234/*[clinic input]
2235bytes.translate
2236
2237 self: self(type="PyBytesObject *")
Victor Stinner049e5092014-08-17 22:20:00 +02002238 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002239 Translation table, which must be a bytes object of length 256.
2240 [
2241 deletechars: object
2242 ]
2243 /
2244
2245Return a copy with each character mapped by the given translation table.
2246
2247All characters occurring in the optional argument deletechars are removed.
2248The remaining characters are mapped through the given translation table.
2249[clinic start generated code]*/
2250
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002251static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002252bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1,
2253 PyObject *deletechars)
2254/*[clinic end generated code: output=233df850eb50bf8d input=d8fa5519d7cc4be7]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002255{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002256 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002257 Py_buffer table_view = {NULL, NULL};
2258 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002259 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002260 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002261 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002262 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002263 Py_ssize_t inlen, tablen, dellen = 0;
2264 PyObject *result;
2265 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002266
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002267 if (PyBytes_Check(table)) {
2268 table_chars = PyBytes_AS_STRING(table);
2269 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002270 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002271 else if (table == Py_None) {
2272 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002273 tablen = 256;
2274 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002275 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002276 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002277 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002278 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002279 tablen = table_view.len;
2280 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002281
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002282 if (tablen != 256) {
2283 PyErr_SetString(PyExc_ValueError,
2284 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002285 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002286 return NULL;
2287 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002288
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002289 if (deletechars != NULL) {
2290 if (PyBytes_Check(deletechars)) {
2291 del_table_chars = PyBytes_AS_STRING(deletechars);
2292 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002293 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002294 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002295 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002296 PyBuffer_Release(&table_view);
2297 return NULL;
2298 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002299 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002300 dellen = del_table_view.len;
2301 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002302 }
2303 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002304 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002305 dellen = 0;
2306 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002307
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002308 inlen = PyBytes_GET_SIZE(input_obj);
2309 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002310 if (result == NULL) {
2311 PyBuffer_Release(&del_table_view);
2312 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002313 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002314 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002315 output_start = output = PyBytes_AsString(result);
2316 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002317
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002318 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002319 /* If no deletions are required, use faster code */
2320 for (i = inlen; --i >= 0; ) {
2321 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002322 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002323 changed = 1;
2324 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002325 if (!changed && PyBytes_CheckExact(input_obj)) {
2326 Py_INCREF(input_obj);
2327 Py_DECREF(result);
2328 result = input_obj;
2329 }
2330 PyBuffer_Release(&del_table_view);
2331 PyBuffer_Release(&table_view);
2332 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002333 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002334
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002335 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002336 for (i = 0; i < 256; i++)
2337 trans_table[i] = Py_CHARMASK(i);
2338 } else {
2339 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002340 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002341 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002342 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002343
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002344 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002345 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002346 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002347
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002348 for (i = inlen; --i >= 0; ) {
2349 c = Py_CHARMASK(*input++);
2350 if (trans_table[c] != -1)
2351 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2352 continue;
2353 changed = 1;
2354 }
2355 if (!changed && PyBytes_CheckExact(input_obj)) {
2356 Py_DECREF(result);
2357 Py_INCREF(input_obj);
2358 return input_obj;
2359 }
2360 /* Fix the size of the resulting string */
2361 if (inlen > 0)
2362 _PyBytes_Resize(&result, output - output_start);
2363 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002364}
2365
2366
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002367/*[clinic input]
2368
2369@staticmethod
2370bytes.maketrans
2371
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002372 frm: Py_buffer
2373 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002374 /
2375
2376Return a translation table useable for the bytes or bytearray translate method.
2377
2378The returned table will be one where each byte in frm is mapped to the byte at
2379the same position in to.
2380
2381The bytes objects frm and to must be of the same length.
2382[clinic start generated code]*/
2383
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002384static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002385bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002386/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002387{
2388 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002389}
2390
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002391/* find and count characters and substrings */
2392
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002393#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002394 ((char *)memchr((const void *)(target), c, target_len))
2395
2396/* String ops must return a string. */
2397/* If the object is subclass of string, create a copy */
2398Py_LOCAL(PyBytesObject *)
2399return_self(PyBytesObject *self)
2400{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002401 if (PyBytes_CheckExact(self)) {
2402 Py_INCREF(self);
2403 return self;
2404 }
2405 return (PyBytesObject *)PyBytes_FromStringAndSize(
2406 PyBytes_AS_STRING(self),
2407 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002408}
2409
2410Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00002411countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002412{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002413 Py_ssize_t count=0;
2414 const char *start=target;
2415 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002416
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002417 while ( (start=findchar(start, end-start, c)) != NULL ) {
2418 count++;
2419 if (count >= maxcount)
2420 break;
2421 start += 1;
2422 }
2423 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002424}
2425
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002426
2427/* Algorithms for different cases of string replacement */
2428
2429/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2430Py_LOCAL(PyBytesObject *)
2431replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002432 const char *to_s, Py_ssize_t to_len,
2433 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002434{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002435 char *self_s, *result_s;
2436 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002437 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002438 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002439
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002440 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002441
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002442 /* 1 at the end plus 1 after every character;
2443 count = min(maxcount, self_len + 1) */
2444 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002445 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002446 else
2447 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
2448 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002449
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002450 /* Check for overflow */
2451 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002452 assert(count > 0);
2453 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002454 PyErr_SetString(PyExc_OverflowError,
2455 "replacement bytes are too long");
2456 return NULL;
2457 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002458 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002459
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002460 if (! (result = (PyBytesObject *)
2461 PyBytes_FromStringAndSize(NULL, result_len)) )
2462 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002463
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002464 self_s = PyBytes_AS_STRING(self);
2465 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002466
Victor Stinnerfac39562016-03-21 10:38:58 +01002467 if (to_len > 1) {
2468 /* Lay the first one down (guaranteed this will occur) */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002469 Py_MEMCPY(result_s, to_s, to_len);
2470 result_s += to_len;
Victor Stinnerfac39562016-03-21 10:38:58 +01002471 count -= 1;
2472
2473 for (i = 0; i < count; i++) {
2474 *result_s++ = *self_s++;
2475 Py_MEMCPY(result_s, to_s, to_len);
2476 result_s += to_len;
2477 }
2478 }
2479 else {
2480 result_s[0] = to_s[0];
2481 result_s += to_len;
2482 count -= 1;
2483 for (i = 0; i < count; i++) {
2484 *result_s++ = *self_s++;
2485 result_s[0] = to_s[0];
2486 result_s += to_len;
2487 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002488 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002489
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002490 /* Copy the rest of the original string */
2491 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002492
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002493 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002494}
2495
2496/* Special case for deleting a single character */
2497/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2498Py_LOCAL(PyBytesObject *)
2499replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002500 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002501{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002502 char *self_s, *result_s;
2503 char *start, *next, *end;
2504 Py_ssize_t self_len, result_len;
2505 Py_ssize_t count;
2506 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002507
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002508 self_len = PyBytes_GET_SIZE(self);
2509 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002510
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002511 count = countchar(self_s, self_len, from_c, maxcount);
2512 if (count == 0) {
2513 return return_self(self);
2514 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002515
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002516 result_len = self_len - count; /* from_len == 1 */
2517 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002518
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002519 if ( (result = (PyBytesObject *)
2520 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2521 return NULL;
2522 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002523
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002524 start = self_s;
2525 end = self_s + self_len;
2526 while (count-- > 0) {
2527 next = findchar(start, end-start, from_c);
2528 if (next == NULL)
2529 break;
2530 Py_MEMCPY(result_s, start, next-start);
2531 result_s += (next-start);
2532 start = next+1;
2533 }
2534 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002535
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002536 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002537}
2538
2539/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2540
2541Py_LOCAL(PyBytesObject *)
2542replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002543 const char *from_s, Py_ssize_t from_len,
2544 Py_ssize_t maxcount) {
2545 char *self_s, *result_s;
2546 char *start, *next, *end;
2547 Py_ssize_t self_len, result_len;
2548 Py_ssize_t count, offset;
2549 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002550
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002551 self_len = PyBytes_GET_SIZE(self);
2552 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002553
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002554 count = stringlib_count(self_s, self_len,
2555 from_s, from_len,
2556 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002557
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002558 if (count == 0) {
2559 /* no matches */
2560 return return_self(self);
2561 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002562
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002563 result_len = self_len - (count * from_len);
2564 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002565
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002566 if ( (result = (PyBytesObject *)
2567 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2568 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002569
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002570 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002571
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002572 start = self_s;
2573 end = self_s + self_len;
2574 while (count-- > 0) {
2575 offset = stringlib_find(start, end-start,
2576 from_s, from_len,
2577 0);
2578 if (offset == -1)
2579 break;
2580 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002581
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002582 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002583
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002584 result_s += (next-start);
2585 start = next+from_len;
2586 }
2587 Py_MEMCPY(result_s, start, end-start);
2588 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002589}
2590
2591/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2592Py_LOCAL(PyBytesObject *)
2593replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002594 char from_c, char to_c,
2595 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002596{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002597 char *self_s, *result_s, *start, *end, *next;
2598 Py_ssize_t self_len;
2599 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002600
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002601 /* The result string will be the same size */
2602 self_s = PyBytes_AS_STRING(self);
2603 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002604
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002605 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002606
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002607 if (next == NULL) {
2608 /* No matches; return the original string */
2609 return return_self(self);
2610 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002611
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002612 /* Need to make a new string */
2613 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2614 if (result == NULL)
2615 return NULL;
2616 result_s = PyBytes_AS_STRING(result);
2617 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002618
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002619 /* change everything in-place, starting with this one */
2620 start = result_s + (next-self_s);
2621 *start = to_c;
2622 start++;
2623 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002624
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002625 while (--maxcount > 0) {
2626 next = findchar(start, end-start, from_c);
2627 if (next == NULL)
2628 break;
2629 *next = to_c;
2630 start = next+1;
2631 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002632
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002633 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002634}
2635
2636/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2637Py_LOCAL(PyBytesObject *)
2638replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002639 const char *from_s, Py_ssize_t from_len,
2640 const char *to_s, Py_ssize_t to_len,
2641 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002642{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002643 char *result_s, *start, *end;
2644 char *self_s;
2645 Py_ssize_t self_len, offset;
2646 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002647
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002648 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002649
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002650 self_s = PyBytes_AS_STRING(self);
2651 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002652
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002653 offset = stringlib_find(self_s, self_len,
2654 from_s, from_len,
2655 0);
2656 if (offset == -1) {
2657 /* No matches; return the original string */
2658 return return_self(self);
2659 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002660
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002661 /* Need to make a new string */
2662 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2663 if (result == NULL)
2664 return NULL;
2665 result_s = PyBytes_AS_STRING(result);
2666 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002667
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002668 /* change everything in-place, starting with this one */
2669 start = result_s + offset;
2670 Py_MEMCPY(start, to_s, from_len);
2671 start += from_len;
2672 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002673
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002674 while ( --maxcount > 0) {
2675 offset = stringlib_find(start, end-start,
2676 from_s, from_len,
2677 0);
2678 if (offset==-1)
2679 break;
2680 Py_MEMCPY(start+offset, to_s, from_len);
2681 start += offset+from_len;
2682 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002683
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002684 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002685}
2686
2687/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2688Py_LOCAL(PyBytesObject *)
2689replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002690 char from_c,
2691 const char *to_s, Py_ssize_t to_len,
2692 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002693{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002694 char *self_s, *result_s;
2695 char *start, *next, *end;
2696 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002697 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002698 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002699
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002700 self_s = PyBytes_AS_STRING(self);
2701 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002702
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002703 count = countchar(self_s, self_len, from_c, maxcount);
2704 if (count == 0) {
2705 /* no matches, return unchanged */
2706 return return_self(self);
2707 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002708
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002709 /* use the difference between current and new, hence the "-1" */
2710 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002711 assert(count > 0);
2712 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002713 PyErr_SetString(PyExc_OverflowError,
2714 "replacement bytes are too long");
2715 return NULL;
2716 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002717 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002718
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002719 if ( (result = (PyBytesObject *)
2720 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2721 return NULL;
2722 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002723
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002724 start = self_s;
2725 end = self_s + self_len;
2726 while (count-- > 0) {
2727 next = findchar(start, end-start, from_c);
2728 if (next == NULL)
2729 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002730
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002731 if (next == start) {
2732 /* replace with the 'to' */
2733 Py_MEMCPY(result_s, to_s, to_len);
2734 result_s += to_len;
2735 start += 1;
2736 } else {
2737 /* copy the unchanged old then the 'to' */
2738 Py_MEMCPY(result_s, start, next-start);
2739 result_s += (next-start);
2740 Py_MEMCPY(result_s, to_s, to_len);
2741 result_s += to_len;
2742 start = next+1;
2743 }
2744 }
2745 /* Copy the remainder of the remaining string */
2746 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002747
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002748 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002749}
2750
2751/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2752Py_LOCAL(PyBytesObject *)
2753replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002754 const char *from_s, Py_ssize_t from_len,
2755 const char *to_s, Py_ssize_t to_len,
2756 Py_ssize_t maxcount) {
2757 char *self_s, *result_s;
2758 char *start, *next, *end;
2759 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002760 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002761 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002762
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002763 self_s = PyBytes_AS_STRING(self);
2764 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002765
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002766 count = stringlib_count(self_s, self_len,
2767 from_s, from_len,
2768 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002769
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002770 if (count == 0) {
2771 /* no matches, return unchanged */
2772 return return_self(self);
2773 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002774
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002775 /* Check for overflow */
2776 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002777 assert(count > 0);
2778 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002779 PyErr_SetString(PyExc_OverflowError,
2780 "replacement bytes are too long");
2781 return NULL;
2782 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002783 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002784
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002785 if ( (result = (PyBytesObject *)
2786 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2787 return NULL;
2788 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002789
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002790 start = self_s;
2791 end = self_s + self_len;
2792 while (count-- > 0) {
2793 offset = stringlib_find(start, end-start,
2794 from_s, from_len,
2795 0);
2796 if (offset == -1)
2797 break;
2798 next = start+offset;
2799 if (next == start) {
2800 /* replace with the 'to' */
2801 Py_MEMCPY(result_s, to_s, to_len);
2802 result_s += to_len;
2803 start += from_len;
2804 } else {
2805 /* copy the unchanged old then the 'to' */
2806 Py_MEMCPY(result_s, start, next-start);
2807 result_s += (next-start);
2808 Py_MEMCPY(result_s, to_s, to_len);
2809 result_s += to_len;
2810 start = next+from_len;
2811 }
2812 }
2813 /* Copy the remainder of the remaining string */
2814 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002815
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002816 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002817}
2818
2819
2820Py_LOCAL(PyBytesObject *)
2821replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002822 const char *from_s, Py_ssize_t from_len,
2823 const char *to_s, Py_ssize_t to_len,
2824 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002825{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002826 if (maxcount < 0) {
2827 maxcount = PY_SSIZE_T_MAX;
2828 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2829 /* nothing to do; return the original string */
2830 return return_self(self);
2831 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002832
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002833 if (maxcount == 0 ||
2834 (from_len == 0 && to_len == 0)) {
2835 /* nothing to do; return the original string */
2836 return return_self(self);
2837 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002838
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002839 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002840
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002841 if (from_len == 0) {
2842 /* insert the 'to' string everywhere. */
2843 /* >>> "Python".replace("", ".") */
2844 /* '.P.y.t.h.o.n.' */
2845 return replace_interleave(self, to_s, to_len, maxcount);
2846 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002847
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002848 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2849 /* point for an empty self string to generate a non-empty string */
2850 /* Special case so the remaining code always gets a non-empty string */
2851 if (PyBytes_GET_SIZE(self) == 0) {
2852 return return_self(self);
2853 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002854
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002855 if (to_len == 0) {
2856 /* delete all occurrences of 'from' string */
2857 if (from_len == 1) {
2858 return replace_delete_single_character(
2859 self, from_s[0], maxcount);
2860 } else {
2861 return replace_delete_substring(self, from_s,
2862 from_len, maxcount);
2863 }
2864 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002865
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002866 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002867
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002868 if (from_len == to_len) {
2869 if (from_len == 1) {
2870 return replace_single_character_in_place(
2871 self,
2872 from_s[0],
2873 to_s[0],
2874 maxcount);
2875 } else {
2876 return replace_substring_in_place(
2877 self, from_s, from_len, to_s, to_len,
2878 maxcount);
2879 }
2880 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002881
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002882 /* Otherwise use the more generic algorithms */
2883 if (from_len == 1) {
2884 return replace_single_character(self, from_s[0],
2885 to_s, to_len, maxcount);
2886 } else {
2887 /* len('from')>=2, len('to')>=1 */
2888 return replace_substring(self, from_s, from_len, to_s, to_len,
2889 maxcount);
2890 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002891}
2892
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002893
2894/*[clinic input]
2895bytes.replace
2896
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002897 old: Py_buffer
2898 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002899 count: Py_ssize_t = -1
2900 Maximum number of occurrences to replace.
2901 -1 (the default value) means replace all occurrences.
2902 /
2903
2904Return a copy with all occurrences of substring old replaced by new.
2905
2906If the optional argument count is given, only the first count occurrences are
2907replaced.
2908[clinic start generated code]*/
2909
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002910static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002911bytes_replace_impl(PyBytesObject*self, Py_buffer *old, Py_buffer *new,
2912 Py_ssize_t count)
2913/*[clinic end generated code: output=403dc9d7a83c5a1d input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002914{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002915 return (PyObject *)replace((PyBytesObject *) self,
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002916 (const char *)old->buf, old->len,
2917 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002918}
2919
2920/** End DALKE **/
2921
2922/* Matches the end (direction >= 0) or start (direction < 0) of self
2923 * against substr, using the start and end arguments. Returns
2924 * -1 on error, 0 if not found and 1 if found.
2925 */
2926Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002927_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002928 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002929{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002930 Py_ssize_t len = PyBytes_GET_SIZE(self);
2931 Py_ssize_t slen;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002932 Py_buffer sub_view = {NULL, NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002933 const char* sub;
2934 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002935
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002936 if (PyBytes_Check(substr)) {
2937 sub = PyBytes_AS_STRING(substr);
2938 slen = PyBytes_GET_SIZE(substr);
2939 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002940 else {
2941 if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
2942 return -1;
2943 sub = sub_view.buf;
2944 slen = sub_view.len;
2945 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002946 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002947
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002948 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002949
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002950 if (direction < 0) {
2951 /* startswith */
2952 if (start+slen > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002953 goto notfound;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002954 } else {
2955 /* endswith */
2956 if (end-start < slen || start > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002957 goto notfound;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002958
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002959 if (end-slen > start)
2960 start = end - slen;
2961 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002962 if (end-start < slen)
2963 goto notfound;
2964 if (memcmp(str+start, sub, slen) != 0)
2965 goto notfound;
2966
2967 PyBuffer_Release(&sub_view);
2968 return 1;
2969
2970notfound:
2971 PyBuffer_Release(&sub_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002972 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002973}
2974
2975
2976PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002977"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002978\n\
2979Return True if B starts with the specified prefix, False otherwise.\n\
2980With optional start, test B beginning at that position.\n\
2981With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002982prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002983
2984static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002985bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002986{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002987 Py_ssize_t start = 0;
2988 Py_ssize_t end = PY_SSIZE_T_MAX;
2989 PyObject *subobj;
2990 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002991
Jesus Ceaac451502011-04-20 17:09:23 +02002992 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002993 return NULL;
2994 if (PyTuple_Check(subobj)) {
2995 Py_ssize_t i;
2996 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2997 result = _bytes_tailmatch(self,
2998 PyTuple_GET_ITEM(subobj, i),
2999 start, end, -1);
3000 if (result == -1)
3001 return NULL;
3002 else if (result) {
3003 Py_RETURN_TRUE;
3004 }
3005 }
3006 Py_RETURN_FALSE;
3007 }
3008 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03003009 if (result == -1) {
3010 if (PyErr_ExceptionMatches(PyExc_TypeError))
3011 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
3012 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003013 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03003014 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003015 else
3016 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003017}
3018
3019
3020PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003021"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003022\n\
3023Return True if B ends with the specified suffix, False otherwise.\n\
3024With optional start, test B beginning at that position.\n\
3025With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00003026suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003027
3028static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003029bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003030{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003031 Py_ssize_t start = 0;
3032 Py_ssize_t end = PY_SSIZE_T_MAX;
3033 PyObject *subobj;
3034 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003035
Jesus Ceaac451502011-04-20 17:09:23 +02003036 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003037 return NULL;
3038 if (PyTuple_Check(subobj)) {
3039 Py_ssize_t i;
3040 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3041 result = _bytes_tailmatch(self,
3042 PyTuple_GET_ITEM(subobj, i),
3043 start, end, +1);
3044 if (result == -1)
3045 return NULL;
3046 else if (result) {
3047 Py_RETURN_TRUE;
3048 }
3049 }
3050 Py_RETURN_FALSE;
3051 }
3052 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03003053 if (result == -1) {
3054 if (PyErr_ExceptionMatches(PyExc_TypeError))
3055 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
3056 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003057 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03003058 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003059 else
3060 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003061}
3062
3063
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003064/*[clinic input]
3065bytes.decode
3066
3067 encoding: str(c_default="NULL") = 'utf-8'
3068 The encoding with which to decode the bytes.
3069 errors: str(c_default="NULL") = 'strict'
3070 The error handling scheme to use for the handling of decoding errors.
3071 The default is 'strict' meaning that decoding errors raise a
3072 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
3073 as well as any other name registered with codecs.register_error that
3074 can handle UnicodeDecodeErrors.
3075
3076Decode the bytes using the codec registered for encoding.
3077[clinic start generated code]*/
3078
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003079static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04003080bytes_decode_impl(PyBytesObject*self, const char *encoding,
3081 const char *errors)
3082/*[clinic end generated code: output=2d2016ff8e0bb176 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003083{
Martin v. Löwis0efea322014-07-27 17:29:17 +02003084 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00003085}
3086
Guido van Rossum20188312006-05-05 15:15:40 +00003087
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003088/*[clinic input]
3089bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003090
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +03003091 keepends: int(c_default="0") = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003092
3093Return a list of the lines in the bytes, breaking at line boundaries.
3094
3095Line breaks are not included in the resulting list unless keepends is given and
3096true.
3097[clinic start generated code]*/
3098
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003099static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003100bytes_splitlines_impl(PyBytesObject*self, int keepends)
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +03003101/*[clinic end generated code: output=995c3598f7833cad input=7f4aac67144f9944]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003102{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003103 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00003104 (PyObject*) self, PyBytes_AS_STRING(self),
3105 PyBytes_GET_SIZE(self), keepends
3106 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003107}
3108
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003109/*[clinic input]
3110@classmethod
3111bytes.fromhex
3112
3113 string: unicode
3114 /
3115
3116Create a bytes object from a string of hexadecimal numbers.
3117
3118Spaces between two numbers are accepted.
3119Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
3120[clinic start generated code]*/
3121
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003122static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003123bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03003124/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003125{
Victor Stinner2bf89932015-10-14 11:25:33 +02003126 return _PyBytes_FromHex(string, 0);
3127}
3128
3129PyObject*
3130_PyBytes_FromHex(PyObject *string, int use_bytearray)
3131{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003132 char *buf;
Victor Stinner2bf89932015-10-14 11:25:33 +02003133 Py_ssize_t hexlen, invalid_char;
3134 unsigned int top, bot;
3135 Py_UCS1 *str, *end;
3136 _PyBytesWriter writer;
3137
3138 _PyBytesWriter_Init(&writer);
3139 writer.use_bytearray = use_bytearray;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003140
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003141 assert(PyUnicode_Check(string));
3142 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003143 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003144 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003145
Victor Stinner2bf89932015-10-14 11:25:33 +02003146 if (!PyUnicode_IS_ASCII(string)) {
3147 void *data = PyUnicode_DATA(string);
3148 unsigned int kind = PyUnicode_KIND(string);
3149 Py_ssize_t i;
3150
3151 /* search for the first non-ASCII character */
3152 for (i = 0; i < hexlen; i++) {
3153 if (PyUnicode_READ(kind, data, i) >= 128)
3154 break;
3155 }
3156 invalid_char = i;
3157 goto error;
3158 }
3159
3160 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
3161 str = PyUnicode_1BYTE_DATA(string);
3162
3163 /* This overestimates if there are spaces */
3164 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
3165 if (buf == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003166 return NULL;
Victor Stinner2bf89932015-10-14 11:25:33 +02003167
3168 end = str + hexlen;
3169 while (str < end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003170 /* skip over spaces in the input */
Victor Stinner2bf89932015-10-14 11:25:33 +02003171 if (*str == ' ') {
3172 do {
3173 str++;
3174 } while (*str == ' ');
3175 if (str >= end)
3176 break;
3177 }
3178
3179 top = _PyLong_DigitValue[*str];
3180 if (top >= 16) {
3181 invalid_char = str - PyUnicode_1BYTE_DATA(string);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003182 goto error;
3183 }
Victor Stinner2bf89932015-10-14 11:25:33 +02003184 str++;
3185
3186 bot = _PyLong_DigitValue[*str];
3187 if (bot >= 16) {
3188 invalid_char = str - PyUnicode_1BYTE_DATA(string);
3189 goto error;
3190 }
3191 str++;
3192
3193 *buf++ = (unsigned char)((top << 4) + bot);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003194 }
Victor Stinner2bf89932015-10-14 11:25:33 +02003195
3196 return _PyBytesWriter_Finish(&writer, buf);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003197
3198 error:
Victor Stinner2bf89932015-10-14 11:25:33 +02003199 PyErr_Format(PyExc_ValueError,
3200 "non-hexadecimal number found in "
3201 "fromhex() arg at position %zd", invalid_char);
3202 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003203 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003204}
3205
Gregory P. Smith8cb65692015-04-25 23:22:26 +00003206PyDoc_STRVAR(hex__doc__,
3207"B.hex() -> string\n\
3208\n\
3209Create a string of hexadecimal numbers from a bytes object.\n\
3210Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'.");
3211
3212static PyObject *
3213bytes_hex(PyBytesObject *self)
3214{
3215 char* argbuf = PyBytes_AS_STRING(self);
3216 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
3217 return _Py_strhex(argbuf, arglen);
3218}
3219
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003220static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003221bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003222{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003223 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003224}
3225
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003226
3227static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003228bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003229 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
3230 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3231 _Py_capitalize__doc__},
3232 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3233 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003234 BYTES_DECODE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003235 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
3236 endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02003237 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003238 expandtabs__doc__},
3239 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003240 BYTES_FROMHEX_METHODDEF
Gregory P. Smith8cb65692015-04-25 23:22:26 +00003241 {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003242 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3243 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3244 _Py_isalnum__doc__},
3245 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3246 _Py_isalpha__doc__},
3247 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3248 _Py_isdigit__doc__},
3249 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3250 _Py_islower__doc__},
3251 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3252 _Py_isspace__doc__},
3253 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3254 _Py_istitle__doc__},
3255 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3256 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003257 BYTES_JOIN_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003258 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3259 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003260 BYTES_LSTRIP_METHODDEF
3261 BYTES_MAKETRANS_METHODDEF
3262 BYTES_PARTITION_METHODDEF
3263 BYTES_REPLACE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003264 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3265 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3266 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003267 BYTES_RPARTITION_METHODDEF
3268 BYTES_RSPLIT_METHODDEF
3269 BYTES_RSTRIP_METHODDEF
3270 BYTES_SPLIT_METHODDEF
3271 BYTES_SPLITLINES_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003272 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
3273 startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003274 BYTES_STRIP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003275 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3276 _Py_swapcase__doc__},
3277 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003278 BYTES_TRANSLATE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003279 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3280 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003281 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003282};
3283
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003284static PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +02003285bytes_mod(PyObject *self, PyObject *args)
Ethan Furmanb95b5612015-01-23 20:05:18 -08003286{
Victor Stinner772b2b02015-10-14 09:56:53 +02003287 if (self == NULL || !PyBytes_Check(self)) {
3288 PyErr_BadInternalCall();
3289 return NULL;
3290 }
3291
3292 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
3293 args, 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -08003294}
3295
3296static PyNumberMethods bytes_as_number = {
3297 0, /*nb_add*/
3298 0, /*nb_subtract*/
3299 0, /*nb_multiply*/
3300 bytes_mod, /*nb_remainder*/
3301};
3302
3303static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02003304bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003305
3306static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003307bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003308{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003309 PyObject *x = NULL;
3310 const char *encoding = NULL;
3311 const char *errors = NULL;
3312 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003313 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003314 Py_ssize_t size;
3315 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003316 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003317
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003318 if (type != &PyBytes_Type)
Serhiy Storchaka15095802015-11-25 15:47:01 +02003319 return bytes_subtype_new(type, args, kwds);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003320 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
3321 &encoding, &errors))
3322 return NULL;
3323 if (x == NULL) {
3324 if (encoding != NULL || errors != NULL) {
3325 PyErr_SetString(PyExc_TypeError,
3326 "encoding or errors without sequence "
3327 "argument");
3328 return NULL;
3329 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02003330 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003331 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003332
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003333 if (encoding != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003334 /* Encode via the codec registry */
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003335 if (!PyUnicode_Check(x)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003336 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003337 "encoding without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003338 return NULL;
3339 }
3340 new = PyUnicode_AsEncodedString(x, encoding, errors);
3341 if (new == NULL)
3342 return NULL;
3343 assert(PyBytes_Check(new));
3344 return new;
3345 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003346
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003347 if (errors != NULL) {
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02003348 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003349 PyUnicode_Check(x) ?
3350 "string argument without an encoding" :
3351 "errors without a string argument");
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02003352 return NULL;
3353 }
3354
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003355 /* We'd like to call PyObject_Bytes here, but we need to check for an
3356 integer argument before deferring to PyBytes_FromObject, something
3357 PyObject_Bytes doesn't do. */
3358 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
3359 if (func != NULL) {
3360 new = PyObject_CallFunctionObjArgs(func, NULL);
3361 Py_DECREF(func);
3362 if (new == NULL)
3363 return NULL;
3364 if (!PyBytes_Check(new)) {
3365 PyErr_Format(PyExc_TypeError,
3366 "__bytes__ returned non-bytes (type %.200s)",
3367 Py_TYPE(new)->tp_name);
3368 Py_DECREF(new);
3369 return NULL;
3370 }
3371 return new;
3372 }
3373 else if (PyErr_Occurred())
3374 return NULL;
3375
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003376 if (PyUnicode_Check(x)) {
3377 PyErr_SetString(PyExc_TypeError,
3378 "string argument without an encoding");
3379 return NULL;
3380 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003381 /* Is it an integer? */
3382 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
3383 if (size == -1 && PyErr_Occurred()) {
3384 if (PyErr_ExceptionMatches(PyExc_OverflowError))
3385 return NULL;
3386 PyErr_Clear();
3387 }
3388 else if (size < 0) {
3389 PyErr_SetString(PyExc_ValueError, "negative count");
3390 return NULL;
3391 }
3392 else {
Victor Stinnerdb067af2014-05-02 22:31:14 +02003393 new = _PyBytes_FromSize(size, 1);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003394 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003395 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003396 return new;
3397 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003398
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003399 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00003400}
3401
Victor Stinnerf2eafa32015-10-14 13:44:29 +02003402static PyObject*
3403_PyBytes_FromBuffer(PyObject *x)
3404{
3405 PyObject *new;
3406 Py_buffer view;
3407
3408 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
3409 return NULL;
3410
3411 new = PyBytes_FromStringAndSize(NULL, view.len);
3412 if (!new)
3413 goto fail;
3414 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
3415 &view, view.len, 'C') < 0)
3416 goto fail;
3417 PyBuffer_Release(&view);
3418 return new;
3419
3420fail:
3421 Py_XDECREF(new);
3422 PyBuffer_Release(&view);
3423 return NULL;
3424}
3425
Victor Stinner3c50ce32015-10-14 13:50:40 +02003426#define _PyBytes_FROM_LIST_BODY(x, GET_ITEM) \
3427 do { \
3428 PyObject *bytes; \
3429 Py_ssize_t i; \
3430 Py_ssize_t value; \
3431 char *str; \
3432 PyObject *item; \
3433 \
3434 bytes = PyBytes_FromStringAndSize(NULL, Py_SIZE(x)); \
3435 if (bytes == NULL) \
3436 return NULL; \
3437 str = ((PyBytesObject *)bytes)->ob_sval; \
3438 \
3439 for (i = 0; i < Py_SIZE(x); i++) { \
3440 item = GET_ITEM((x), i); \
3441 value = PyNumber_AsSsize_t(item, PyExc_ValueError); \
3442 if (value == -1 && PyErr_Occurred()) \
3443 goto error; \
3444 \
3445 if (value < 0 || value >= 256) { \
3446 PyErr_SetString(PyExc_ValueError, \
3447 "bytes must be in range(0, 256)"); \
3448 goto error; \
3449 } \
3450 *str++ = (char) value; \
3451 } \
3452 return bytes; \
3453 \
3454 error: \
3455 Py_DECREF(bytes); \
3456 return NULL; \
3457 } while (0)
3458
Victor Stinnerf2eafa32015-10-14 13:44:29 +02003459static PyObject*
3460_PyBytes_FromList(PyObject *x)
3461{
Victor Stinner3c50ce32015-10-14 13:50:40 +02003462 _PyBytes_FROM_LIST_BODY(x, PyList_GET_ITEM);
Victor Stinnerf2eafa32015-10-14 13:44:29 +02003463}
3464
3465static PyObject*
3466_PyBytes_FromTuple(PyObject *x)
3467{
Victor Stinner3c50ce32015-10-14 13:50:40 +02003468 _PyBytes_FROM_LIST_BODY(x, PyTuple_GET_ITEM);
Victor Stinnerf2eafa32015-10-14 13:44:29 +02003469}
3470
3471static PyObject *
3472_PyBytes_FromIterator(PyObject *x)
Benjamin Petersonc15a0732008-08-26 16:46:47 +00003473{
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02003474 char *str;
3475 PyObject *it;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003476 Py_ssize_t i, size;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02003477 _PyBytesWriter writer;
3478
3479 _PyBytesWriter_Init(&writer);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003480
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003481 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02003482 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003483 if (size == -1 && PyErr_Occurred())
3484 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02003485
3486 str = _PyBytesWriter_Alloc(&writer, size);
3487 if (str == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003488 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02003489 writer.overallocate = 1;
3490 size = writer.allocated;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003491
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003492 /* Get the iterator */
3493 it = PyObject_GetIter(x);
3494 if (it == NULL)
3495 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003496
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003497 /* Run the iterator to exhaustion */
3498 for (i = 0; ; i++) {
3499 PyObject *item;
3500 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003501
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003502 /* Get the next item */
3503 item = PyIter_Next(it);
3504 if (item == NULL) {
3505 if (PyErr_Occurred())
3506 goto error;
3507 break;
3508 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003509
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003510 /* Interpret it as an int (__index__) */
3511 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3512 Py_DECREF(item);
3513 if (value == -1 && PyErr_Occurred())
3514 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003515
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003516 /* Range check */
3517 if (value < 0 || value >= 256) {
3518 PyErr_SetString(PyExc_ValueError,
3519 "bytes must be in range(0, 256)");
3520 goto error;
3521 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003522
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003523 /* Append the byte */
3524 if (i >= size) {
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02003525 str = _PyBytesWriter_Resize(&writer, str, size+1);
3526 if (str == NULL)
3527 return NULL;
3528 size = writer.allocated;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003529 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02003530 *str++ = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003531 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003532 Py_DECREF(it);
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02003533
3534 return _PyBytesWriter_Finish(&writer, str);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003535
3536 error:
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02003537 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003538 Py_XDECREF(it);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003539 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003540}
3541
Victor Stinnerf2eafa32015-10-14 13:44:29 +02003542PyObject *
3543PyBytes_FromObject(PyObject *x)
3544{
3545 if (x == NULL) {
3546 PyErr_BadInternalCall();
3547 return NULL;
3548 }
3549
3550 if (PyBytes_CheckExact(x)) {
3551 Py_INCREF(x);
3552 return x;
3553 }
3554
3555 /* Use the modern buffer interface */
3556 if (PyObject_CheckBuffer(x))
3557 return _PyBytes_FromBuffer(x);
3558
3559 if (PyList_CheckExact(x))
3560 return _PyBytes_FromList(x);
3561
3562 if (PyTuple_CheckExact(x))
3563 return _PyBytes_FromTuple(x);
3564
3565 if (PyUnicode_Check(x)) {
3566 PyErr_SetString(PyExc_TypeError,
3567 "cannot convert unicode object to bytes");
3568 return NULL;
3569 }
3570
3571 return _PyBytes_FromIterator(x);
3572}
3573
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003574static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02003575bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003576{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003577 PyObject *tmp, *pnew;
3578 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003579
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003580 assert(PyType_IsSubtype(type, &PyBytes_Type));
3581 tmp = bytes_new(&PyBytes_Type, args, kwds);
3582 if (tmp == NULL)
3583 return NULL;
Serhiy Storchaka15095802015-11-25 15:47:01 +02003584 assert(PyBytes_Check(tmp));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003585 n = PyBytes_GET_SIZE(tmp);
3586 pnew = type->tp_alloc(type, n);
3587 if (pnew != NULL) {
3588 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3589 PyBytes_AS_STRING(tmp), n+1);
3590 ((PyBytesObject *)pnew)->ob_shash =
3591 ((PyBytesObject *)tmp)->ob_shash;
3592 }
3593 Py_DECREF(tmp);
3594 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003595}
3596
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003597PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003598"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003599bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003600bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003601bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3602bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003603\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003604Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003605 - an iterable yielding integers in range(256)\n\
3606 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003607 - any object implementing the buffer API.\n\
3608 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003609
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003610static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003611
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003612PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003613 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3614 "bytes",
3615 PyBytesObject_SIZE,
3616 sizeof(char),
3617 bytes_dealloc, /* tp_dealloc */
3618 0, /* tp_print */
3619 0, /* tp_getattr */
3620 0, /* tp_setattr */
3621 0, /* tp_reserved */
3622 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08003623 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003624 &bytes_as_sequence, /* tp_as_sequence */
3625 &bytes_as_mapping, /* tp_as_mapping */
3626 (hashfunc)bytes_hash, /* tp_hash */
3627 0, /* tp_call */
3628 bytes_str, /* tp_str */
3629 PyObject_GenericGetAttr, /* tp_getattro */
3630 0, /* tp_setattro */
3631 &bytes_as_buffer, /* tp_as_buffer */
3632 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3633 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3634 bytes_doc, /* tp_doc */
3635 0, /* tp_traverse */
3636 0, /* tp_clear */
3637 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3638 0, /* tp_weaklistoffset */
3639 bytes_iter, /* tp_iter */
3640 0, /* tp_iternext */
3641 bytes_methods, /* tp_methods */
3642 0, /* tp_members */
3643 0, /* tp_getset */
3644 &PyBaseObject_Type, /* tp_base */
3645 0, /* tp_dict */
3646 0, /* tp_descr_get */
3647 0, /* tp_descr_set */
3648 0, /* tp_dictoffset */
3649 0, /* tp_init */
3650 0, /* tp_alloc */
3651 bytes_new, /* tp_new */
3652 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003653};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003654
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003655void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003656PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003657{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003658 assert(pv != NULL);
3659 if (*pv == NULL)
3660 return;
3661 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003662 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003663 return;
3664 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02003665
3666 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
3667 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05003668 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02003669 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02003670
Antoine Pitrou161d6952014-05-01 14:36:20 +02003671 wb.len = -1;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003672 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02003673 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3674 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3675 Py_CLEAR(*pv);
3676 return;
3677 }
3678
3679 oldsize = PyBytes_GET_SIZE(*pv);
3680 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3681 PyErr_NoMemory();
3682 goto error;
3683 }
3684 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3685 goto error;
3686
3687 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3688 PyBuffer_Release(&wb);
3689 return;
3690
3691 error:
3692 PyBuffer_Release(&wb);
3693 Py_CLEAR(*pv);
3694 return;
3695 }
3696
3697 else {
3698 /* Multiple references, need to create new object */
3699 PyObject *v;
3700 v = bytes_concat(*pv, w);
Serhiy Storchaka5a57ade2015-12-24 10:35:59 +02003701 Py_SETREF(*pv, v);
Antoine Pitrou161d6952014-05-01 14:36:20 +02003702 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003703}
3704
3705void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003706PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003707{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003708 PyBytes_Concat(pv, w);
3709 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003710}
3711
3712
Ethan Furmanb95b5612015-01-23 20:05:18 -08003713/* The following function breaks the notion that bytes are immutable:
3714 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003715 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08003716 as creating a new bytes object and destroying the old one, only
3717 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003718 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08003719 Note that if there's not enough memory to resize the bytes object, the
3720 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003721 memory" exception is set, and -1 is returned. Else (on success) 0 is
3722 returned, and the value in *pv may or may not be the same as on input.
3723 As always, an extra byte is allocated for a trailing \0 byte (newsize
3724 does *not* include that), and a trailing \0 byte is stored.
3725*/
3726
3727int
3728_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3729{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003730 PyObject *v;
3731 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003732 v = *pv;
3733 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
3734 *pv = 0;
3735 Py_DECREF(v);
3736 PyErr_BadInternalCall();
3737 return -1;
3738 }
3739 /* XXX UNREF/NEWREF interface should be more symmetrical */
3740 _Py_DEC_REFTOTAL;
3741 _Py_ForgetReference(v);
3742 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03003743 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003744 if (*pv == NULL) {
3745 PyObject_Del(v);
3746 PyErr_NoMemory();
3747 return -1;
3748 }
3749 _Py_NewReference(*pv);
3750 sv = (PyBytesObject *) *pv;
3751 Py_SIZE(sv) = newsize;
3752 sv->ob_sval[newsize] = '\0';
3753 sv->ob_shash = -1; /* invalidate cached hash value */
3754 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003755}
3756
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003757void
3758PyBytes_Fini(void)
3759{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003760 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003761 for (i = 0; i < UCHAR_MAX + 1; i++)
3762 Py_CLEAR(characters[i]);
3763 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003764}
3765
Benjamin Peterson4116f362008-05-27 00:36:20 +00003766/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003767
3768typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003769 PyObject_HEAD
3770 Py_ssize_t it_index;
3771 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003772} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003773
3774static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003775striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003776{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003777 _PyObject_GC_UNTRACK(it);
3778 Py_XDECREF(it->it_seq);
3779 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003780}
3781
3782static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003783striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003784{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003785 Py_VISIT(it->it_seq);
3786 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003787}
3788
3789static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003790striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003791{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003792 PyBytesObject *seq;
3793 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003794
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003795 assert(it != NULL);
3796 seq = it->it_seq;
3797 if (seq == NULL)
3798 return NULL;
3799 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003800
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003801 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3802 item = PyLong_FromLong(
3803 (unsigned char)seq->ob_sval[it->it_index]);
3804 if (item != NULL)
3805 ++it->it_index;
3806 return item;
3807 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003808
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003809 it->it_seq = NULL;
Serhiy Storchakafbb1c5e2016-03-30 20:40:02 +03003810 Py_DECREF(seq);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003811 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003812}
3813
3814static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003815striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003816{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003817 Py_ssize_t len = 0;
3818 if (it->it_seq)
3819 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3820 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003821}
3822
3823PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003824 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003825
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003826static PyObject *
3827striter_reduce(striterobject *it)
3828{
3829 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02003830 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003831 it->it_seq, it->it_index);
3832 } else {
3833 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
3834 if (u == NULL)
3835 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02003836 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003837 }
3838}
3839
3840PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3841
3842static PyObject *
3843striter_setstate(striterobject *it, PyObject *state)
3844{
3845 Py_ssize_t index = PyLong_AsSsize_t(state);
3846 if (index == -1 && PyErr_Occurred())
3847 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003848 if (it->it_seq != NULL) {
3849 if (index < 0)
3850 index = 0;
3851 else if (index > PyBytes_GET_SIZE(it->it_seq))
3852 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3853 it->it_index = index;
3854 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003855 Py_RETURN_NONE;
3856}
3857
3858PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3859
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003860static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003861 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3862 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003863 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3864 reduce_doc},
3865 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3866 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003867 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003868};
3869
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003870PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003871 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3872 "bytes_iterator", /* tp_name */
3873 sizeof(striterobject), /* tp_basicsize */
3874 0, /* tp_itemsize */
3875 /* methods */
3876 (destructor)striter_dealloc, /* tp_dealloc */
3877 0, /* tp_print */
3878 0, /* tp_getattr */
3879 0, /* tp_setattr */
3880 0, /* tp_reserved */
3881 0, /* tp_repr */
3882 0, /* tp_as_number */
3883 0, /* tp_as_sequence */
3884 0, /* tp_as_mapping */
3885 0, /* tp_hash */
3886 0, /* tp_call */
3887 0, /* tp_str */
3888 PyObject_GenericGetAttr, /* tp_getattro */
3889 0, /* tp_setattro */
3890 0, /* tp_as_buffer */
3891 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3892 0, /* tp_doc */
3893 (traverseproc)striter_traverse, /* tp_traverse */
3894 0, /* tp_clear */
3895 0, /* tp_richcompare */
3896 0, /* tp_weaklistoffset */
3897 PyObject_SelfIter, /* tp_iter */
3898 (iternextfunc)striter_next, /* tp_iternext */
3899 striter_methods, /* tp_methods */
3900 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003901};
3902
3903static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003904bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003905{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003906 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003907
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003908 if (!PyBytes_Check(seq)) {
3909 PyErr_BadInternalCall();
3910 return NULL;
3911 }
3912 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3913 if (it == NULL)
3914 return NULL;
3915 it->it_index = 0;
3916 Py_INCREF(seq);
3917 it->it_seq = (PyBytesObject *)seq;
3918 _PyObject_GC_TRACK(it);
3919 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003920}
Victor Stinner00165072015-10-09 01:53:21 +02003921
3922
3923/* _PyBytesWriter API */
3924
3925#ifdef MS_WINDOWS
3926 /* On Windows, overallocate by 50% is the best factor */
3927# define OVERALLOCATE_FACTOR 2
3928#else
3929 /* On Linux, overallocate by 25% is the best factor */
3930# define OVERALLOCATE_FACTOR 4
3931#endif
3932
3933void
3934_PyBytesWriter_Init(_PyBytesWriter *writer)
3935{
Victor Stinner661aacc2015-10-14 09:41:48 +02003936 /* Set all attributes before small_buffer to 0 */
3937 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003938#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003939 memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003940#endif
3941}
3942
3943void
3944_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3945{
3946 Py_CLEAR(writer->buffer);
3947}
3948
3949Py_LOCAL_INLINE(char*)
3950_PyBytesWriter_AsString(_PyBytesWriter *writer)
3951{
Victor Stinner661aacc2015-10-14 09:41:48 +02003952 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003953 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003954 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003955 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003956 else if (writer->use_bytearray) {
3957 assert(writer->buffer != NULL);
3958 return PyByteArray_AS_STRING(writer->buffer);
3959 }
3960 else {
3961 assert(writer->buffer != NULL);
3962 return PyBytes_AS_STRING(writer->buffer);
3963 }
Victor Stinner00165072015-10-09 01:53:21 +02003964}
3965
3966Py_LOCAL_INLINE(Py_ssize_t)
Victor Stinner2bf89932015-10-14 11:25:33 +02003967_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
Victor Stinner00165072015-10-09 01:53:21 +02003968{
3969 char *start = _PyBytesWriter_AsString(writer);
3970 assert(str != NULL);
3971 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003972 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003973 return str - start;
3974}
3975
3976Py_LOCAL_INLINE(void)
3977_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3978{
3979#ifdef Py_DEBUG
3980 char *start, *end;
3981
Victor Stinner661aacc2015-10-14 09:41:48 +02003982 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003983 assert(writer->buffer == NULL);
3984 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003985 else {
3986 assert(writer->buffer != NULL);
3987 if (writer->use_bytearray)
3988 assert(PyByteArray_CheckExact(writer->buffer));
3989 else
3990 assert(PyBytes_CheckExact(writer->buffer));
3991 assert(Py_REFCNT(writer->buffer) == 1);
3992 }
Victor Stinner00165072015-10-09 01:53:21 +02003993
Victor Stinner661aacc2015-10-14 09:41:48 +02003994 if (writer->use_bytearray) {
3995 /* bytearray has its own overallocation algorithm,
3996 writer overallocation must be disabled */
3997 assert(!writer->overallocate);
3998 }
3999
4000 assert(0 <= writer->allocated);
Victor Stinner53926a12015-10-09 12:37:03 +02004001 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02004002 /* the last byte must always be null */
Victor Stinner661aacc2015-10-14 09:41:48 +02004003 start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02004004 assert(start[writer->allocated] == 0);
4005
4006 end = start + writer->allocated;
4007 assert(str != NULL);
4008 assert(start <= str && str <= end);
4009#endif
4010}
4011
Victor Stinnerc29e29b2015-10-12 13:12:54 +02004012void*
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02004013_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
Victor Stinner00165072015-10-09 01:53:21 +02004014{
4015 Py_ssize_t allocated, pos;
4016
4017 _PyBytesWriter_CheckConsistency(writer, str);
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02004018 assert(writer->allocated < size);
Victor Stinner00165072015-10-09 01:53:21 +02004019
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02004020 allocated = size;
Victor Stinner00165072015-10-09 01:53:21 +02004021 if (writer->overallocate
4022 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
4023 /* overallocate to limit the number of realloc() */
4024 allocated += allocated / OVERALLOCATE_FACTOR;
4025 }
4026
Victor Stinner2bf89932015-10-14 11:25:33 +02004027 pos = _PyBytesWriter_GetSize(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02004028 if (!writer->use_small_buffer) {
Victor Stinner661aacc2015-10-14 09:41:48 +02004029 if (writer->use_bytearray) {
4030 if (PyByteArray_Resize(writer->buffer, allocated))
4031 goto error;
4032 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
4033 but we cannot use ob_alloc because bytes may need to be moved
4034 to use the whole buffer. bytearray uses an internal optimization
4035 to avoid moving or copying bytes when bytes are removed at the
4036 beginning (ex: del bytearray[:1]). */
4037 }
4038 else {
4039 if (_PyBytes_Resize(&writer->buffer, allocated))
4040 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02004041 }
4042 }
4043 else {
4044 /* convert from stack buffer to bytes object buffer */
4045 assert(writer->buffer == NULL);
4046
Victor Stinner661aacc2015-10-14 09:41:48 +02004047 if (writer->use_bytearray)
4048 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
4049 else
4050 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
Victor Stinner00165072015-10-09 01:53:21 +02004051 if (writer->buffer == NULL)
Victor Stinner661aacc2015-10-14 09:41:48 +02004052 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02004053
4054 if (pos != 0) {
Victor Stinner661aacc2015-10-14 09:41:48 +02004055 char *dest;
4056 if (writer->use_bytearray)
4057 dest = PyByteArray_AS_STRING(writer->buffer);
4058 else
4059 dest = PyBytes_AS_STRING(writer->buffer);
4060 Py_MEMCPY(dest,
Victor Stinnerb3653a32015-10-09 03:38:24 +02004061 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02004062 pos);
4063 }
4064
Victor Stinnerb3653a32015-10-09 03:38:24 +02004065 writer->use_small_buffer = 0;
Victor Stinner00165072015-10-09 01:53:21 +02004066#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02004067 memset(writer->small_buffer, 0xDB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02004068#endif
Victor Stinner00165072015-10-09 01:53:21 +02004069 }
4070 writer->allocated = allocated;
4071
4072 str = _PyBytesWriter_AsString(writer) + pos;
4073 _PyBytesWriter_CheckConsistency(writer, str);
4074 return str;
Victor Stinner661aacc2015-10-14 09:41:48 +02004075
4076error:
4077 _PyBytesWriter_Dealloc(writer);
4078 return NULL;
Victor Stinner00165072015-10-09 01:53:21 +02004079}
4080
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02004081void*
4082_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
4083{
4084 Py_ssize_t new_min_size;
4085
4086 _PyBytesWriter_CheckConsistency(writer, str);
4087 assert(size >= 0);
4088
4089 if (size == 0) {
4090 /* nothing to do */
4091 return str;
4092 }
4093
4094 if (writer->min_size > PY_SSIZE_T_MAX - size) {
4095 PyErr_NoMemory();
4096 _PyBytesWriter_Dealloc(writer);
4097 return NULL;
4098 }
4099 new_min_size = writer->min_size + size;
4100
4101 if (new_min_size > writer->allocated)
4102 str = _PyBytesWriter_Resize(writer, str, new_min_size);
4103
4104 writer->min_size = new_min_size;
4105 return str;
4106}
4107
Victor Stinner00165072015-10-09 01:53:21 +02004108/* Allocate the buffer to write size bytes.
4109 Return the pointer to the beginning of buffer data.
4110 Raise an exception and return NULL on error. */
Victor Stinnerc29e29b2015-10-12 13:12:54 +02004111void*
Victor Stinner00165072015-10-09 01:53:21 +02004112_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
4113{
4114 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02004115 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02004116 assert(size >= 0);
4117
Victor Stinnerb3653a32015-10-09 03:38:24 +02004118 writer->use_small_buffer = 1;
Victor Stinnerb13b97d2015-10-09 02:52:16 +02004119#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02004120 writer->allocated = sizeof(writer->small_buffer) - 1;
Victor Stinnerf6358a72015-10-14 12:02:39 +02004121 /* In debug mode, don't use the full small buffer because it is less
4122 efficient than bytes and bytearray objects to detect buffer underflow
4123 and buffer overflow. Use 10 bytes of the small buffer to test also
4124 code using the smaller buffer in debug mode.
4125
4126 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
4127 in debug mode to also be able to detect stack overflow when running
4128 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
4129 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
4130 stack overflow. */
4131 writer->allocated = Py_MIN(writer->allocated, 10);
4132 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
4133 to detect buffer overflow */
Victor Stinnerb3653a32015-10-09 03:38:24 +02004134 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02004135#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02004136 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02004137#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02004138 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02004139}
4140
4141PyObject *
Victor Stinnerc29e29b2015-10-12 13:12:54 +02004142_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
Victor Stinner00165072015-10-09 01:53:21 +02004143{
Victor Stinner2bf89932015-10-14 11:25:33 +02004144 Py_ssize_t size;
Victor Stinner00165072015-10-09 01:53:21 +02004145 PyObject *result;
4146
4147 _PyBytesWriter_CheckConsistency(writer, str);
4148
Victor Stinner2bf89932015-10-14 11:25:33 +02004149 size = _PyBytesWriter_GetSize(writer, str);
4150 if (size == 0 && !writer->use_bytearray) {
Victor Stinner6c2cdae2015-10-12 13:29:43 +02004151 Py_CLEAR(writer->buffer);
4152 /* Get the empty byte string singleton */
4153 result = PyBytes_FromStringAndSize(NULL, 0);
4154 }
4155 else if (writer->use_small_buffer) {
Victor Stinner2bf89932015-10-14 11:25:33 +02004156 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
Victor Stinner6c2cdae2015-10-12 13:29:43 +02004157 }
4158 else {
4159 result = writer->buffer;
4160 writer->buffer = NULL;
4161
Victor Stinner2bf89932015-10-14 11:25:33 +02004162 if (size != writer->allocated) {
Victor Stinner661aacc2015-10-14 09:41:48 +02004163 if (writer->use_bytearray) {
Victor Stinner2bf89932015-10-14 11:25:33 +02004164 if (PyByteArray_Resize(result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02004165 Py_DECREF(result);
4166 return NULL;
4167 }
4168 }
4169 else {
Victor Stinner2bf89932015-10-14 11:25:33 +02004170 if (_PyBytes_Resize(&result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02004171 assert(result == NULL);
4172 return NULL;
4173 }
Victor Stinner00165072015-10-09 01:53:21 +02004174 }
4175 }
Victor Stinner00165072015-10-09 01:53:21 +02004176 }
Victor Stinner00165072015-10-09 01:53:21 +02004177 return result;
4178}
Victor Stinnerce179bf2015-10-09 12:57:22 +02004179
Victor Stinnerc29e29b2015-10-12 13:12:54 +02004180void*
Victor Stinnere9aa5952015-10-12 13:57:47 +02004181_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
Victor Stinnerc29e29b2015-10-12 13:12:54 +02004182 const void *bytes, Py_ssize_t size)
Victor Stinnerce179bf2015-10-09 12:57:22 +02004183{
Victor Stinnere9aa5952015-10-12 13:57:47 +02004184 char *str = (char *)ptr;
4185
Victor Stinnerce179bf2015-10-09 12:57:22 +02004186 str = _PyBytesWriter_Prepare(writer, str, size);
4187 if (str == NULL)
4188 return NULL;
4189
4190 Py_MEMCPY(str, bytes, size);
4191 str += size;
4192
4193 return str;
4194}