blob: ec03233ba384a391e670f35b08a7372148669c43 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Gregory P. Smith8cb65692015-04-25 23:22:26 +00008#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00009#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000010
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020011/*[clinic input]
Martin v. Löwis0efea322014-07-27 17:29:17 +020012class bytes "PyBytesObject*" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020013[clinic start generated code]*/
Martin v. Löwis0efea322014-07-27 17:29:17 +020014/*[clinic end generated code: output=da39a3ee5e6b4b0d input=1a1d9102afc1b00c]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020015
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030016#include "clinic/bytesobject.c.h"
17
Christian Heimes2c9c7a52008-05-26 13:42:13 +000018#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000019Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000020#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000021
Christian Heimes2c9c7a52008-05-26 13:42:13 +000022static PyBytesObject *characters[UCHAR_MAX + 1];
23static PyBytesObject *nullstring;
24
Mark Dickinsonfd24b322008-12-06 15:33:31 +000025/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
26 for a string of length n should request PyBytesObject_SIZE + n bytes.
27
28 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
29 3 bytes per string allocation on a typical system.
30*/
31#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
32
Victor Stinner2bf89932015-10-14 11:25:33 +020033/* Forward declaration */
34Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
35 char *str);
36
Christian Heimes2c9c7a52008-05-26 13:42:13 +000037/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000038 For PyBytes_FromString(), the parameter `str' points to a null-terminated
39 string containing exactly `size' bytes.
40
41 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
42 either NULL or else points to a string containing at least `size' bytes.
43 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
44 not have to be null-terminated. (Therefore it is safe to construct a
45 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
46 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
47 bytes (setting the last byte to the null terminating character) and you can
48 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000049 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000050 alter the data yourself, since the strings may be shared.
51
52 The PyObject member `op->ob_size', which denotes the number of "extra
53 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020054 allocated for string data, not counting the null terminating character.
55 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000056 PyBytes_FromStringAndSize()) or the length of the string in the `str'
57 parameter (for PyBytes_FromString()).
58*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020059static PyObject *
60_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000061{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020062 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020063 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020064
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000066#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000068#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000069 Py_INCREF(op);
70 return (PyObject *)op;
71 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000072
Victor Stinner049e5092014-08-17 22:20:00 +020073 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000074 PyErr_SetString(PyExc_OverflowError,
75 "byte string is too large");
76 return NULL;
77 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000078
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000079 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020080 if (use_calloc)
81 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
82 else
83 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084 if (op == NULL)
85 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +010086 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020088 if (!use_calloc)
89 op->ob_sval[size] = '\0';
90 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 if (size == 0) {
92 nullstring = op;
93 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020094 }
95 return (PyObject *) op;
96}
97
98PyObject *
99PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
100{
101 PyBytesObject *op;
102 if (size < 0) {
103 PyErr_SetString(PyExc_SystemError,
104 "Negative size passed to PyBytes_FromStringAndSize");
105 return NULL;
106 }
107 if (size == 1 && str != NULL &&
108 (op = characters[*str & UCHAR_MAX]) != NULL)
109 {
110#ifdef COUNT_ALLOCS
111 one_strings++;
112#endif
113 Py_INCREF(op);
114 return (PyObject *)op;
115 }
116
117 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
118 if (op == NULL)
119 return NULL;
120 if (str == NULL)
121 return (PyObject *) op;
122
123 Py_MEMCPY(op->ob_sval, str, size);
124 /* share short strings */
125 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000126 characters[*str & UCHAR_MAX] = op;
127 Py_INCREF(op);
128 }
129 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000130}
131
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000132PyObject *
133PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000134{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200135 size_t size;
136 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000137
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000138 assert(str != NULL);
139 size = strlen(str);
140 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
141 PyErr_SetString(PyExc_OverflowError,
142 "byte string is too long");
143 return NULL;
144 }
145 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000146#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000147 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000148#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 Py_INCREF(op);
150 return (PyObject *)op;
151 }
152 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000153#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000155#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 Py_INCREF(op);
157 return (PyObject *)op;
158 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000159
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000160 /* Inline PyObject_NewVar */
161 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
162 if (op == NULL)
163 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100164 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000165 op->ob_shash = -1;
166 Py_MEMCPY(op->ob_sval, str, size+1);
167 /* share short strings */
168 if (size == 0) {
169 nullstring = op;
170 Py_INCREF(op);
171 } else if (size == 1) {
172 characters[*str & UCHAR_MAX] = op;
173 Py_INCREF(op);
174 }
175 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000176}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000177
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000178PyObject *
179PyBytes_FromFormatV(const char *format, va_list vargs)
180{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000181 char *s;
Victor Stinner03dab782015-10-14 00:21:35 +0200182 const char *f;
183 const char *p;
184 Py_ssize_t prec;
185 int longflag;
186 int size_tflag;
187 /* Longest 64-bit formatted numbers:
188 - "18446744073709551615\0" (21 bytes)
189 - "-9223372036854775808\0" (21 bytes)
190 Decimal takes the most space (it isn't enough for octal.)
Guido van Rossum343e97f2007-04-09 00:43:24 +0000191
Victor Stinner03dab782015-10-14 00:21:35 +0200192 Longest 64-bit pointer representation:
193 "0xffffffffffffffff\0" (19 bytes). */
194 char buffer[21];
195 _PyBytesWriter writer;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000196
Victor Stinner03dab782015-10-14 00:21:35 +0200197 _PyBytesWriter_Init(&writer);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000198
Victor Stinner03dab782015-10-14 00:21:35 +0200199 s = _PyBytesWriter_Alloc(&writer, strlen(format));
200 if (s == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000201 return NULL;
Victor Stinner03dab782015-10-14 00:21:35 +0200202 writer.overallocate = 1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000203
Victor Stinner03dab782015-10-14 00:21:35 +0200204#define WRITE_BYTES(str) \
205 do { \
206 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
207 if (s == NULL) \
208 goto error; \
209 } while (0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000210
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000211 for (f = format; *f; f++) {
Victor Stinner03dab782015-10-14 00:21:35 +0200212 if (*f != '%') {
213 *s++ = *f;
214 continue;
215 }
216
217 p = f++;
218
219 /* ignore the width (ex: 10 in "%10s") */
220 while (Py_ISDIGIT(*f))
221 f++;
222
223 /* parse the precision (ex: 10 in "%.10s") */
224 prec = 0;
225 if (*f == '.') {
226 f++;
227 for (; Py_ISDIGIT(*f); f++) {
228 prec = (prec * 10) + (*f - '0');
229 }
230 }
231
232 while (*f && *f != '%' && !Py_ISALPHA(*f))
233 f++;
234
235 /* handle the long flag ('l'), but only for %ld and %lu.
236 others can be added when necessary. */
237 longflag = 0;
238 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
239 longflag = 1;
240 ++f;
241 }
242
243 /* handle the size_t flag ('z'). */
244 size_tflag = 0;
245 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
246 size_tflag = 1;
247 ++f;
248 }
249
250 /* substract bytes preallocated for the format string
251 (ex: 2 for "%s") */
252 writer.min_size -= (f - p + 1);
253
254 switch (*f) {
255 case 'c':
256 {
257 int c = va_arg(vargs, int);
258 if (c < 0 || c > 255) {
259 PyErr_SetString(PyExc_OverflowError,
260 "PyBytes_FromFormatV(): %c format "
261 "expects an integer in range [0; 255]");
262 goto error;
263 }
264 writer.min_size++;
265 *s++ = (unsigned char)c;
266 break;
267 }
268
269 case 'd':
270 if (longflag)
271 sprintf(buffer, "%ld", va_arg(vargs, long));
272 else if (size_tflag)
273 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
274 va_arg(vargs, Py_ssize_t));
275 else
276 sprintf(buffer, "%d", va_arg(vargs, int));
277 assert(strlen(buffer) < sizeof(buffer));
278 WRITE_BYTES(buffer);
279 break;
280
281 case 'u':
282 if (longflag)
283 sprintf(buffer, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
286 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
288 else
289 sprintf(buffer, "%u",
290 va_arg(vargs, unsigned int));
291 assert(strlen(buffer) < sizeof(buffer));
292 WRITE_BYTES(buffer);
293 break;
294
295 case 'i':
296 sprintf(buffer, "%i", va_arg(vargs, int));
297 assert(strlen(buffer) < sizeof(buffer));
298 WRITE_BYTES(buffer);
299 break;
300
301 case 'x':
302 sprintf(buffer, "%x", va_arg(vargs, int));
303 assert(strlen(buffer) < sizeof(buffer));
304 WRITE_BYTES(buffer);
305 break;
306
307 case 's':
308 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000309 Py_ssize_t i;
Victor Stinner03dab782015-10-14 00:21:35 +0200310
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200311 p = va_arg(vargs, const char*);
Victor Stinner03dab782015-10-14 00:21:35 +0200312 i = strlen(p);
313 if (prec > 0 && i > prec)
314 i = prec;
315 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
316 if (s == NULL)
317 goto error;
318 break;
319 }
320
321 case 'p':
322 sprintf(buffer, "%p", va_arg(vargs, void*));
323 assert(strlen(buffer) < sizeof(buffer));
324 /* %p is ill-defined: ensure leading 0x. */
325 if (buffer[1] == 'X')
326 buffer[1] = 'x';
327 else if (buffer[1] != 'x') {
328 memmove(buffer+2, buffer, strlen(buffer)+1);
329 buffer[0] = '0';
330 buffer[1] = 'x';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000331 }
Victor Stinner03dab782015-10-14 00:21:35 +0200332 WRITE_BYTES(buffer);
333 break;
334
335 case '%':
336 writer.min_size++;
337 *s++ = '%';
338 break;
339
340 default:
341 if (*f == 0) {
342 /* fix min_size if we reached the end of the format string */
343 writer.min_size++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000344 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000345
Victor Stinner03dab782015-10-14 00:21:35 +0200346 /* invalid format string: copy unformatted string and exit */
347 WRITE_BYTES(p);
348 return _PyBytesWriter_Finish(&writer, s);
349 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000351
Victor Stinner03dab782015-10-14 00:21:35 +0200352#undef WRITE_BYTES
353
354 return _PyBytesWriter_Finish(&writer, s);
355
356 error:
357 _PyBytesWriter_Dealloc(&writer);
358 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000359}
360
361PyObject *
362PyBytes_FromFormat(const char *format, ...)
363{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000364 PyObject* ret;
365 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000366
367#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000368 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000369#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000370 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000371#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 ret = PyBytes_FromFormatV(format, vargs);
373 va_end(vargs);
374 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000375}
376
Ethan Furmanb95b5612015-01-23 20:05:18 -0800377/* Helpers for formatstring */
378
379Py_LOCAL_INLINE(PyObject *)
380getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
381{
382 Py_ssize_t argidx = *p_argidx;
383 if (argidx < arglen) {
384 (*p_argidx)++;
385 if (arglen < 0)
386 return args;
387 else
388 return PyTuple_GetItem(args, argidx);
389 }
390 PyErr_SetString(PyExc_TypeError,
391 "not enough arguments for format string");
392 return NULL;
393}
394
395/* Format codes
396 * F_LJUST '-'
397 * F_SIGN '+'
398 * F_BLANK ' '
399 * F_ALT '#'
400 * F_ZERO '0'
401 */
402#define F_LJUST (1<<0)
403#define F_SIGN (1<<1)
404#define F_BLANK (1<<2)
405#define F_ALT (1<<3)
406#define F_ZERO (1<<4)
407
408/* Returns a new reference to a PyBytes object, or NULL on failure. */
409
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200410static char*
411formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200412 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800413{
414 char *p;
415 PyObject *result;
416 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200417 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800418
419 x = PyFloat_AsDouble(v);
420 if (x == -1.0 && PyErr_Occurred()) {
421 PyErr_Format(PyExc_TypeError, "float argument required, "
422 "not %.200s", Py_TYPE(v)->tp_name);
423 return NULL;
424 }
425
426 if (prec < 0)
427 prec = 6;
428
429 p = PyOS_double_to_string(x, type, prec,
430 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
431
432 if (p == NULL)
433 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200434
435 len = strlen(p);
436 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200437 str = _PyBytesWriter_Prepare(writer, str, len);
438 if (str == NULL)
439 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200440 Py_MEMCPY(str, p, len);
441 str += len;
442 return str;
443 }
444
445 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800446 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200447 *p_result = result;
448 return str;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800449}
450
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300451static PyObject *
452formatlong(PyObject *v, int flags, int prec, int type)
453{
454 PyObject *result, *iobj;
455 if (type == 'i')
456 type = 'd';
457 if (PyLong_Check(v))
458 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
459 if (PyNumber_Check(v)) {
460 /* make sure number is a type of integer for o, x, and X */
461 if (type == 'o' || type == 'x' || type == 'X')
462 iobj = PyNumber_Index(v);
463 else
464 iobj = PyNumber_Long(v);
465 if (iobj == NULL) {
466 if (!PyErr_ExceptionMatches(PyExc_TypeError))
467 return NULL;
468 }
469 else if (!PyLong_Check(iobj))
470 Py_CLEAR(iobj);
471 if (iobj != NULL) {
472 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
473 Py_DECREF(iobj);
474 return result;
475 }
476 }
477 PyErr_Format(PyExc_TypeError,
478 "%%%c format: %s is required, not %.200s", type,
479 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
480 : "a number",
481 Py_TYPE(v)->tp_name);
482 return NULL;
483}
484
485static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200486byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800487{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200488 if (PyBytes_Check(arg) && PyBytes_Size(arg) == 1) {
489 *p = PyBytes_AS_STRING(arg)[0];
490 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800491 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200492 else if (PyByteArray_Check(arg) && PyByteArray_Size(arg) == 1) {
493 *p = PyByteArray_AS_STRING(arg)[0];
494 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800495 }
496 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300497 PyObject *iobj;
498 long ival;
499 int overflow;
500 /* make sure number is a type of integer */
501 if (PyLong_Check(arg)) {
502 ival = PyLong_AsLongAndOverflow(arg, &overflow);
503 }
504 else {
505 iobj = PyNumber_Index(arg);
506 if (iobj == NULL) {
507 if (!PyErr_ExceptionMatches(PyExc_TypeError))
508 return 0;
509 goto onError;
510 }
511 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
512 Py_DECREF(iobj);
513 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300514 if (!overflow && ival == -1 && PyErr_Occurred())
515 goto onError;
516 if (overflow || !(0 <= ival && ival <= 255)) {
517 PyErr_SetString(PyExc_OverflowError,
518 "%c arg not in range(256)");
519 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800520 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300521 *p = (char)ival;
522 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800523 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300524 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200525 PyErr_SetString(PyExc_TypeError,
526 "%c requires an integer in range(256) or a single byte");
527 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800528}
529
530static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200531format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800532{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200533 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800534 _Py_IDENTIFIER(__bytes__);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800535 /* is it a bytes object? */
536 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200537 *pbuf = PyBytes_AS_STRING(v);
538 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800539 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200540 return v;
541 }
542 if (PyByteArray_Check(v)) {
543 *pbuf = PyByteArray_AS_STRING(v);
544 *plen = PyByteArray_GET_SIZE(v);
545 Py_INCREF(v);
546 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800547 }
548 /* does it support __bytes__? */
549 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
550 if (func != NULL) {
551 result = PyObject_CallFunctionObjArgs(func, NULL);
552 Py_DECREF(func);
553 if (result == NULL)
554 return NULL;
555 if (!PyBytes_Check(result)) {
556 PyErr_Format(PyExc_TypeError,
557 "__bytes__ returned non-bytes (type %.200s)",
558 Py_TYPE(result)->tp_name);
559 Py_DECREF(result);
560 return NULL;
561 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200562 *pbuf = PyBytes_AS_STRING(result);
563 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800564 return result;
565 }
566 PyErr_Format(PyExc_TypeError,
567 "%%b requires bytes, or an object that implements __bytes__, not '%.100s'",
568 Py_TYPE(v)->tp_name);
569 return NULL;
570}
571
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200572/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800573
574PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +0200575_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
576 PyObject *args, int use_bytearray)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800577{
Victor Stinner772b2b02015-10-14 09:56:53 +0200578 const char *fmt;
579 char *res;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800580 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200581 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800582 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800583 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200584 _PyBytesWriter writer;
585
Victor Stinner772b2b02015-10-14 09:56:53 +0200586 if (args == NULL) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800587 PyErr_BadInternalCall();
588 return NULL;
589 }
Victor Stinner772b2b02015-10-14 09:56:53 +0200590 fmt = format;
591 fmtcnt = format_len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200592
593 _PyBytesWriter_Init(&writer);
Victor Stinner772b2b02015-10-14 09:56:53 +0200594 writer.use_bytearray = use_bytearray;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200595
596 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
597 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800598 return NULL;
Victor Stinner772b2b02015-10-14 09:56:53 +0200599 if (!use_bytearray)
600 writer.overallocate = 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200601
Ethan Furmanb95b5612015-01-23 20:05:18 -0800602 if (PyTuple_Check(args)) {
603 arglen = PyTuple_GET_SIZE(args);
604 argidx = 0;
605 }
606 else {
607 arglen = -1;
608 argidx = -2;
609 }
610 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
611 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
612 !PyByteArray_Check(args)) {
613 dict = args;
614 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200615
Ethan Furmanb95b5612015-01-23 20:05:18 -0800616 while (--fmtcnt >= 0) {
617 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200618 Py_ssize_t len;
619 char *pos;
620
621 pos = strchr(fmt + 1, '%');
622 if (pos != NULL)
623 len = pos - fmt;
Victor Stinner772b2b02015-10-14 09:56:53 +0200624 else
625 len = format_len - (fmt - format);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200626 assert(len != 0);
627
628 Py_MEMCPY(res, fmt, len);
629 res += len;
630 fmt += len;
631 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800632 }
633 else {
634 /* Got a format specifier */
635 int flags = 0;
636 Py_ssize_t width = -1;
637 int prec = -1;
638 int c = '\0';
639 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800640 PyObject *v = NULL;
641 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200642 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800643 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200644 Py_ssize_t len = 0;
645 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200646 Py_ssize_t alloc;
647#ifdef Py_DEBUG
648 char *before;
649#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -0800650
Ethan Furmanb95b5612015-01-23 20:05:18 -0800651 fmt++;
652 if (*fmt == '(') {
Victor Stinner772b2b02015-10-14 09:56:53 +0200653 const char *keystart;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800654 Py_ssize_t keylen;
655 PyObject *key;
656 int pcount = 1;
657
658 if (dict == NULL) {
659 PyErr_SetString(PyExc_TypeError,
660 "format requires a mapping");
661 goto error;
662 }
663 ++fmt;
664 --fmtcnt;
665 keystart = fmt;
666 /* Skip over balanced parentheses */
667 while (pcount > 0 && --fmtcnt >= 0) {
668 if (*fmt == ')')
669 --pcount;
670 else if (*fmt == '(')
671 ++pcount;
672 fmt++;
673 }
674 keylen = fmt - keystart - 1;
675 if (fmtcnt < 0 || pcount > 0) {
676 PyErr_SetString(PyExc_ValueError,
677 "incomplete format key");
678 goto error;
679 }
680 key = PyBytes_FromStringAndSize(keystart,
681 keylen);
682 if (key == NULL)
683 goto error;
684 if (args_owned) {
685 Py_DECREF(args);
686 args_owned = 0;
687 }
688 args = PyObject_GetItem(dict, key);
689 Py_DECREF(key);
690 if (args == NULL) {
691 goto error;
692 }
693 args_owned = 1;
694 arglen = -1;
695 argidx = -2;
696 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200697
698 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800699 while (--fmtcnt >= 0) {
700 switch (c = *fmt++) {
701 case '-': flags |= F_LJUST; continue;
702 case '+': flags |= F_SIGN; continue;
703 case ' ': flags |= F_BLANK; continue;
704 case '#': flags |= F_ALT; continue;
705 case '0': flags |= F_ZERO; continue;
706 }
707 break;
708 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200709
710 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800711 if (c == '*') {
712 v = getnextarg(args, arglen, &argidx);
713 if (v == NULL)
714 goto error;
715 if (!PyLong_Check(v)) {
716 PyErr_SetString(PyExc_TypeError,
717 "* wants int");
718 goto error;
719 }
720 width = PyLong_AsSsize_t(v);
721 if (width == -1 && PyErr_Occurred())
722 goto error;
723 if (width < 0) {
724 flags |= F_LJUST;
725 width = -width;
726 }
727 if (--fmtcnt >= 0)
728 c = *fmt++;
729 }
730 else if (c >= 0 && isdigit(c)) {
731 width = c - '0';
732 while (--fmtcnt >= 0) {
733 c = Py_CHARMASK(*fmt++);
734 if (!isdigit(c))
735 break;
736 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
737 PyErr_SetString(
738 PyExc_ValueError,
739 "width too big");
740 goto error;
741 }
742 width = width*10 + (c - '0');
743 }
744 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200745
746 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800747 if (c == '.') {
748 prec = 0;
749 if (--fmtcnt >= 0)
750 c = *fmt++;
751 if (c == '*') {
752 v = getnextarg(args, arglen, &argidx);
753 if (v == NULL)
754 goto error;
755 if (!PyLong_Check(v)) {
756 PyErr_SetString(
757 PyExc_TypeError,
758 "* wants int");
759 goto error;
760 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200761 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800762 if (prec == -1 && PyErr_Occurred())
763 goto error;
764 if (prec < 0)
765 prec = 0;
766 if (--fmtcnt >= 0)
767 c = *fmt++;
768 }
769 else if (c >= 0 && isdigit(c)) {
770 prec = c - '0';
771 while (--fmtcnt >= 0) {
772 c = Py_CHARMASK(*fmt++);
773 if (!isdigit(c))
774 break;
775 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
776 PyErr_SetString(
777 PyExc_ValueError,
778 "prec too big");
779 goto error;
780 }
781 prec = prec*10 + (c - '0');
782 }
783 }
784 } /* prec */
785 if (fmtcnt >= 0) {
786 if (c == 'h' || c == 'l' || c == 'L') {
787 if (--fmtcnt >= 0)
788 c = *fmt++;
789 }
790 }
791 if (fmtcnt < 0) {
792 PyErr_SetString(PyExc_ValueError,
793 "incomplete format");
794 goto error;
795 }
796 if (c != '%') {
797 v = getnextarg(args, arglen, &argidx);
798 if (v == NULL)
799 goto error;
800 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200801
802 if (fmtcnt < 0) {
803 /* last writer: disable writer overallocation */
804 writer.overallocate = 0;
805 }
806
Ethan Furmanb95b5612015-01-23 20:05:18 -0800807 sign = 0;
808 fill = ' ';
809 switch (c) {
810 case '%':
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200811 *res++ = '%';
812 continue;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200813
Ethan Furman62e977f2015-03-11 08:17:00 -0700814 case 'r':
815 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800816 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200817 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800818 if (temp == NULL)
819 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200820 assert(PyUnicode_IS_ASCII(temp));
821 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
822 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800823 if (prec >= 0 && len > prec)
824 len = prec;
825 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200826
Ethan Furmanb95b5612015-01-23 20:05:18 -0800827 case 's':
828 // %s is only for 2/3 code; 3 only code should use %b
829 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200830 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800831 if (temp == NULL)
832 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800833 if (prec >= 0 && len > prec)
834 len = prec;
835 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200836
Ethan Furmanb95b5612015-01-23 20:05:18 -0800837 case 'i':
838 case 'd':
839 case 'u':
840 case 'o':
841 case 'x':
842 case 'X':
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200843 if (PyLong_CheckExact(v)
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200844 && width == -1 && prec == -1
845 && !(flags & (F_SIGN | F_BLANK))
846 && c != 'X')
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200847 {
848 /* Fast path */
849 int alternate = flags & F_ALT;
850 int base;
851
852 switch(c)
853 {
854 default:
855 assert(0 && "'type' not in [diuoxX]");
856 case 'd':
857 case 'i':
858 case 'u':
859 base = 10;
860 break;
861 case 'o':
862 base = 8;
863 break;
864 case 'x':
865 case 'X':
866 base = 16;
867 break;
868 }
869
870 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200871 writer.min_size -= 2; /* size preallocated for "%d" */
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200872 res = _PyLong_FormatBytesWriter(&writer, res,
873 v, base, alternate);
874 if (res == NULL)
875 goto error;
876 continue;
877 }
878
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300879 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200880 if (!temp)
881 goto error;
882 assert(PyUnicode_IS_ASCII(temp));
883 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
884 len = PyUnicode_GET_LENGTH(temp);
885 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800886 if (flags & F_ZERO)
887 fill = '0';
888 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200889
Ethan Furmanb95b5612015-01-23 20:05:18 -0800890 case 'e':
891 case 'E':
892 case 'f':
893 case 'F':
894 case 'g':
895 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200896 if (width == -1 && prec == -1
897 && !(flags & (F_SIGN | F_BLANK)))
898 {
899 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200900 writer.min_size -= 2; /* size preallocated for "%f" */
Victor Stinnerad771582015-10-09 12:38:53 +0200901 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200902 if (res == NULL)
903 goto error;
904 continue;
905 }
906
Victor Stinnerad771582015-10-09 12:38:53 +0200907 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800908 goto error;
909 pbuf = PyBytes_AS_STRING(temp);
910 len = PyBytes_GET_SIZE(temp);
911 sign = 1;
912 if (flags & F_ZERO)
913 fill = '0';
914 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200915
Ethan Furmanb95b5612015-01-23 20:05:18 -0800916 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200917 pbuf = &onechar;
918 len = byte_converter(v, &onechar);
919 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800920 goto error;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200921 if (width == -1) {
922 /* Fast path */
923 *res++ = onechar;
924 continue;
925 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800926 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200927
Ethan Furmanb95b5612015-01-23 20:05:18 -0800928 default:
929 PyErr_Format(PyExc_ValueError,
930 "unsupported format character '%c' (0x%x) "
931 "at index %zd",
932 c, c,
Victor Stinner772b2b02015-10-14 09:56:53 +0200933 (Py_ssize_t)(fmt - 1 - format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800934 goto error;
935 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200936
Ethan Furmanb95b5612015-01-23 20:05:18 -0800937 if (sign) {
938 if (*pbuf == '-' || *pbuf == '+') {
939 sign = *pbuf++;
940 len--;
941 }
942 else if (flags & F_SIGN)
943 sign = '+';
944 else if (flags & F_BLANK)
945 sign = ' ';
946 else
947 sign = 0;
948 }
949 if (width < len)
950 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200951
952 alloc = width;
953 if (sign != 0 && len == width)
954 alloc++;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200955 /* 2: size preallocated for %s */
956 if (alloc > 2) {
957 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200958 if (res == NULL)
959 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800960 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200961#ifdef Py_DEBUG
962 before = res;
963#endif
964
965 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800966 if (sign) {
967 if (fill != ' ')
968 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800969 if (width > len)
970 width--;
971 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200972
973 /* Write the numeric prefix for "x", "X" and "o" formats
974 if the alternate form is used.
975 For example, write "0x" for the "%#x" format. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800976 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
977 assert(pbuf[0] == '0');
978 assert(pbuf[1] == c);
979 if (fill != ' ') {
980 *res++ = *pbuf++;
981 *res++ = *pbuf++;
982 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800983 width -= 2;
984 if (width < 0)
985 width = 0;
986 len -= 2;
987 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200988
989 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800990 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200991 memset(res, fill, width - len);
992 res += (width - len);
993 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800994 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200995
996 /* If padding with spaces: write sign if needed and/or numeric
997 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800998 if (fill == ' ') {
999 if (sign)
1000 *res++ = sign;
1001 if ((flags & F_ALT) &&
1002 (c == 'x' || c == 'X')) {
1003 assert(pbuf[0] == '0');
1004 assert(pbuf[1] == c);
1005 *res++ = *pbuf++;
1006 *res++ = *pbuf++;
1007 }
1008 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001009
1010 /* Copy bytes */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001011 Py_MEMCPY(res, pbuf, len);
1012 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001013
1014 /* Pad right with the fill character if needed */
1015 if (width > len) {
1016 memset(res, ' ', width - len);
1017 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001018 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001019
Ethan Furmanb95b5612015-01-23 20:05:18 -08001020 if (dict && (argidx < arglen) && c != '%') {
1021 PyErr_SetString(PyExc_TypeError,
1022 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -08001023 Py_XDECREF(temp);
1024 goto error;
1025 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001026 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001027
1028#ifdef Py_DEBUG
1029 /* check that we computed the exact size for this write */
1030 assert((res - before) == alloc);
1031#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -08001032 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001033
1034 /* If overallocation was disabled, ensure that it was the last
1035 write. Otherwise, we missed an optimization */
Victor Stinner772b2b02015-10-14 09:56:53 +02001036 assert(writer.overallocate || fmtcnt < 0 || use_bytearray);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001037 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001038
Ethan Furmanb95b5612015-01-23 20:05:18 -08001039 if (argidx < arglen && !dict) {
1040 PyErr_SetString(PyExc_TypeError,
1041 "not all arguments converted during bytes formatting");
1042 goto error;
1043 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001044
Ethan Furmanb95b5612015-01-23 20:05:18 -08001045 if (args_owned) {
1046 Py_DECREF(args);
1047 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001048 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001049
1050 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001051 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001052 if (args_owned) {
1053 Py_DECREF(args);
1054 }
1055 return NULL;
1056}
1057
1058/* =-= */
1059
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001060static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001061bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001062{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001063 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001064}
1065
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001066/* Unescape a backslash-escaped string. If unicode is non-zero,
1067 the string is a u-literal. If recode_encoding is non-zero,
1068 the string is UTF-8 encoded and should be re-encoded in the
1069 specified encoding. */
1070
Victor Stinner2ec80632015-10-14 13:32:13 +02001071static char *
1072_PyBytes_DecodeEscapeRecode(const char **s, const char *end,
1073 const char *errors, const char *recode_encoding,
1074 _PyBytesWriter *writer, char *p)
1075{
1076 PyObject *u, *w;
1077 const char* t;
1078
1079 t = *s;
1080 /* Decode non-ASCII bytes as UTF-8. */
1081 while (t < end && (*t & 0x80))
1082 t++;
1083 u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
1084 if (u == NULL)
1085 return NULL;
1086
1087 /* Recode them in target encoding. */
1088 w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
1089 Py_DECREF(u);
1090 if (w == NULL)
1091 return NULL;
1092 assert(PyBytes_Check(w));
1093
1094 /* Append bytes to output buffer. */
1095 writer->min_size--; /* substract 1 preallocated byte */
1096 p = _PyBytesWriter_WriteBytes(writer, p,
1097 PyBytes_AS_STRING(w),
1098 PyBytes_GET_SIZE(w));
1099 Py_DECREF(w);
1100 if (p == NULL)
1101 return NULL;
1102
1103 *s = t;
1104 return p;
1105}
1106
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001107PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001108 Py_ssize_t len,
1109 const char *errors,
1110 Py_ssize_t unicode,
1111 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001112{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001113 int c;
Victor Stinner2ec80632015-10-14 13:32:13 +02001114 char *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001115 const char *end;
Victor Stinner2ec80632015-10-14 13:32:13 +02001116 _PyBytesWriter writer;
1117
1118 _PyBytesWriter_Init(&writer);
1119
1120 p = _PyBytesWriter_Alloc(&writer, len);
1121 if (p == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001122 return NULL;
Victor Stinner2ec80632015-10-14 13:32:13 +02001123 writer.overallocate = 1;
1124
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001125 end = s + len;
1126 while (s < end) {
1127 if (*s != '\\') {
1128 non_esc:
Victor Stinner2ec80632015-10-14 13:32:13 +02001129 if (!(recode_encoding && (*s & 0x80))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001130 *p++ = *s++;
1131 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001132 else {
1133 /* non-ASCII character and need to recode */
1134 p = _PyBytes_DecodeEscapeRecode(&s, end,
1135 errors, recode_encoding,
1136 &writer, p);
1137 if (p == NULL)
1138 goto failed;
1139 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001140 continue;
1141 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001142
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001143 s++;
Victor Stinner2ec80632015-10-14 13:32:13 +02001144 if (s == end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001145 PyErr_SetString(PyExc_ValueError,
1146 "Trailing \\ in string");
1147 goto failed;
1148 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001149
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001150 switch (*s++) {
1151 /* XXX This assumes ASCII! */
1152 case '\n': break;
1153 case '\\': *p++ = '\\'; break;
1154 case '\'': *p++ = '\''; break;
1155 case '\"': *p++ = '\"'; break;
1156 case 'b': *p++ = '\b'; break;
1157 case 'f': *p++ = '\014'; break; /* FF */
1158 case 't': *p++ = '\t'; break;
1159 case 'n': *p++ = '\n'; break;
1160 case 'r': *p++ = '\r'; break;
1161 case 'v': *p++ = '\013'; break; /* VT */
1162 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1163 case '0': case '1': case '2': case '3':
1164 case '4': case '5': case '6': case '7':
1165 c = s[-1] - '0';
1166 if (s < end && '0' <= *s && *s <= '7') {
1167 c = (c<<3) + *s++ - '0';
1168 if (s < end && '0' <= *s && *s <= '7')
1169 c = (c<<3) + *s++ - '0';
1170 }
1171 *p++ = c;
1172 break;
1173 case 'x':
Victor Stinner2ec80632015-10-14 13:32:13 +02001174 if (s+1 < end) {
1175 int digit1, digit2;
1176 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1177 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1178 if (digit1 < 16 && digit2 < 16) {
1179 *p++ = (unsigned char)((digit1 << 4) + digit2);
1180 s += 2;
1181 break;
1182 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001183 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001184 /* invalid hexadecimal digits */
1185
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001186 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001187 PyErr_Format(PyExc_ValueError,
1188 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001189 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001190 goto failed;
1191 }
1192 if (strcmp(errors, "replace") == 0) {
1193 *p++ = '?';
1194 } else if (strcmp(errors, "ignore") == 0)
1195 /* do nothing */;
1196 else {
1197 PyErr_Format(PyExc_ValueError,
1198 "decoding error; unknown "
1199 "error handling code: %.400s",
1200 errors);
1201 goto failed;
1202 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001203 /* skip \x */
1204 if (s < end && Py_ISXDIGIT(s[0]))
1205 s++; /* and a hexdigit */
1206 break;
Victor Stinner2ec80632015-10-14 13:32:13 +02001207
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001208 default:
1209 *p++ = '\\';
1210 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +02001211 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001212 UTF-8 bytes may follow. */
1213 }
1214 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001215
1216 return _PyBytesWriter_Finish(&writer, p);
1217
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001218 failed:
Victor Stinner2ec80632015-10-14 13:32:13 +02001219 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001220 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001221}
1222
1223/* -------------------------------------------------------------------- */
1224/* object api */
1225
1226Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001227PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001228{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001229 if (!PyBytes_Check(op)) {
1230 PyErr_Format(PyExc_TypeError,
1231 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1232 return -1;
1233 }
1234 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001235}
1236
1237char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001238PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001239{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001240 if (!PyBytes_Check(op)) {
1241 PyErr_Format(PyExc_TypeError,
1242 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1243 return NULL;
1244 }
1245 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001246}
1247
1248int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001249PyBytes_AsStringAndSize(PyObject *obj,
1250 char **s,
1251 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001252{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001253 if (s == NULL) {
1254 PyErr_BadInternalCall();
1255 return -1;
1256 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001257
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001258 if (!PyBytes_Check(obj)) {
1259 PyErr_Format(PyExc_TypeError,
1260 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1261 return -1;
1262 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001263
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001264 *s = PyBytes_AS_STRING(obj);
1265 if (len != NULL)
1266 *len = PyBytes_GET_SIZE(obj);
1267 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001268 PyErr_SetString(PyExc_ValueError,
1269 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001270 return -1;
1271 }
1272 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001273}
Neal Norwitz6968b052007-02-27 19:02:19 +00001274
1275/* -------------------------------------------------------------------- */
1276/* Methods */
1277
Eric Smith0923d1d2009-04-16 20:16:10 +00001278#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001279
1280#include "stringlib/fastsearch.h"
1281#include "stringlib/count.h"
1282#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001283#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001284#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001285#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001286#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001287
Eric Smith0f78bff2009-11-30 01:01:42 +00001288#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001289
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001290PyObject *
1291PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001292{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001293 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001294 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001295 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001296 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001297 unsigned char quote, *s, *p;
1298
1299 /* Compute size of output string */
1300 squotes = dquotes = 0;
1301 newsize = 3; /* b'' */
1302 s = (unsigned char*)op->ob_sval;
1303 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001304 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001305 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001306 case '\'': squotes++; break;
1307 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001308 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001309 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001310 default:
1311 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001312 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001313 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001314 if (newsize > PY_SSIZE_T_MAX - incr)
1315 goto overflow;
1316 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001317 }
1318 quote = '\'';
1319 if (smartquotes && squotes && !dquotes)
1320 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001321 if (squotes && quote == '\'') {
1322 if (newsize > PY_SSIZE_T_MAX - squotes)
1323 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001324 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001325 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001326
1327 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001328 if (v == NULL) {
1329 return NULL;
1330 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001331 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001332
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001333 *p++ = 'b', *p++ = quote;
1334 for (i = 0; i < length; i++) {
1335 unsigned char c = op->ob_sval[i];
1336 if (c == quote || c == '\\')
1337 *p++ = '\\', *p++ = c;
1338 else if (c == '\t')
1339 *p++ = '\\', *p++ = 't';
1340 else if (c == '\n')
1341 *p++ = '\\', *p++ = 'n';
1342 else if (c == '\r')
1343 *p++ = '\\', *p++ = 'r';
1344 else if (c < ' ' || c >= 0x7f) {
1345 *p++ = '\\';
1346 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001347 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1348 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001349 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001350 else
1351 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001352 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001353 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001354 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001355 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001356
1357 overflow:
1358 PyErr_SetString(PyExc_OverflowError,
1359 "bytes object is too large to make repr");
1360 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001361}
1362
Neal Norwitz6968b052007-02-27 19:02:19 +00001363static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001364bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001365{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001366 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001367}
1368
Neal Norwitz6968b052007-02-27 19:02:19 +00001369static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001370bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001371{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001372 if (Py_BytesWarningFlag) {
1373 if (PyErr_WarnEx(PyExc_BytesWarning,
1374 "str() on a bytes instance", 1))
1375 return NULL;
1376 }
1377 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001378}
1379
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001380static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001381bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001382{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001383 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001384}
Neal Norwitz6968b052007-02-27 19:02:19 +00001385
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001386/* This is also used by PyBytes_Concat() */
1387static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001388bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001389{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001390 Py_ssize_t size;
1391 Py_buffer va, vb;
1392 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001393
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001394 va.len = -1;
1395 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001396 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1397 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001398 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1399 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
1400 goto done;
1401 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001402
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001403 /* Optimize end cases */
1404 if (va.len == 0 && PyBytes_CheckExact(b)) {
1405 result = b;
1406 Py_INCREF(result);
1407 goto done;
1408 }
1409 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1410 result = a;
1411 Py_INCREF(result);
1412 goto done;
1413 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001414
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001415 size = va.len + vb.len;
1416 if (size < 0) {
1417 PyErr_NoMemory();
1418 goto done;
1419 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001420
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001421 result = PyBytes_FromStringAndSize(NULL, size);
1422 if (result != NULL) {
1423 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1424 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1425 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001426
1427 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001428 if (va.len != -1)
1429 PyBuffer_Release(&va);
1430 if (vb.len != -1)
1431 PyBuffer_Release(&vb);
1432 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001433}
Neal Norwitz6968b052007-02-27 19:02:19 +00001434
1435static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001436bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001437{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001438 Py_ssize_t i;
1439 Py_ssize_t j;
1440 Py_ssize_t size;
1441 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001442 size_t nbytes;
1443 if (n < 0)
1444 n = 0;
1445 /* watch out for overflows: the size can overflow int,
1446 * and the # of bytes needed can overflow size_t
1447 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001448 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001449 PyErr_SetString(PyExc_OverflowError,
1450 "repeated bytes are too long");
1451 return NULL;
1452 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001453 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001454 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1455 Py_INCREF(a);
1456 return (PyObject *)a;
1457 }
1458 nbytes = (size_t)size;
1459 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1460 PyErr_SetString(PyExc_OverflowError,
1461 "repeated bytes are too long");
1462 return NULL;
1463 }
1464 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1465 if (op == NULL)
1466 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +01001467 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001468 op->ob_shash = -1;
1469 op->ob_sval[size] = '\0';
1470 if (Py_SIZE(a) == 1 && n > 0) {
1471 memset(op->ob_sval, a->ob_sval[0] , n);
1472 return (PyObject *) op;
1473 }
1474 i = 0;
1475 if (i < size) {
1476 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1477 i = Py_SIZE(a);
1478 }
1479 while (i < size) {
1480 j = (i <= size-i) ? i : size-i;
1481 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1482 i += j;
1483 }
1484 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001485}
1486
Guido van Rossum98297ee2007-11-06 21:34:58 +00001487static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001488bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +00001489{
1490 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1491 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001492 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +00001493 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +00001494 PyErr_Clear();
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001495 if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
Antoine Pitroud1188562010-06-09 16:38:55 +00001496 return -1;
1497 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
1498 varg.buf, varg.len, 0);
1499 PyBuffer_Release(&varg);
1500 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001501 }
1502 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001503 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1504 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001505 }
1506
Antoine Pitrou0010d372010-08-15 17:12:55 +00001507 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001508}
1509
Neal Norwitz6968b052007-02-27 19:02:19 +00001510static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001511bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001512{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001513 if (i < 0 || i >= Py_SIZE(a)) {
1514 PyErr_SetString(PyExc_IndexError, "index out of range");
1515 return NULL;
1516 }
1517 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001518}
1519
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001520Py_LOCAL(int)
1521bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1522{
1523 int cmp;
1524 Py_ssize_t len;
1525
1526 len = Py_SIZE(a);
1527 if (Py_SIZE(b) != len)
1528 return 0;
1529
1530 if (a->ob_sval[0] != b->ob_sval[0])
1531 return 0;
1532
1533 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1534 return (cmp == 0);
1535}
1536
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001537static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001538bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001539{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001540 int c;
1541 Py_ssize_t len_a, len_b;
1542 Py_ssize_t min_len;
1543 PyObject *result;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001544 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001545
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001546 /* Make sure both arguments are strings. */
1547 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001548 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001549 rc = PyObject_IsInstance((PyObject*)a,
1550 (PyObject*)&PyUnicode_Type);
1551 if (!rc)
1552 rc = PyObject_IsInstance((PyObject*)b,
1553 (PyObject*)&PyUnicode_Type);
1554 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001555 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001556 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001557 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001558 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001559 return NULL;
1560 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001561 else {
1562 rc = PyObject_IsInstance((PyObject*)a,
1563 (PyObject*)&PyLong_Type);
1564 if (!rc)
1565 rc = PyObject_IsInstance((PyObject*)b,
1566 (PyObject*)&PyLong_Type);
1567 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001568 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001569 if (rc) {
1570 if (PyErr_WarnEx(PyExc_BytesWarning,
1571 "Comparison between bytes and int", 1))
1572 return NULL;
1573 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001574 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001575 }
1576 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001577 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001578 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001579 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001580 case Py_EQ:
1581 case Py_LE:
1582 case Py_GE:
1583 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001584 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001585 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001586 case Py_NE:
1587 case Py_LT:
1588 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001589 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001590 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001591 default:
1592 PyErr_BadArgument();
1593 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001594 }
1595 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001596 else if (op == Py_EQ || op == Py_NE) {
1597 int eq = bytes_compare_eq(a, b);
1598 eq ^= (op == Py_NE);
1599 result = eq ? Py_True : Py_False;
1600 }
1601 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001602 len_a = Py_SIZE(a);
1603 len_b = Py_SIZE(b);
1604 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001605 if (min_len > 0) {
1606 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001607 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001608 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001609 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001610 else
1611 c = 0;
1612 if (c == 0)
1613 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1614 switch (op) {
1615 case Py_LT: c = c < 0; break;
1616 case Py_LE: c = c <= 0; break;
1617 case Py_GT: c = c > 0; break;
1618 case Py_GE: c = c >= 0; break;
1619 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001620 PyErr_BadArgument();
1621 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001622 }
1623 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001624 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001625
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001626 Py_INCREF(result);
1627 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001628}
1629
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001630static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001631bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001632{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001633 if (a->ob_shash == -1) {
1634 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001635 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001636 }
1637 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001638}
1639
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001640static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001641bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001642{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001643 if (PyIndex_Check(item)) {
1644 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1645 if (i == -1 && PyErr_Occurred())
1646 return NULL;
1647 if (i < 0)
1648 i += PyBytes_GET_SIZE(self);
1649 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1650 PyErr_SetString(PyExc_IndexError,
1651 "index out of range");
1652 return NULL;
1653 }
1654 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1655 }
1656 else if (PySlice_Check(item)) {
1657 Py_ssize_t start, stop, step, slicelength, cur, i;
1658 char* source_buf;
1659 char* result_buf;
1660 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001661
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001662 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001663 PyBytes_GET_SIZE(self),
1664 &start, &stop, &step, &slicelength) < 0) {
1665 return NULL;
1666 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001667
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001668 if (slicelength <= 0) {
1669 return PyBytes_FromStringAndSize("", 0);
1670 }
1671 else if (start == 0 && step == 1 &&
1672 slicelength == PyBytes_GET_SIZE(self) &&
1673 PyBytes_CheckExact(self)) {
1674 Py_INCREF(self);
1675 return (PyObject *)self;
1676 }
1677 else if (step == 1) {
1678 return PyBytes_FromStringAndSize(
1679 PyBytes_AS_STRING(self) + start,
1680 slicelength);
1681 }
1682 else {
1683 source_buf = PyBytes_AS_STRING(self);
1684 result = PyBytes_FromStringAndSize(NULL, slicelength);
1685 if (result == NULL)
1686 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001687
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001688 result_buf = PyBytes_AS_STRING(result);
1689 for (cur = start, i = 0; i < slicelength;
1690 cur += step, i++) {
1691 result_buf[i] = source_buf[cur];
1692 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001693
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001694 return result;
1695 }
1696 }
1697 else {
1698 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001699 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001700 Py_TYPE(item)->tp_name);
1701 return NULL;
1702 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001703}
1704
1705static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001706bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001707{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001708 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1709 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001710}
1711
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001712static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001713 (lenfunc)bytes_length, /*sq_length*/
1714 (binaryfunc)bytes_concat, /*sq_concat*/
1715 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1716 (ssizeargfunc)bytes_item, /*sq_item*/
1717 0, /*sq_slice*/
1718 0, /*sq_ass_item*/
1719 0, /*sq_ass_slice*/
1720 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001721};
1722
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001723static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001724 (lenfunc)bytes_length,
1725 (binaryfunc)bytes_subscript,
1726 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001727};
1728
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001729static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001730 (getbufferproc)bytes_buffer_getbuffer,
1731 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001732};
1733
1734
1735#define LEFTSTRIP 0
1736#define RIGHTSTRIP 1
1737#define BOTHSTRIP 2
1738
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001739/*[clinic input]
1740bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001741
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001742 sep: object = None
1743 The delimiter according which to split the bytes.
1744 None (the default value) means split on ASCII whitespace characters
1745 (space, tab, return, newline, formfeed, vertical tab).
1746 maxsplit: Py_ssize_t = -1
1747 Maximum number of splits to do.
1748 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001749
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001750Return a list of the sections in the bytes, using sep as the delimiter.
1751[clinic start generated code]*/
1752
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001753static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001754bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001755/*[clinic end generated code: output=8bde44dacb36ef2e input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001756{
1757 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001758 const char *s = PyBytes_AS_STRING(self), *sub;
1759 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001760 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001761
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001762 if (maxsplit < 0)
1763 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001764 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001765 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001766 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001767 return NULL;
1768 sub = vsub.buf;
1769 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001770
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001771 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1772 PyBuffer_Release(&vsub);
1773 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001774}
1775
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001776/*[clinic input]
1777bytes.partition
1778
1779 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001780 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001781 /
1782
1783Partition the bytes into three parts using the given separator.
1784
1785This will search for the separator sep in the bytes. If the separator is found,
1786returns a 3-tuple containing the part before the separator, the separator
1787itself, and the part after it.
1788
1789If the separator is not found, returns a 3-tuple containing the original bytes
1790object and two empty bytes objects.
1791[clinic start generated code]*/
1792
Neal Norwitz6968b052007-02-27 19:02:19 +00001793static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001794bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001795/*[clinic end generated code: output=f532b392a17ff695 input=bc855dc63ca949de]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001796{
Neal Norwitz6968b052007-02-27 19:02:19 +00001797 return stringlib_partition(
1798 (PyObject*) self,
1799 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001800 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001801 );
1802}
1803
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001804/*[clinic input]
1805bytes.rpartition
1806
1807 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001808 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001809 /
1810
1811Partition the bytes into three parts using the given separator.
1812
1813This will search for the separator sep in the bytes, starting and the end. If
1814the separator is found, returns a 3-tuple containing the part before the
1815separator, the separator itself, and the part after it.
1816
1817If the separator is not found, returns a 3-tuple containing two empty bytes
1818objects and the original bytes object.
1819[clinic start generated code]*/
1820
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001821static PyObject *
1822bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001823/*[clinic end generated code: output=191b114cbb028e50 input=6588fff262a9170e]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001824{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001825 return stringlib_rpartition(
1826 (PyObject*) self,
1827 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001828 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001829 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001830}
1831
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001832/*[clinic input]
1833bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001834
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001835Return a list of the sections in the bytes, using sep as the delimiter.
1836
1837Splitting is done starting at the end of the bytes and working to the front.
1838[clinic start generated code]*/
1839
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001840static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001841bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001842/*[clinic end generated code: output=0b6570b977911d88 input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001843{
1844 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001845 const char *s = PyBytes_AS_STRING(self), *sub;
1846 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001847 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001848
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001849 if (maxsplit < 0)
1850 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001851 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001852 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001853 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001854 return NULL;
1855 sub = vsub.buf;
1856 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001857
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001858 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1859 PyBuffer_Release(&vsub);
1860 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001861}
1862
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001863
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001864/*[clinic input]
1865bytes.join
1866
1867 iterable_of_bytes: object
1868 /
1869
1870Concatenate any number of bytes objects.
1871
1872The bytes whose method is called is inserted in between each pair.
1873
1874The result is returned as a new bytes object.
1875
1876Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1877[clinic start generated code]*/
1878
Neal Norwitz6968b052007-02-27 19:02:19 +00001879static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001880bytes_join(PyBytesObject*self, PyObject *iterable_of_bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001881/*[clinic end generated code: output=634aff14764ff997 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001882{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001883 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001884}
1885
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001886PyObject *
1887_PyBytes_Join(PyObject *sep, PyObject *x)
1888{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001889 assert(sep != NULL && PyBytes_Check(sep));
1890 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001891 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001892}
1893
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001894/* helper macro to fixup start/end slice values */
1895#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001896 if (end > len) \
1897 end = len; \
1898 else if (end < 0) { \
1899 end += len; \
1900 if (end < 0) \
1901 end = 0; \
1902 } \
1903 if (start < 0) { \
1904 start += len; \
1905 if (start < 0) \
1906 start = 0; \
1907 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001908
1909Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001910bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001911{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001912 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001913 char byte;
1914 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001915 const char *sub;
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001916 Py_ssize_t len, sub_len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001917 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001918 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001919
Antoine Pitrouac65d962011-10-20 23:54:17 +02001920 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1921 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001922 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001923
Antoine Pitrouac65d962011-10-20 23:54:17 +02001924 if (subobj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001925 if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001926 return -2;
1927
1928 sub = subbuf.buf;
1929 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001930 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001931 else {
1932 sub = &byte;
1933 sub_len = 1;
1934 }
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001935 len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001936
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001937 ADJUST_INDICES(start, end, len);
1938 if (end - start < sub_len)
1939 res = -1;
Serhiy Storchaka413fdce2015-11-14 15:42:17 +02001940 else if (sub_len == 1) {
1941 if (dir > 0)
1942 res = stringlib_find_char(
1943 PyBytes_AS_STRING(self) + start, end - start,
1944 *sub);
1945 else
1946 res = stringlib_rfind_char(
1947 PyBytes_AS_STRING(self) + start, end - start,
1948 *sub);
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001949 if (res >= 0)
1950 res += start;
1951 }
1952 else {
1953 if (dir > 0)
1954 res = stringlib_find_slice(
1955 PyBytes_AS_STRING(self), len,
1956 sub, sub_len, start, end);
1957 else
1958 res = stringlib_rfind_slice(
1959 PyBytes_AS_STRING(self), len,
1960 sub, sub_len, start, end);
1961 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001962
1963 if (subobj)
1964 PyBuffer_Release(&subbuf);
1965
1966 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001967}
1968
1969
1970PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001971"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001972\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001973Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001974such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001975arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001976\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001977Return -1 on failure.");
1978
Neal Norwitz6968b052007-02-27 19:02:19 +00001979static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001980bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001981{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001982 Py_ssize_t result = bytes_find_internal(self, args, +1);
1983 if (result == -2)
1984 return NULL;
1985 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001986}
1987
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001988
1989PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001990"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001991\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001992Like B.find() but raise ValueError when the substring is not found.");
1993
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001994static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001995bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001996{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001997 Py_ssize_t result = bytes_find_internal(self, args, +1);
1998 if (result == -2)
1999 return NULL;
2000 if (result == -1) {
2001 PyErr_SetString(PyExc_ValueError,
2002 "substring not found");
2003 return NULL;
2004 }
2005 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00002006}
2007
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002008
2009PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002010"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002011\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002012Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08002013such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002014arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002015\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002016Return -1 on failure.");
2017
Neal Norwitz6968b052007-02-27 19:02:19 +00002018static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002019bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00002020{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002021 Py_ssize_t result = bytes_find_internal(self, args, -1);
2022 if (result == -2)
2023 return NULL;
2024 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00002025}
2026
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002027
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002028PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002029"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002030\n\
2031Like B.rfind() but raise ValueError when the substring is not found.");
2032
2033static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002034bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002035{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002036 Py_ssize_t result = bytes_find_internal(self, args, -1);
2037 if (result == -2)
2038 return NULL;
2039 if (result == -1) {
2040 PyErr_SetString(PyExc_ValueError,
2041 "substring not found");
2042 return NULL;
2043 }
2044 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002045}
2046
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002047
2048Py_LOCAL_INLINE(PyObject *)
2049do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002050{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002051 Py_buffer vsep;
2052 char *s = PyBytes_AS_STRING(self);
2053 Py_ssize_t len = PyBytes_GET_SIZE(self);
2054 char *sep;
2055 Py_ssize_t seplen;
2056 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002057
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002058 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002059 return NULL;
2060 sep = vsep.buf;
2061 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002062
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002063 i = 0;
2064 if (striptype != RIGHTSTRIP) {
2065 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2066 i++;
2067 }
2068 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002069
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002070 j = len;
2071 if (striptype != LEFTSTRIP) {
2072 do {
2073 j--;
2074 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2075 j++;
2076 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002077
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002078 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002079
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002080 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2081 Py_INCREF(self);
2082 return (PyObject*)self;
2083 }
2084 else
2085 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002086}
2087
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002088
2089Py_LOCAL_INLINE(PyObject *)
2090do_strip(PyBytesObject *self, int striptype)
2091{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002092 char *s = PyBytes_AS_STRING(self);
2093 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002094
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002095 i = 0;
2096 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00002097 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002098 i++;
2099 }
2100 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002101
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002102 j = len;
2103 if (striptype != LEFTSTRIP) {
2104 do {
2105 j--;
David Malcolm96960882010-11-05 17:23:41 +00002106 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002107 j++;
2108 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002109
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002110 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2111 Py_INCREF(self);
2112 return (PyObject*)self;
2113 }
2114 else
2115 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002116}
2117
2118
2119Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002120do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002121{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002122 if (bytes != NULL && bytes != Py_None) {
2123 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002124 }
2125 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002126}
2127
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002128/*[clinic input]
2129bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002130
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002131 self: self(type="PyBytesObject *")
2132 bytes: object = None
2133 /
2134
2135Strip leading and trailing bytes contained in the argument.
2136
2137If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2138[clinic start generated code]*/
2139
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002140static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002141bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002142/*[clinic end generated code: output=c7c228d3bd104a1b input=37daa5fad1395d95]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002143{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002144 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002145}
2146
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002147/*[clinic input]
2148bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002149
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002150 self: self(type="PyBytesObject *")
2151 bytes: object = None
2152 /
2153
2154Strip leading bytes contained in the argument.
2155
2156If the argument is omitted or None, strip leading ASCII whitespace.
2157[clinic start generated code]*/
2158
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002159static PyObject *
2160bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002161/*[clinic end generated code: output=28602e586f524e82 input=88811b09dfbc2988]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002162{
2163 return do_argstrip(self, LEFTSTRIP, bytes);
2164}
2165
2166/*[clinic input]
2167bytes.rstrip
2168
2169 self: self(type="PyBytesObject *")
2170 bytes: object = None
2171 /
2172
2173Strip trailing bytes contained in the argument.
2174
2175If the argument is omitted or None, strip trailing ASCII whitespace.
2176[clinic start generated code]*/
2177
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002178static PyObject *
2179bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002180/*[clinic end generated code: output=547e3815c95447da input=8f93c9cd361f0140]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002181{
2182 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002183}
Neal Norwitz6968b052007-02-27 19:02:19 +00002184
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002185
2186PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002187"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002188\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002189Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002190string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002191as in slice notation.");
2192
2193static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002194bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002195{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002196 PyObject *sub_obj;
2197 const char *str = PyBytes_AS_STRING(self), *sub;
2198 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02002199 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002200 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002201
Antoine Pitrouac65d962011-10-20 23:54:17 +02002202 Py_buffer vsub;
2203 PyObject *count_obj;
2204
2205 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
2206 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002207 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002208
Antoine Pitrouac65d962011-10-20 23:54:17 +02002209 if (sub_obj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002210 if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02002211 return NULL;
2212
2213 sub = vsub.buf;
2214 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002215 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02002216 else {
2217 sub = &byte;
2218 sub_len = 1;
2219 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002220
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002221 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002222
Antoine Pitrouac65d962011-10-20 23:54:17 +02002223 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002224 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2225 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02002226
2227 if (sub_obj)
2228 PyBuffer_Release(&vsub);
2229
2230 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002231}
2232
2233
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002234/*[clinic input]
2235bytes.translate
2236
2237 self: self(type="PyBytesObject *")
Victor Stinner049e5092014-08-17 22:20:00 +02002238 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002239 Translation table, which must be a bytes object of length 256.
2240 [
2241 deletechars: object
2242 ]
2243 /
2244
2245Return a copy with each character mapped by the given translation table.
2246
2247All characters occurring in the optional argument deletechars are removed.
2248The remaining characters are mapped through the given translation table.
2249[clinic start generated code]*/
2250
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002251static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002252bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1,
2253 PyObject *deletechars)
2254/*[clinic end generated code: output=233df850eb50bf8d input=d8fa5519d7cc4be7]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002255{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002256 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002257 Py_buffer table_view = {NULL, NULL};
2258 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002259 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002260 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002261 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002262 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002263 Py_ssize_t inlen, tablen, dellen = 0;
2264 PyObject *result;
2265 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002266
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002267 if (PyBytes_Check(table)) {
2268 table_chars = PyBytes_AS_STRING(table);
2269 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002270 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002271 else if (table == Py_None) {
2272 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002273 tablen = 256;
2274 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002275 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002276 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002277 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002278 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002279 tablen = table_view.len;
2280 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002281
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002282 if (tablen != 256) {
2283 PyErr_SetString(PyExc_ValueError,
2284 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002285 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002286 return NULL;
2287 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002288
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002289 if (deletechars != NULL) {
2290 if (PyBytes_Check(deletechars)) {
2291 del_table_chars = PyBytes_AS_STRING(deletechars);
2292 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002293 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002294 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002295 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002296 PyBuffer_Release(&table_view);
2297 return NULL;
2298 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002299 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002300 dellen = del_table_view.len;
2301 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002302 }
2303 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002304 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002305 dellen = 0;
2306 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002307
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002308 inlen = PyBytes_GET_SIZE(input_obj);
2309 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002310 if (result == NULL) {
2311 PyBuffer_Release(&del_table_view);
2312 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002313 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002314 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002315 output_start = output = PyBytes_AsString(result);
2316 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002317
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002318 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002319 /* If no deletions are required, use faster code */
2320 for (i = inlen; --i >= 0; ) {
2321 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002322 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002323 changed = 1;
2324 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002325 if (!changed && PyBytes_CheckExact(input_obj)) {
2326 Py_INCREF(input_obj);
2327 Py_DECREF(result);
2328 result = input_obj;
2329 }
2330 PyBuffer_Release(&del_table_view);
2331 PyBuffer_Release(&table_view);
2332 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002333 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002334
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002335 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002336 for (i = 0; i < 256; i++)
2337 trans_table[i] = Py_CHARMASK(i);
2338 } else {
2339 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002340 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002341 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002342 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002343
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002344 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002345 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002346 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002347
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002348 for (i = inlen; --i >= 0; ) {
2349 c = Py_CHARMASK(*input++);
2350 if (trans_table[c] != -1)
2351 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2352 continue;
2353 changed = 1;
2354 }
2355 if (!changed && PyBytes_CheckExact(input_obj)) {
2356 Py_DECREF(result);
2357 Py_INCREF(input_obj);
2358 return input_obj;
2359 }
2360 /* Fix the size of the resulting string */
2361 if (inlen > 0)
2362 _PyBytes_Resize(&result, output - output_start);
2363 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002364}
2365
2366
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002367/*[clinic input]
2368
2369@staticmethod
2370bytes.maketrans
2371
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002372 frm: Py_buffer
2373 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002374 /
2375
2376Return a translation table useable for the bytes or bytearray translate method.
2377
2378The returned table will be one where each byte in frm is mapped to the byte at
2379the same position in to.
2380
2381The bytes objects frm and to must be of the same length.
2382[clinic start generated code]*/
2383
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002384static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002385bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002386/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002387{
2388 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002389}
2390
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002391/* find and count characters and substrings */
2392
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002393#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002394 ((char *)memchr((const void *)(target), c, target_len))
2395
2396/* String ops must return a string. */
2397/* If the object is subclass of string, create a copy */
2398Py_LOCAL(PyBytesObject *)
2399return_self(PyBytesObject *self)
2400{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002401 if (PyBytes_CheckExact(self)) {
2402 Py_INCREF(self);
2403 return self;
2404 }
2405 return (PyBytesObject *)PyBytes_FromStringAndSize(
2406 PyBytes_AS_STRING(self),
2407 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002408}
2409
2410Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00002411countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002412{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002413 Py_ssize_t count=0;
2414 const char *start=target;
2415 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002416
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002417 while ( (start=findchar(start, end-start, c)) != NULL ) {
2418 count++;
2419 if (count >= maxcount)
2420 break;
2421 start += 1;
2422 }
2423 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002424}
2425
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002426
2427/* Algorithms for different cases of string replacement */
2428
2429/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2430Py_LOCAL(PyBytesObject *)
2431replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002432 const char *to_s, Py_ssize_t to_len,
2433 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002434{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002435 char *self_s, *result_s;
2436 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002437 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002438 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002439
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002440 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002441
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002442 /* 1 at the end plus 1 after every character;
2443 count = min(maxcount, self_len + 1) */
2444 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002445 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002446 else
2447 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
2448 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002449
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002450 /* Check for overflow */
2451 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002452 assert(count > 0);
2453 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002454 PyErr_SetString(PyExc_OverflowError,
2455 "replacement bytes are too long");
2456 return NULL;
2457 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002458 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002459
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002460 if (! (result = (PyBytesObject *)
2461 PyBytes_FromStringAndSize(NULL, result_len)) )
2462 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002463
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002464 self_s = PyBytes_AS_STRING(self);
2465 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002466
Victor Stinnerfac39562016-03-21 10:38:58 +01002467 if (to_len > 1) {
2468 /* Lay the first one down (guaranteed this will occur) */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002469 Py_MEMCPY(result_s, to_s, to_len);
2470 result_s += to_len;
Victor Stinnerfac39562016-03-21 10:38:58 +01002471 count -= 1;
2472
2473 for (i = 0; i < count; i++) {
2474 *result_s++ = *self_s++;
2475 Py_MEMCPY(result_s, to_s, to_len);
2476 result_s += to_len;
2477 }
2478 }
2479 else {
2480 result_s[0] = to_s[0];
2481 result_s += to_len;
2482 count -= 1;
2483 for (i = 0; i < count; i++) {
2484 *result_s++ = *self_s++;
2485 result_s[0] = to_s[0];
2486 result_s += to_len;
2487 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002488 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002489
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002490 /* Copy the rest of the original string */
2491 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002492
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002493 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002494}
2495
2496/* Special case for deleting a single character */
2497/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2498Py_LOCAL(PyBytesObject *)
2499replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002500 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002501{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002502 char *self_s, *result_s;
2503 char *start, *next, *end;
2504 Py_ssize_t self_len, result_len;
2505 Py_ssize_t count;
2506 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002507
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002508 self_len = PyBytes_GET_SIZE(self);
2509 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002510
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002511 count = countchar(self_s, self_len, from_c, maxcount);
2512 if (count == 0) {
2513 return return_self(self);
2514 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002515
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002516 result_len = self_len - count; /* from_len == 1 */
2517 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002518
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002519 if ( (result = (PyBytesObject *)
2520 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2521 return NULL;
2522 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002523
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002524 start = self_s;
2525 end = self_s + self_len;
2526 while (count-- > 0) {
2527 next = findchar(start, end-start, from_c);
2528 if (next == NULL)
2529 break;
2530 Py_MEMCPY(result_s, start, next-start);
2531 result_s += (next-start);
2532 start = next+1;
2533 }
2534 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002535
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002536 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002537}
2538
2539/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2540
2541Py_LOCAL(PyBytesObject *)
2542replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002543 const char *from_s, Py_ssize_t from_len,
2544 Py_ssize_t maxcount) {
2545 char *self_s, *result_s;
2546 char *start, *next, *end;
2547 Py_ssize_t self_len, result_len;
2548 Py_ssize_t count, offset;
2549 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002550
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002551 self_len = PyBytes_GET_SIZE(self);
2552 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002553
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002554 count = stringlib_count(self_s, self_len,
2555 from_s, from_len,
2556 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002557
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002558 if (count == 0) {
2559 /* no matches */
2560 return return_self(self);
2561 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002562
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002563 result_len = self_len - (count * from_len);
2564 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002565
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002566 if ( (result = (PyBytesObject *)
2567 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2568 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002569
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002570 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002571
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002572 start = self_s;
2573 end = self_s + self_len;
2574 while (count-- > 0) {
2575 offset = stringlib_find(start, end-start,
2576 from_s, from_len,
2577 0);
2578 if (offset == -1)
2579 break;
2580 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002581
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002582 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002583
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002584 result_s += (next-start);
2585 start = next+from_len;
2586 }
2587 Py_MEMCPY(result_s, start, end-start);
2588 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002589}
2590
2591/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2592Py_LOCAL(PyBytesObject *)
2593replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002594 char from_c, char to_c,
2595 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002596{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002597 char *self_s, *result_s, *start, *end, *next;
2598 Py_ssize_t self_len;
2599 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002600
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002601 /* The result string will be the same size */
2602 self_s = PyBytes_AS_STRING(self);
2603 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002604
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002605 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002606
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002607 if (next == NULL) {
2608 /* No matches; return the original string */
2609 return return_self(self);
2610 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002611
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002612 /* Need to make a new string */
2613 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2614 if (result == NULL)
2615 return NULL;
2616 result_s = PyBytes_AS_STRING(result);
2617 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002618
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002619 /* change everything in-place, starting with this one */
2620 start = result_s + (next-self_s);
2621 *start = to_c;
2622 start++;
2623 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002624
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002625 while (--maxcount > 0) {
2626 next = findchar(start, end-start, from_c);
2627 if (next == NULL)
2628 break;
2629 *next = to_c;
2630 start = next+1;
2631 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002632
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002633 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002634}
2635
2636/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2637Py_LOCAL(PyBytesObject *)
2638replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002639 const char *from_s, Py_ssize_t from_len,
2640 const char *to_s, Py_ssize_t to_len,
2641 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002642{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002643 char *result_s, *start, *end;
2644 char *self_s;
2645 Py_ssize_t self_len, offset;
2646 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002647
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002648 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002649
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002650 self_s = PyBytes_AS_STRING(self);
2651 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002652
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002653 offset = stringlib_find(self_s, self_len,
2654 from_s, from_len,
2655 0);
2656 if (offset == -1) {
2657 /* No matches; return the original string */
2658 return return_self(self);
2659 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002660
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002661 /* Need to make a new string */
2662 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2663 if (result == NULL)
2664 return NULL;
2665 result_s = PyBytes_AS_STRING(result);
2666 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002667
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002668 /* change everything in-place, starting with this one */
2669 start = result_s + offset;
2670 Py_MEMCPY(start, to_s, from_len);
2671 start += from_len;
2672 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002673
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002674 while ( --maxcount > 0) {
2675 offset = stringlib_find(start, end-start,
2676 from_s, from_len,
2677 0);
2678 if (offset==-1)
2679 break;
2680 Py_MEMCPY(start+offset, to_s, from_len);
2681 start += offset+from_len;
2682 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002683
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002684 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002685}
2686
2687/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2688Py_LOCAL(PyBytesObject *)
2689replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002690 char from_c,
2691 const char *to_s, Py_ssize_t to_len,
2692 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002693{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002694 char *self_s, *result_s;
2695 char *start, *next, *end;
2696 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002697 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002698 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002699
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002700 self_s = PyBytes_AS_STRING(self);
2701 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002702
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002703 count = countchar(self_s, self_len, from_c, maxcount);
2704 if (count == 0) {
2705 /* no matches, return unchanged */
2706 return return_self(self);
2707 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002708
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002709 /* use the difference between current and new, hence the "-1" */
2710 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002711 assert(count > 0);
2712 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002713 PyErr_SetString(PyExc_OverflowError,
2714 "replacement bytes are too long");
2715 return NULL;
2716 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002717 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002718
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002719 if ( (result = (PyBytesObject *)
2720 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2721 return NULL;
2722 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002723
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002724 start = self_s;
2725 end = self_s + self_len;
2726 while (count-- > 0) {
2727 next = findchar(start, end-start, from_c);
2728 if (next == NULL)
2729 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002730
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002731 if (next == start) {
2732 /* replace with the 'to' */
2733 Py_MEMCPY(result_s, to_s, to_len);
2734 result_s += to_len;
2735 start += 1;
2736 } else {
2737 /* copy the unchanged old then the 'to' */
2738 Py_MEMCPY(result_s, start, next-start);
2739 result_s += (next-start);
2740 Py_MEMCPY(result_s, to_s, to_len);
2741 result_s += to_len;
2742 start = next+1;
2743 }
2744 }
2745 /* Copy the remainder of the remaining string */
2746 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002747
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002748 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002749}
2750
2751/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2752Py_LOCAL(PyBytesObject *)
2753replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002754 const char *from_s, Py_ssize_t from_len,
2755 const char *to_s, Py_ssize_t to_len,
2756 Py_ssize_t maxcount) {
2757 char *self_s, *result_s;
2758 char *start, *next, *end;
2759 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002760 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002761 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002762
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002763 self_s = PyBytes_AS_STRING(self);
2764 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002765
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002766 count = stringlib_count(self_s, self_len,
2767 from_s, from_len,
2768 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002769
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002770 if (count == 0) {
2771 /* no matches, return unchanged */
2772 return return_self(self);
2773 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002774
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002775 /* Check for overflow */
2776 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002777 assert(count > 0);
2778 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002779 PyErr_SetString(PyExc_OverflowError,
2780 "replacement bytes are too long");
2781 return NULL;
2782 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002783 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002784
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002785 if ( (result = (PyBytesObject *)
2786 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2787 return NULL;
2788 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002789
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002790 start = self_s;
2791 end = self_s + self_len;
2792 while (count-- > 0) {
2793 offset = stringlib_find(start, end-start,
2794 from_s, from_len,
2795 0);
2796 if (offset == -1)
2797 break;
2798 next = start+offset;
2799 if (next == start) {
2800 /* replace with the 'to' */
2801 Py_MEMCPY(result_s, to_s, to_len);
2802 result_s += to_len;
2803 start += from_len;
2804 } else {
2805 /* copy the unchanged old then the 'to' */
2806 Py_MEMCPY(result_s, start, next-start);
2807 result_s += (next-start);
2808 Py_MEMCPY(result_s, to_s, to_len);
2809 result_s += to_len;
2810 start = next+from_len;
2811 }
2812 }
2813 /* Copy the remainder of the remaining string */
2814 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002815
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002816 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002817}
2818
2819
2820Py_LOCAL(PyBytesObject *)
2821replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002822 const char *from_s, Py_ssize_t from_len,
2823 const char *to_s, Py_ssize_t to_len,
2824 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002825{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002826 if (maxcount < 0) {
2827 maxcount = PY_SSIZE_T_MAX;
2828 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2829 /* nothing to do; return the original string */
2830 return return_self(self);
2831 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002832
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002833 if (maxcount == 0 ||
2834 (from_len == 0 && to_len == 0)) {
2835 /* nothing to do; return the original string */
2836 return return_self(self);
2837 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002838
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002839 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002840
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002841 if (from_len == 0) {
2842 /* insert the 'to' string everywhere. */
2843 /* >>> "Python".replace("", ".") */
2844 /* '.P.y.t.h.o.n.' */
2845 return replace_interleave(self, to_s, to_len, maxcount);
2846 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002847
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002848 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2849 /* point for an empty self string to generate a non-empty string */
2850 /* Special case so the remaining code always gets a non-empty string */
2851 if (PyBytes_GET_SIZE(self) == 0) {
2852 return return_self(self);
2853 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002854
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002855 if (to_len == 0) {
2856 /* delete all occurrences of 'from' string */
2857 if (from_len == 1) {
2858 return replace_delete_single_character(
2859 self, from_s[0], maxcount);
2860 } else {
2861 return replace_delete_substring(self, from_s,
2862 from_len, maxcount);
2863 }
2864 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002865
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002866 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002867
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002868 if (from_len == to_len) {
2869 if (from_len == 1) {
2870 return replace_single_character_in_place(
2871 self,
2872 from_s[0],
2873 to_s[0],
2874 maxcount);
2875 } else {
2876 return replace_substring_in_place(
2877 self, from_s, from_len, to_s, to_len,
2878 maxcount);
2879 }
2880 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002881
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002882 /* Otherwise use the more generic algorithms */
2883 if (from_len == 1) {
2884 return replace_single_character(self, from_s[0],
2885 to_s, to_len, maxcount);
2886 } else {
2887 /* len('from')>=2, len('to')>=1 */
2888 return replace_substring(self, from_s, from_len, to_s, to_len,
2889 maxcount);
2890 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002891}
2892
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002893
2894/*[clinic input]
2895bytes.replace
2896
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002897 old: Py_buffer
2898 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002899 count: Py_ssize_t = -1
2900 Maximum number of occurrences to replace.
2901 -1 (the default value) means replace all occurrences.
2902 /
2903
2904Return a copy with all occurrences of substring old replaced by new.
2905
2906If the optional argument count is given, only the first count occurrences are
2907replaced.
2908[clinic start generated code]*/
2909
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002910static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002911bytes_replace_impl(PyBytesObject*self, Py_buffer *old, Py_buffer *new,
2912 Py_ssize_t count)
2913/*[clinic end generated code: output=403dc9d7a83c5a1d input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002914{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002915 return (PyObject *)replace((PyBytesObject *) self,
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002916 (const char *)old->buf, old->len,
2917 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002918}
2919
2920/** End DALKE **/
2921
2922/* Matches the end (direction >= 0) or start (direction < 0) of self
2923 * against substr, using the start and end arguments. Returns
2924 * -1 on error, 0 if not found and 1 if found.
2925 */
2926Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002927_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002928 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002929{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002930 Py_ssize_t len = PyBytes_GET_SIZE(self);
2931 Py_ssize_t slen;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002932 Py_buffer sub_view = {NULL, NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002933 const char* sub;
2934 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002935
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002936 if (PyBytes_Check(substr)) {
2937 sub = PyBytes_AS_STRING(substr);
2938 slen = PyBytes_GET_SIZE(substr);
2939 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002940 else {
2941 if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
2942 return -1;
2943 sub = sub_view.buf;
2944 slen = sub_view.len;
2945 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002946 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002947
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002948 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002949
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002950 if (direction < 0) {
2951 /* startswith */
2952 if (start+slen > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002953 goto notfound;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002954 } else {
2955 /* endswith */
2956 if (end-start < slen || start > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002957 goto notfound;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002958
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002959 if (end-slen > start)
2960 start = end - slen;
2961 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002962 if (end-start < slen)
2963 goto notfound;
2964 if (memcmp(str+start, sub, slen) != 0)
2965 goto notfound;
2966
2967 PyBuffer_Release(&sub_view);
2968 return 1;
2969
2970notfound:
2971 PyBuffer_Release(&sub_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002972 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002973}
2974
2975
2976PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002977"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002978\n\
2979Return True if B starts with the specified prefix, False otherwise.\n\
2980With optional start, test B beginning at that position.\n\
2981With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002982prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002983
2984static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002985bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002986{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002987 Py_ssize_t start = 0;
2988 Py_ssize_t end = PY_SSIZE_T_MAX;
2989 PyObject *subobj;
2990 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002991
Jesus Ceaac451502011-04-20 17:09:23 +02002992 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002993 return NULL;
2994 if (PyTuple_Check(subobj)) {
2995 Py_ssize_t i;
2996 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2997 result = _bytes_tailmatch(self,
2998 PyTuple_GET_ITEM(subobj, i),
2999 start, end, -1);
3000 if (result == -1)
3001 return NULL;
3002 else if (result) {
3003 Py_RETURN_TRUE;
3004 }
3005 }
3006 Py_RETURN_FALSE;
3007 }
3008 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03003009 if (result == -1) {
3010 if (PyErr_ExceptionMatches(PyExc_TypeError))
3011 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
3012 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003013 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03003014 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003015 else
3016 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003017}
3018
3019
3020PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003021"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003022\n\
3023Return True if B ends with the specified suffix, False otherwise.\n\
3024With optional start, test B beginning at that position.\n\
3025With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00003026suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003027
3028static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003029bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003030{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003031 Py_ssize_t start = 0;
3032 Py_ssize_t end = PY_SSIZE_T_MAX;
3033 PyObject *subobj;
3034 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003035
Jesus Ceaac451502011-04-20 17:09:23 +02003036 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003037 return NULL;
3038 if (PyTuple_Check(subobj)) {
3039 Py_ssize_t i;
3040 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3041 result = _bytes_tailmatch(self,
3042 PyTuple_GET_ITEM(subobj, i),
3043 start, end, +1);
3044 if (result == -1)
3045 return NULL;
3046 else if (result) {
3047 Py_RETURN_TRUE;
3048 }
3049 }
3050 Py_RETURN_FALSE;
3051 }
3052 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03003053 if (result == -1) {
3054 if (PyErr_ExceptionMatches(PyExc_TypeError))
3055 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
3056 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003057 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03003058 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003059 else
3060 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003061}
3062
3063
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003064/*[clinic input]
3065bytes.decode
3066
3067 encoding: str(c_default="NULL") = 'utf-8'
3068 The encoding with which to decode the bytes.
3069 errors: str(c_default="NULL") = 'strict'
3070 The error handling scheme to use for the handling of decoding errors.
3071 The default is 'strict' meaning that decoding errors raise a
3072 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
3073 as well as any other name registered with codecs.register_error that
3074 can handle UnicodeDecodeErrors.
3075
3076Decode the bytes using the codec registered for encoding.
3077[clinic start generated code]*/
3078
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003079static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04003080bytes_decode_impl(PyBytesObject*self, const char *encoding,
3081 const char *errors)
3082/*[clinic end generated code: output=2d2016ff8e0bb176 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003083{
Martin v. Löwis0efea322014-07-27 17:29:17 +02003084 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00003085}
3086
Guido van Rossum20188312006-05-05 15:15:40 +00003087
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003088/*[clinic input]
3089bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003090
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +03003091 keepends: int(c_default="0") = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003092
3093Return a list of the lines in the bytes, breaking at line boundaries.
3094
3095Line breaks are not included in the resulting list unless keepends is given and
3096true.
3097[clinic start generated code]*/
3098
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003099static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003100bytes_splitlines_impl(PyBytesObject*self, int keepends)
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +03003101/*[clinic end generated code: output=995c3598f7833cad input=7f4aac67144f9944]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003102{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003103 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00003104 (PyObject*) self, PyBytes_AS_STRING(self),
3105 PyBytes_GET_SIZE(self), keepends
3106 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003107}
3108
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003109/*[clinic input]
3110@classmethod
3111bytes.fromhex
3112
3113 string: unicode
3114 /
3115
3116Create a bytes object from a string of hexadecimal numbers.
3117
3118Spaces between two numbers are accepted.
3119Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
3120[clinic start generated code]*/
3121
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003122static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003123bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03003124/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003125{
Victor Stinner2bf89932015-10-14 11:25:33 +02003126 return _PyBytes_FromHex(string, 0);
3127}
3128
3129PyObject*
3130_PyBytes_FromHex(PyObject *string, int use_bytearray)
3131{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003132 char *buf;
Victor Stinner2bf89932015-10-14 11:25:33 +02003133 Py_ssize_t hexlen, invalid_char;
3134 unsigned int top, bot;
3135 Py_UCS1 *str, *end;
3136 _PyBytesWriter writer;
3137
3138 _PyBytesWriter_Init(&writer);
3139 writer.use_bytearray = use_bytearray;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003140
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003141 assert(PyUnicode_Check(string));
3142 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003143 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003144 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003145
Victor Stinner2bf89932015-10-14 11:25:33 +02003146 if (!PyUnicode_IS_ASCII(string)) {
3147 void *data = PyUnicode_DATA(string);
3148 unsigned int kind = PyUnicode_KIND(string);
3149 Py_ssize_t i;
3150
3151 /* search for the first non-ASCII character */
3152 for (i = 0; i < hexlen; i++) {
3153 if (PyUnicode_READ(kind, data, i) >= 128)
3154 break;
3155 }
3156 invalid_char = i;
3157 goto error;
3158 }
3159
3160 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
3161 str = PyUnicode_1BYTE_DATA(string);
3162
3163 /* This overestimates if there are spaces */
3164 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
3165 if (buf == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003166 return NULL;
Victor Stinner2bf89932015-10-14 11:25:33 +02003167
3168 end = str + hexlen;
3169 while (str < end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003170 /* skip over spaces in the input */
Victor Stinner2bf89932015-10-14 11:25:33 +02003171 if (*str == ' ') {
3172 do {
3173 str++;
3174 } while (*str == ' ');
3175 if (str >= end)
3176 break;
3177 }
3178
3179 top = _PyLong_DigitValue[*str];
3180 if (top >= 16) {
3181 invalid_char = str - PyUnicode_1BYTE_DATA(string);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003182 goto error;
3183 }
Victor Stinner2bf89932015-10-14 11:25:33 +02003184 str++;
3185
3186 bot = _PyLong_DigitValue[*str];
3187 if (bot >= 16) {
3188 invalid_char = str - PyUnicode_1BYTE_DATA(string);
3189 goto error;
3190 }
3191 str++;
3192
3193 *buf++ = (unsigned char)((top << 4) + bot);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003194 }
Victor Stinner2bf89932015-10-14 11:25:33 +02003195
3196 return _PyBytesWriter_Finish(&writer, buf);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003197
3198 error:
Victor Stinner2bf89932015-10-14 11:25:33 +02003199 PyErr_Format(PyExc_ValueError,
3200 "non-hexadecimal number found in "
3201 "fromhex() arg at position %zd", invalid_char);
3202 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003203 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003204}
3205
Gregory P. Smith8cb65692015-04-25 23:22:26 +00003206PyDoc_STRVAR(hex__doc__,
3207"B.hex() -> string\n\
3208\n\
3209Create a string of hexadecimal numbers from a bytes object.\n\
3210Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'.");
3211
3212static PyObject *
3213bytes_hex(PyBytesObject *self)
3214{
3215 char* argbuf = PyBytes_AS_STRING(self);
3216 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
3217 return _Py_strhex(argbuf, arglen);
3218}
3219
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003220static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003221bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003222{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003223 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003224}
3225
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003226
3227static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003228bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003229 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
3230 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3231 _Py_capitalize__doc__},
3232 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3233 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003234 BYTES_DECODE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003235 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
3236 endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02003237 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003238 expandtabs__doc__},
3239 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003240 BYTES_FROMHEX_METHODDEF
Gregory P. Smith8cb65692015-04-25 23:22:26 +00003241 {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003242 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3243 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3244 _Py_isalnum__doc__},
3245 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3246 _Py_isalpha__doc__},
3247 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3248 _Py_isdigit__doc__},
3249 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3250 _Py_islower__doc__},
3251 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3252 _Py_isspace__doc__},
3253 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3254 _Py_istitle__doc__},
3255 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3256 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003257 BYTES_JOIN_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003258 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3259 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003260 BYTES_LSTRIP_METHODDEF
3261 BYTES_MAKETRANS_METHODDEF
3262 BYTES_PARTITION_METHODDEF
3263 BYTES_REPLACE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003264 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3265 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3266 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003267 BYTES_RPARTITION_METHODDEF
3268 BYTES_RSPLIT_METHODDEF
3269 BYTES_RSTRIP_METHODDEF
3270 BYTES_SPLIT_METHODDEF
3271 BYTES_SPLITLINES_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003272 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
3273 startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003274 BYTES_STRIP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003275 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3276 _Py_swapcase__doc__},
3277 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003278 BYTES_TRANSLATE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003279 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3280 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003281 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003282};
3283
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003284static PyObject *
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03003285bytes_mod(PyObject *self, PyObject *arg)
Ethan Furmanb95b5612015-01-23 20:05:18 -08003286{
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03003287 if (!PyBytes_Check(self)) {
3288 Py_RETURN_NOTIMPLEMENTED;
Victor Stinner772b2b02015-10-14 09:56:53 +02003289 }
Victor Stinner772b2b02015-10-14 09:56:53 +02003290 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchakac9a59e62016-04-15 14:11:10 +03003291 arg, 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -08003292}
3293
3294static PyNumberMethods bytes_as_number = {
3295 0, /*nb_add*/
3296 0, /*nb_subtract*/
3297 0, /*nb_multiply*/
3298 bytes_mod, /*nb_remainder*/
3299};
3300
3301static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02003302bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003303
3304static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003305bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003306{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003307 PyObject *x = NULL;
3308 const char *encoding = NULL;
3309 const char *errors = NULL;
3310 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003311 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003312 Py_ssize_t size;
3313 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003314 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003315
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003316 if (type != &PyBytes_Type)
Serhiy Storchaka15095802015-11-25 15:47:01 +02003317 return bytes_subtype_new(type, args, kwds);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003318 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
3319 &encoding, &errors))
3320 return NULL;
3321 if (x == NULL) {
3322 if (encoding != NULL || errors != NULL) {
3323 PyErr_SetString(PyExc_TypeError,
3324 "encoding or errors without sequence "
3325 "argument");
3326 return NULL;
3327 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02003328 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003329 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003330
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003331 if (encoding != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003332 /* Encode via the codec registry */
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003333 if (!PyUnicode_Check(x)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003334 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003335 "encoding without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003336 return NULL;
3337 }
3338 new = PyUnicode_AsEncodedString(x, encoding, errors);
3339 if (new == NULL)
3340 return NULL;
3341 assert(PyBytes_Check(new));
3342 return new;
3343 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003344
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003345 if (errors != NULL) {
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02003346 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003347 PyUnicode_Check(x) ?
3348 "string argument without an encoding" :
3349 "errors without a string argument");
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02003350 return NULL;
3351 }
3352
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003353 /* We'd like to call PyObject_Bytes here, but we need to check for an
3354 integer argument before deferring to PyBytes_FromObject, something
3355 PyObject_Bytes doesn't do. */
3356 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
3357 if (func != NULL) {
3358 new = PyObject_CallFunctionObjArgs(func, NULL);
3359 Py_DECREF(func);
3360 if (new == NULL)
3361 return NULL;
3362 if (!PyBytes_Check(new)) {
3363 PyErr_Format(PyExc_TypeError,
3364 "__bytes__ returned non-bytes (type %.200s)",
3365 Py_TYPE(new)->tp_name);
3366 Py_DECREF(new);
3367 return NULL;
3368 }
3369 return new;
3370 }
3371 else if (PyErr_Occurred())
3372 return NULL;
3373
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003374 if (PyUnicode_Check(x)) {
3375 PyErr_SetString(PyExc_TypeError,
3376 "string argument without an encoding");
3377 return NULL;
3378 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003379 /* Is it an integer? */
3380 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
3381 if (size == -1 && PyErr_Occurred()) {
3382 if (PyErr_ExceptionMatches(PyExc_OverflowError))
3383 return NULL;
3384 PyErr_Clear();
3385 }
3386 else if (size < 0) {
3387 PyErr_SetString(PyExc_ValueError, "negative count");
3388 return NULL;
3389 }
3390 else {
Victor Stinnerdb067af2014-05-02 22:31:14 +02003391 new = _PyBytes_FromSize(size, 1);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003392 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003393 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003394 return new;
3395 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003396
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003397 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00003398}
3399
Victor Stinnerf2eafa32015-10-14 13:44:29 +02003400static PyObject*
3401_PyBytes_FromBuffer(PyObject *x)
3402{
3403 PyObject *new;
3404 Py_buffer view;
3405
3406 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
3407 return NULL;
3408
3409 new = PyBytes_FromStringAndSize(NULL, view.len);
3410 if (!new)
3411 goto fail;
3412 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
3413 &view, view.len, 'C') < 0)
3414 goto fail;
3415 PyBuffer_Release(&view);
3416 return new;
3417
3418fail:
3419 Py_XDECREF(new);
3420 PyBuffer_Release(&view);
3421 return NULL;
3422}
3423
Victor Stinner3c50ce32015-10-14 13:50:40 +02003424#define _PyBytes_FROM_LIST_BODY(x, GET_ITEM) \
3425 do { \
3426 PyObject *bytes; \
3427 Py_ssize_t i; \
3428 Py_ssize_t value; \
3429 char *str; \
3430 PyObject *item; \
3431 \
3432 bytes = PyBytes_FromStringAndSize(NULL, Py_SIZE(x)); \
3433 if (bytes == NULL) \
3434 return NULL; \
3435 str = ((PyBytesObject *)bytes)->ob_sval; \
3436 \
3437 for (i = 0; i < Py_SIZE(x); i++) { \
3438 item = GET_ITEM((x), i); \
3439 value = PyNumber_AsSsize_t(item, PyExc_ValueError); \
3440 if (value == -1 && PyErr_Occurred()) \
3441 goto error; \
3442 \
3443 if (value < 0 || value >= 256) { \
3444 PyErr_SetString(PyExc_ValueError, \
3445 "bytes must be in range(0, 256)"); \
3446 goto error; \
3447 } \
3448 *str++ = (char) value; \
3449 } \
3450 return bytes; \
3451 \
3452 error: \
3453 Py_DECREF(bytes); \
3454 return NULL; \
3455 } while (0)
3456
Victor Stinnerf2eafa32015-10-14 13:44:29 +02003457static PyObject*
3458_PyBytes_FromList(PyObject *x)
3459{
Victor Stinner3c50ce32015-10-14 13:50:40 +02003460 _PyBytes_FROM_LIST_BODY(x, PyList_GET_ITEM);
Victor Stinnerf2eafa32015-10-14 13:44:29 +02003461}
3462
3463static PyObject*
3464_PyBytes_FromTuple(PyObject *x)
3465{
Victor Stinner3c50ce32015-10-14 13:50:40 +02003466 _PyBytes_FROM_LIST_BODY(x, PyTuple_GET_ITEM);
Victor Stinnerf2eafa32015-10-14 13:44:29 +02003467}
3468
3469static PyObject *
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03003470_PyBytes_FromIterator(PyObject *it, PyObject *x)
Benjamin Petersonc15a0732008-08-26 16:46:47 +00003471{
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02003472 char *str;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003473 Py_ssize_t i, size;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02003474 _PyBytesWriter writer;
3475
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003476 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02003477 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003478 if (size == -1 && PyErr_Occurred())
3479 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02003480
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03003481 _PyBytesWriter_Init(&writer);
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02003482 str = _PyBytesWriter_Alloc(&writer, size);
3483 if (str == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003484 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02003485 writer.overallocate = 1;
3486 size = writer.allocated;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003487
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003488 /* Run the iterator to exhaustion */
3489 for (i = 0; ; i++) {
3490 PyObject *item;
3491 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003492
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003493 /* Get the next item */
3494 item = PyIter_Next(it);
3495 if (item == NULL) {
3496 if (PyErr_Occurred())
3497 goto error;
3498 break;
3499 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003500
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003501 /* Interpret it as an int (__index__) */
3502 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3503 Py_DECREF(item);
3504 if (value == -1 && PyErr_Occurred())
3505 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003506
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003507 /* Range check */
3508 if (value < 0 || value >= 256) {
3509 PyErr_SetString(PyExc_ValueError,
3510 "bytes must be in range(0, 256)");
3511 goto error;
3512 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003513
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003514 /* Append the byte */
3515 if (i >= size) {
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02003516 str = _PyBytesWriter_Resize(&writer, str, size+1);
3517 if (str == NULL)
3518 return NULL;
3519 size = writer.allocated;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003520 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02003521 *str++ = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003522 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02003523
3524 return _PyBytesWriter_Finish(&writer, str);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003525
3526 error:
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02003527 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003528 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003529}
3530
Victor Stinnerf2eafa32015-10-14 13:44:29 +02003531PyObject *
3532PyBytes_FromObject(PyObject *x)
3533{
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03003534 PyObject *it, *result;
3535
Victor Stinnerf2eafa32015-10-14 13:44:29 +02003536 if (x == NULL) {
3537 PyErr_BadInternalCall();
3538 return NULL;
3539 }
3540
3541 if (PyBytes_CheckExact(x)) {
3542 Py_INCREF(x);
3543 return x;
3544 }
3545
3546 /* Use the modern buffer interface */
3547 if (PyObject_CheckBuffer(x))
3548 return _PyBytes_FromBuffer(x);
3549
3550 if (PyList_CheckExact(x))
3551 return _PyBytes_FromList(x);
3552
3553 if (PyTuple_CheckExact(x))
3554 return _PyBytes_FromTuple(x);
3555
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03003556 if (!PyUnicode_Check(x)) {
3557 it = PyObject_GetIter(x);
3558 if (it != NULL) {
3559 result = _PyBytes_FromIterator(it, x);
3560 Py_DECREF(it);
3561 return result;
3562 }
Victor Stinnerf2eafa32015-10-14 13:44:29 +02003563 }
3564
Serhiy Storchaka03f17f82016-04-10 14:44:59 +03003565 PyErr_Format(PyExc_TypeError,
3566 "cannot convert '%.200s' object to bytes",
3567 x->ob_type->tp_name);
3568 return NULL;
Victor Stinnerf2eafa32015-10-14 13:44:29 +02003569}
3570
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003571static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02003572bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003573{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003574 PyObject *tmp, *pnew;
3575 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003576
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003577 assert(PyType_IsSubtype(type, &PyBytes_Type));
3578 tmp = bytes_new(&PyBytes_Type, args, kwds);
3579 if (tmp == NULL)
3580 return NULL;
Serhiy Storchaka15095802015-11-25 15:47:01 +02003581 assert(PyBytes_Check(tmp));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003582 n = PyBytes_GET_SIZE(tmp);
3583 pnew = type->tp_alloc(type, n);
3584 if (pnew != NULL) {
3585 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3586 PyBytes_AS_STRING(tmp), n+1);
3587 ((PyBytesObject *)pnew)->ob_shash =
3588 ((PyBytesObject *)tmp)->ob_shash;
3589 }
3590 Py_DECREF(tmp);
3591 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003592}
3593
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003594PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003595"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003596bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003597bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003598bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3599bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003600\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003601Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003602 - an iterable yielding integers in range(256)\n\
3603 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003604 - any object implementing the buffer API.\n\
3605 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003606
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003607static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003608
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003609PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003610 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3611 "bytes",
3612 PyBytesObject_SIZE,
3613 sizeof(char),
3614 bytes_dealloc, /* tp_dealloc */
3615 0, /* tp_print */
3616 0, /* tp_getattr */
3617 0, /* tp_setattr */
3618 0, /* tp_reserved */
3619 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08003620 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003621 &bytes_as_sequence, /* tp_as_sequence */
3622 &bytes_as_mapping, /* tp_as_mapping */
3623 (hashfunc)bytes_hash, /* tp_hash */
3624 0, /* tp_call */
3625 bytes_str, /* tp_str */
3626 PyObject_GenericGetAttr, /* tp_getattro */
3627 0, /* tp_setattro */
3628 &bytes_as_buffer, /* tp_as_buffer */
3629 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3630 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3631 bytes_doc, /* tp_doc */
3632 0, /* tp_traverse */
3633 0, /* tp_clear */
3634 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3635 0, /* tp_weaklistoffset */
3636 bytes_iter, /* tp_iter */
3637 0, /* tp_iternext */
3638 bytes_methods, /* tp_methods */
3639 0, /* tp_members */
3640 0, /* tp_getset */
3641 &PyBaseObject_Type, /* tp_base */
3642 0, /* tp_dict */
3643 0, /* tp_descr_get */
3644 0, /* tp_descr_set */
3645 0, /* tp_dictoffset */
3646 0, /* tp_init */
3647 0, /* tp_alloc */
3648 bytes_new, /* tp_new */
3649 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003650};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003651
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003652void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003653PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003654{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003655 assert(pv != NULL);
3656 if (*pv == NULL)
3657 return;
3658 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003659 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003660 return;
3661 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02003662
3663 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
3664 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05003665 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02003666 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02003667
Antoine Pitrou161d6952014-05-01 14:36:20 +02003668 wb.len = -1;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003669 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02003670 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3671 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3672 Py_CLEAR(*pv);
3673 return;
3674 }
3675
3676 oldsize = PyBytes_GET_SIZE(*pv);
3677 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3678 PyErr_NoMemory();
3679 goto error;
3680 }
3681 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3682 goto error;
3683
3684 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3685 PyBuffer_Release(&wb);
3686 return;
3687
3688 error:
3689 PyBuffer_Release(&wb);
3690 Py_CLEAR(*pv);
3691 return;
3692 }
3693
3694 else {
3695 /* Multiple references, need to create new object */
3696 PyObject *v;
3697 v = bytes_concat(*pv, w);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03003698 Py_SETREF(*pv, v);
Antoine Pitrou161d6952014-05-01 14:36:20 +02003699 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003700}
3701
3702void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003703PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003704{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003705 PyBytes_Concat(pv, w);
3706 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003707}
3708
3709
Ethan Furmanb95b5612015-01-23 20:05:18 -08003710/* The following function breaks the notion that bytes are immutable:
3711 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003712 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08003713 as creating a new bytes object and destroying the old one, only
3714 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003715 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08003716 Note that if there's not enough memory to resize the bytes object, the
3717 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003718 memory" exception is set, and -1 is returned. Else (on success) 0 is
3719 returned, and the value in *pv may or may not be the same as on input.
3720 As always, an extra byte is allocated for a trailing \0 byte (newsize
3721 does *not* include that), and a trailing \0 byte is stored.
3722*/
3723
3724int
3725_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3726{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003727 PyObject *v;
3728 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003729 v = *pv;
3730 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
3731 *pv = 0;
3732 Py_DECREF(v);
3733 PyErr_BadInternalCall();
3734 return -1;
3735 }
3736 /* XXX UNREF/NEWREF interface should be more symmetrical */
3737 _Py_DEC_REFTOTAL;
3738 _Py_ForgetReference(v);
3739 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03003740 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003741 if (*pv == NULL) {
3742 PyObject_Del(v);
3743 PyErr_NoMemory();
3744 return -1;
3745 }
3746 _Py_NewReference(*pv);
3747 sv = (PyBytesObject *) *pv;
3748 Py_SIZE(sv) = newsize;
3749 sv->ob_sval[newsize] = '\0';
3750 sv->ob_shash = -1; /* invalidate cached hash value */
3751 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003752}
3753
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003754void
3755PyBytes_Fini(void)
3756{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003757 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003758 for (i = 0; i < UCHAR_MAX + 1; i++)
3759 Py_CLEAR(characters[i]);
3760 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003761}
3762
Benjamin Peterson4116f362008-05-27 00:36:20 +00003763/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003764
3765typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003766 PyObject_HEAD
3767 Py_ssize_t it_index;
3768 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003769} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003770
3771static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003772striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003773{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003774 _PyObject_GC_UNTRACK(it);
3775 Py_XDECREF(it->it_seq);
3776 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003777}
3778
3779static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003780striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003781{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003782 Py_VISIT(it->it_seq);
3783 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003784}
3785
3786static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003787striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003788{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003789 PyBytesObject *seq;
3790 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003791
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003792 assert(it != NULL);
3793 seq = it->it_seq;
3794 if (seq == NULL)
3795 return NULL;
3796 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003797
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003798 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3799 item = PyLong_FromLong(
3800 (unsigned char)seq->ob_sval[it->it_index]);
3801 if (item != NULL)
3802 ++it->it_index;
3803 return item;
3804 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003805
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003806 it->it_seq = NULL;
Serhiy Storchakafbb1c5e2016-03-30 20:40:02 +03003807 Py_DECREF(seq);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003808 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003809}
3810
3811static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003812striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003813{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003814 Py_ssize_t len = 0;
3815 if (it->it_seq)
3816 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3817 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003818}
3819
3820PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003821 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003822
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003823static PyObject *
3824striter_reduce(striterobject *it)
3825{
3826 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02003827 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003828 it->it_seq, it->it_index);
3829 } else {
3830 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
3831 if (u == NULL)
3832 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02003833 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003834 }
3835}
3836
3837PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3838
3839static PyObject *
3840striter_setstate(striterobject *it, PyObject *state)
3841{
3842 Py_ssize_t index = PyLong_AsSsize_t(state);
3843 if (index == -1 && PyErr_Occurred())
3844 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003845 if (it->it_seq != NULL) {
3846 if (index < 0)
3847 index = 0;
3848 else if (index > PyBytes_GET_SIZE(it->it_seq))
3849 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3850 it->it_index = index;
3851 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003852 Py_RETURN_NONE;
3853}
3854
3855PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3856
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003857static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003858 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3859 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003860 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3861 reduce_doc},
3862 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3863 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003864 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003865};
3866
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003867PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003868 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3869 "bytes_iterator", /* tp_name */
3870 sizeof(striterobject), /* tp_basicsize */
3871 0, /* tp_itemsize */
3872 /* methods */
3873 (destructor)striter_dealloc, /* tp_dealloc */
3874 0, /* tp_print */
3875 0, /* tp_getattr */
3876 0, /* tp_setattr */
3877 0, /* tp_reserved */
3878 0, /* tp_repr */
3879 0, /* tp_as_number */
3880 0, /* tp_as_sequence */
3881 0, /* tp_as_mapping */
3882 0, /* tp_hash */
3883 0, /* tp_call */
3884 0, /* tp_str */
3885 PyObject_GenericGetAttr, /* tp_getattro */
3886 0, /* tp_setattro */
3887 0, /* tp_as_buffer */
3888 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3889 0, /* tp_doc */
3890 (traverseproc)striter_traverse, /* tp_traverse */
3891 0, /* tp_clear */
3892 0, /* tp_richcompare */
3893 0, /* tp_weaklistoffset */
3894 PyObject_SelfIter, /* tp_iter */
3895 (iternextfunc)striter_next, /* tp_iternext */
3896 striter_methods, /* tp_methods */
3897 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003898};
3899
3900static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003901bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003902{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003903 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003904
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003905 if (!PyBytes_Check(seq)) {
3906 PyErr_BadInternalCall();
3907 return NULL;
3908 }
3909 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3910 if (it == NULL)
3911 return NULL;
3912 it->it_index = 0;
3913 Py_INCREF(seq);
3914 it->it_seq = (PyBytesObject *)seq;
3915 _PyObject_GC_TRACK(it);
3916 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003917}
Victor Stinner00165072015-10-09 01:53:21 +02003918
3919
3920/* _PyBytesWriter API */
3921
3922#ifdef MS_WINDOWS
3923 /* On Windows, overallocate by 50% is the best factor */
3924# define OVERALLOCATE_FACTOR 2
3925#else
3926 /* On Linux, overallocate by 25% is the best factor */
3927# define OVERALLOCATE_FACTOR 4
3928#endif
3929
3930void
3931_PyBytesWriter_Init(_PyBytesWriter *writer)
3932{
Victor Stinner661aacc2015-10-14 09:41:48 +02003933 /* Set all attributes before small_buffer to 0 */
3934 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003935#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003936 memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003937#endif
3938}
3939
3940void
3941_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3942{
3943 Py_CLEAR(writer->buffer);
3944}
3945
3946Py_LOCAL_INLINE(char*)
3947_PyBytesWriter_AsString(_PyBytesWriter *writer)
3948{
Victor Stinner661aacc2015-10-14 09:41:48 +02003949 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003950 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003951 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003952 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003953 else if (writer->use_bytearray) {
3954 assert(writer->buffer != NULL);
3955 return PyByteArray_AS_STRING(writer->buffer);
3956 }
3957 else {
3958 assert(writer->buffer != NULL);
3959 return PyBytes_AS_STRING(writer->buffer);
3960 }
Victor Stinner00165072015-10-09 01:53:21 +02003961}
3962
3963Py_LOCAL_INLINE(Py_ssize_t)
Victor Stinner2bf89932015-10-14 11:25:33 +02003964_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
Victor Stinner00165072015-10-09 01:53:21 +02003965{
3966 char *start = _PyBytesWriter_AsString(writer);
3967 assert(str != NULL);
3968 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003969 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003970 return str - start;
3971}
3972
3973Py_LOCAL_INLINE(void)
3974_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3975{
3976#ifdef Py_DEBUG
3977 char *start, *end;
3978
Victor Stinner661aacc2015-10-14 09:41:48 +02003979 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003980 assert(writer->buffer == NULL);
3981 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003982 else {
3983 assert(writer->buffer != NULL);
3984 if (writer->use_bytearray)
3985 assert(PyByteArray_CheckExact(writer->buffer));
3986 else
3987 assert(PyBytes_CheckExact(writer->buffer));
3988 assert(Py_REFCNT(writer->buffer) == 1);
3989 }
Victor Stinner00165072015-10-09 01:53:21 +02003990
Victor Stinner661aacc2015-10-14 09:41:48 +02003991 if (writer->use_bytearray) {
3992 /* bytearray has its own overallocation algorithm,
3993 writer overallocation must be disabled */
3994 assert(!writer->overallocate);
3995 }
3996
3997 assert(0 <= writer->allocated);
Victor Stinner53926a12015-10-09 12:37:03 +02003998 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003999 /* the last byte must always be null */
Victor Stinner661aacc2015-10-14 09:41:48 +02004000 start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02004001 assert(start[writer->allocated] == 0);
4002
4003 end = start + writer->allocated;
4004 assert(str != NULL);
4005 assert(start <= str && str <= end);
4006#endif
4007}
4008
Victor Stinnerc29e29b2015-10-12 13:12:54 +02004009void*
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02004010_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
Victor Stinner00165072015-10-09 01:53:21 +02004011{
4012 Py_ssize_t allocated, pos;
4013
4014 _PyBytesWriter_CheckConsistency(writer, str);
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02004015 assert(writer->allocated < size);
Victor Stinner00165072015-10-09 01:53:21 +02004016
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02004017 allocated = size;
Victor Stinner00165072015-10-09 01:53:21 +02004018 if (writer->overallocate
4019 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
4020 /* overallocate to limit the number of realloc() */
4021 allocated += allocated / OVERALLOCATE_FACTOR;
4022 }
4023
Victor Stinner2bf89932015-10-14 11:25:33 +02004024 pos = _PyBytesWriter_GetSize(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02004025 if (!writer->use_small_buffer) {
Victor Stinner661aacc2015-10-14 09:41:48 +02004026 if (writer->use_bytearray) {
4027 if (PyByteArray_Resize(writer->buffer, allocated))
4028 goto error;
4029 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
4030 but we cannot use ob_alloc because bytes may need to be moved
4031 to use the whole buffer. bytearray uses an internal optimization
4032 to avoid moving or copying bytes when bytes are removed at the
4033 beginning (ex: del bytearray[:1]). */
4034 }
4035 else {
4036 if (_PyBytes_Resize(&writer->buffer, allocated))
4037 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02004038 }
4039 }
4040 else {
4041 /* convert from stack buffer to bytes object buffer */
4042 assert(writer->buffer == NULL);
4043
Victor Stinner661aacc2015-10-14 09:41:48 +02004044 if (writer->use_bytearray)
4045 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
4046 else
4047 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
Victor Stinner00165072015-10-09 01:53:21 +02004048 if (writer->buffer == NULL)
Victor Stinner661aacc2015-10-14 09:41:48 +02004049 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02004050
4051 if (pos != 0) {
Victor Stinner661aacc2015-10-14 09:41:48 +02004052 char *dest;
4053 if (writer->use_bytearray)
4054 dest = PyByteArray_AS_STRING(writer->buffer);
4055 else
4056 dest = PyBytes_AS_STRING(writer->buffer);
4057 Py_MEMCPY(dest,
Victor Stinnerb3653a32015-10-09 03:38:24 +02004058 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02004059 pos);
4060 }
4061
Victor Stinnerb3653a32015-10-09 03:38:24 +02004062 writer->use_small_buffer = 0;
Victor Stinner00165072015-10-09 01:53:21 +02004063#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02004064 memset(writer->small_buffer, 0xDB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02004065#endif
Victor Stinner00165072015-10-09 01:53:21 +02004066 }
4067 writer->allocated = allocated;
4068
4069 str = _PyBytesWriter_AsString(writer) + pos;
4070 _PyBytesWriter_CheckConsistency(writer, str);
4071 return str;
Victor Stinner661aacc2015-10-14 09:41:48 +02004072
4073error:
4074 _PyBytesWriter_Dealloc(writer);
4075 return NULL;
Victor Stinner00165072015-10-09 01:53:21 +02004076}
4077
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02004078void*
4079_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
4080{
4081 Py_ssize_t new_min_size;
4082
4083 _PyBytesWriter_CheckConsistency(writer, str);
4084 assert(size >= 0);
4085
4086 if (size == 0) {
4087 /* nothing to do */
4088 return str;
4089 }
4090
4091 if (writer->min_size > PY_SSIZE_T_MAX - size) {
4092 PyErr_NoMemory();
4093 _PyBytesWriter_Dealloc(writer);
4094 return NULL;
4095 }
4096 new_min_size = writer->min_size + size;
4097
4098 if (new_min_size > writer->allocated)
4099 str = _PyBytesWriter_Resize(writer, str, new_min_size);
4100
4101 writer->min_size = new_min_size;
4102 return str;
4103}
4104
Victor Stinner00165072015-10-09 01:53:21 +02004105/* Allocate the buffer to write size bytes.
4106 Return the pointer to the beginning of buffer data.
4107 Raise an exception and return NULL on error. */
Victor Stinnerc29e29b2015-10-12 13:12:54 +02004108void*
Victor Stinner00165072015-10-09 01:53:21 +02004109_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
4110{
4111 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02004112 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02004113 assert(size >= 0);
4114
Victor Stinnerb3653a32015-10-09 03:38:24 +02004115 writer->use_small_buffer = 1;
Victor Stinnerb13b97d2015-10-09 02:52:16 +02004116#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02004117 writer->allocated = sizeof(writer->small_buffer) - 1;
Victor Stinnerf6358a72015-10-14 12:02:39 +02004118 /* In debug mode, don't use the full small buffer because it is less
4119 efficient than bytes and bytearray objects to detect buffer underflow
4120 and buffer overflow. Use 10 bytes of the small buffer to test also
4121 code using the smaller buffer in debug mode.
4122
4123 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
4124 in debug mode to also be able to detect stack overflow when running
4125 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
4126 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
4127 stack overflow. */
4128 writer->allocated = Py_MIN(writer->allocated, 10);
4129 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
4130 to detect buffer overflow */
Victor Stinnerb3653a32015-10-09 03:38:24 +02004131 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02004132#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02004133 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02004134#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02004135 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02004136}
4137
4138PyObject *
Victor Stinnerc29e29b2015-10-12 13:12:54 +02004139_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
Victor Stinner00165072015-10-09 01:53:21 +02004140{
Victor Stinner2bf89932015-10-14 11:25:33 +02004141 Py_ssize_t size;
Victor Stinner00165072015-10-09 01:53:21 +02004142 PyObject *result;
4143
4144 _PyBytesWriter_CheckConsistency(writer, str);
4145
Victor Stinner2bf89932015-10-14 11:25:33 +02004146 size = _PyBytesWriter_GetSize(writer, str);
4147 if (size == 0 && !writer->use_bytearray) {
Victor Stinner6c2cdae2015-10-12 13:29:43 +02004148 Py_CLEAR(writer->buffer);
4149 /* Get the empty byte string singleton */
4150 result = PyBytes_FromStringAndSize(NULL, 0);
4151 }
4152 else if (writer->use_small_buffer) {
Victor Stinner2bf89932015-10-14 11:25:33 +02004153 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
Victor Stinner6c2cdae2015-10-12 13:29:43 +02004154 }
4155 else {
4156 result = writer->buffer;
4157 writer->buffer = NULL;
4158
Victor Stinner2bf89932015-10-14 11:25:33 +02004159 if (size != writer->allocated) {
Victor Stinner661aacc2015-10-14 09:41:48 +02004160 if (writer->use_bytearray) {
Victor Stinner2bf89932015-10-14 11:25:33 +02004161 if (PyByteArray_Resize(result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02004162 Py_DECREF(result);
4163 return NULL;
4164 }
4165 }
4166 else {
Victor Stinner2bf89932015-10-14 11:25:33 +02004167 if (_PyBytes_Resize(&result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02004168 assert(result == NULL);
4169 return NULL;
4170 }
Victor Stinner00165072015-10-09 01:53:21 +02004171 }
4172 }
Victor Stinner00165072015-10-09 01:53:21 +02004173 }
Victor Stinner00165072015-10-09 01:53:21 +02004174 return result;
4175}
Victor Stinnerce179bf2015-10-09 12:57:22 +02004176
Victor Stinnerc29e29b2015-10-12 13:12:54 +02004177void*
Victor Stinnere9aa5952015-10-12 13:57:47 +02004178_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
Victor Stinnerc29e29b2015-10-12 13:12:54 +02004179 const void *bytes, Py_ssize_t size)
Victor Stinnerce179bf2015-10-09 12:57:22 +02004180{
Victor Stinnere9aa5952015-10-12 13:57:47 +02004181 char *str = (char *)ptr;
4182
Victor Stinnerce179bf2015-10-09 12:57:22 +02004183 str = _PyBytesWriter_Prepare(writer, str, size);
4184 if (str == NULL)
4185 return NULL;
4186
4187 Py_MEMCPY(str, bytes, size);
4188 str += size;
4189
4190 return str;
4191}