blob: 20b11fb375f075d8fc085d3107a7aceed5537c6b [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Gregory P. Smith8cb65692015-04-25 23:22:26 +00008#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00009#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000010
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020011/*[clinic input]
Martin v. Löwis0efea322014-07-27 17:29:17 +020012class bytes "PyBytesObject*" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020013[clinic start generated code]*/
Martin v. Löwis0efea322014-07-27 17:29:17 +020014/*[clinic end generated code: output=da39a3ee5e6b4b0d input=1a1d9102afc1b00c]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020015
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030016#include "clinic/bytesobject.c.h"
17
Christian Heimes2c9c7a52008-05-26 13:42:13 +000018#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000019Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000020#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000021
Christian Heimes2c9c7a52008-05-26 13:42:13 +000022static PyBytesObject *characters[UCHAR_MAX + 1];
23static PyBytesObject *nullstring;
24
Mark Dickinsonfd24b322008-12-06 15:33:31 +000025/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
26 for a string of length n should request PyBytesObject_SIZE + n bytes.
27
28 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
29 3 bytes per string allocation on a typical system.
30*/
31#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
32
Christian Heimes2c9c7a52008-05-26 13:42:13 +000033/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000034 For PyBytes_FromString(), the parameter `str' points to a null-terminated
35 string containing exactly `size' bytes.
36
37 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
38 either NULL or else points to a string containing at least `size' bytes.
39 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
40 not have to be null-terminated. (Therefore it is safe to construct a
41 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
42 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
43 bytes (setting the last byte to the null terminating character) and you can
44 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000045 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000046 alter the data yourself, since the strings may be shared.
47
48 The PyObject member `op->ob_size', which denotes the number of "extra
49 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020050 allocated for string data, not counting the null terminating character.
51 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000052 PyBytes_FromStringAndSize()) or the length of the string in the `str'
53 parameter (for PyBytes_FromString()).
54*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020055static PyObject *
56_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000057{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020058 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020059 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020060
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000061 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000062#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000063 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000064#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065 Py_INCREF(op);
66 return (PyObject *)op;
67 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000068
Victor Stinner049e5092014-08-17 22:20:00 +020069 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000070 PyErr_SetString(PyExc_OverflowError,
71 "byte string is too large");
72 return NULL;
73 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000074
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000075 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020076 if (use_calloc)
77 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
78 else
79 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000080 if (op == NULL)
81 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +010082 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000083 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020084 if (!use_calloc)
85 op->ob_sval[size] = '\0';
86 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 if (size == 0) {
88 nullstring = op;
89 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020090 }
91 return (PyObject *) op;
92}
93
94PyObject *
95PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
96{
97 PyBytesObject *op;
98 if (size < 0) {
99 PyErr_SetString(PyExc_SystemError,
100 "Negative size passed to PyBytes_FromStringAndSize");
101 return NULL;
102 }
103 if (size == 1 && str != NULL &&
104 (op = characters[*str & UCHAR_MAX]) != NULL)
105 {
106#ifdef COUNT_ALLOCS
107 one_strings++;
108#endif
109 Py_INCREF(op);
110 return (PyObject *)op;
111 }
112
113 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
114 if (op == NULL)
115 return NULL;
116 if (str == NULL)
117 return (PyObject *) op;
118
119 Py_MEMCPY(op->ob_sval, str, size);
120 /* share short strings */
121 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000122 characters[*str & UCHAR_MAX] = op;
123 Py_INCREF(op);
124 }
125 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000126}
127
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000128PyObject *
129PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000130{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200131 size_t size;
132 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000133
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 assert(str != NULL);
135 size = strlen(str);
136 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
137 PyErr_SetString(PyExc_OverflowError,
138 "byte string is too long");
139 return NULL;
140 }
141 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000142#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000143 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000144#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 Py_INCREF(op);
146 return (PyObject *)op;
147 }
148 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000149#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000150 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000151#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000152 Py_INCREF(op);
153 return (PyObject *)op;
154 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000155
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 /* Inline PyObject_NewVar */
157 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
158 if (op == NULL)
159 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100160 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000161 op->ob_shash = -1;
162 Py_MEMCPY(op->ob_sval, str, size+1);
163 /* share short strings */
164 if (size == 0) {
165 nullstring = op;
166 Py_INCREF(op);
167 } else if (size == 1) {
168 characters[*str & UCHAR_MAX] = op;
169 Py_INCREF(op);
170 }
171 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000172}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000173
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000174PyObject *
175PyBytes_FromFormatV(const char *format, va_list vargs)
176{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 char *s;
Victor Stinner03dab782015-10-14 00:21:35 +0200178 const char *f;
179 const char *p;
180 Py_ssize_t prec;
181 int longflag;
182 int size_tflag;
183 /* Longest 64-bit formatted numbers:
184 - "18446744073709551615\0" (21 bytes)
185 - "-9223372036854775808\0" (21 bytes)
186 Decimal takes the most space (it isn't enough for octal.)
Guido van Rossum343e97f2007-04-09 00:43:24 +0000187
Victor Stinner03dab782015-10-14 00:21:35 +0200188 Longest 64-bit pointer representation:
189 "0xffffffffffffffff\0" (19 bytes). */
190 char buffer[21];
191 _PyBytesWriter writer;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000192
Victor Stinner03dab782015-10-14 00:21:35 +0200193 _PyBytesWriter_Init(&writer);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000194
Victor Stinner03dab782015-10-14 00:21:35 +0200195 s = _PyBytesWriter_Alloc(&writer, strlen(format));
196 if (s == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000197 return NULL;
Victor Stinner03dab782015-10-14 00:21:35 +0200198 writer.overallocate = 1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000199
Victor Stinner03dab782015-10-14 00:21:35 +0200200#define WRITE_BYTES(str) \
201 do { \
202 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
203 if (s == NULL) \
204 goto error; \
205 } while (0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000206
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000207 for (f = format; *f; f++) {
Victor Stinner03dab782015-10-14 00:21:35 +0200208 if (*f != '%') {
209 *s++ = *f;
210 continue;
211 }
212
213 p = f++;
214
215 /* ignore the width (ex: 10 in "%10s") */
216 while (Py_ISDIGIT(*f))
217 f++;
218
219 /* parse the precision (ex: 10 in "%.10s") */
220 prec = 0;
221 if (*f == '.') {
222 f++;
223 for (; Py_ISDIGIT(*f); f++) {
224 prec = (prec * 10) + (*f - '0');
225 }
226 }
227
228 while (*f && *f != '%' && !Py_ISALPHA(*f))
229 f++;
230
231 /* handle the long flag ('l'), but only for %ld and %lu.
232 others can be added when necessary. */
233 longflag = 0;
234 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
235 longflag = 1;
236 ++f;
237 }
238
239 /* handle the size_t flag ('z'). */
240 size_tflag = 0;
241 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
242 size_tflag = 1;
243 ++f;
244 }
245
246 /* substract bytes preallocated for the format string
247 (ex: 2 for "%s") */
248 writer.min_size -= (f - p + 1);
249
250 switch (*f) {
251 case 'c':
252 {
253 int c = va_arg(vargs, int);
254 if (c < 0 || c > 255) {
255 PyErr_SetString(PyExc_OverflowError,
256 "PyBytes_FromFormatV(): %c format "
257 "expects an integer in range [0; 255]");
258 goto error;
259 }
260 writer.min_size++;
261 *s++ = (unsigned char)c;
262 break;
263 }
264
265 case 'd':
266 if (longflag)
267 sprintf(buffer, "%ld", va_arg(vargs, long));
268 else if (size_tflag)
269 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
270 va_arg(vargs, Py_ssize_t));
271 else
272 sprintf(buffer, "%d", va_arg(vargs, int));
273 assert(strlen(buffer) < sizeof(buffer));
274 WRITE_BYTES(buffer);
275 break;
276
277 case 'u':
278 if (longflag)
279 sprintf(buffer, "%lu",
280 va_arg(vargs, unsigned long));
281 else if (size_tflag)
282 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
283 va_arg(vargs, size_t));
284 else
285 sprintf(buffer, "%u",
286 va_arg(vargs, unsigned int));
287 assert(strlen(buffer) < sizeof(buffer));
288 WRITE_BYTES(buffer);
289 break;
290
291 case 'i':
292 sprintf(buffer, "%i", va_arg(vargs, int));
293 assert(strlen(buffer) < sizeof(buffer));
294 WRITE_BYTES(buffer);
295 break;
296
297 case 'x':
298 sprintf(buffer, "%x", va_arg(vargs, int));
299 assert(strlen(buffer) < sizeof(buffer));
300 WRITE_BYTES(buffer);
301 break;
302
303 case 's':
304 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000305 Py_ssize_t i;
Victor Stinner03dab782015-10-14 00:21:35 +0200306
307 p = va_arg(vargs, char*);
308 i = strlen(p);
309 if (prec > 0 && i > prec)
310 i = prec;
311 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
312 if (s == NULL)
313 goto error;
314 break;
315 }
316
317 case 'p':
318 sprintf(buffer, "%p", va_arg(vargs, void*));
319 assert(strlen(buffer) < sizeof(buffer));
320 /* %p is ill-defined: ensure leading 0x. */
321 if (buffer[1] == 'X')
322 buffer[1] = 'x';
323 else if (buffer[1] != 'x') {
324 memmove(buffer+2, buffer, strlen(buffer)+1);
325 buffer[0] = '0';
326 buffer[1] = 'x';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000327 }
Victor Stinner03dab782015-10-14 00:21:35 +0200328 WRITE_BYTES(buffer);
329 break;
330
331 case '%':
332 writer.min_size++;
333 *s++ = '%';
334 break;
335
336 default:
337 if (*f == 0) {
338 /* fix min_size if we reached the end of the format string */
339 writer.min_size++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000340 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000341
Victor Stinner03dab782015-10-14 00:21:35 +0200342 /* invalid format string: copy unformatted string and exit */
343 WRITE_BYTES(p);
344 return _PyBytesWriter_Finish(&writer, s);
345 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000347
Victor Stinner03dab782015-10-14 00:21:35 +0200348#undef WRITE_BYTES
349
350 return _PyBytesWriter_Finish(&writer, s);
351
352 error:
353 _PyBytesWriter_Dealloc(&writer);
354 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000355}
356
357PyObject *
358PyBytes_FromFormat(const char *format, ...)
359{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000360 PyObject* ret;
361 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000362
363#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000364 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000365#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000366 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000367#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000368 ret = PyBytes_FromFormatV(format, vargs);
369 va_end(vargs);
370 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000371}
372
Ethan Furmanb95b5612015-01-23 20:05:18 -0800373/* Helpers for formatstring */
374
375Py_LOCAL_INLINE(PyObject *)
376getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
377{
378 Py_ssize_t argidx = *p_argidx;
379 if (argidx < arglen) {
380 (*p_argidx)++;
381 if (arglen < 0)
382 return args;
383 else
384 return PyTuple_GetItem(args, argidx);
385 }
386 PyErr_SetString(PyExc_TypeError,
387 "not enough arguments for format string");
388 return NULL;
389}
390
391/* Format codes
392 * F_LJUST '-'
393 * F_SIGN '+'
394 * F_BLANK ' '
395 * F_ALT '#'
396 * F_ZERO '0'
397 */
398#define F_LJUST (1<<0)
399#define F_SIGN (1<<1)
400#define F_BLANK (1<<2)
401#define F_ALT (1<<3)
402#define F_ZERO (1<<4)
403
404/* Returns a new reference to a PyBytes object, or NULL on failure. */
405
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200406static char*
407formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200408 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800409{
410 char *p;
411 PyObject *result;
412 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200413 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800414
415 x = PyFloat_AsDouble(v);
416 if (x == -1.0 && PyErr_Occurred()) {
417 PyErr_Format(PyExc_TypeError, "float argument required, "
418 "not %.200s", Py_TYPE(v)->tp_name);
419 return NULL;
420 }
421
422 if (prec < 0)
423 prec = 6;
424
425 p = PyOS_double_to_string(x, type, prec,
426 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
427
428 if (p == NULL)
429 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200430
431 len = strlen(p);
432 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200433 str = _PyBytesWriter_Prepare(writer, str, len);
434 if (str == NULL)
435 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200436 Py_MEMCPY(str, p, len);
437 str += len;
438 return str;
439 }
440
441 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800442 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200443 *p_result = result;
444 return str;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800445}
446
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300447static PyObject *
448formatlong(PyObject *v, int flags, int prec, int type)
449{
450 PyObject *result, *iobj;
451 if (type == 'i')
452 type = 'd';
453 if (PyLong_Check(v))
454 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
455 if (PyNumber_Check(v)) {
456 /* make sure number is a type of integer for o, x, and X */
457 if (type == 'o' || type == 'x' || type == 'X')
458 iobj = PyNumber_Index(v);
459 else
460 iobj = PyNumber_Long(v);
461 if (iobj == NULL) {
462 if (!PyErr_ExceptionMatches(PyExc_TypeError))
463 return NULL;
464 }
465 else if (!PyLong_Check(iobj))
466 Py_CLEAR(iobj);
467 if (iobj != NULL) {
468 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
469 Py_DECREF(iobj);
470 return result;
471 }
472 }
473 PyErr_Format(PyExc_TypeError,
474 "%%%c format: %s is required, not %.200s", type,
475 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
476 : "a number",
477 Py_TYPE(v)->tp_name);
478 return NULL;
479}
480
481static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200482byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800483{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200484 if (PyBytes_Check(arg) && PyBytes_Size(arg) == 1) {
485 *p = PyBytes_AS_STRING(arg)[0];
486 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800487 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200488 else if (PyByteArray_Check(arg) && PyByteArray_Size(arg) == 1) {
489 *p = PyByteArray_AS_STRING(arg)[0];
490 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800491 }
492 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300493 PyObject *iobj;
494 long ival;
495 int overflow;
496 /* make sure number is a type of integer */
497 if (PyLong_Check(arg)) {
498 ival = PyLong_AsLongAndOverflow(arg, &overflow);
499 }
500 else {
501 iobj = PyNumber_Index(arg);
502 if (iobj == NULL) {
503 if (!PyErr_ExceptionMatches(PyExc_TypeError))
504 return 0;
505 goto onError;
506 }
507 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
508 Py_DECREF(iobj);
509 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300510 if (!overflow && ival == -1 && PyErr_Occurred())
511 goto onError;
512 if (overflow || !(0 <= ival && ival <= 255)) {
513 PyErr_SetString(PyExc_OverflowError,
514 "%c arg not in range(256)");
515 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800516 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300517 *p = (char)ival;
518 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800519 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300520 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200521 PyErr_SetString(PyExc_TypeError,
522 "%c requires an integer in range(256) or a single byte");
523 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800524}
525
526static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200527format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800528{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200529 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800530 _Py_IDENTIFIER(__bytes__);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800531 /* is it a bytes object? */
532 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200533 *pbuf = PyBytes_AS_STRING(v);
534 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800535 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200536 return v;
537 }
538 if (PyByteArray_Check(v)) {
539 *pbuf = PyByteArray_AS_STRING(v);
540 *plen = PyByteArray_GET_SIZE(v);
541 Py_INCREF(v);
542 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800543 }
544 /* does it support __bytes__? */
545 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
546 if (func != NULL) {
547 result = PyObject_CallFunctionObjArgs(func, NULL);
548 Py_DECREF(func);
549 if (result == NULL)
550 return NULL;
551 if (!PyBytes_Check(result)) {
552 PyErr_Format(PyExc_TypeError,
553 "__bytes__ returned non-bytes (type %.200s)",
554 Py_TYPE(result)->tp_name);
555 Py_DECREF(result);
556 return NULL;
557 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200558 *pbuf = PyBytes_AS_STRING(result);
559 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800560 return result;
561 }
562 PyErr_Format(PyExc_TypeError,
563 "%%b requires bytes, or an object that implements __bytes__, not '%.100s'",
564 Py_TYPE(v)->tp_name);
565 return NULL;
566}
567
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200568/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800569
570PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +0200571_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
572 PyObject *args, int use_bytearray)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800573{
Victor Stinner772b2b02015-10-14 09:56:53 +0200574 const char *fmt;
575 char *res;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800576 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200577 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800578 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800579 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200580 _PyBytesWriter writer;
581
Victor Stinner772b2b02015-10-14 09:56:53 +0200582 if (args == NULL) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800583 PyErr_BadInternalCall();
584 return NULL;
585 }
Victor Stinner772b2b02015-10-14 09:56:53 +0200586 fmt = format;
587 fmtcnt = format_len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200588
589 _PyBytesWriter_Init(&writer);
Victor Stinner772b2b02015-10-14 09:56:53 +0200590 writer.use_bytearray = use_bytearray;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200591
592 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
593 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800594 return NULL;
Victor Stinner772b2b02015-10-14 09:56:53 +0200595 if (!use_bytearray)
596 writer.overallocate = 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200597
Ethan Furmanb95b5612015-01-23 20:05:18 -0800598 if (PyTuple_Check(args)) {
599 arglen = PyTuple_GET_SIZE(args);
600 argidx = 0;
601 }
602 else {
603 arglen = -1;
604 argidx = -2;
605 }
606 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
607 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
608 !PyByteArray_Check(args)) {
609 dict = args;
610 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200611
Ethan Furmanb95b5612015-01-23 20:05:18 -0800612 while (--fmtcnt >= 0) {
613 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200614 Py_ssize_t len;
615 char *pos;
616
617 pos = strchr(fmt + 1, '%');
618 if (pos != NULL)
619 len = pos - fmt;
Victor Stinner772b2b02015-10-14 09:56:53 +0200620 else
621 len = format_len - (fmt - format);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200622 assert(len != 0);
623
624 Py_MEMCPY(res, fmt, len);
625 res += len;
626 fmt += len;
627 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800628 }
629 else {
630 /* Got a format specifier */
631 int flags = 0;
632 Py_ssize_t width = -1;
633 int prec = -1;
634 int c = '\0';
635 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800636 PyObject *v = NULL;
637 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200638 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800639 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200640 Py_ssize_t len = 0;
641 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200642 Py_ssize_t alloc;
643#ifdef Py_DEBUG
644 char *before;
645#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -0800646
Ethan Furmanb95b5612015-01-23 20:05:18 -0800647 fmt++;
648 if (*fmt == '(') {
Victor Stinner772b2b02015-10-14 09:56:53 +0200649 const char *keystart;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800650 Py_ssize_t keylen;
651 PyObject *key;
652 int pcount = 1;
653
654 if (dict == NULL) {
655 PyErr_SetString(PyExc_TypeError,
656 "format requires a mapping");
657 goto error;
658 }
659 ++fmt;
660 --fmtcnt;
661 keystart = fmt;
662 /* Skip over balanced parentheses */
663 while (pcount > 0 && --fmtcnt >= 0) {
664 if (*fmt == ')')
665 --pcount;
666 else if (*fmt == '(')
667 ++pcount;
668 fmt++;
669 }
670 keylen = fmt - keystart - 1;
671 if (fmtcnt < 0 || pcount > 0) {
672 PyErr_SetString(PyExc_ValueError,
673 "incomplete format key");
674 goto error;
675 }
676 key = PyBytes_FromStringAndSize(keystart,
677 keylen);
678 if (key == NULL)
679 goto error;
680 if (args_owned) {
681 Py_DECREF(args);
682 args_owned = 0;
683 }
684 args = PyObject_GetItem(dict, key);
685 Py_DECREF(key);
686 if (args == NULL) {
687 goto error;
688 }
689 args_owned = 1;
690 arglen = -1;
691 argidx = -2;
692 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200693
694 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800695 while (--fmtcnt >= 0) {
696 switch (c = *fmt++) {
697 case '-': flags |= F_LJUST; continue;
698 case '+': flags |= F_SIGN; continue;
699 case ' ': flags |= F_BLANK; continue;
700 case '#': flags |= F_ALT; continue;
701 case '0': flags |= F_ZERO; continue;
702 }
703 break;
704 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200705
706 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800707 if (c == '*') {
708 v = getnextarg(args, arglen, &argidx);
709 if (v == NULL)
710 goto error;
711 if (!PyLong_Check(v)) {
712 PyErr_SetString(PyExc_TypeError,
713 "* wants int");
714 goto error;
715 }
716 width = PyLong_AsSsize_t(v);
717 if (width == -1 && PyErr_Occurred())
718 goto error;
719 if (width < 0) {
720 flags |= F_LJUST;
721 width = -width;
722 }
723 if (--fmtcnt >= 0)
724 c = *fmt++;
725 }
726 else if (c >= 0 && isdigit(c)) {
727 width = c - '0';
728 while (--fmtcnt >= 0) {
729 c = Py_CHARMASK(*fmt++);
730 if (!isdigit(c))
731 break;
732 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
733 PyErr_SetString(
734 PyExc_ValueError,
735 "width too big");
736 goto error;
737 }
738 width = width*10 + (c - '0');
739 }
740 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200741
742 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800743 if (c == '.') {
744 prec = 0;
745 if (--fmtcnt >= 0)
746 c = *fmt++;
747 if (c == '*') {
748 v = getnextarg(args, arglen, &argidx);
749 if (v == NULL)
750 goto error;
751 if (!PyLong_Check(v)) {
752 PyErr_SetString(
753 PyExc_TypeError,
754 "* wants int");
755 goto error;
756 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200757 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800758 if (prec == -1 && PyErr_Occurred())
759 goto error;
760 if (prec < 0)
761 prec = 0;
762 if (--fmtcnt >= 0)
763 c = *fmt++;
764 }
765 else if (c >= 0 && isdigit(c)) {
766 prec = c - '0';
767 while (--fmtcnt >= 0) {
768 c = Py_CHARMASK(*fmt++);
769 if (!isdigit(c))
770 break;
771 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
772 PyErr_SetString(
773 PyExc_ValueError,
774 "prec too big");
775 goto error;
776 }
777 prec = prec*10 + (c - '0');
778 }
779 }
780 } /* prec */
781 if (fmtcnt >= 0) {
782 if (c == 'h' || c == 'l' || c == 'L') {
783 if (--fmtcnt >= 0)
784 c = *fmt++;
785 }
786 }
787 if (fmtcnt < 0) {
788 PyErr_SetString(PyExc_ValueError,
789 "incomplete format");
790 goto error;
791 }
792 if (c != '%') {
793 v = getnextarg(args, arglen, &argidx);
794 if (v == NULL)
795 goto error;
796 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200797
798 if (fmtcnt < 0) {
799 /* last writer: disable writer overallocation */
800 writer.overallocate = 0;
801 }
802
Ethan Furmanb95b5612015-01-23 20:05:18 -0800803 sign = 0;
804 fill = ' ';
805 switch (c) {
806 case '%':
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200807 *res++ = '%';
808 continue;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200809
Ethan Furman62e977f2015-03-11 08:17:00 -0700810 case 'r':
811 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800812 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200813 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800814 if (temp == NULL)
815 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200816 assert(PyUnicode_IS_ASCII(temp));
817 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
818 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800819 if (prec >= 0 && len > prec)
820 len = prec;
821 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200822
Ethan Furmanb95b5612015-01-23 20:05:18 -0800823 case 's':
824 // %s is only for 2/3 code; 3 only code should use %b
825 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200826 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800827 if (temp == NULL)
828 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800829 if (prec >= 0 && len > prec)
830 len = prec;
831 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200832
Ethan Furmanb95b5612015-01-23 20:05:18 -0800833 case 'i':
834 case 'd':
835 case 'u':
836 case 'o':
837 case 'x':
838 case 'X':
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200839 if (PyLong_CheckExact(v)
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200840 && width == -1 && prec == -1
841 && !(flags & (F_SIGN | F_BLANK))
842 && c != 'X')
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200843 {
844 /* Fast path */
845 int alternate = flags & F_ALT;
846 int base;
847
848 switch(c)
849 {
850 default:
851 assert(0 && "'type' not in [diuoxX]");
852 case 'd':
853 case 'i':
854 case 'u':
855 base = 10;
856 break;
857 case 'o':
858 base = 8;
859 break;
860 case 'x':
861 case 'X':
862 base = 16;
863 break;
864 }
865
866 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200867 writer.min_size -= 2; /* size preallocated for "%d" */
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200868 res = _PyLong_FormatBytesWriter(&writer, res,
869 v, base, alternate);
870 if (res == NULL)
871 goto error;
872 continue;
873 }
874
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300875 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200876 if (!temp)
877 goto error;
878 assert(PyUnicode_IS_ASCII(temp));
879 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
880 len = PyUnicode_GET_LENGTH(temp);
881 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800882 if (flags & F_ZERO)
883 fill = '0';
884 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200885
Ethan Furmanb95b5612015-01-23 20:05:18 -0800886 case 'e':
887 case 'E':
888 case 'f':
889 case 'F':
890 case 'g':
891 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200892 if (width == -1 && prec == -1
893 && !(flags & (F_SIGN | F_BLANK)))
894 {
895 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200896 writer.min_size -= 2; /* size preallocated for "%f" */
Victor Stinnerad771582015-10-09 12:38:53 +0200897 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200898 if (res == NULL)
899 goto error;
900 continue;
901 }
902
Victor Stinnerad771582015-10-09 12:38:53 +0200903 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800904 goto error;
905 pbuf = PyBytes_AS_STRING(temp);
906 len = PyBytes_GET_SIZE(temp);
907 sign = 1;
908 if (flags & F_ZERO)
909 fill = '0';
910 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200911
Ethan Furmanb95b5612015-01-23 20:05:18 -0800912 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200913 pbuf = &onechar;
914 len = byte_converter(v, &onechar);
915 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800916 goto error;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200917 if (width == -1) {
918 /* Fast path */
919 *res++ = onechar;
920 continue;
921 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800922 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200923
Ethan Furmanb95b5612015-01-23 20:05:18 -0800924 default:
925 PyErr_Format(PyExc_ValueError,
926 "unsupported format character '%c' (0x%x) "
927 "at index %zd",
928 c, c,
Victor Stinner772b2b02015-10-14 09:56:53 +0200929 (Py_ssize_t)(fmt - 1 - format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800930 goto error;
931 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200932
Ethan Furmanb95b5612015-01-23 20:05:18 -0800933 if (sign) {
934 if (*pbuf == '-' || *pbuf == '+') {
935 sign = *pbuf++;
936 len--;
937 }
938 else if (flags & F_SIGN)
939 sign = '+';
940 else if (flags & F_BLANK)
941 sign = ' ';
942 else
943 sign = 0;
944 }
945 if (width < len)
946 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200947
948 alloc = width;
949 if (sign != 0 && len == width)
950 alloc++;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200951 /* 2: size preallocated for %s */
952 if (alloc > 2) {
953 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200954 if (res == NULL)
955 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800956 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200957#ifdef Py_DEBUG
958 before = res;
959#endif
960
961 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800962 if (sign) {
963 if (fill != ' ')
964 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800965 if (width > len)
966 width--;
967 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200968
969 /* Write the numeric prefix for "x", "X" and "o" formats
970 if the alternate form is used.
971 For example, write "0x" for the "%#x" format. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800972 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
973 assert(pbuf[0] == '0');
974 assert(pbuf[1] == c);
975 if (fill != ' ') {
976 *res++ = *pbuf++;
977 *res++ = *pbuf++;
978 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800979 width -= 2;
980 if (width < 0)
981 width = 0;
982 len -= 2;
983 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200984
985 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800986 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200987 memset(res, fill, width - len);
988 res += (width - len);
989 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800990 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200991
992 /* If padding with spaces: write sign if needed and/or numeric
993 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800994 if (fill == ' ') {
995 if (sign)
996 *res++ = sign;
997 if ((flags & F_ALT) &&
998 (c == 'x' || c == 'X')) {
999 assert(pbuf[0] == '0');
1000 assert(pbuf[1] == c);
1001 *res++ = *pbuf++;
1002 *res++ = *pbuf++;
1003 }
1004 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001005
1006 /* Copy bytes */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001007 Py_MEMCPY(res, pbuf, len);
1008 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001009
1010 /* Pad right with the fill character if needed */
1011 if (width > len) {
1012 memset(res, ' ', width - len);
1013 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001014 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001015
Ethan Furmanb95b5612015-01-23 20:05:18 -08001016 if (dict && (argidx < arglen) && c != '%') {
1017 PyErr_SetString(PyExc_TypeError,
1018 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -08001019 Py_XDECREF(temp);
1020 goto error;
1021 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001022 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001023
1024#ifdef Py_DEBUG
1025 /* check that we computed the exact size for this write */
1026 assert((res - before) == alloc);
1027#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -08001028 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001029
1030 /* If overallocation was disabled, ensure that it was the last
1031 write. Otherwise, we missed an optimization */
Victor Stinner772b2b02015-10-14 09:56:53 +02001032 assert(writer.overallocate || fmtcnt < 0 || use_bytearray);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001033 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001034
Ethan Furmanb95b5612015-01-23 20:05:18 -08001035 if (argidx < arglen && !dict) {
1036 PyErr_SetString(PyExc_TypeError,
1037 "not all arguments converted during bytes formatting");
1038 goto error;
1039 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001040
Ethan Furmanb95b5612015-01-23 20:05:18 -08001041 if (args_owned) {
1042 Py_DECREF(args);
1043 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001044 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001045
1046 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001047 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001048 if (args_owned) {
1049 Py_DECREF(args);
1050 }
1051 return NULL;
1052}
1053
1054/* =-= */
1055
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001056static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001057bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001058{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001059 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001060}
1061
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001062/* Unescape a backslash-escaped string. If unicode is non-zero,
1063 the string is a u-literal. If recode_encoding is non-zero,
1064 the string is UTF-8 encoded and should be re-encoded in the
1065 specified encoding. */
1066
1067PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001068 Py_ssize_t len,
1069 const char *errors,
1070 Py_ssize_t unicode,
1071 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001072{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001073 int c;
1074 char *p, *buf;
1075 const char *end;
1076 PyObject *v;
1077 Py_ssize_t newlen = recode_encoding ? 4*len:len;
1078 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
1079 if (v == NULL)
1080 return NULL;
1081 p = buf = PyBytes_AsString(v);
1082 end = s + len;
1083 while (s < end) {
1084 if (*s != '\\') {
1085 non_esc:
1086 if (recode_encoding && (*s & 0x80)) {
1087 PyObject *u, *w;
1088 char *r;
1089 const char* t;
1090 Py_ssize_t rn;
1091 t = s;
1092 /* Decode non-ASCII bytes as UTF-8. */
1093 while (t < end && (*t & 0x80)) t++;
1094 u = PyUnicode_DecodeUTF8(s, t - s, errors);
1095 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001097 /* Recode them in target encoding. */
1098 w = PyUnicode_AsEncodedString(
1099 u, recode_encoding, errors);
1100 Py_DECREF(u);
1101 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001102
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001103 /* Append bytes to output buffer. */
1104 assert(PyBytes_Check(w));
1105 r = PyBytes_AS_STRING(w);
1106 rn = PyBytes_GET_SIZE(w);
1107 Py_MEMCPY(p, r, rn);
1108 p += rn;
1109 Py_DECREF(w);
1110 s = t;
1111 } else {
1112 *p++ = *s++;
1113 }
1114 continue;
1115 }
1116 s++;
1117 if (s==end) {
1118 PyErr_SetString(PyExc_ValueError,
1119 "Trailing \\ in string");
1120 goto failed;
1121 }
1122 switch (*s++) {
1123 /* XXX This assumes ASCII! */
1124 case '\n': break;
1125 case '\\': *p++ = '\\'; break;
1126 case '\'': *p++ = '\''; break;
1127 case '\"': *p++ = '\"'; break;
1128 case 'b': *p++ = '\b'; break;
1129 case 'f': *p++ = '\014'; break; /* FF */
1130 case 't': *p++ = '\t'; break;
1131 case 'n': *p++ = '\n'; break;
1132 case 'r': *p++ = '\r'; break;
1133 case 'v': *p++ = '\013'; break; /* VT */
1134 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1135 case '0': case '1': case '2': case '3':
1136 case '4': case '5': case '6': case '7':
1137 c = s[-1] - '0';
1138 if (s < end && '0' <= *s && *s <= '7') {
1139 c = (c<<3) + *s++ - '0';
1140 if (s < end && '0' <= *s && *s <= '7')
1141 c = (c<<3) + *s++ - '0';
1142 }
1143 *p++ = c;
1144 break;
1145 case 'x':
David Malcolm96960882010-11-05 17:23:41 +00001146 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001147 unsigned int x = 0;
1148 c = Py_CHARMASK(*s);
1149 s++;
David Malcolm96960882010-11-05 17:23:41 +00001150 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001151 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +00001152 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001153 x = 10 + c - 'a';
1154 else
1155 x = 10 + c - 'A';
1156 x = x << 4;
1157 c = Py_CHARMASK(*s);
1158 s++;
David Malcolm96960882010-11-05 17:23:41 +00001159 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001160 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +00001161 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001162 x += 10 + c - 'a';
1163 else
1164 x += 10 + c - 'A';
1165 *p++ = x;
1166 break;
1167 }
1168 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001169 PyErr_Format(PyExc_ValueError,
1170 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001171 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001172 goto failed;
1173 }
1174 if (strcmp(errors, "replace") == 0) {
1175 *p++ = '?';
1176 } else if (strcmp(errors, "ignore") == 0)
1177 /* do nothing */;
1178 else {
1179 PyErr_Format(PyExc_ValueError,
1180 "decoding error; unknown "
1181 "error handling code: %.400s",
1182 errors);
1183 goto failed;
1184 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001185 /* skip \x */
1186 if (s < end && Py_ISXDIGIT(s[0]))
1187 s++; /* and a hexdigit */
1188 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001189 default:
1190 *p++ = '\\';
1191 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +02001192 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001193 UTF-8 bytes may follow. */
1194 }
1195 }
1196 if (p-buf < newlen)
1197 _PyBytes_Resize(&v, p - buf);
1198 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001199 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001200 Py_DECREF(v);
1201 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001202}
1203
1204/* -------------------------------------------------------------------- */
1205/* object api */
1206
1207Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001208PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001209{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001210 if (!PyBytes_Check(op)) {
1211 PyErr_Format(PyExc_TypeError,
1212 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1213 return -1;
1214 }
1215 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001216}
1217
1218char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001219PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001220{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001221 if (!PyBytes_Check(op)) {
1222 PyErr_Format(PyExc_TypeError,
1223 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1224 return NULL;
1225 }
1226 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001227}
1228
1229int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001230PyBytes_AsStringAndSize(PyObject *obj,
1231 char **s,
1232 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001233{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001234 if (s == NULL) {
1235 PyErr_BadInternalCall();
1236 return -1;
1237 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001238
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001239 if (!PyBytes_Check(obj)) {
1240 PyErr_Format(PyExc_TypeError,
1241 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1242 return -1;
1243 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001244
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001245 *s = PyBytes_AS_STRING(obj);
1246 if (len != NULL)
1247 *len = PyBytes_GET_SIZE(obj);
1248 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001249 PyErr_SetString(PyExc_ValueError,
1250 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001251 return -1;
1252 }
1253 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001254}
Neal Norwitz6968b052007-02-27 19:02:19 +00001255
1256/* -------------------------------------------------------------------- */
1257/* Methods */
1258
Eric Smith0923d1d2009-04-16 20:16:10 +00001259#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001260
1261#include "stringlib/fastsearch.h"
1262#include "stringlib/count.h"
1263#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001264#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001265#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001266#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001267#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001268
Eric Smith0f78bff2009-11-30 01:01:42 +00001269#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001270
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001271PyObject *
1272PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001273{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001274 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001275 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001276 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001277 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001278 unsigned char quote, *s, *p;
1279
1280 /* Compute size of output string */
1281 squotes = dquotes = 0;
1282 newsize = 3; /* b'' */
1283 s = (unsigned char*)op->ob_sval;
1284 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001285 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001286 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001287 case '\'': squotes++; break;
1288 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001289 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001290 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001291 default:
1292 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001293 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001294 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001295 if (newsize > PY_SSIZE_T_MAX - incr)
1296 goto overflow;
1297 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001298 }
1299 quote = '\'';
1300 if (smartquotes && squotes && !dquotes)
1301 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001302 if (squotes && quote == '\'') {
1303 if (newsize > PY_SSIZE_T_MAX - squotes)
1304 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001305 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001306 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001307
1308 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001309 if (v == NULL) {
1310 return NULL;
1311 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001312 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001313
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001314 *p++ = 'b', *p++ = quote;
1315 for (i = 0; i < length; i++) {
1316 unsigned char c = op->ob_sval[i];
1317 if (c == quote || c == '\\')
1318 *p++ = '\\', *p++ = c;
1319 else if (c == '\t')
1320 *p++ = '\\', *p++ = 't';
1321 else if (c == '\n')
1322 *p++ = '\\', *p++ = 'n';
1323 else if (c == '\r')
1324 *p++ = '\\', *p++ = 'r';
1325 else if (c < ' ' || c >= 0x7f) {
1326 *p++ = '\\';
1327 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001328 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1329 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001330 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001331 else
1332 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001333 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001334 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001335 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001336 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001337
1338 overflow:
1339 PyErr_SetString(PyExc_OverflowError,
1340 "bytes object is too large to make repr");
1341 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001342}
1343
Neal Norwitz6968b052007-02-27 19:02:19 +00001344static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001345bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001346{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001347 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001348}
1349
Neal Norwitz6968b052007-02-27 19:02:19 +00001350static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001351bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001352{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001353 if (Py_BytesWarningFlag) {
1354 if (PyErr_WarnEx(PyExc_BytesWarning,
1355 "str() on a bytes instance", 1))
1356 return NULL;
1357 }
1358 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001359}
1360
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001361static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001362bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001363{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001364 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001365}
Neal Norwitz6968b052007-02-27 19:02:19 +00001366
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001367/* This is also used by PyBytes_Concat() */
1368static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001369bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001370{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001371 Py_ssize_t size;
1372 Py_buffer va, vb;
1373 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001374
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001375 va.len = -1;
1376 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001377 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1378 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001379 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1380 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
1381 goto done;
1382 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001383
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001384 /* Optimize end cases */
1385 if (va.len == 0 && PyBytes_CheckExact(b)) {
1386 result = b;
1387 Py_INCREF(result);
1388 goto done;
1389 }
1390 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1391 result = a;
1392 Py_INCREF(result);
1393 goto done;
1394 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001395
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001396 size = va.len + vb.len;
1397 if (size < 0) {
1398 PyErr_NoMemory();
1399 goto done;
1400 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001401
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001402 result = PyBytes_FromStringAndSize(NULL, size);
1403 if (result != NULL) {
1404 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1405 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1406 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001407
1408 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001409 if (va.len != -1)
1410 PyBuffer_Release(&va);
1411 if (vb.len != -1)
1412 PyBuffer_Release(&vb);
1413 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001414}
Neal Norwitz6968b052007-02-27 19:02:19 +00001415
1416static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001417bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001418{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001419 Py_ssize_t i;
1420 Py_ssize_t j;
1421 Py_ssize_t size;
1422 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001423 size_t nbytes;
1424 if (n < 0)
1425 n = 0;
1426 /* watch out for overflows: the size can overflow int,
1427 * and the # of bytes needed can overflow size_t
1428 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001429 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001430 PyErr_SetString(PyExc_OverflowError,
1431 "repeated bytes are too long");
1432 return NULL;
1433 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001434 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001435 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1436 Py_INCREF(a);
1437 return (PyObject *)a;
1438 }
1439 nbytes = (size_t)size;
1440 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1441 PyErr_SetString(PyExc_OverflowError,
1442 "repeated bytes are too long");
1443 return NULL;
1444 }
1445 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1446 if (op == NULL)
1447 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +01001448 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001449 op->ob_shash = -1;
1450 op->ob_sval[size] = '\0';
1451 if (Py_SIZE(a) == 1 && n > 0) {
1452 memset(op->ob_sval, a->ob_sval[0] , n);
1453 return (PyObject *) op;
1454 }
1455 i = 0;
1456 if (i < size) {
1457 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1458 i = Py_SIZE(a);
1459 }
1460 while (i < size) {
1461 j = (i <= size-i) ? i : size-i;
1462 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1463 i += j;
1464 }
1465 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001466}
1467
Guido van Rossum98297ee2007-11-06 21:34:58 +00001468static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001469bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +00001470{
1471 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1472 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001473 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +00001474 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +00001475 PyErr_Clear();
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001476 if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
Antoine Pitroud1188562010-06-09 16:38:55 +00001477 return -1;
1478 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
1479 varg.buf, varg.len, 0);
1480 PyBuffer_Release(&varg);
1481 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001482 }
1483 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001484 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1485 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001486 }
1487
Antoine Pitrou0010d372010-08-15 17:12:55 +00001488 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001489}
1490
Neal Norwitz6968b052007-02-27 19:02:19 +00001491static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001492bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001493{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001494 if (i < 0 || i >= Py_SIZE(a)) {
1495 PyErr_SetString(PyExc_IndexError, "index out of range");
1496 return NULL;
1497 }
1498 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001499}
1500
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001501Py_LOCAL(int)
1502bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1503{
1504 int cmp;
1505 Py_ssize_t len;
1506
1507 len = Py_SIZE(a);
1508 if (Py_SIZE(b) != len)
1509 return 0;
1510
1511 if (a->ob_sval[0] != b->ob_sval[0])
1512 return 0;
1513
1514 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1515 return (cmp == 0);
1516}
1517
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001518static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001519bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001520{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001521 int c;
1522 Py_ssize_t len_a, len_b;
1523 Py_ssize_t min_len;
1524 PyObject *result;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001525 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001526
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001527 /* Make sure both arguments are strings. */
1528 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001529 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001530 rc = PyObject_IsInstance((PyObject*)a,
1531 (PyObject*)&PyUnicode_Type);
1532 if (!rc)
1533 rc = PyObject_IsInstance((PyObject*)b,
1534 (PyObject*)&PyUnicode_Type);
1535 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001536 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001537 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001538 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001539 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001540 return NULL;
1541 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001542 else {
1543 rc = PyObject_IsInstance((PyObject*)a,
1544 (PyObject*)&PyLong_Type);
1545 if (!rc)
1546 rc = PyObject_IsInstance((PyObject*)b,
1547 (PyObject*)&PyLong_Type);
1548 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001549 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001550 if (rc) {
1551 if (PyErr_WarnEx(PyExc_BytesWarning,
1552 "Comparison between bytes and int", 1))
1553 return NULL;
1554 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001555 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001556 }
1557 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001558 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001559 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001560 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001561 case Py_EQ:
1562 case Py_LE:
1563 case Py_GE:
1564 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001565 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001566 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001567 case Py_NE:
1568 case Py_LT:
1569 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001570 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001571 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001572 default:
1573 PyErr_BadArgument();
1574 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001575 }
1576 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001577 else if (op == Py_EQ || op == Py_NE) {
1578 int eq = bytes_compare_eq(a, b);
1579 eq ^= (op == Py_NE);
1580 result = eq ? Py_True : Py_False;
1581 }
1582 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001583 len_a = Py_SIZE(a);
1584 len_b = Py_SIZE(b);
1585 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001586 if (min_len > 0) {
1587 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001588 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001589 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001590 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001591 else
1592 c = 0;
1593 if (c == 0)
1594 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1595 switch (op) {
1596 case Py_LT: c = c < 0; break;
1597 case Py_LE: c = c <= 0; break;
1598 case Py_GT: c = c > 0; break;
1599 case Py_GE: c = c >= 0; break;
1600 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001601 PyErr_BadArgument();
1602 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001603 }
1604 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001605 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001606
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001607 Py_INCREF(result);
1608 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001609}
1610
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001611static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001612bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001613{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001614 if (a->ob_shash == -1) {
1615 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001616 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001617 }
1618 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001619}
1620
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001621static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001622bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001623{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001624 if (PyIndex_Check(item)) {
1625 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1626 if (i == -1 && PyErr_Occurred())
1627 return NULL;
1628 if (i < 0)
1629 i += PyBytes_GET_SIZE(self);
1630 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1631 PyErr_SetString(PyExc_IndexError,
1632 "index out of range");
1633 return NULL;
1634 }
1635 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1636 }
1637 else if (PySlice_Check(item)) {
1638 Py_ssize_t start, stop, step, slicelength, cur, i;
1639 char* source_buf;
1640 char* result_buf;
1641 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001642
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001643 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001644 PyBytes_GET_SIZE(self),
1645 &start, &stop, &step, &slicelength) < 0) {
1646 return NULL;
1647 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001648
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001649 if (slicelength <= 0) {
1650 return PyBytes_FromStringAndSize("", 0);
1651 }
1652 else if (start == 0 && step == 1 &&
1653 slicelength == PyBytes_GET_SIZE(self) &&
1654 PyBytes_CheckExact(self)) {
1655 Py_INCREF(self);
1656 return (PyObject *)self;
1657 }
1658 else if (step == 1) {
1659 return PyBytes_FromStringAndSize(
1660 PyBytes_AS_STRING(self) + start,
1661 slicelength);
1662 }
1663 else {
1664 source_buf = PyBytes_AS_STRING(self);
1665 result = PyBytes_FromStringAndSize(NULL, slicelength);
1666 if (result == NULL)
1667 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001668
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001669 result_buf = PyBytes_AS_STRING(result);
1670 for (cur = start, i = 0; i < slicelength;
1671 cur += step, i++) {
1672 result_buf[i] = source_buf[cur];
1673 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001674
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001675 return result;
1676 }
1677 }
1678 else {
1679 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001680 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001681 Py_TYPE(item)->tp_name);
1682 return NULL;
1683 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001684}
1685
1686static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001687bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001688{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001689 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1690 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001691}
1692
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001693static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001694 (lenfunc)bytes_length, /*sq_length*/
1695 (binaryfunc)bytes_concat, /*sq_concat*/
1696 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1697 (ssizeargfunc)bytes_item, /*sq_item*/
1698 0, /*sq_slice*/
1699 0, /*sq_ass_item*/
1700 0, /*sq_ass_slice*/
1701 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001702};
1703
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001704static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001705 (lenfunc)bytes_length,
1706 (binaryfunc)bytes_subscript,
1707 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001708};
1709
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001710static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001711 (getbufferproc)bytes_buffer_getbuffer,
1712 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001713};
1714
1715
1716#define LEFTSTRIP 0
1717#define RIGHTSTRIP 1
1718#define BOTHSTRIP 2
1719
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001720/*[clinic input]
1721bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001722
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001723 sep: object = None
1724 The delimiter according which to split the bytes.
1725 None (the default value) means split on ASCII whitespace characters
1726 (space, tab, return, newline, formfeed, vertical tab).
1727 maxsplit: Py_ssize_t = -1
1728 Maximum number of splits to do.
1729 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001730
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001731Return a list of the sections in the bytes, using sep as the delimiter.
1732[clinic start generated code]*/
1733
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001734static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001735bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001736/*[clinic end generated code: output=8bde44dacb36ef2e input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001737{
1738 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001739 const char *s = PyBytes_AS_STRING(self), *sub;
1740 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001741 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001742
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001743 if (maxsplit < 0)
1744 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001745 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001746 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001747 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001748 return NULL;
1749 sub = vsub.buf;
1750 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001751
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001752 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1753 PyBuffer_Release(&vsub);
1754 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001755}
1756
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001757/*[clinic input]
1758bytes.partition
1759
1760 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001761 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001762 /
1763
1764Partition the bytes into three parts using the given separator.
1765
1766This will search for the separator sep in the bytes. If the separator is found,
1767returns a 3-tuple containing the part before the separator, the separator
1768itself, and the part after it.
1769
1770If the separator is not found, returns a 3-tuple containing the original bytes
1771object and two empty bytes objects.
1772[clinic start generated code]*/
1773
Neal Norwitz6968b052007-02-27 19:02:19 +00001774static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001775bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001776/*[clinic end generated code: output=f532b392a17ff695 input=bc855dc63ca949de]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001777{
Neal Norwitz6968b052007-02-27 19:02:19 +00001778 return stringlib_partition(
1779 (PyObject*) self,
1780 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001781 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001782 );
1783}
1784
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001785/*[clinic input]
1786bytes.rpartition
1787
1788 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001789 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001790 /
1791
1792Partition the bytes into three parts using the given separator.
1793
1794This will search for the separator sep in the bytes, starting and the end. If
1795the separator is found, returns a 3-tuple containing the part before the
1796separator, the separator itself, and the part after it.
1797
1798If the separator is not found, returns a 3-tuple containing two empty bytes
1799objects and the original bytes object.
1800[clinic start generated code]*/
1801
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001802static PyObject *
1803bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001804/*[clinic end generated code: output=191b114cbb028e50 input=6588fff262a9170e]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001805{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001806 return stringlib_rpartition(
1807 (PyObject*) self,
1808 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001809 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001810 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001811}
1812
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001813/*[clinic input]
1814bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001815
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001816Return a list of the sections in the bytes, using sep as the delimiter.
1817
1818Splitting is done starting at the end of the bytes and working to the front.
1819[clinic start generated code]*/
1820
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001821static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001822bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001823/*[clinic end generated code: output=0b6570b977911d88 input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001824{
1825 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001826 const char *s = PyBytes_AS_STRING(self), *sub;
1827 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001828 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001829
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001830 if (maxsplit < 0)
1831 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001832 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001833 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001834 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001835 return NULL;
1836 sub = vsub.buf;
1837 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001838
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001839 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1840 PyBuffer_Release(&vsub);
1841 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001842}
1843
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001844
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001845/*[clinic input]
1846bytes.join
1847
1848 iterable_of_bytes: object
1849 /
1850
1851Concatenate any number of bytes objects.
1852
1853The bytes whose method is called is inserted in between each pair.
1854
1855The result is returned as a new bytes object.
1856
1857Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1858[clinic start generated code]*/
1859
Neal Norwitz6968b052007-02-27 19:02:19 +00001860static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001861bytes_join(PyBytesObject*self, PyObject *iterable_of_bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001862/*[clinic end generated code: output=634aff14764ff997 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001863{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001864 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001865}
1866
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001867PyObject *
1868_PyBytes_Join(PyObject *sep, PyObject *x)
1869{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001870 assert(sep != NULL && PyBytes_Check(sep));
1871 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001872 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001873}
1874
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001875/* helper macro to fixup start/end slice values */
1876#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001877 if (end > len) \
1878 end = len; \
1879 else if (end < 0) { \
1880 end += len; \
1881 if (end < 0) \
1882 end = 0; \
1883 } \
1884 if (start < 0) { \
1885 start += len; \
1886 if (start < 0) \
1887 start = 0; \
1888 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001889
1890Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001891bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001892{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001893 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001894 char byte;
1895 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001896 const char *sub;
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001897 Py_ssize_t len, sub_len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001898 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001899 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001900
Antoine Pitrouac65d962011-10-20 23:54:17 +02001901 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1902 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001903 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001904
Antoine Pitrouac65d962011-10-20 23:54:17 +02001905 if (subobj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001906 if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001907 return -2;
1908
1909 sub = subbuf.buf;
1910 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001911 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001912 else {
1913 sub = &byte;
1914 sub_len = 1;
1915 }
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001916 len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001917
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001918 ADJUST_INDICES(start, end, len);
1919 if (end - start < sub_len)
1920 res = -1;
Serhiy Storchakad92d4ef2015-07-20 22:58:02 +03001921 else if (sub_len == 1
1922#ifndef HAVE_MEMRCHR
1923 && dir > 0
1924#endif
1925 ) {
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001926 unsigned char needle = *sub;
Serhiy Storchakad92d4ef2015-07-20 22:58:02 +03001927 int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001928 res = stringlib_fastsearch_memchr_1char(
1929 PyBytes_AS_STRING(self) + start, end - start,
Serhiy Storchakad92d4ef2015-07-20 22:58:02 +03001930 needle, needle, mode);
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001931 if (res >= 0)
1932 res += start;
1933 }
1934 else {
1935 if (dir > 0)
1936 res = stringlib_find_slice(
1937 PyBytes_AS_STRING(self), len,
1938 sub, sub_len, start, end);
1939 else
1940 res = stringlib_rfind_slice(
1941 PyBytes_AS_STRING(self), len,
1942 sub, sub_len, start, end);
1943 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001944
1945 if (subobj)
1946 PyBuffer_Release(&subbuf);
1947
1948 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001949}
1950
1951
1952PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001953"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001954\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001955Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001956such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001957arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001958\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001959Return -1 on failure.");
1960
Neal Norwitz6968b052007-02-27 19:02:19 +00001961static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001962bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001963{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001964 Py_ssize_t result = bytes_find_internal(self, args, +1);
1965 if (result == -2)
1966 return NULL;
1967 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001968}
1969
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001970
1971PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001972"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001973\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001974Like B.find() but raise ValueError when the substring is not found.");
1975
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001976static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001977bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001978{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001979 Py_ssize_t result = bytes_find_internal(self, args, +1);
1980 if (result == -2)
1981 return NULL;
1982 if (result == -1) {
1983 PyErr_SetString(PyExc_ValueError,
1984 "substring not found");
1985 return NULL;
1986 }
1987 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001988}
1989
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001990
1991PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001992"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001993\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001994Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001995such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001996arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001997\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001998Return -1 on failure.");
1999
Neal Norwitz6968b052007-02-27 19:02:19 +00002000static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002001bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00002002{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002003 Py_ssize_t result = bytes_find_internal(self, args, -1);
2004 if (result == -2)
2005 return NULL;
2006 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00002007}
2008
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002009
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002010PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002011"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002012\n\
2013Like B.rfind() but raise ValueError when the substring is not found.");
2014
2015static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002016bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002017{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002018 Py_ssize_t result = bytes_find_internal(self, args, -1);
2019 if (result == -2)
2020 return NULL;
2021 if (result == -1) {
2022 PyErr_SetString(PyExc_ValueError,
2023 "substring not found");
2024 return NULL;
2025 }
2026 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002027}
2028
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002029
2030Py_LOCAL_INLINE(PyObject *)
2031do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002032{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002033 Py_buffer vsep;
2034 char *s = PyBytes_AS_STRING(self);
2035 Py_ssize_t len = PyBytes_GET_SIZE(self);
2036 char *sep;
2037 Py_ssize_t seplen;
2038 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002039
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002040 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002041 return NULL;
2042 sep = vsep.buf;
2043 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002044
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002045 i = 0;
2046 if (striptype != RIGHTSTRIP) {
2047 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2048 i++;
2049 }
2050 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002051
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002052 j = len;
2053 if (striptype != LEFTSTRIP) {
2054 do {
2055 j--;
2056 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2057 j++;
2058 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002059
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002060 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002061
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002062 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2063 Py_INCREF(self);
2064 return (PyObject*)self;
2065 }
2066 else
2067 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002068}
2069
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002070
2071Py_LOCAL_INLINE(PyObject *)
2072do_strip(PyBytesObject *self, int striptype)
2073{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002074 char *s = PyBytes_AS_STRING(self);
2075 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002076
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002077 i = 0;
2078 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00002079 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002080 i++;
2081 }
2082 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002083
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002084 j = len;
2085 if (striptype != LEFTSTRIP) {
2086 do {
2087 j--;
David Malcolm96960882010-11-05 17:23:41 +00002088 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002089 j++;
2090 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002091
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002092 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2093 Py_INCREF(self);
2094 return (PyObject*)self;
2095 }
2096 else
2097 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002098}
2099
2100
2101Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002102do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002103{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002104 if (bytes != NULL && bytes != Py_None) {
2105 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002106 }
2107 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002108}
2109
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002110/*[clinic input]
2111bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002112
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002113 self: self(type="PyBytesObject *")
2114 bytes: object = None
2115 /
2116
2117Strip leading and trailing bytes contained in the argument.
2118
2119If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2120[clinic start generated code]*/
2121
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002122static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002123bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002124/*[clinic end generated code: output=c7c228d3bd104a1b input=37daa5fad1395d95]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002125{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002126 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002127}
2128
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002129/*[clinic input]
2130bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002131
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002132 self: self(type="PyBytesObject *")
2133 bytes: object = None
2134 /
2135
2136Strip leading bytes contained in the argument.
2137
2138If the argument is omitted or None, strip leading ASCII whitespace.
2139[clinic start generated code]*/
2140
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002141static PyObject *
2142bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002143/*[clinic end generated code: output=28602e586f524e82 input=88811b09dfbc2988]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002144{
2145 return do_argstrip(self, LEFTSTRIP, bytes);
2146}
2147
2148/*[clinic input]
2149bytes.rstrip
2150
2151 self: self(type="PyBytesObject *")
2152 bytes: object = None
2153 /
2154
2155Strip trailing bytes contained in the argument.
2156
2157If the argument is omitted or None, strip trailing ASCII whitespace.
2158[clinic start generated code]*/
2159
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002160static PyObject *
2161bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002162/*[clinic end generated code: output=547e3815c95447da input=8f93c9cd361f0140]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002163{
2164 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002165}
Neal Norwitz6968b052007-02-27 19:02:19 +00002166
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002167
2168PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002169"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002170\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002171Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002172string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002173as in slice notation.");
2174
2175static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002176bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002177{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002178 PyObject *sub_obj;
2179 const char *str = PyBytes_AS_STRING(self), *sub;
2180 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02002181 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002182 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002183
Antoine Pitrouac65d962011-10-20 23:54:17 +02002184 Py_buffer vsub;
2185 PyObject *count_obj;
2186
2187 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
2188 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002189 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002190
Antoine Pitrouac65d962011-10-20 23:54:17 +02002191 if (sub_obj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002192 if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02002193 return NULL;
2194
2195 sub = vsub.buf;
2196 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002197 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02002198 else {
2199 sub = &byte;
2200 sub_len = 1;
2201 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002202
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002203 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002204
Antoine Pitrouac65d962011-10-20 23:54:17 +02002205 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002206 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2207 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02002208
2209 if (sub_obj)
2210 PyBuffer_Release(&vsub);
2211
2212 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002213}
2214
2215
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002216/*[clinic input]
2217bytes.translate
2218
2219 self: self(type="PyBytesObject *")
Victor Stinner049e5092014-08-17 22:20:00 +02002220 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002221 Translation table, which must be a bytes object of length 256.
2222 [
2223 deletechars: object
2224 ]
2225 /
2226
2227Return a copy with each character mapped by the given translation table.
2228
2229All characters occurring in the optional argument deletechars are removed.
2230The remaining characters are mapped through the given translation table.
2231[clinic start generated code]*/
2232
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002233static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002234bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1,
2235 PyObject *deletechars)
2236/*[clinic end generated code: output=233df850eb50bf8d input=d8fa5519d7cc4be7]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002237{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002238 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002239 Py_buffer table_view = {NULL, NULL};
2240 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002241 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002242 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002243 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002244 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002245 Py_ssize_t inlen, tablen, dellen = 0;
2246 PyObject *result;
2247 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002248
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002249 if (PyBytes_Check(table)) {
2250 table_chars = PyBytes_AS_STRING(table);
2251 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002252 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002253 else if (table == Py_None) {
2254 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002255 tablen = 256;
2256 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002257 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002258 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002259 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002260 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002261 tablen = table_view.len;
2262 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002263
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002264 if (tablen != 256) {
2265 PyErr_SetString(PyExc_ValueError,
2266 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002267 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002268 return NULL;
2269 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002270
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002271 if (deletechars != NULL) {
2272 if (PyBytes_Check(deletechars)) {
2273 del_table_chars = PyBytes_AS_STRING(deletechars);
2274 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002275 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002276 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002277 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002278 PyBuffer_Release(&table_view);
2279 return NULL;
2280 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002281 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002282 dellen = del_table_view.len;
2283 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002284 }
2285 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002286 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002287 dellen = 0;
2288 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002289
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002290 inlen = PyBytes_GET_SIZE(input_obj);
2291 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002292 if (result == NULL) {
2293 PyBuffer_Release(&del_table_view);
2294 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002295 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002296 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002297 output_start = output = PyBytes_AsString(result);
2298 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002299
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002300 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002301 /* If no deletions are required, use faster code */
2302 for (i = inlen; --i >= 0; ) {
2303 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002304 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002305 changed = 1;
2306 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002307 if (!changed && PyBytes_CheckExact(input_obj)) {
2308 Py_INCREF(input_obj);
2309 Py_DECREF(result);
2310 result = input_obj;
2311 }
2312 PyBuffer_Release(&del_table_view);
2313 PyBuffer_Release(&table_view);
2314 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002315 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002316
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002317 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002318 for (i = 0; i < 256; i++)
2319 trans_table[i] = Py_CHARMASK(i);
2320 } else {
2321 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002322 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002323 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002324 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002325
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002326 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002327 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002328 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002329
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002330 for (i = inlen; --i >= 0; ) {
2331 c = Py_CHARMASK(*input++);
2332 if (trans_table[c] != -1)
2333 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2334 continue;
2335 changed = 1;
2336 }
2337 if (!changed && PyBytes_CheckExact(input_obj)) {
2338 Py_DECREF(result);
2339 Py_INCREF(input_obj);
2340 return input_obj;
2341 }
2342 /* Fix the size of the resulting string */
2343 if (inlen > 0)
2344 _PyBytes_Resize(&result, output - output_start);
2345 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002346}
2347
2348
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002349/*[clinic input]
2350
2351@staticmethod
2352bytes.maketrans
2353
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002354 frm: Py_buffer
2355 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002356 /
2357
2358Return a translation table useable for the bytes or bytearray translate method.
2359
2360The returned table will be one where each byte in frm is mapped to the byte at
2361the same position in to.
2362
2363The bytes objects frm and to must be of the same length.
2364[clinic start generated code]*/
2365
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002366static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002367bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002368/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002369{
2370 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002371}
2372
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002373/* find and count characters and substrings */
2374
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002375#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002376 ((char *)memchr((const void *)(target), c, target_len))
2377
2378/* String ops must return a string. */
2379/* If the object is subclass of string, create a copy */
2380Py_LOCAL(PyBytesObject *)
2381return_self(PyBytesObject *self)
2382{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002383 if (PyBytes_CheckExact(self)) {
2384 Py_INCREF(self);
2385 return self;
2386 }
2387 return (PyBytesObject *)PyBytes_FromStringAndSize(
2388 PyBytes_AS_STRING(self),
2389 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002390}
2391
2392Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00002393countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002394{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002395 Py_ssize_t count=0;
2396 const char *start=target;
2397 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002398
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002399 while ( (start=findchar(start, end-start, c)) != NULL ) {
2400 count++;
2401 if (count >= maxcount)
2402 break;
2403 start += 1;
2404 }
2405 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002406}
2407
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002408
2409/* Algorithms for different cases of string replacement */
2410
2411/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2412Py_LOCAL(PyBytesObject *)
2413replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002414 const char *to_s, Py_ssize_t to_len,
2415 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002416{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002417 char *self_s, *result_s;
2418 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002419 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002420 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002421
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002422 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002423
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002424 /* 1 at the end plus 1 after every character;
2425 count = min(maxcount, self_len + 1) */
2426 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002427 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002428 else
2429 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
2430 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002431
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002432 /* Check for overflow */
2433 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002434 assert(count > 0);
2435 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002436 PyErr_SetString(PyExc_OverflowError,
2437 "replacement bytes are too long");
2438 return NULL;
2439 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002440 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002441
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002442 if (! (result = (PyBytesObject *)
2443 PyBytes_FromStringAndSize(NULL, result_len)) )
2444 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002445
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002446 self_s = PyBytes_AS_STRING(self);
2447 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002448
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002449 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002450
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002451 /* Lay the first one down (guaranteed this will occur) */
2452 Py_MEMCPY(result_s, to_s, to_len);
2453 result_s += to_len;
2454 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002455
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002456 for (i=0; i<count; i++) {
2457 *result_s++ = *self_s++;
2458 Py_MEMCPY(result_s, to_s, to_len);
2459 result_s += to_len;
2460 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002461
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002462 /* Copy the rest of the original string */
2463 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002464
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002465 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002466}
2467
2468/* Special case for deleting a single character */
2469/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2470Py_LOCAL(PyBytesObject *)
2471replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002472 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002473{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002474 char *self_s, *result_s;
2475 char *start, *next, *end;
2476 Py_ssize_t self_len, result_len;
2477 Py_ssize_t count;
2478 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002479
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002480 self_len = PyBytes_GET_SIZE(self);
2481 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002482
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002483 count = countchar(self_s, self_len, from_c, maxcount);
2484 if (count == 0) {
2485 return return_self(self);
2486 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002487
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002488 result_len = self_len - count; /* from_len == 1 */
2489 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002490
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002491 if ( (result = (PyBytesObject *)
2492 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2493 return NULL;
2494 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002495
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002496 start = self_s;
2497 end = self_s + self_len;
2498 while (count-- > 0) {
2499 next = findchar(start, end-start, from_c);
2500 if (next == NULL)
2501 break;
2502 Py_MEMCPY(result_s, start, next-start);
2503 result_s += (next-start);
2504 start = next+1;
2505 }
2506 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002507
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002508 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002509}
2510
2511/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2512
2513Py_LOCAL(PyBytesObject *)
2514replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002515 const char *from_s, Py_ssize_t from_len,
2516 Py_ssize_t maxcount) {
2517 char *self_s, *result_s;
2518 char *start, *next, *end;
2519 Py_ssize_t self_len, result_len;
2520 Py_ssize_t count, offset;
2521 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002522
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002523 self_len = PyBytes_GET_SIZE(self);
2524 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002525
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002526 count = stringlib_count(self_s, self_len,
2527 from_s, from_len,
2528 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002529
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002530 if (count == 0) {
2531 /* no matches */
2532 return return_self(self);
2533 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002534
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002535 result_len = self_len - (count * from_len);
2536 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002537
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002538 if ( (result = (PyBytesObject *)
2539 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2540 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002541
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002542 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002543
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002544 start = self_s;
2545 end = self_s + self_len;
2546 while (count-- > 0) {
2547 offset = stringlib_find(start, end-start,
2548 from_s, from_len,
2549 0);
2550 if (offset == -1)
2551 break;
2552 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002553
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002554 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002555
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002556 result_s += (next-start);
2557 start = next+from_len;
2558 }
2559 Py_MEMCPY(result_s, start, end-start);
2560 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002561}
2562
2563/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2564Py_LOCAL(PyBytesObject *)
2565replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002566 char from_c, char to_c,
2567 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002568{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002569 char *self_s, *result_s, *start, *end, *next;
2570 Py_ssize_t self_len;
2571 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002572
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002573 /* The result string will be the same size */
2574 self_s = PyBytes_AS_STRING(self);
2575 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002576
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002577 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002578
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002579 if (next == NULL) {
2580 /* No matches; return the original string */
2581 return return_self(self);
2582 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002583
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002584 /* Need to make a new string */
2585 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2586 if (result == NULL)
2587 return NULL;
2588 result_s = PyBytes_AS_STRING(result);
2589 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002590
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002591 /* change everything in-place, starting with this one */
2592 start = result_s + (next-self_s);
2593 *start = to_c;
2594 start++;
2595 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002596
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002597 while (--maxcount > 0) {
2598 next = findchar(start, end-start, from_c);
2599 if (next == NULL)
2600 break;
2601 *next = to_c;
2602 start = next+1;
2603 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002604
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002605 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002606}
2607
2608/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2609Py_LOCAL(PyBytesObject *)
2610replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002611 const char *from_s, Py_ssize_t from_len,
2612 const char *to_s, Py_ssize_t to_len,
2613 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002614{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002615 char *result_s, *start, *end;
2616 char *self_s;
2617 Py_ssize_t self_len, offset;
2618 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002619
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002620 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002621
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002622 self_s = PyBytes_AS_STRING(self);
2623 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002624
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002625 offset = stringlib_find(self_s, self_len,
2626 from_s, from_len,
2627 0);
2628 if (offset == -1) {
2629 /* No matches; return the original string */
2630 return return_self(self);
2631 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002632
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002633 /* Need to make a new string */
2634 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2635 if (result == NULL)
2636 return NULL;
2637 result_s = PyBytes_AS_STRING(result);
2638 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002639
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002640 /* change everything in-place, starting with this one */
2641 start = result_s + offset;
2642 Py_MEMCPY(start, to_s, from_len);
2643 start += from_len;
2644 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002645
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002646 while ( --maxcount > 0) {
2647 offset = stringlib_find(start, end-start,
2648 from_s, from_len,
2649 0);
2650 if (offset==-1)
2651 break;
2652 Py_MEMCPY(start+offset, to_s, from_len);
2653 start += offset+from_len;
2654 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002655
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002656 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002657}
2658
2659/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2660Py_LOCAL(PyBytesObject *)
2661replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002662 char from_c,
2663 const char *to_s, Py_ssize_t to_len,
2664 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002665{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002666 char *self_s, *result_s;
2667 char *start, *next, *end;
2668 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002669 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002670 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002671
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002672 self_s = PyBytes_AS_STRING(self);
2673 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002674
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002675 count = countchar(self_s, self_len, from_c, maxcount);
2676 if (count == 0) {
2677 /* no matches, return unchanged */
2678 return return_self(self);
2679 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002680
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002681 /* use the difference between current and new, hence the "-1" */
2682 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002683 assert(count > 0);
2684 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002685 PyErr_SetString(PyExc_OverflowError,
2686 "replacement bytes are too long");
2687 return NULL;
2688 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002689 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002690
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002691 if ( (result = (PyBytesObject *)
2692 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2693 return NULL;
2694 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002695
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002696 start = self_s;
2697 end = self_s + self_len;
2698 while (count-- > 0) {
2699 next = findchar(start, end-start, from_c);
2700 if (next == NULL)
2701 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002702
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002703 if (next == start) {
2704 /* replace with the 'to' */
2705 Py_MEMCPY(result_s, to_s, to_len);
2706 result_s += to_len;
2707 start += 1;
2708 } else {
2709 /* copy the unchanged old then the 'to' */
2710 Py_MEMCPY(result_s, start, next-start);
2711 result_s += (next-start);
2712 Py_MEMCPY(result_s, to_s, to_len);
2713 result_s += to_len;
2714 start = next+1;
2715 }
2716 }
2717 /* Copy the remainder of the remaining string */
2718 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002719
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002720 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002721}
2722
2723/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2724Py_LOCAL(PyBytesObject *)
2725replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002726 const char *from_s, Py_ssize_t from_len,
2727 const char *to_s, Py_ssize_t to_len,
2728 Py_ssize_t maxcount) {
2729 char *self_s, *result_s;
2730 char *start, *next, *end;
2731 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002732 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002733 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002734
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002735 self_s = PyBytes_AS_STRING(self);
2736 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002737
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002738 count = stringlib_count(self_s, self_len,
2739 from_s, from_len,
2740 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002741
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002742 if (count == 0) {
2743 /* no matches, return unchanged */
2744 return return_self(self);
2745 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002746
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002747 /* Check for overflow */
2748 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002749 assert(count > 0);
2750 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002751 PyErr_SetString(PyExc_OverflowError,
2752 "replacement bytes are too long");
2753 return NULL;
2754 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002755 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002756
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002757 if ( (result = (PyBytesObject *)
2758 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2759 return NULL;
2760 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002761
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002762 start = self_s;
2763 end = self_s + self_len;
2764 while (count-- > 0) {
2765 offset = stringlib_find(start, end-start,
2766 from_s, from_len,
2767 0);
2768 if (offset == -1)
2769 break;
2770 next = start+offset;
2771 if (next == start) {
2772 /* replace with the 'to' */
2773 Py_MEMCPY(result_s, to_s, to_len);
2774 result_s += to_len;
2775 start += from_len;
2776 } else {
2777 /* copy the unchanged old then the 'to' */
2778 Py_MEMCPY(result_s, start, next-start);
2779 result_s += (next-start);
2780 Py_MEMCPY(result_s, to_s, to_len);
2781 result_s += to_len;
2782 start = next+from_len;
2783 }
2784 }
2785 /* Copy the remainder of the remaining string */
2786 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002787
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002788 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002789}
2790
2791
2792Py_LOCAL(PyBytesObject *)
2793replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002794 const char *from_s, Py_ssize_t from_len,
2795 const char *to_s, Py_ssize_t to_len,
2796 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002797{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002798 if (maxcount < 0) {
2799 maxcount = PY_SSIZE_T_MAX;
2800 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2801 /* nothing to do; return the original string */
2802 return return_self(self);
2803 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002804
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002805 if (maxcount == 0 ||
2806 (from_len == 0 && to_len == 0)) {
2807 /* nothing to do; return the original string */
2808 return return_self(self);
2809 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002810
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002811 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002812
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002813 if (from_len == 0) {
2814 /* insert the 'to' string everywhere. */
2815 /* >>> "Python".replace("", ".") */
2816 /* '.P.y.t.h.o.n.' */
2817 return replace_interleave(self, to_s, to_len, maxcount);
2818 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002819
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002820 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2821 /* point for an empty self string to generate a non-empty string */
2822 /* Special case so the remaining code always gets a non-empty string */
2823 if (PyBytes_GET_SIZE(self) == 0) {
2824 return return_self(self);
2825 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002826
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002827 if (to_len == 0) {
2828 /* delete all occurrences of 'from' string */
2829 if (from_len == 1) {
2830 return replace_delete_single_character(
2831 self, from_s[0], maxcount);
2832 } else {
2833 return replace_delete_substring(self, from_s,
2834 from_len, maxcount);
2835 }
2836 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002837
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002838 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002839
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002840 if (from_len == to_len) {
2841 if (from_len == 1) {
2842 return replace_single_character_in_place(
2843 self,
2844 from_s[0],
2845 to_s[0],
2846 maxcount);
2847 } else {
2848 return replace_substring_in_place(
2849 self, from_s, from_len, to_s, to_len,
2850 maxcount);
2851 }
2852 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002853
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002854 /* Otherwise use the more generic algorithms */
2855 if (from_len == 1) {
2856 return replace_single_character(self, from_s[0],
2857 to_s, to_len, maxcount);
2858 } else {
2859 /* len('from')>=2, len('to')>=1 */
2860 return replace_substring(self, from_s, from_len, to_s, to_len,
2861 maxcount);
2862 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002863}
2864
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002865
2866/*[clinic input]
2867bytes.replace
2868
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002869 old: Py_buffer
2870 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002871 count: Py_ssize_t = -1
2872 Maximum number of occurrences to replace.
2873 -1 (the default value) means replace all occurrences.
2874 /
2875
2876Return a copy with all occurrences of substring old replaced by new.
2877
2878If the optional argument count is given, only the first count occurrences are
2879replaced.
2880[clinic start generated code]*/
2881
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002882static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002883bytes_replace_impl(PyBytesObject*self, Py_buffer *old, Py_buffer *new,
2884 Py_ssize_t count)
2885/*[clinic end generated code: output=403dc9d7a83c5a1d input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002886{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002887 return (PyObject *)replace((PyBytesObject *) self,
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002888 (const char *)old->buf, old->len,
2889 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002890}
2891
2892/** End DALKE **/
2893
2894/* Matches the end (direction >= 0) or start (direction < 0) of self
2895 * against substr, using the start and end arguments. Returns
2896 * -1 on error, 0 if not found and 1 if found.
2897 */
2898Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002899_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002900 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002901{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002902 Py_ssize_t len = PyBytes_GET_SIZE(self);
2903 Py_ssize_t slen;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002904 Py_buffer sub_view = {NULL, NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002905 const char* sub;
2906 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002907
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002908 if (PyBytes_Check(substr)) {
2909 sub = PyBytes_AS_STRING(substr);
2910 slen = PyBytes_GET_SIZE(substr);
2911 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002912 else {
2913 if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
2914 return -1;
2915 sub = sub_view.buf;
2916 slen = sub_view.len;
2917 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002918 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002919
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002920 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002921
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002922 if (direction < 0) {
2923 /* startswith */
2924 if (start+slen > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002925 goto notfound;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002926 } else {
2927 /* endswith */
2928 if (end-start < slen || start > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002929 goto notfound;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002930
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002931 if (end-slen > start)
2932 start = end - slen;
2933 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002934 if (end-start < slen)
2935 goto notfound;
2936 if (memcmp(str+start, sub, slen) != 0)
2937 goto notfound;
2938
2939 PyBuffer_Release(&sub_view);
2940 return 1;
2941
2942notfound:
2943 PyBuffer_Release(&sub_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002944 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002945}
2946
2947
2948PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002949"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002950\n\
2951Return True if B starts with the specified prefix, False otherwise.\n\
2952With optional start, test B beginning at that position.\n\
2953With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002954prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002955
2956static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002957bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002958{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002959 Py_ssize_t start = 0;
2960 Py_ssize_t end = PY_SSIZE_T_MAX;
2961 PyObject *subobj;
2962 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002963
Jesus Ceaac451502011-04-20 17:09:23 +02002964 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002965 return NULL;
2966 if (PyTuple_Check(subobj)) {
2967 Py_ssize_t i;
2968 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2969 result = _bytes_tailmatch(self,
2970 PyTuple_GET_ITEM(subobj, i),
2971 start, end, -1);
2972 if (result == -1)
2973 return NULL;
2974 else if (result) {
2975 Py_RETURN_TRUE;
2976 }
2977 }
2978 Py_RETURN_FALSE;
2979 }
2980 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002981 if (result == -1) {
2982 if (PyErr_ExceptionMatches(PyExc_TypeError))
2983 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2984 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002985 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002986 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002987 else
2988 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002989}
2990
2991
2992PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002993"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002994\n\
2995Return True if B ends with the specified suffix, False otherwise.\n\
2996With optional start, test B beginning at that position.\n\
2997With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002998suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002999
3000static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003001bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003002{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003003 Py_ssize_t start = 0;
3004 Py_ssize_t end = PY_SSIZE_T_MAX;
3005 PyObject *subobj;
3006 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003007
Jesus Ceaac451502011-04-20 17:09:23 +02003008 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003009 return NULL;
3010 if (PyTuple_Check(subobj)) {
3011 Py_ssize_t i;
3012 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3013 result = _bytes_tailmatch(self,
3014 PyTuple_GET_ITEM(subobj, i),
3015 start, end, +1);
3016 if (result == -1)
3017 return NULL;
3018 else if (result) {
3019 Py_RETURN_TRUE;
3020 }
3021 }
3022 Py_RETURN_FALSE;
3023 }
3024 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03003025 if (result == -1) {
3026 if (PyErr_ExceptionMatches(PyExc_TypeError))
3027 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
3028 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003029 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03003030 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003031 else
3032 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003033}
3034
3035
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003036/*[clinic input]
3037bytes.decode
3038
3039 encoding: str(c_default="NULL") = 'utf-8'
3040 The encoding with which to decode the bytes.
3041 errors: str(c_default="NULL") = 'strict'
3042 The error handling scheme to use for the handling of decoding errors.
3043 The default is 'strict' meaning that decoding errors raise a
3044 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
3045 as well as any other name registered with codecs.register_error that
3046 can handle UnicodeDecodeErrors.
3047
3048Decode the bytes using the codec registered for encoding.
3049[clinic start generated code]*/
3050
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003051static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04003052bytes_decode_impl(PyBytesObject*self, const char *encoding,
3053 const char *errors)
3054/*[clinic end generated code: output=2d2016ff8e0bb176 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003055{
Martin v. Löwis0efea322014-07-27 17:29:17 +02003056 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00003057}
3058
Guido van Rossum20188312006-05-05 15:15:40 +00003059
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003060/*[clinic input]
3061bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003062
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +03003063 keepends: int(c_default="0") = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003064
3065Return a list of the lines in the bytes, breaking at line boundaries.
3066
3067Line breaks are not included in the resulting list unless keepends is given and
3068true.
3069[clinic start generated code]*/
3070
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003071static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003072bytes_splitlines_impl(PyBytesObject*self, int keepends)
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +03003073/*[clinic end generated code: output=995c3598f7833cad input=7f4aac67144f9944]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003074{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003075 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00003076 (PyObject*) self, PyBytes_AS_STRING(self),
3077 PyBytes_GET_SIZE(self), keepends
3078 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003079}
3080
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003081static int
Victor Stinner6430fd52011-09-29 04:02:13 +02003082hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003083{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003084 if (c >= 128)
3085 return -1;
David Malcolm96960882010-11-05 17:23:41 +00003086 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003087 return c - '0';
3088 else {
David Malcolm96960882010-11-05 17:23:41 +00003089 if (Py_ISUPPER(c))
3090 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003091 if (c >= 'a' && c <= 'f')
3092 return c - 'a' + 10;
3093 }
3094 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003095}
3096
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003097/*[clinic input]
3098@classmethod
3099bytes.fromhex
3100
3101 string: unicode
3102 /
3103
3104Create a bytes object from a string of hexadecimal numbers.
3105
3106Spaces between two numbers are accepted.
3107Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
3108[clinic start generated code]*/
3109
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003110static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003111bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03003112/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003113{
3114 PyObject *newstring;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003115 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003116 Py_ssize_t hexlen, byteslen, i, j;
3117 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003118 void *data;
3119 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003120
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003121 assert(PyUnicode_Check(string));
3122 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003123 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003124 kind = PyUnicode_KIND(string);
3125 data = PyUnicode_DATA(string);
3126 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003127
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003128 byteslen = hexlen/2; /* This overestimates if there are spaces */
3129 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
3130 if (!newstring)
3131 return NULL;
3132 buf = PyBytes_AS_STRING(newstring);
3133 for (i = j = 0; i < hexlen; i += 2) {
3134 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003135 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003136 i++;
3137 if (i >= hexlen)
3138 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003139 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
3140 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003141 if (top == -1 || bot == -1) {
3142 PyErr_Format(PyExc_ValueError,
3143 "non-hexadecimal number found in "
3144 "fromhex() arg at position %zd", i);
3145 goto error;
3146 }
3147 buf[j++] = (top << 4) + bot;
3148 }
3149 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
3150 goto error;
3151 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003152
3153 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003154 Py_XDECREF(newstring);
3155 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003156}
3157
Gregory P. Smith8cb65692015-04-25 23:22:26 +00003158PyDoc_STRVAR(hex__doc__,
3159"B.hex() -> string\n\
3160\n\
3161Create a string of hexadecimal numbers from a bytes object.\n\
3162Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'.");
3163
3164static PyObject *
3165bytes_hex(PyBytesObject *self)
3166{
3167 char* argbuf = PyBytes_AS_STRING(self);
3168 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
3169 return _Py_strhex(argbuf, arglen);
3170}
3171
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003172static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003173bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003174{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003175 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003176}
3177
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003178
3179static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003180bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003181 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
3182 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3183 _Py_capitalize__doc__},
3184 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3185 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003186 BYTES_DECODE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003187 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
3188 endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02003189 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003190 expandtabs__doc__},
3191 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003192 BYTES_FROMHEX_METHODDEF
Gregory P. Smith8cb65692015-04-25 23:22:26 +00003193 {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003194 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3195 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3196 _Py_isalnum__doc__},
3197 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3198 _Py_isalpha__doc__},
3199 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3200 _Py_isdigit__doc__},
3201 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3202 _Py_islower__doc__},
3203 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3204 _Py_isspace__doc__},
3205 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3206 _Py_istitle__doc__},
3207 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3208 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003209 BYTES_JOIN_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003210 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3211 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003212 BYTES_LSTRIP_METHODDEF
3213 BYTES_MAKETRANS_METHODDEF
3214 BYTES_PARTITION_METHODDEF
3215 BYTES_REPLACE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003216 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3217 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3218 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003219 BYTES_RPARTITION_METHODDEF
3220 BYTES_RSPLIT_METHODDEF
3221 BYTES_RSTRIP_METHODDEF
3222 BYTES_SPLIT_METHODDEF
3223 BYTES_SPLITLINES_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003224 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
3225 startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003226 BYTES_STRIP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003227 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3228 _Py_swapcase__doc__},
3229 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003230 BYTES_TRANSLATE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003231 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3232 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003233 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003234};
3235
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003236static PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +02003237bytes_mod(PyObject *self, PyObject *args)
Ethan Furmanb95b5612015-01-23 20:05:18 -08003238{
Victor Stinner772b2b02015-10-14 09:56:53 +02003239 if (self == NULL || !PyBytes_Check(self)) {
3240 PyErr_BadInternalCall();
3241 return NULL;
3242 }
3243
3244 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
3245 args, 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -08003246}
3247
3248static PyNumberMethods bytes_as_number = {
3249 0, /*nb_add*/
3250 0, /*nb_subtract*/
3251 0, /*nb_multiply*/
3252 bytes_mod, /*nb_remainder*/
3253};
3254
3255static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003256str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3257
3258static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003259bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003260{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003261 PyObject *x = NULL;
3262 const char *encoding = NULL;
3263 const char *errors = NULL;
3264 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003265 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003266 Py_ssize_t size;
3267 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003268 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003269
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003270 if (type != &PyBytes_Type)
3271 return str_subtype_new(type, args, kwds);
3272 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
3273 &encoding, &errors))
3274 return NULL;
3275 if (x == NULL) {
3276 if (encoding != NULL || errors != NULL) {
3277 PyErr_SetString(PyExc_TypeError,
3278 "encoding or errors without sequence "
3279 "argument");
3280 return NULL;
3281 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02003282 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003283 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003284
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003285 if (PyUnicode_Check(x)) {
3286 /* Encode via the codec registry */
3287 if (encoding == NULL) {
3288 PyErr_SetString(PyExc_TypeError,
3289 "string argument without an encoding");
3290 return NULL;
3291 }
3292 new = PyUnicode_AsEncodedString(x, encoding, errors);
3293 if (new == NULL)
3294 return NULL;
3295 assert(PyBytes_Check(new));
3296 return new;
3297 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003298
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02003299 /* If it's not unicode, there can't be encoding or errors */
3300 if (encoding != NULL || errors != NULL) {
3301 PyErr_SetString(PyExc_TypeError,
3302 "encoding or errors without a string argument");
3303 return NULL;
3304 }
3305
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003306 /* We'd like to call PyObject_Bytes here, but we need to check for an
3307 integer argument before deferring to PyBytes_FromObject, something
3308 PyObject_Bytes doesn't do. */
3309 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
3310 if (func != NULL) {
3311 new = PyObject_CallFunctionObjArgs(func, NULL);
3312 Py_DECREF(func);
3313 if (new == NULL)
3314 return NULL;
3315 if (!PyBytes_Check(new)) {
3316 PyErr_Format(PyExc_TypeError,
3317 "__bytes__ returned non-bytes (type %.200s)",
3318 Py_TYPE(new)->tp_name);
3319 Py_DECREF(new);
3320 return NULL;
3321 }
3322 return new;
3323 }
3324 else if (PyErr_Occurred())
3325 return NULL;
3326
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003327 /* Is it an integer? */
3328 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
3329 if (size == -1 && PyErr_Occurred()) {
3330 if (PyErr_ExceptionMatches(PyExc_OverflowError))
3331 return NULL;
3332 PyErr_Clear();
3333 }
3334 else if (size < 0) {
3335 PyErr_SetString(PyExc_ValueError, "negative count");
3336 return NULL;
3337 }
3338 else {
Victor Stinnerdb067af2014-05-02 22:31:14 +02003339 new = _PyBytes_FromSize(size, 1);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003340 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003341 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003342 return new;
3343 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003344
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003345 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00003346}
3347
3348PyObject *
3349PyBytes_FromObject(PyObject *x)
3350{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003351 PyObject *new, *it;
3352 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003353
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003354 if (x == NULL) {
3355 PyErr_BadInternalCall();
3356 return NULL;
3357 }
Larry Hastingsca28e992012-05-24 22:58:30 -07003358
3359 if (PyBytes_CheckExact(x)) {
3360 Py_INCREF(x);
3361 return x;
3362 }
3363
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003364 /* Use the modern buffer interface */
3365 if (PyObject_CheckBuffer(x)) {
3366 Py_buffer view;
3367 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
3368 return NULL;
3369 new = PyBytes_FromStringAndSize(NULL, view.len);
3370 if (!new)
3371 goto fail;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003372 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
3373 &view, view.len, 'C') < 0)
3374 goto fail;
3375 PyBuffer_Release(&view);
3376 return new;
3377 fail:
3378 Py_XDECREF(new);
3379 PyBuffer_Release(&view);
3380 return NULL;
3381 }
3382 if (PyUnicode_Check(x)) {
3383 PyErr_SetString(PyExc_TypeError,
3384 "cannot convert unicode object to bytes");
3385 return NULL;
3386 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003387
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003388 if (PyList_CheckExact(x)) {
3389 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3390 if (new == NULL)
3391 return NULL;
3392 for (i = 0; i < Py_SIZE(x); i++) {
3393 Py_ssize_t value = PyNumber_AsSsize_t(
3394 PyList_GET_ITEM(x, i), PyExc_ValueError);
3395 if (value == -1 && PyErr_Occurred()) {
3396 Py_DECREF(new);
3397 return NULL;
3398 }
3399 if (value < 0 || value >= 256) {
3400 PyErr_SetString(PyExc_ValueError,
3401 "bytes must be in range(0, 256)");
3402 Py_DECREF(new);
3403 return NULL;
3404 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003405 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003406 }
3407 return new;
3408 }
3409 if (PyTuple_CheckExact(x)) {
3410 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3411 if (new == NULL)
3412 return NULL;
3413 for (i = 0; i < Py_SIZE(x); i++) {
3414 Py_ssize_t value = PyNumber_AsSsize_t(
3415 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
3416 if (value == -1 && PyErr_Occurred()) {
3417 Py_DECREF(new);
3418 return NULL;
3419 }
3420 if (value < 0 || value >= 256) {
3421 PyErr_SetString(PyExc_ValueError,
3422 "bytes must be in range(0, 256)");
3423 Py_DECREF(new);
3424 return NULL;
3425 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003426 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003427 }
3428 return new;
3429 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00003430
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003431 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02003432 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003433 if (size == -1 && PyErr_Occurred())
3434 return NULL;
3435 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
3436 returning a shared empty bytes string. This required because we
3437 want to call _PyBytes_Resize() the returned object, which we can
3438 only do on bytes objects with refcount == 1. */
Victor Stinner88d146b2014-08-17 21:12:18 +02003439 if (size == 0)
3440 size = 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003441 new = PyBytes_FromStringAndSize(NULL, size);
3442 if (new == NULL)
3443 return NULL;
Victor Stinner88d146b2014-08-17 21:12:18 +02003444 assert(Py_REFCNT(new) == 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003445
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003446 /* Get the iterator */
3447 it = PyObject_GetIter(x);
3448 if (it == NULL)
3449 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003450
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003451 /* Run the iterator to exhaustion */
3452 for (i = 0; ; i++) {
3453 PyObject *item;
3454 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003455
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003456 /* Get the next item */
3457 item = PyIter_Next(it);
3458 if (item == NULL) {
3459 if (PyErr_Occurred())
3460 goto error;
3461 break;
3462 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003463
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003464 /* Interpret it as an int (__index__) */
3465 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3466 Py_DECREF(item);
3467 if (value == -1 && PyErr_Occurred())
3468 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003469
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003470 /* Range check */
3471 if (value < 0 || value >= 256) {
3472 PyErr_SetString(PyExc_ValueError,
3473 "bytes must be in range(0, 256)");
3474 goto error;
3475 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003476
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003477 /* Append the byte */
3478 if (i >= size) {
3479 size = 2 * size + 1;
3480 if (_PyBytes_Resize(&new, size) < 0)
3481 goto error;
3482 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003483 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003484 }
3485 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003486
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003487 /* Clean up and return success */
3488 Py_DECREF(it);
3489 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003490
3491 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003492 Py_XDECREF(it);
Victor Stinner986e2242013-10-29 03:14:22 +01003493 Py_XDECREF(new);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003494 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003495}
3496
3497static PyObject *
3498str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3499{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003500 PyObject *tmp, *pnew;
3501 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003502
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003503 assert(PyType_IsSubtype(type, &PyBytes_Type));
3504 tmp = bytes_new(&PyBytes_Type, args, kwds);
3505 if (tmp == NULL)
3506 return NULL;
3507 assert(PyBytes_CheckExact(tmp));
3508 n = PyBytes_GET_SIZE(tmp);
3509 pnew = type->tp_alloc(type, n);
3510 if (pnew != NULL) {
3511 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3512 PyBytes_AS_STRING(tmp), n+1);
3513 ((PyBytesObject *)pnew)->ob_shash =
3514 ((PyBytesObject *)tmp)->ob_shash;
3515 }
3516 Py_DECREF(tmp);
3517 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003518}
3519
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003520PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003521"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003522bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003523bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003524bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3525bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003526\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003527Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003528 - an iterable yielding integers in range(256)\n\
3529 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003530 - any object implementing the buffer API.\n\
3531 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003532
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003533static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003534
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003535PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003536 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3537 "bytes",
3538 PyBytesObject_SIZE,
3539 sizeof(char),
3540 bytes_dealloc, /* tp_dealloc */
3541 0, /* tp_print */
3542 0, /* tp_getattr */
3543 0, /* tp_setattr */
3544 0, /* tp_reserved */
3545 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08003546 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003547 &bytes_as_sequence, /* tp_as_sequence */
3548 &bytes_as_mapping, /* tp_as_mapping */
3549 (hashfunc)bytes_hash, /* tp_hash */
3550 0, /* tp_call */
3551 bytes_str, /* tp_str */
3552 PyObject_GenericGetAttr, /* tp_getattro */
3553 0, /* tp_setattro */
3554 &bytes_as_buffer, /* tp_as_buffer */
3555 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3556 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3557 bytes_doc, /* tp_doc */
3558 0, /* tp_traverse */
3559 0, /* tp_clear */
3560 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3561 0, /* tp_weaklistoffset */
3562 bytes_iter, /* tp_iter */
3563 0, /* tp_iternext */
3564 bytes_methods, /* tp_methods */
3565 0, /* tp_members */
3566 0, /* tp_getset */
3567 &PyBaseObject_Type, /* tp_base */
3568 0, /* tp_dict */
3569 0, /* tp_descr_get */
3570 0, /* tp_descr_set */
3571 0, /* tp_dictoffset */
3572 0, /* tp_init */
3573 0, /* tp_alloc */
3574 bytes_new, /* tp_new */
3575 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003576};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003577
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003578void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003579PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003580{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003581 assert(pv != NULL);
3582 if (*pv == NULL)
3583 return;
3584 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003585 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003586 return;
3587 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02003588
3589 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
3590 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05003591 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02003592 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02003593
Antoine Pitrou161d6952014-05-01 14:36:20 +02003594 wb.len = -1;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003595 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02003596 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3597 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3598 Py_CLEAR(*pv);
3599 return;
3600 }
3601
3602 oldsize = PyBytes_GET_SIZE(*pv);
3603 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3604 PyErr_NoMemory();
3605 goto error;
3606 }
3607 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3608 goto error;
3609
3610 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3611 PyBuffer_Release(&wb);
3612 return;
3613
3614 error:
3615 PyBuffer_Release(&wb);
3616 Py_CLEAR(*pv);
3617 return;
3618 }
3619
3620 else {
3621 /* Multiple references, need to create new object */
3622 PyObject *v;
3623 v = bytes_concat(*pv, w);
3624 Py_DECREF(*pv);
3625 *pv = v;
3626 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003627}
3628
3629void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003630PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003631{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003632 PyBytes_Concat(pv, w);
3633 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003634}
3635
3636
Ethan Furmanb95b5612015-01-23 20:05:18 -08003637/* The following function breaks the notion that bytes are immutable:
3638 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003639 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08003640 as creating a new bytes object and destroying the old one, only
3641 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003642 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08003643 Note that if there's not enough memory to resize the bytes object, the
3644 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003645 memory" exception is set, and -1 is returned. Else (on success) 0 is
3646 returned, and the value in *pv may or may not be the same as on input.
3647 As always, an extra byte is allocated for a trailing \0 byte (newsize
3648 does *not* include that), and a trailing \0 byte is stored.
3649*/
3650
3651int
3652_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3653{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003654 PyObject *v;
3655 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003656 v = *pv;
3657 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
3658 *pv = 0;
3659 Py_DECREF(v);
3660 PyErr_BadInternalCall();
3661 return -1;
3662 }
3663 /* XXX UNREF/NEWREF interface should be more symmetrical */
3664 _Py_DEC_REFTOTAL;
3665 _Py_ForgetReference(v);
3666 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03003667 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003668 if (*pv == NULL) {
3669 PyObject_Del(v);
3670 PyErr_NoMemory();
3671 return -1;
3672 }
3673 _Py_NewReference(*pv);
3674 sv = (PyBytesObject *) *pv;
3675 Py_SIZE(sv) = newsize;
3676 sv->ob_sval[newsize] = '\0';
3677 sv->ob_shash = -1; /* invalidate cached hash value */
3678 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003679}
3680
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003681void
3682PyBytes_Fini(void)
3683{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003684 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003685 for (i = 0; i < UCHAR_MAX + 1; i++)
3686 Py_CLEAR(characters[i]);
3687 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003688}
3689
Benjamin Peterson4116f362008-05-27 00:36:20 +00003690/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003691
3692typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003693 PyObject_HEAD
3694 Py_ssize_t it_index;
3695 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003696} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003697
3698static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003699striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003700{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003701 _PyObject_GC_UNTRACK(it);
3702 Py_XDECREF(it->it_seq);
3703 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003704}
3705
3706static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003707striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003708{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003709 Py_VISIT(it->it_seq);
3710 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003711}
3712
3713static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003714striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003715{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003716 PyBytesObject *seq;
3717 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003718
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003719 assert(it != NULL);
3720 seq = it->it_seq;
3721 if (seq == NULL)
3722 return NULL;
3723 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003724
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003725 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3726 item = PyLong_FromLong(
3727 (unsigned char)seq->ob_sval[it->it_index]);
3728 if (item != NULL)
3729 ++it->it_index;
3730 return item;
3731 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003732
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003733 Py_DECREF(seq);
3734 it->it_seq = NULL;
3735 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003736}
3737
3738static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003739striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003740{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003741 Py_ssize_t len = 0;
3742 if (it->it_seq)
3743 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3744 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003745}
3746
3747PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003748 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003749
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003750static PyObject *
3751striter_reduce(striterobject *it)
3752{
3753 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02003754 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003755 it->it_seq, it->it_index);
3756 } else {
3757 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
3758 if (u == NULL)
3759 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02003760 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003761 }
3762}
3763
3764PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3765
3766static PyObject *
3767striter_setstate(striterobject *it, PyObject *state)
3768{
3769 Py_ssize_t index = PyLong_AsSsize_t(state);
3770 if (index == -1 && PyErr_Occurred())
3771 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003772 if (it->it_seq != NULL) {
3773 if (index < 0)
3774 index = 0;
3775 else if (index > PyBytes_GET_SIZE(it->it_seq))
3776 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3777 it->it_index = index;
3778 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003779 Py_RETURN_NONE;
3780}
3781
3782PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3783
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003784static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003785 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3786 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003787 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3788 reduce_doc},
3789 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3790 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003791 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003792};
3793
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003794PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003795 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3796 "bytes_iterator", /* tp_name */
3797 sizeof(striterobject), /* tp_basicsize */
3798 0, /* tp_itemsize */
3799 /* methods */
3800 (destructor)striter_dealloc, /* tp_dealloc */
3801 0, /* tp_print */
3802 0, /* tp_getattr */
3803 0, /* tp_setattr */
3804 0, /* tp_reserved */
3805 0, /* tp_repr */
3806 0, /* tp_as_number */
3807 0, /* tp_as_sequence */
3808 0, /* tp_as_mapping */
3809 0, /* tp_hash */
3810 0, /* tp_call */
3811 0, /* tp_str */
3812 PyObject_GenericGetAttr, /* tp_getattro */
3813 0, /* tp_setattro */
3814 0, /* tp_as_buffer */
3815 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3816 0, /* tp_doc */
3817 (traverseproc)striter_traverse, /* tp_traverse */
3818 0, /* tp_clear */
3819 0, /* tp_richcompare */
3820 0, /* tp_weaklistoffset */
3821 PyObject_SelfIter, /* tp_iter */
3822 (iternextfunc)striter_next, /* tp_iternext */
3823 striter_methods, /* tp_methods */
3824 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003825};
3826
3827static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003828bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003829{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003830 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003831
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003832 if (!PyBytes_Check(seq)) {
3833 PyErr_BadInternalCall();
3834 return NULL;
3835 }
3836 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3837 if (it == NULL)
3838 return NULL;
3839 it->it_index = 0;
3840 Py_INCREF(seq);
3841 it->it_seq = (PyBytesObject *)seq;
3842 _PyObject_GC_TRACK(it);
3843 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003844}
Victor Stinner00165072015-10-09 01:53:21 +02003845
3846
3847/* _PyBytesWriter API */
3848
3849#ifdef MS_WINDOWS
3850 /* On Windows, overallocate by 50% is the best factor */
3851# define OVERALLOCATE_FACTOR 2
3852#else
3853 /* On Linux, overallocate by 25% is the best factor */
3854# define OVERALLOCATE_FACTOR 4
3855#endif
3856
3857void
3858_PyBytesWriter_Init(_PyBytesWriter *writer)
3859{
Victor Stinner661aacc2015-10-14 09:41:48 +02003860 /* Set all attributes before small_buffer to 0 */
3861 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003862#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003863 memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003864#endif
3865}
3866
3867void
3868_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3869{
3870 Py_CLEAR(writer->buffer);
3871}
3872
3873Py_LOCAL_INLINE(char*)
3874_PyBytesWriter_AsString(_PyBytesWriter *writer)
3875{
Victor Stinner661aacc2015-10-14 09:41:48 +02003876 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003877 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003878 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003879 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003880 else if (writer->use_bytearray) {
3881 assert(writer->buffer != NULL);
3882 return PyByteArray_AS_STRING(writer->buffer);
3883 }
3884 else {
3885 assert(writer->buffer != NULL);
3886 return PyBytes_AS_STRING(writer->buffer);
3887 }
Victor Stinner00165072015-10-09 01:53:21 +02003888}
3889
3890Py_LOCAL_INLINE(Py_ssize_t)
3891_PyBytesWriter_GetPos(_PyBytesWriter *writer, char *str)
3892{
3893 char *start = _PyBytesWriter_AsString(writer);
3894 assert(str != NULL);
3895 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003896 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003897 return str - start;
3898}
3899
3900Py_LOCAL_INLINE(void)
3901_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3902{
3903#ifdef Py_DEBUG
3904 char *start, *end;
3905
Victor Stinner661aacc2015-10-14 09:41:48 +02003906 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003907 assert(writer->buffer == NULL);
3908 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003909 else {
3910 assert(writer->buffer != NULL);
3911 if (writer->use_bytearray)
3912 assert(PyByteArray_CheckExact(writer->buffer));
3913 else
3914 assert(PyBytes_CheckExact(writer->buffer));
3915 assert(Py_REFCNT(writer->buffer) == 1);
3916 }
Victor Stinner00165072015-10-09 01:53:21 +02003917
Victor Stinner661aacc2015-10-14 09:41:48 +02003918 if (writer->use_bytearray) {
3919 /* bytearray has its own overallocation algorithm,
3920 writer overallocation must be disabled */
3921 assert(!writer->overallocate);
3922 }
3923
3924 assert(0 <= writer->allocated);
Victor Stinner53926a12015-10-09 12:37:03 +02003925 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003926 /* the last byte must always be null */
Victor Stinner661aacc2015-10-14 09:41:48 +02003927 start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003928 assert(start[writer->allocated] == 0);
3929
3930 end = start + writer->allocated;
3931 assert(str != NULL);
3932 assert(start <= str && str <= end);
3933#endif
3934}
3935
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003936void*
3937_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
Victor Stinner00165072015-10-09 01:53:21 +02003938{
3939 Py_ssize_t allocated, pos;
3940
3941 _PyBytesWriter_CheckConsistency(writer, str);
3942 assert(size >= 0);
3943
3944 if (size == 0) {
3945 /* nothing to do */
3946 return str;
3947 }
3948
Victor Stinner53926a12015-10-09 12:37:03 +02003949 if (writer->min_size > PY_SSIZE_T_MAX - size) {
Victor Stinner00165072015-10-09 01:53:21 +02003950 PyErr_NoMemory();
Victor Stinner661aacc2015-10-14 09:41:48 +02003951 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003952 }
Victor Stinner53926a12015-10-09 12:37:03 +02003953 writer->min_size += size;
Victor Stinner00165072015-10-09 01:53:21 +02003954
3955 allocated = writer->allocated;
Victor Stinner53926a12015-10-09 12:37:03 +02003956 if (writer->min_size <= allocated)
Victor Stinner00165072015-10-09 01:53:21 +02003957 return str;
3958
Victor Stinner53926a12015-10-09 12:37:03 +02003959 allocated = writer->min_size;
Victor Stinner00165072015-10-09 01:53:21 +02003960 if (writer->overallocate
3961 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3962 /* overallocate to limit the number of realloc() */
3963 allocated += allocated / OVERALLOCATE_FACTOR;
3964 }
3965
3966 pos = _PyBytesWriter_GetPos(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003967 if (!writer->use_small_buffer) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003968 if (writer->use_bytearray) {
3969 if (PyByteArray_Resize(writer->buffer, allocated))
3970 goto error;
3971 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3972 but we cannot use ob_alloc because bytes may need to be moved
3973 to use the whole buffer. bytearray uses an internal optimization
3974 to avoid moving or copying bytes when bytes are removed at the
3975 beginning (ex: del bytearray[:1]). */
3976 }
3977 else {
3978 if (_PyBytes_Resize(&writer->buffer, allocated))
3979 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003980 }
3981 }
3982 else {
3983 /* convert from stack buffer to bytes object buffer */
3984 assert(writer->buffer == NULL);
3985
Victor Stinner661aacc2015-10-14 09:41:48 +02003986 if (writer->use_bytearray)
3987 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3988 else
3989 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003990 if (writer->buffer == NULL)
Victor Stinner661aacc2015-10-14 09:41:48 +02003991 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02003992
3993 if (pos != 0) {
Victor Stinner661aacc2015-10-14 09:41:48 +02003994 char *dest;
3995 if (writer->use_bytearray)
3996 dest = PyByteArray_AS_STRING(writer->buffer);
3997 else
3998 dest = PyBytes_AS_STRING(writer->buffer);
3999 Py_MEMCPY(dest,
Victor Stinnerb3653a32015-10-09 03:38:24 +02004000 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02004001 pos);
4002 }
4003
Victor Stinnerb3653a32015-10-09 03:38:24 +02004004 writer->use_small_buffer = 0;
Victor Stinner00165072015-10-09 01:53:21 +02004005#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02004006 memset(writer->small_buffer, 0xDB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02004007#endif
Victor Stinner00165072015-10-09 01:53:21 +02004008 }
4009 writer->allocated = allocated;
4010
4011 str = _PyBytesWriter_AsString(writer) + pos;
4012 _PyBytesWriter_CheckConsistency(writer, str);
4013 return str;
Victor Stinner661aacc2015-10-14 09:41:48 +02004014
4015error:
4016 _PyBytesWriter_Dealloc(writer);
4017 return NULL;
Victor Stinner00165072015-10-09 01:53:21 +02004018}
4019
4020/* Allocate the buffer to write size bytes.
4021 Return the pointer to the beginning of buffer data.
4022 Raise an exception and return NULL on error. */
Victor Stinnerc29e29b2015-10-12 13:12:54 +02004023void*
Victor Stinner00165072015-10-09 01:53:21 +02004024_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
4025{
4026 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02004027 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02004028 assert(size >= 0);
4029
Victor Stinnerb3653a32015-10-09 03:38:24 +02004030 writer->use_small_buffer = 1;
Victor Stinnerb13b97d2015-10-09 02:52:16 +02004031#ifdef Py_DEBUG
Victor Stinner00165072015-10-09 01:53:21 +02004032 /* the last byte is reserved, it must be '\0' */
Victor Stinnerb3653a32015-10-09 03:38:24 +02004033 writer->allocated = sizeof(writer->small_buffer) - 1;
4034 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02004035#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02004036 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02004037#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02004038 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02004039}
4040
4041PyObject *
Victor Stinnerc29e29b2015-10-12 13:12:54 +02004042_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
Victor Stinner00165072015-10-09 01:53:21 +02004043{
4044 Py_ssize_t pos;
4045 PyObject *result;
4046
4047 _PyBytesWriter_CheckConsistency(writer, str);
4048
4049 pos = _PyBytesWriter_GetPos(writer, str);
Victor Stinner661aacc2015-10-14 09:41:48 +02004050 if (pos == 0 && !writer->use_bytearray) {
Victor Stinner6c2cdae2015-10-12 13:29:43 +02004051 Py_CLEAR(writer->buffer);
4052 /* Get the empty byte string singleton */
4053 result = PyBytes_FromStringAndSize(NULL, 0);
4054 }
4055 else if (writer->use_small_buffer) {
4056 result = PyBytes_FromStringAndSize(writer->small_buffer, pos);
4057 }
4058 else {
4059 result = writer->buffer;
4060 writer->buffer = NULL;
4061
Victor Stinner00165072015-10-09 01:53:21 +02004062 if (pos != writer->allocated) {
Victor Stinner661aacc2015-10-14 09:41:48 +02004063 if (writer->use_bytearray) {
4064 if (PyByteArray_Resize(result, pos)) {
4065 Py_DECREF(result);
4066 return NULL;
4067 }
4068 }
4069 else {
4070 if (_PyBytes_Resize(&result, pos)) {
4071 assert(result == NULL);
4072 return NULL;
4073 }
Victor Stinner00165072015-10-09 01:53:21 +02004074 }
4075 }
Victor Stinner00165072015-10-09 01:53:21 +02004076 }
Victor Stinner00165072015-10-09 01:53:21 +02004077 return result;
4078}
Victor Stinnerce179bf2015-10-09 12:57:22 +02004079
Victor Stinnerc29e29b2015-10-12 13:12:54 +02004080void*
Victor Stinnere9aa5952015-10-12 13:57:47 +02004081_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
Victor Stinnerc29e29b2015-10-12 13:12:54 +02004082 const void *bytes, Py_ssize_t size)
Victor Stinnerce179bf2015-10-09 12:57:22 +02004083{
Victor Stinnere9aa5952015-10-12 13:57:47 +02004084 char *str = (char *)ptr;
4085
Victor Stinnerce179bf2015-10-09 12:57:22 +02004086 str = _PyBytesWriter_Prepare(writer, str, size);
4087 if (str == NULL)
4088 return NULL;
4089
4090 Py_MEMCPY(str, bytes, size);
4091 str += size;
4092
4093 return str;
4094}