blob: bf919b53de15d6f05e92180a3bb31873fbf07e97 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020010/*[clinic input]
Martin v. Löwis0efea322014-07-27 17:29:17 +020011class bytes "PyBytesObject*" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020012[clinic start generated code]*/
Martin v. Löwis0efea322014-07-27 17:29:17 +020013/*[clinic end generated code: output=da39a3ee5e6b4b0d input=1a1d9102afc1b00c]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020014
Neal Norwitz2bad9702007-08-27 06:19:22 +000015static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000016_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000017{
Antoine Pitroucfc22b42012-10-16 21:07:23 +020018 PyBufferProcs *bufferprocs;
19 if (PyBytes_CheckExact(obj)) {
20 /* Fast path, e.g. for .join() of many bytes objects */
21 Py_INCREF(obj);
22 view->obj = obj;
23 view->buf = PyBytes_AS_STRING(obj);
24 view->len = PyBytes_GET_SIZE(obj);
25 return view->len;
26 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000027
Antoine Pitroucfc22b42012-10-16 21:07:23 +020028 bufferprocs = Py_TYPE(obj)->tp_as_buffer;
29 if (bufferprocs == NULL || bufferprocs->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000030 {
Antoine Pitroud1188562010-06-09 16:38:55 +000031 PyErr_Format(PyExc_TypeError,
R David Murray861470c2014-10-05 11:47:01 -040032 "a bytes-like object is required, not '%.100s'",
Antoine Pitroud1188562010-06-09 16:38:55 +000033 Py_TYPE(obj)->tp_name);
34 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000035 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000036
Antoine Pitroucfc22b42012-10-16 21:07:23 +020037 if (bufferprocs->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000038 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000039 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000040}
41
Christian Heimes2c9c7a52008-05-26 13:42:13 +000042#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000043Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000044#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000045
Christian Heimes2c9c7a52008-05-26 13:42:13 +000046static PyBytesObject *characters[UCHAR_MAX + 1];
47static PyBytesObject *nullstring;
48
Mark Dickinsonfd24b322008-12-06 15:33:31 +000049/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
50 for a string of length n should request PyBytesObject_SIZE + n bytes.
51
52 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
53 3 bytes per string allocation on a typical system.
54*/
55#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
56
Christian Heimes2c9c7a52008-05-26 13:42:13 +000057/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000058 For PyBytes_FromString(), the parameter `str' points to a null-terminated
59 string containing exactly `size' bytes.
60
61 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
62 either NULL or else points to a string containing at least `size' bytes.
63 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
64 not have to be null-terminated. (Therefore it is safe to construct a
65 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
66 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
67 bytes (setting the last byte to the null terminating character) and you can
68 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000069 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000070 alter the data yourself, since the strings may be shared.
71
72 The PyObject member `op->ob_size', which denotes the number of "extra
73 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020074 allocated for string data, not counting the null terminating character.
75 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000076 PyBytes_FromStringAndSize()) or the length of the string in the `str'
77 parameter (for PyBytes_FromString()).
78*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020079static PyObject *
80_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000081{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020082 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020083 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020084
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000086#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000088#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000089 Py_INCREF(op);
90 return (PyObject *)op;
91 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000092
Victor Stinner049e5092014-08-17 22:20:00 +020093 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000094 PyErr_SetString(PyExc_OverflowError,
95 "byte string is too large");
96 return NULL;
97 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000098
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000099 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +0200100 if (use_calloc)
101 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
102 else
103 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000104 if (op == NULL)
105 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100106 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000107 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +0200108 if (!use_calloc)
109 op->ob_sval[size] = '\0';
110 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000111 if (size == 0) {
112 nullstring = op;
113 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +0200114 }
115 return (PyObject *) op;
116}
117
118PyObject *
119PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
120{
121 PyBytesObject *op;
122 if (size < 0) {
123 PyErr_SetString(PyExc_SystemError,
124 "Negative size passed to PyBytes_FromStringAndSize");
125 return NULL;
126 }
127 if (size == 1 && str != NULL &&
128 (op = characters[*str & UCHAR_MAX]) != NULL)
129 {
130#ifdef COUNT_ALLOCS
131 one_strings++;
132#endif
133 Py_INCREF(op);
134 return (PyObject *)op;
135 }
136
137 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
138 if (op == NULL)
139 return NULL;
140 if (str == NULL)
141 return (PyObject *) op;
142
143 Py_MEMCPY(op->ob_sval, str, size);
144 /* share short strings */
145 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000146 characters[*str & UCHAR_MAX] = op;
147 Py_INCREF(op);
148 }
149 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000150}
151
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000152PyObject *
153PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000154{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200155 size_t size;
156 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000157
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000158 assert(str != NULL);
159 size = strlen(str);
160 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
161 PyErr_SetString(PyExc_OverflowError,
162 "byte string is too long");
163 return NULL;
164 }
165 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000166#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000167 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000168#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000169 Py_INCREF(op);
170 return (PyObject *)op;
171 }
172 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000173#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000174 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000175#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000176 Py_INCREF(op);
177 return (PyObject *)op;
178 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000179
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000180 /* Inline PyObject_NewVar */
181 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
182 if (op == NULL)
183 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100184 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000185 op->ob_shash = -1;
186 Py_MEMCPY(op->ob_sval, str, size+1);
187 /* share short strings */
188 if (size == 0) {
189 nullstring = op;
190 Py_INCREF(op);
191 } else if (size == 1) {
192 characters[*str & UCHAR_MAX] = op;
193 Py_INCREF(op);
194 }
195 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000196}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000197
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000198PyObject *
199PyBytes_FromFormatV(const char *format, va_list vargs)
200{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000201 va_list count;
202 Py_ssize_t n = 0;
203 const char* f;
204 char *s;
205 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000206
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000207 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000208 /* step 1: figure out how large a buffer we need */
209 for (f = format; *f; f++) {
210 if (*f == '%') {
211 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000212 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000213 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000214
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000215 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
216 * they don't affect the amount of space we reserve.
217 */
218 if ((*f == 'l' || *f == 'z') &&
219 (f[1] == 'd' || f[1] == 'u'))
220 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000221
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000222 switch (*f) {
223 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100224 {
225 int c = va_arg(count, int);
226 if (c < 0 || c > 255) {
227 PyErr_SetString(PyExc_OverflowError,
228 "PyBytes_FromFormatV(): %c format "
229 "expects an integer in range [0; 255]");
230 return NULL;
231 }
232 n++;
233 break;
234 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000235 case '%':
236 n++;
237 break;
238 case 'd': case 'u': case 'i': case 'x':
239 (void) va_arg(count, int);
240 /* 20 bytes is enough to hold a 64-bit
241 integer. Decimal takes the most space.
242 This isn't enough for octal. */
243 n += 20;
244 break;
245 case 's':
246 s = va_arg(count, char*);
247 n += strlen(s);
248 break;
249 case 'p':
250 (void) va_arg(count, int);
251 /* maximum 64-bit pointer representation:
252 * 0xffffffffffffffff
253 * so 19 characters is enough.
254 * XXX I count 18 -- what's the extra for?
255 */
256 n += 19;
257 break;
258 default:
259 /* if we stumble upon an unknown
260 formatting code, copy the rest of
261 the format string to the output
262 string. (we cannot just skip the
263 code, since there's no way to know
264 what's in the argument list) */
265 n += strlen(p);
266 goto expand;
267 }
268 } else
269 n++;
270 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000271 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 /* step 2: fill the buffer */
273 /* Since we've analyzed how much space we need for the worst case,
274 use sprintf directly instead of the slower PyOS_snprintf. */
275 string = PyBytes_FromStringAndSize(NULL, n);
276 if (!string)
277 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000278
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000279 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000280
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000281 for (f = format; *f; f++) {
282 if (*f == '%') {
283 const char* p = f++;
284 Py_ssize_t i;
285 int longflag = 0;
286 int size_tflag = 0;
287 /* parse the width.precision part (we're only
288 interested in the precision value, if any) */
289 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000290 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000291 n = (n*10) + *f++ - '0';
292 if (*f == '.') {
293 f++;
294 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000295 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000296 n = (n*10) + *f++ - '0';
297 }
David Malcolm96960882010-11-05 17:23:41 +0000298 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000299 f++;
300 /* handle the long flag, but only for %ld and %lu.
301 others can be added when necessary. */
302 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
303 longflag = 1;
304 ++f;
305 }
306 /* handle the size_t flag. */
307 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
308 size_tflag = 1;
309 ++f;
310 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000311
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000312 switch (*f) {
313 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100314 {
315 int c = va_arg(vargs, int);
316 /* c has been checked for overflow in the first step */
317 *s++ = (unsigned char)c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000318 break;
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100319 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000320 case 'd':
321 if (longflag)
322 sprintf(s, "%ld", va_arg(vargs, long));
323 else if (size_tflag)
324 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
325 va_arg(vargs, Py_ssize_t));
326 else
327 sprintf(s, "%d", va_arg(vargs, int));
328 s += strlen(s);
329 break;
330 case 'u':
331 if (longflag)
332 sprintf(s, "%lu",
333 va_arg(vargs, unsigned long));
334 else if (size_tflag)
335 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
336 va_arg(vargs, size_t));
337 else
338 sprintf(s, "%u",
339 va_arg(vargs, unsigned int));
340 s += strlen(s);
341 break;
342 case 'i':
343 sprintf(s, "%i", va_arg(vargs, int));
344 s += strlen(s);
345 break;
346 case 'x':
347 sprintf(s, "%x", va_arg(vargs, int));
348 s += strlen(s);
349 break;
350 case 's':
351 p = va_arg(vargs, char*);
352 i = strlen(p);
353 if (n > 0 && i > n)
354 i = n;
355 Py_MEMCPY(s, p, i);
356 s += i;
357 break;
358 case 'p':
359 sprintf(s, "%p", va_arg(vargs, void*));
360 /* %p is ill-defined: ensure leading 0x. */
361 if (s[1] == 'X')
362 s[1] = 'x';
363 else if (s[1] != 'x') {
364 memmove(s+2, s, strlen(s)+1);
365 s[0] = '0';
366 s[1] = 'x';
367 }
368 s += strlen(s);
369 break;
370 case '%':
371 *s++ = '%';
372 break;
373 default:
374 strcpy(s, p);
375 s += strlen(s);
376 goto end;
377 }
378 } else
379 *s++ = *f;
380 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000381
382 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000383 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
384 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000385}
386
387PyObject *
388PyBytes_FromFormat(const char *format, ...)
389{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000390 PyObject* ret;
391 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000392
393#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000394 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000395#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000396 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000397#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000398 ret = PyBytes_FromFormatV(format, vargs);
399 va_end(vargs);
400 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000401}
402
Ethan Furmanb95b5612015-01-23 20:05:18 -0800403/* Helpers for formatstring */
404
405Py_LOCAL_INLINE(PyObject *)
406getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
407{
408 Py_ssize_t argidx = *p_argidx;
409 if (argidx < arglen) {
410 (*p_argidx)++;
411 if (arglen < 0)
412 return args;
413 else
414 return PyTuple_GetItem(args, argidx);
415 }
416 PyErr_SetString(PyExc_TypeError,
417 "not enough arguments for format string");
418 return NULL;
419}
420
421/* Format codes
422 * F_LJUST '-'
423 * F_SIGN '+'
424 * F_BLANK ' '
425 * F_ALT '#'
426 * F_ZERO '0'
427 */
428#define F_LJUST (1<<0)
429#define F_SIGN (1<<1)
430#define F_BLANK (1<<2)
431#define F_ALT (1<<3)
432#define F_ZERO (1<<4)
433
434/* Returns a new reference to a PyBytes object, or NULL on failure. */
435
436static PyObject *
437formatfloat(PyObject *v, int flags, int prec, int type)
438{
439 char *p;
440 PyObject *result;
441 double x;
442
443 x = PyFloat_AsDouble(v);
444 if (x == -1.0 && PyErr_Occurred()) {
445 PyErr_Format(PyExc_TypeError, "float argument required, "
446 "not %.200s", Py_TYPE(v)->tp_name);
447 return NULL;
448 }
449
450 if (prec < 0)
451 prec = 6;
452
453 p = PyOS_double_to_string(x, type, prec,
454 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
455
456 if (p == NULL)
457 return NULL;
458 result = PyBytes_FromStringAndSize(p, strlen(p));
459 PyMem_Free(p);
460 return result;
461}
462
463/* format_long emulates the format codes d, u, o, x and X, and
464 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
465 * Python's regular ints.
466 * Return value: a new PyBytes*, or NULL if error.
467 * . *pbuf is set to point into it,
468 * *plen set to the # of chars following that.
469 * Caller must decref it when done using pbuf.
470 * The string starting at *pbuf is of the form
471 * "-"? ("0x" | "0X")? digit+
472 * "0x"/"0X" are present only for x and X conversions, with F_ALT
473 * set in flags. The case of hex digits will be correct,
474 * There will be at least prec digits, zero-filled on the left if
475 * necessary to get that many.
476 * val object to be converted
477 * flags bitmask of format flags; only F_ALT is looked at
478 * prec minimum number of digits; 0-fill on left if needed
479 * type a character in [duoxX]; u acts the same as d
480 *
481 * CAUTION: o, x and X conversions on regular ints can never
482 * produce a '-' sign, but can for Python's unbounded ints.
483 */
484
485static PyObject *
486format_long(PyObject *val, int flags, int prec, int type,
487 char **pbuf, int *plen)
488{
489 PyObject *s;
490 PyObject *result = NULL;
491
492 s = _PyUnicode_FormatLong(val, flags & F_ALT, prec, type);
493 if (!s)
494 return NULL;
495 result = _PyUnicode_AsASCIIString(s, "strict");
496 Py_DECREF(s);
497 if (!result)
498 return NULL;
499 *pbuf = PyBytes_AS_STRING(result);
500 *plen = PyBytes_GET_SIZE(result);
501 return result;
502}
503
504Py_LOCAL_INLINE(int)
505formatchar(char *buf, size_t buflen, PyObject *v)
506{
507 PyObject *w = NULL;
508 /* convert bytearray to bytes */
509 if (PyByteArray_Check(v)) {
510 w = PyBytes_FromObject(v);
511 if (w == NULL)
512 goto error;
513 v = w;
514 }
515 /* presume that the buffer is at least 2 characters long */
516 if (PyBytes_Check(v)) {
517 if (!PyArg_Parse(v, "c;%c requires an integer in range(256) or a single byte", &buf[0]))
518 goto error;
519 }
520 else {
521 long ival = PyLong_AsLong(v);
522 if (ival == -1 && PyErr_Occurred()) {
523 PyErr_SetString(PyExc_TypeError,
524 "%c requires an integer in range(256) or a single byte");
525 goto error;
526 }
527 if (ival < 0 || ival > 255) {
528 PyErr_SetString(PyExc_TypeError,
529 "%c requires an integer in range(256) or a single byte");
530 goto error;
531 }
532 buf[0] = ival;
533 }
534 Py_XDECREF(w);
535 buf[1] = '\0';
536 return 1;
537
538 error:
539 Py_XDECREF(w);
540 return -1;
541}
542
543static PyObject *
544format_obj(PyObject *v)
545{
546 PyObject *result = NULL, *w = NULL;
547 PyObject *func;
548 _Py_IDENTIFIER(__bytes__);
549 /* convert bytearray to bytes */
550 if (PyByteArray_Check(v)) {
551 w = PyBytes_FromObject(v);
552 if (w == NULL)
553 return NULL;
554 v = w;
555 }
556 /* is it a bytes object? */
557 if (PyBytes_Check(v)) {
558 result = v;
559 Py_INCREF(v);
560 Py_XDECREF(w);
561 return result;
562 }
563 /* does it support __bytes__? */
564 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
565 if (func != NULL) {
566 result = PyObject_CallFunctionObjArgs(func, NULL);
567 Py_DECREF(func);
568 if (result == NULL)
569 return NULL;
570 if (!PyBytes_Check(result)) {
571 PyErr_Format(PyExc_TypeError,
572 "__bytes__ returned non-bytes (type %.200s)",
573 Py_TYPE(result)->tp_name);
574 Py_DECREF(result);
575 return NULL;
576 }
577 return result;
578 }
579 PyErr_Format(PyExc_TypeError,
580 "%%b requires bytes, or an object that implements __bytes__, not '%.100s'",
581 Py_TYPE(v)->tp_name);
582 return NULL;
583}
584
585/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
586
587 FORMATBUFLEN is the length of the buffer in which the ints &
588 chars are formatted. XXX This is a magic number. Each formatting
589 routine does bounds checking to ensure no overflow, but a better
590 solution may be to malloc a buffer of appropriate size for each
591 format. For now, the current solution is sufficient.
592*/
593#define FORMATBUFLEN (size_t)120
594
595PyObject *
596_PyBytes_Format(PyObject *format, PyObject *args)
597{
598 char *fmt, *res;
599 Py_ssize_t arglen, argidx;
600 Py_ssize_t reslen, rescnt, fmtcnt;
601 int args_owned = 0;
602 PyObject *result;
603 PyObject *repr;
604 PyObject *dict = NULL;
605 if (format == NULL || !PyBytes_Check(format) || args == NULL) {
606 PyErr_BadInternalCall();
607 return NULL;
608 }
609 fmt = PyBytes_AS_STRING(format);
610 fmtcnt = PyBytes_GET_SIZE(format);
611 reslen = rescnt = fmtcnt + 100;
612 result = PyBytes_FromStringAndSize((char *)NULL, reslen);
613 if (result == NULL)
614 return NULL;
615 res = PyBytes_AsString(result);
616 if (PyTuple_Check(args)) {
617 arglen = PyTuple_GET_SIZE(args);
618 argidx = 0;
619 }
620 else {
621 arglen = -1;
622 argidx = -2;
623 }
624 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
625 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
626 !PyByteArray_Check(args)) {
627 dict = args;
628 }
629 while (--fmtcnt >= 0) {
630 if (*fmt != '%') {
631 if (--rescnt < 0) {
632 rescnt = fmtcnt + 100;
633 reslen += rescnt;
634 if (_PyBytes_Resize(&result, reslen))
635 return NULL;
636 res = PyBytes_AS_STRING(result)
637 + reslen - rescnt;
638 --rescnt;
639 }
640 *res++ = *fmt++;
641 }
642 else {
643 /* Got a format specifier */
644 int flags = 0;
645 Py_ssize_t width = -1;
646 int prec = -1;
647 int c = '\0';
648 int fill;
649 int isnumok;
650 PyObject *v = NULL;
651 PyObject *temp = NULL;
652 Py_buffer buf;
653 char *pbuf;
654 int sign;
655 Py_ssize_t len;
656 char formatbuf[FORMATBUFLEN];
657 /* For format{int,char}() */
658
659 buf.obj = NULL;
660 fmt++;
661 if (*fmt == '(') {
662 char *keystart;
663 Py_ssize_t keylen;
664 PyObject *key;
665 int pcount = 1;
666
667 if (dict == NULL) {
668 PyErr_SetString(PyExc_TypeError,
669 "format requires a mapping");
670 goto error;
671 }
672 ++fmt;
673 --fmtcnt;
674 keystart = fmt;
675 /* Skip over balanced parentheses */
676 while (pcount > 0 && --fmtcnt >= 0) {
677 if (*fmt == ')')
678 --pcount;
679 else if (*fmt == '(')
680 ++pcount;
681 fmt++;
682 }
683 keylen = fmt - keystart - 1;
684 if (fmtcnt < 0 || pcount > 0) {
685 PyErr_SetString(PyExc_ValueError,
686 "incomplete format key");
687 goto error;
688 }
689 key = PyBytes_FromStringAndSize(keystart,
690 keylen);
691 if (key == NULL)
692 goto error;
693 if (args_owned) {
694 Py_DECREF(args);
695 args_owned = 0;
696 }
697 args = PyObject_GetItem(dict, key);
698 Py_DECREF(key);
699 if (args == NULL) {
700 goto error;
701 }
702 args_owned = 1;
703 arglen = -1;
704 argidx = -2;
705 }
706 while (--fmtcnt >= 0) {
707 switch (c = *fmt++) {
708 case '-': flags |= F_LJUST; continue;
709 case '+': flags |= F_SIGN; continue;
710 case ' ': flags |= F_BLANK; continue;
711 case '#': flags |= F_ALT; continue;
712 case '0': flags |= F_ZERO; continue;
713 }
714 break;
715 }
716 if (c == '*') {
717 v = getnextarg(args, arglen, &argidx);
718 if (v == NULL)
719 goto error;
720 if (!PyLong_Check(v)) {
721 PyErr_SetString(PyExc_TypeError,
722 "* wants int");
723 goto error;
724 }
725 width = PyLong_AsSsize_t(v);
726 if (width == -1 && PyErr_Occurred())
727 goto error;
728 if (width < 0) {
729 flags |= F_LJUST;
730 width = -width;
731 }
732 if (--fmtcnt >= 0)
733 c = *fmt++;
734 }
735 else if (c >= 0 && isdigit(c)) {
736 width = c - '0';
737 while (--fmtcnt >= 0) {
738 c = Py_CHARMASK(*fmt++);
739 if (!isdigit(c))
740 break;
741 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
742 PyErr_SetString(
743 PyExc_ValueError,
744 "width too big");
745 goto error;
746 }
747 width = width*10 + (c - '0');
748 }
749 }
750 if (c == '.') {
751 prec = 0;
752 if (--fmtcnt >= 0)
753 c = *fmt++;
754 if (c == '*') {
755 v = getnextarg(args, arglen, &argidx);
756 if (v == NULL)
757 goto error;
758 if (!PyLong_Check(v)) {
759 PyErr_SetString(
760 PyExc_TypeError,
761 "* wants int");
762 goto error;
763 }
764 prec = PyLong_AsSsize_t(v);
765 if (prec == -1 && PyErr_Occurred())
766 goto error;
767 if (prec < 0)
768 prec = 0;
769 if (--fmtcnt >= 0)
770 c = *fmt++;
771 }
772 else if (c >= 0 && isdigit(c)) {
773 prec = c - '0';
774 while (--fmtcnt >= 0) {
775 c = Py_CHARMASK(*fmt++);
776 if (!isdigit(c))
777 break;
778 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
779 PyErr_SetString(
780 PyExc_ValueError,
781 "prec too big");
782 goto error;
783 }
784 prec = prec*10 + (c - '0');
785 }
786 }
787 } /* prec */
788 if (fmtcnt >= 0) {
789 if (c == 'h' || c == 'l' || c == 'L') {
790 if (--fmtcnt >= 0)
791 c = *fmt++;
792 }
793 }
794 if (fmtcnt < 0) {
795 PyErr_SetString(PyExc_ValueError,
796 "incomplete format");
797 goto error;
798 }
799 if (c != '%') {
800 v = getnextarg(args, arglen, &argidx);
801 if (v == NULL)
802 goto error;
803 }
804 sign = 0;
805 fill = ' ';
806 switch (c) {
807 case '%':
808 pbuf = "%";
809 len = 1;
810 break;
811 case 'a':
812 temp = PyObject_Repr(v);
813 if (temp == NULL)
814 goto error;
815 repr = PyUnicode_AsEncodedObject(temp, "ascii", "backslashreplace");
816 if (repr == NULL) {
817 Py_DECREF(temp);
818 goto error;
819 }
820 if (_getbuffer(repr, &buf) < 0) {
821 temp = format_obj(repr);
822 if (temp == NULL) {
823 Py_DECREF(repr);
824 goto error;
825 }
826 Py_DECREF(repr);
827 repr = temp;
828 }
829 pbuf = PyBytes_AS_STRING(repr);
830 len = PyBytes_GET_SIZE(repr);
831 Py_DECREF(repr);
832 if (prec >= 0 && len > prec)
833 len = prec;
834 break;
835 case 's':
836 // %s is only for 2/3 code; 3 only code should use %b
837 case 'b':
838 temp = format_obj(v);
839 if (temp == NULL)
840 goto error;
841 pbuf = PyBytes_AS_STRING(temp);
842 len = PyBytes_GET_SIZE(temp);
843 if (prec >= 0 && len > prec)
844 len = prec;
845 break;
846 case 'i':
847 case 'd':
848 case 'u':
849 case 'o':
850 case 'x':
851 case 'X':
852 if (c == 'i')
853 c = 'd';
854 isnumok = 0;
855 if (PyNumber_Check(v)) {
856 PyObject *iobj=NULL;
857
858 if ((PyLong_Check(v))) {
859 iobj = v;
860 Py_INCREF(iobj);
861 }
862 else {
863 iobj = PyNumber_Long(v);
864 }
865 if (iobj!=NULL) {
866 if (PyLong_Check(iobj)) {
867 int ilen;
868
869 isnumok = 1;
870 temp = format_long(iobj, flags, prec, c,
871 &pbuf, &ilen);
872 Py_DECREF(iobj);
873 len = ilen;
874 if (!temp)
875 goto error;
876 sign = 1;
877 }
878 else {
879 Py_DECREF(iobj);
880 }
881 }
882 }
883 if (!isnumok) {
884 PyErr_Format(PyExc_TypeError,
885 "%%%c format: a number is required, "
886 "not %.200s", c, Py_TYPE(v)->tp_name);
887 goto error;
888 }
889 if (flags & F_ZERO)
890 fill = '0';
891 break;
892 case 'e':
893 case 'E':
894 case 'f':
895 case 'F':
896 case 'g':
897 case 'G':
898 temp = formatfloat(v, flags, prec, c);
899 if (temp == NULL)
900 goto error;
901 pbuf = PyBytes_AS_STRING(temp);
902 len = PyBytes_GET_SIZE(temp);
903 sign = 1;
904 if (flags & F_ZERO)
905 fill = '0';
906 break;
907 case 'c':
908 pbuf = formatbuf;
909 len = formatchar(pbuf, sizeof(formatbuf), v);
910 if (len < 0)
911 goto error;
912 break;
913 default:
914 PyErr_Format(PyExc_ValueError,
915 "unsupported format character '%c' (0x%x) "
916 "at index %zd",
917 c, c,
918 (Py_ssize_t)(fmt - 1 -
919 PyBytes_AsString(format)));
920 goto error;
921 }
922 if (sign) {
923 if (*pbuf == '-' || *pbuf == '+') {
924 sign = *pbuf++;
925 len--;
926 }
927 else if (flags & F_SIGN)
928 sign = '+';
929 else if (flags & F_BLANK)
930 sign = ' ';
931 else
932 sign = 0;
933 }
934 if (width < len)
935 width = len;
936 if (rescnt - (sign != 0) < width) {
937 reslen -= rescnt;
938 rescnt = width + fmtcnt + 100;
939 reslen += rescnt;
940 if (reslen < 0) {
941 Py_DECREF(result);
942 PyBuffer_Release(&buf);
943 Py_XDECREF(temp);
944 return PyErr_NoMemory();
945 }
946 if (_PyBytes_Resize(&result, reslen)) {
947 PyBuffer_Release(&buf);
948 Py_XDECREF(temp);
949 return NULL;
950 }
951 res = PyBytes_AS_STRING(result)
952 + reslen - rescnt;
953 }
954 if (sign) {
955 if (fill != ' ')
956 *res++ = sign;
957 rescnt--;
958 if (width > len)
959 width--;
960 }
961 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
962 assert(pbuf[0] == '0');
963 assert(pbuf[1] == c);
964 if (fill != ' ') {
965 *res++ = *pbuf++;
966 *res++ = *pbuf++;
967 }
968 rescnt -= 2;
969 width -= 2;
970 if (width < 0)
971 width = 0;
972 len -= 2;
973 }
974 if (width > len && !(flags & F_LJUST)) {
975 do {
976 --rescnt;
977 *res++ = fill;
978 } while (--width > len);
979 }
980 if (fill == ' ') {
981 if (sign)
982 *res++ = sign;
983 if ((flags & F_ALT) &&
984 (c == 'x' || c == 'X')) {
985 assert(pbuf[0] == '0');
986 assert(pbuf[1] == c);
987 *res++ = *pbuf++;
988 *res++ = *pbuf++;
989 }
990 }
991 Py_MEMCPY(res, pbuf, len);
992 res += len;
993 rescnt -= len;
994 while (--width >= len) {
995 --rescnt;
996 *res++ = ' ';
997 }
998 if (dict && (argidx < arglen) && c != '%') {
999 PyErr_SetString(PyExc_TypeError,
1000 "not all arguments converted during bytes formatting");
1001 PyBuffer_Release(&buf);
1002 Py_XDECREF(temp);
1003 goto error;
1004 }
1005 PyBuffer_Release(&buf);
1006 Py_XDECREF(temp);
1007 } /* '%' */
1008 } /* until end */
1009 if (argidx < arglen && !dict) {
1010 PyErr_SetString(PyExc_TypeError,
1011 "not all arguments converted during bytes formatting");
1012 goto error;
1013 }
1014 if (args_owned) {
1015 Py_DECREF(args);
1016 }
1017 if (_PyBytes_Resize(&result, reslen - rescnt))
1018 return NULL;
1019 return result;
1020
1021 error:
1022 Py_DECREF(result);
1023 if (args_owned) {
1024 Py_DECREF(args);
1025 }
1026 return NULL;
1027}
1028
1029/* =-= */
1030
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001031static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001032bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001033{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001034 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001035}
1036
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001037/* Unescape a backslash-escaped string. If unicode is non-zero,
1038 the string is a u-literal. If recode_encoding is non-zero,
1039 the string is UTF-8 encoded and should be re-encoded in the
1040 specified encoding. */
1041
1042PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001043 Py_ssize_t len,
1044 const char *errors,
1045 Py_ssize_t unicode,
1046 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001047{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001048 int c;
1049 char *p, *buf;
1050 const char *end;
1051 PyObject *v;
1052 Py_ssize_t newlen = recode_encoding ? 4*len:len;
1053 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
1054 if (v == NULL)
1055 return NULL;
1056 p = buf = PyBytes_AsString(v);
1057 end = s + len;
1058 while (s < end) {
1059 if (*s != '\\') {
1060 non_esc:
1061 if (recode_encoding && (*s & 0x80)) {
1062 PyObject *u, *w;
1063 char *r;
1064 const char* t;
1065 Py_ssize_t rn;
1066 t = s;
1067 /* Decode non-ASCII bytes as UTF-8. */
1068 while (t < end && (*t & 0x80)) t++;
1069 u = PyUnicode_DecodeUTF8(s, t - s, errors);
1070 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001071
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001072 /* Recode them in target encoding. */
1073 w = PyUnicode_AsEncodedString(
1074 u, recode_encoding, errors);
1075 Py_DECREF(u);
1076 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001077
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001078 /* Append bytes to output buffer. */
1079 assert(PyBytes_Check(w));
1080 r = PyBytes_AS_STRING(w);
1081 rn = PyBytes_GET_SIZE(w);
1082 Py_MEMCPY(p, r, rn);
1083 p += rn;
1084 Py_DECREF(w);
1085 s = t;
1086 } else {
1087 *p++ = *s++;
1088 }
1089 continue;
1090 }
1091 s++;
1092 if (s==end) {
1093 PyErr_SetString(PyExc_ValueError,
1094 "Trailing \\ in string");
1095 goto failed;
1096 }
1097 switch (*s++) {
1098 /* XXX This assumes ASCII! */
1099 case '\n': break;
1100 case '\\': *p++ = '\\'; break;
1101 case '\'': *p++ = '\''; break;
1102 case '\"': *p++ = '\"'; break;
1103 case 'b': *p++ = '\b'; break;
1104 case 'f': *p++ = '\014'; break; /* FF */
1105 case 't': *p++ = '\t'; break;
1106 case 'n': *p++ = '\n'; break;
1107 case 'r': *p++ = '\r'; break;
1108 case 'v': *p++ = '\013'; break; /* VT */
1109 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1110 case '0': case '1': case '2': case '3':
1111 case '4': case '5': case '6': case '7':
1112 c = s[-1] - '0';
1113 if (s < end && '0' <= *s && *s <= '7') {
1114 c = (c<<3) + *s++ - '0';
1115 if (s < end && '0' <= *s && *s <= '7')
1116 c = (c<<3) + *s++ - '0';
1117 }
1118 *p++ = c;
1119 break;
1120 case 'x':
David Malcolm96960882010-11-05 17:23:41 +00001121 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001122 unsigned int x = 0;
1123 c = Py_CHARMASK(*s);
1124 s++;
David Malcolm96960882010-11-05 17:23:41 +00001125 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001126 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +00001127 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001128 x = 10 + c - 'a';
1129 else
1130 x = 10 + c - 'A';
1131 x = x << 4;
1132 c = Py_CHARMASK(*s);
1133 s++;
David Malcolm96960882010-11-05 17:23:41 +00001134 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001135 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +00001136 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001137 x += 10 + c - 'a';
1138 else
1139 x += 10 + c - 'A';
1140 *p++ = x;
1141 break;
1142 }
1143 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001144 PyErr_Format(PyExc_ValueError,
1145 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001146 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001147 goto failed;
1148 }
1149 if (strcmp(errors, "replace") == 0) {
1150 *p++ = '?';
1151 } else if (strcmp(errors, "ignore") == 0)
1152 /* do nothing */;
1153 else {
1154 PyErr_Format(PyExc_ValueError,
1155 "decoding error; unknown "
1156 "error handling code: %.400s",
1157 errors);
1158 goto failed;
1159 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001160 /* skip \x */
1161 if (s < end && Py_ISXDIGIT(s[0]))
1162 s++; /* and a hexdigit */
1163 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001164 default:
1165 *p++ = '\\';
1166 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +02001167 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001168 UTF-8 bytes may follow. */
1169 }
1170 }
1171 if (p-buf < newlen)
1172 _PyBytes_Resize(&v, p - buf);
1173 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001174 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001175 Py_DECREF(v);
1176 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001177}
1178
1179/* -------------------------------------------------------------------- */
1180/* object api */
1181
1182Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001183PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001184{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001185 if (!PyBytes_Check(op)) {
1186 PyErr_Format(PyExc_TypeError,
1187 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1188 return -1;
1189 }
1190 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001191}
1192
1193char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001194PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001195{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001196 if (!PyBytes_Check(op)) {
1197 PyErr_Format(PyExc_TypeError,
1198 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1199 return NULL;
1200 }
1201 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001202}
1203
1204int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001205PyBytes_AsStringAndSize(PyObject *obj,
1206 char **s,
1207 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001208{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001209 if (s == NULL) {
1210 PyErr_BadInternalCall();
1211 return -1;
1212 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001213
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001214 if (!PyBytes_Check(obj)) {
1215 PyErr_Format(PyExc_TypeError,
1216 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1217 return -1;
1218 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001219
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001220 *s = PyBytes_AS_STRING(obj);
1221 if (len != NULL)
1222 *len = PyBytes_GET_SIZE(obj);
1223 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001224 PyErr_SetString(PyExc_ValueError,
1225 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001226 return -1;
1227 }
1228 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001229}
Neal Norwitz6968b052007-02-27 19:02:19 +00001230
1231/* -------------------------------------------------------------------- */
1232/* Methods */
1233
Eric Smith0923d1d2009-04-16 20:16:10 +00001234#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001235
1236#include "stringlib/fastsearch.h"
1237#include "stringlib/count.h"
1238#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001239#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001240#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001241#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001242#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001243
Eric Smith0f78bff2009-11-30 01:01:42 +00001244#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001245
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001246PyObject *
1247PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001248{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001249 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001250 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001251 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001252 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001253 unsigned char quote, *s, *p;
1254
1255 /* Compute size of output string */
1256 squotes = dquotes = 0;
1257 newsize = 3; /* b'' */
1258 s = (unsigned char*)op->ob_sval;
1259 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001260 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001261 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001262 case '\'': squotes++; break;
1263 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001264 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001265 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001266 default:
1267 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001268 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001269 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001270 if (newsize > PY_SSIZE_T_MAX - incr)
1271 goto overflow;
1272 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001273 }
1274 quote = '\'';
1275 if (smartquotes && squotes && !dquotes)
1276 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001277 if (squotes && quote == '\'') {
1278 if (newsize > PY_SSIZE_T_MAX - squotes)
1279 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001280 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001281 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001282
1283 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001284 if (v == NULL) {
1285 return NULL;
1286 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001287 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001288
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001289 *p++ = 'b', *p++ = quote;
1290 for (i = 0; i < length; i++) {
1291 unsigned char c = op->ob_sval[i];
1292 if (c == quote || c == '\\')
1293 *p++ = '\\', *p++ = c;
1294 else if (c == '\t')
1295 *p++ = '\\', *p++ = 't';
1296 else if (c == '\n')
1297 *p++ = '\\', *p++ = 'n';
1298 else if (c == '\r')
1299 *p++ = '\\', *p++ = 'r';
1300 else if (c < ' ' || c >= 0x7f) {
1301 *p++ = '\\';
1302 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001303 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1304 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001305 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001306 else
1307 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001308 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001309 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001310 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001311 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001312
1313 overflow:
1314 PyErr_SetString(PyExc_OverflowError,
1315 "bytes object is too large to make repr");
1316 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001317}
1318
Neal Norwitz6968b052007-02-27 19:02:19 +00001319static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001320bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001321{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001322 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001323}
1324
Neal Norwitz6968b052007-02-27 19:02:19 +00001325static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001326bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001327{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001328 if (Py_BytesWarningFlag) {
1329 if (PyErr_WarnEx(PyExc_BytesWarning,
1330 "str() on a bytes instance", 1))
1331 return NULL;
1332 }
1333 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001334}
1335
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001336static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001337bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001338{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001339 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001340}
Neal Norwitz6968b052007-02-27 19:02:19 +00001341
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001342/* This is also used by PyBytes_Concat() */
1343static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001344bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001345{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001346 Py_ssize_t size;
1347 Py_buffer va, vb;
1348 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001349
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001350 va.len = -1;
1351 vb.len = -1;
1352 if (_getbuffer(a, &va) < 0 ||
1353 _getbuffer(b, &vb) < 0) {
1354 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1355 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
1356 goto done;
1357 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001358
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001359 /* Optimize end cases */
1360 if (va.len == 0 && PyBytes_CheckExact(b)) {
1361 result = b;
1362 Py_INCREF(result);
1363 goto done;
1364 }
1365 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1366 result = a;
1367 Py_INCREF(result);
1368 goto done;
1369 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001370
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001371 size = va.len + vb.len;
1372 if (size < 0) {
1373 PyErr_NoMemory();
1374 goto done;
1375 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001376
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001377 result = PyBytes_FromStringAndSize(NULL, size);
1378 if (result != NULL) {
1379 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1380 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1381 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001382
1383 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001384 if (va.len != -1)
1385 PyBuffer_Release(&va);
1386 if (vb.len != -1)
1387 PyBuffer_Release(&vb);
1388 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001389}
Neal Norwitz6968b052007-02-27 19:02:19 +00001390
1391static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001392bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001393{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001394 Py_ssize_t i;
1395 Py_ssize_t j;
1396 Py_ssize_t size;
1397 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001398 size_t nbytes;
1399 if (n < 0)
1400 n = 0;
1401 /* watch out for overflows: the size can overflow int,
1402 * and the # of bytes needed can overflow size_t
1403 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001404 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001405 PyErr_SetString(PyExc_OverflowError,
1406 "repeated bytes are too long");
1407 return NULL;
1408 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001409 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001410 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1411 Py_INCREF(a);
1412 return (PyObject *)a;
1413 }
1414 nbytes = (size_t)size;
1415 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1416 PyErr_SetString(PyExc_OverflowError,
1417 "repeated bytes are too long");
1418 return NULL;
1419 }
1420 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1421 if (op == NULL)
1422 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +01001423 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001424 op->ob_shash = -1;
1425 op->ob_sval[size] = '\0';
1426 if (Py_SIZE(a) == 1 && n > 0) {
1427 memset(op->ob_sval, a->ob_sval[0] , n);
1428 return (PyObject *) op;
1429 }
1430 i = 0;
1431 if (i < size) {
1432 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1433 i = Py_SIZE(a);
1434 }
1435 while (i < size) {
1436 j = (i <= size-i) ? i : size-i;
1437 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1438 i += j;
1439 }
1440 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001441}
1442
Guido van Rossum98297ee2007-11-06 21:34:58 +00001443static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001444bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +00001445{
1446 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1447 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001448 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +00001449 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +00001450 PyErr_Clear();
1451 if (_getbuffer(arg, &varg) < 0)
1452 return -1;
1453 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
1454 varg.buf, varg.len, 0);
1455 PyBuffer_Release(&varg);
1456 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001457 }
1458 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001459 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1460 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001461 }
1462
Antoine Pitrou0010d372010-08-15 17:12:55 +00001463 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001464}
1465
Neal Norwitz6968b052007-02-27 19:02:19 +00001466static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001467bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001468{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001469 if (i < 0 || i >= Py_SIZE(a)) {
1470 PyErr_SetString(PyExc_IndexError, "index out of range");
1471 return NULL;
1472 }
1473 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001474}
1475
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001476Py_LOCAL(int)
1477bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1478{
1479 int cmp;
1480 Py_ssize_t len;
1481
1482 len = Py_SIZE(a);
1483 if (Py_SIZE(b) != len)
1484 return 0;
1485
1486 if (a->ob_sval[0] != b->ob_sval[0])
1487 return 0;
1488
1489 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1490 return (cmp == 0);
1491}
1492
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001493static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001494bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001495{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001496 int c;
1497 Py_ssize_t len_a, len_b;
1498 Py_ssize_t min_len;
1499 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001500
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001501 /* Make sure both arguments are strings. */
1502 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
1503 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
1504 (PyObject_IsInstance((PyObject*)a,
1505 (PyObject*)&PyUnicode_Type) ||
1506 PyObject_IsInstance((PyObject*)b,
1507 (PyObject*)&PyUnicode_Type))) {
1508 if (PyErr_WarnEx(PyExc_BytesWarning,
1509 "Comparison between bytes and string", 1))
1510 return NULL;
1511 }
1512 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001513 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001514 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001515 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001516 case Py_EQ:
1517 case Py_LE:
1518 case Py_GE:
1519 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001520 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001521 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001522 case Py_NE:
1523 case Py_LT:
1524 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001525 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001526 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001527 default:
1528 PyErr_BadArgument();
1529 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001530 }
1531 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001532 else if (op == Py_EQ || op == Py_NE) {
1533 int eq = bytes_compare_eq(a, b);
1534 eq ^= (op == Py_NE);
1535 result = eq ? Py_True : Py_False;
1536 }
1537 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001538 len_a = Py_SIZE(a);
1539 len_b = Py_SIZE(b);
1540 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001541 if (min_len > 0) {
1542 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001543 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001544 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001545 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001546 else
1547 c = 0;
1548 if (c == 0)
1549 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1550 switch (op) {
1551 case Py_LT: c = c < 0; break;
1552 case Py_LE: c = c <= 0; break;
1553 case Py_GT: c = c > 0; break;
1554 case Py_GE: c = c >= 0; break;
1555 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001556 PyErr_BadArgument();
1557 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001558 }
1559 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001560 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001561
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001562 Py_INCREF(result);
1563 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001564}
1565
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001566static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001567bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001568{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001569 if (a->ob_shash == -1) {
1570 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001571 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001572 }
1573 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001574}
1575
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001576static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001577bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001578{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001579 if (PyIndex_Check(item)) {
1580 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1581 if (i == -1 && PyErr_Occurred())
1582 return NULL;
1583 if (i < 0)
1584 i += PyBytes_GET_SIZE(self);
1585 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1586 PyErr_SetString(PyExc_IndexError,
1587 "index out of range");
1588 return NULL;
1589 }
1590 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1591 }
1592 else if (PySlice_Check(item)) {
1593 Py_ssize_t start, stop, step, slicelength, cur, i;
1594 char* source_buf;
1595 char* result_buf;
1596 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001597
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001598 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001599 PyBytes_GET_SIZE(self),
1600 &start, &stop, &step, &slicelength) < 0) {
1601 return NULL;
1602 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001603
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001604 if (slicelength <= 0) {
1605 return PyBytes_FromStringAndSize("", 0);
1606 }
1607 else if (start == 0 && step == 1 &&
1608 slicelength == PyBytes_GET_SIZE(self) &&
1609 PyBytes_CheckExact(self)) {
1610 Py_INCREF(self);
1611 return (PyObject *)self;
1612 }
1613 else if (step == 1) {
1614 return PyBytes_FromStringAndSize(
1615 PyBytes_AS_STRING(self) + start,
1616 slicelength);
1617 }
1618 else {
1619 source_buf = PyBytes_AS_STRING(self);
1620 result = PyBytes_FromStringAndSize(NULL, slicelength);
1621 if (result == NULL)
1622 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001623
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001624 result_buf = PyBytes_AS_STRING(result);
1625 for (cur = start, i = 0; i < slicelength;
1626 cur += step, i++) {
1627 result_buf[i] = source_buf[cur];
1628 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001629
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001630 return result;
1631 }
1632 }
1633 else {
1634 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001635 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001636 Py_TYPE(item)->tp_name);
1637 return NULL;
1638 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001639}
1640
1641static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001642bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001643{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001644 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1645 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001646}
1647
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001648static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001649 (lenfunc)bytes_length, /*sq_length*/
1650 (binaryfunc)bytes_concat, /*sq_concat*/
1651 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1652 (ssizeargfunc)bytes_item, /*sq_item*/
1653 0, /*sq_slice*/
1654 0, /*sq_ass_item*/
1655 0, /*sq_ass_slice*/
1656 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001657};
1658
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001659static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001660 (lenfunc)bytes_length,
1661 (binaryfunc)bytes_subscript,
1662 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001663};
1664
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001665static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001666 (getbufferproc)bytes_buffer_getbuffer,
1667 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001668};
1669
1670
1671#define LEFTSTRIP 0
1672#define RIGHTSTRIP 1
1673#define BOTHSTRIP 2
1674
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001675/*[clinic input]
1676bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001677
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001678 sep: object = None
1679 The delimiter according which to split the bytes.
1680 None (the default value) means split on ASCII whitespace characters
1681 (space, tab, return, newline, formfeed, vertical tab).
1682 maxsplit: Py_ssize_t = -1
1683 Maximum number of splits to do.
1684 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001685
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001686Return a list of the sections in the bytes, using sep as the delimiter.
1687[clinic start generated code]*/
1688
1689PyDoc_STRVAR(bytes_split__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001690"split($self, /, sep=None, maxsplit=-1)\n"
1691"--\n"
1692"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001693"Return a list of the sections in the bytes, using sep as the delimiter.\n"
1694"\n"
1695" sep\n"
1696" The delimiter according which to split the bytes.\n"
1697" None (the default value) means split on ASCII whitespace characters\n"
1698" (space, tab, return, newline, formfeed, vertical tab).\n"
1699" maxsplit\n"
1700" Maximum number of splits to do.\n"
1701" -1 (the default value) means no limit.");
1702
1703#define BYTES_SPLIT_METHODDEF \
1704 {"split", (PyCFunction)bytes_split, METH_VARARGS|METH_KEYWORDS, bytes_split__doc__},
Neal Norwitz6968b052007-02-27 19:02:19 +00001705
1706static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001707bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001708
1709static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001710bytes_split(PyBytesObject*self, PyObject *args, PyObject *kwargs)
Neal Norwitz6968b052007-02-27 19:02:19 +00001711{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001712 PyObject *return_value = NULL;
1713 static char *_keywords[] = {"sep", "maxsplit", NULL};
1714 PyObject *sep = Py_None;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001715 Py_ssize_t maxsplit = -1;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001716
1717 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
1718 "|On:split", _keywords,
1719 &sep, &maxsplit))
1720 goto exit;
1721 return_value = bytes_split_impl(self, sep, maxsplit);
1722
1723exit:
1724 return return_value;
1725}
1726
1727static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001728bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
1729/*[clinic end generated code: output=c80a47afdd505975 input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001730{
1731 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001732 const char *s = PyBytes_AS_STRING(self), *sub;
1733 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001734 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001735
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001736 if (maxsplit < 0)
1737 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001738 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001739 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001740 if (_getbuffer(sep, &vsub) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001741 return NULL;
1742 sub = vsub.buf;
1743 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001744
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001745 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1746 PyBuffer_Release(&vsub);
1747 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001748}
1749
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001750/*[clinic input]
1751bytes.partition
1752
1753 self: self(type="PyBytesObject *")
1754 sep: object
1755 /
1756
1757Partition the bytes into three parts using the given separator.
1758
1759This will search for the separator sep in the bytes. If the separator is found,
1760returns a 3-tuple containing the part before the separator, the separator
1761itself, and the part after it.
1762
1763If the separator is not found, returns a 3-tuple containing the original bytes
1764object and two empty bytes objects.
1765[clinic start generated code]*/
1766
1767PyDoc_STRVAR(bytes_partition__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001768"partition($self, sep, /)\n"
1769"--\n"
1770"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001771"Partition the bytes into three parts using the given separator.\n"
1772"\n"
1773"This will search for the separator sep in the bytes. If the separator is found,\n"
1774"returns a 3-tuple containing the part before the separator, the separator\n"
1775"itself, and the part after it.\n"
1776"\n"
1777"If the separator is not found, returns a 3-tuple containing the original bytes\n"
1778"object and two empty bytes objects.");
1779
1780#define BYTES_PARTITION_METHODDEF \
1781 {"partition", (PyCFunction)bytes_partition, METH_O, bytes_partition__doc__},
Neal Norwitz6968b052007-02-27 19:02:19 +00001782
1783static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001784bytes_partition(PyBytesObject *self, PyObject *sep)
Martin v. Löwis0efea322014-07-27 17:29:17 +02001785/*[clinic end generated code: output=b41e119c873c08bc input=6c5b9dcc5a9fd62e]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001786{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001787 const char *sep_chars;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001788 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001789
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001790 if (PyBytes_Check(sep)) {
1791 sep_chars = PyBytes_AS_STRING(sep);
1792 sep_len = PyBytes_GET_SIZE(sep);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001793 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001794 else if (PyObject_AsCharBuffer(sep, &sep_chars, &sep_len))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001795 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001796
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001797 return stringlib_partition(
1798 (PyObject*) self,
1799 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001800 sep, sep_chars, sep_len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001801 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001802}
1803
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001804/*[clinic input]
1805bytes.rpartition
1806
1807 self: self(type="PyBytesObject *")
1808 sep: object
1809 /
1810
1811Partition the bytes into three parts using the given separator.
1812
1813This will search for the separator sep in the bytes, starting and the end. If
1814the separator is found, returns a 3-tuple containing the part before the
1815separator, the separator itself, and the part after it.
1816
1817If the separator is not found, returns a 3-tuple containing two empty bytes
1818objects and the original bytes object.
1819[clinic start generated code]*/
1820
1821PyDoc_STRVAR(bytes_rpartition__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001822"rpartition($self, sep, /)\n"
1823"--\n"
1824"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001825"Partition the bytes into three parts using the given separator.\n"
1826"\n"
1827"This will search for the separator sep in the bytes, starting and the end. If\n"
1828"the separator is found, returns a 3-tuple containing the part before the\n"
1829"separator, the separator itself, and the part after it.\n"
1830"\n"
1831"If the separator is not found, returns a 3-tuple containing two empty bytes\n"
1832"objects and the original bytes object.");
1833
1834#define BYTES_RPARTITION_METHODDEF \
1835 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, bytes_rpartition__doc__},
Neal Norwitz6968b052007-02-27 19:02:19 +00001836
1837static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001838bytes_rpartition(PyBytesObject *self, PyObject *sep)
Martin v. Löwis0efea322014-07-27 17:29:17 +02001839/*[clinic end generated code: output=3a620803657196ee input=79bc2932e78e5ce0]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001840{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001841 const char *sep_chars;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001842 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001843
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001844 if (PyBytes_Check(sep)) {
1845 sep_chars = PyBytes_AS_STRING(sep);
1846 sep_len = PyBytes_GET_SIZE(sep);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001847 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001848 else if (PyObject_AsCharBuffer(sep, &sep_chars, &sep_len))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001849 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001850
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001851 return stringlib_rpartition(
1852 (PyObject*) self,
1853 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001854 sep, sep_chars, sep_len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001855 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001856}
1857
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001858/*[clinic input]
1859bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001860
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001861Return a list of the sections in the bytes, using sep as the delimiter.
1862
1863Splitting is done starting at the end of the bytes and working to the front.
1864[clinic start generated code]*/
1865
1866PyDoc_STRVAR(bytes_rsplit__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001867"rsplit($self, /, sep=None, maxsplit=-1)\n"
1868"--\n"
1869"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001870"Return a list of the sections in the bytes, using sep as the delimiter.\n"
1871"\n"
1872" sep\n"
1873" The delimiter according which to split the bytes.\n"
1874" None (the default value) means split on ASCII whitespace characters\n"
1875" (space, tab, return, newline, formfeed, vertical tab).\n"
1876" maxsplit\n"
1877" Maximum number of splits to do.\n"
1878" -1 (the default value) means no limit.\n"
1879"\n"
1880"Splitting is done starting at the end of the bytes and working to the front.");
1881
1882#define BYTES_RSPLIT_METHODDEF \
1883 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS|METH_KEYWORDS, bytes_rsplit__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001884
Neal Norwitz6968b052007-02-27 19:02:19 +00001885static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001886bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001887
1888static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001889bytes_rsplit(PyBytesObject*self, PyObject *args, PyObject *kwargs)
Neal Norwitz6968b052007-02-27 19:02:19 +00001890{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001891 PyObject *return_value = NULL;
1892 static char *_keywords[] = {"sep", "maxsplit", NULL};
1893 PyObject *sep = Py_None;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001894 Py_ssize_t maxsplit = -1;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001895
1896 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
1897 "|On:rsplit", _keywords,
1898 &sep, &maxsplit))
1899 goto exit;
1900 return_value = bytes_rsplit_impl(self, sep, maxsplit);
1901
1902exit:
1903 return return_value;
1904}
1905
1906static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001907bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
1908/*[clinic end generated code: output=f86feddedbd7b26d input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001909{
1910 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001911 const char *s = PyBytes_AS_STRING(self), *sub;
1912 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001913 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001914
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001915 if (maxsplit < 0)
1916 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001917 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001918 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001919 if (_getbuffer(sep, &vsub) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001920 return NULL;
1921 sub = vsub.buf;
1922 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001923
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001924 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1925 PyBuffer_Release(&vsub);
1926 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001927}
1928
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001929
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001930/*[clinic input]
1931bytes.join
1932
1933 iterable_of_bytes: object
1934 /
1935
1936Concatenate any number of bytes objects.
1937
1938The bytes whose method is called is inserted in between each pair.
1939
1940The result is returned as a new bytes object.
1941
1942Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1943[clinic start generated code]*/
1944
1945PyDoc_STRVAR(bytes_join__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001946"join($self, iterable_of_bytes, /)\n"
1947"--\n"
1948"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001949"Concatenate any number of bytes objects.\n"
1950"\n"
1951"The bytes whose method is called is inserted in between each pair.\n"
1952"\n"
1953"The result is returned as a new bytes object.\n"
1954"\n"
1955"Example: b\'.\'.join([b\'ab\', b\'pq\', b\'rs\']) -> b\'ab.pq.rs\'.");
1956
1957#define BYTES_JOIN_METHODDEF \
1958 {"join", (PyCFunction)bytes_join, METH_O, bytes_join__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001959
Neal Norwitz6968b052007-02-27 19:02:19 +00001960static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001961bytes_join(PyBytesObject*self, PyObject *iterable_of_bytes)
1962/*[clinic end generated code: output=e541a14a8da97908 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001963{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001964 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001965}
1966
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001967PyObject *
1968_PyBytes_Join(PyObject *sep, PyObject *x)
1969{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001970 assert(sep != NULL && PyBytes_Check(sep));
1971 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001972 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001973}
1974
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001975/* helper macro to fixup start/end slice values */
1976#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001977 if (end > len) \
1978 end = len; \
1979 else if (end < 0) { \
1980 end += len; \
1981 if (end < 0) \
1982 end = 0; \
1983 } \
1984 if (start < 0) { \
1985 start += len; \
1986 if (start < 0) \
1987 start = 0; \
1988 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001989
1990Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001991bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001992{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001993 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001994 char byte;
1995 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001996 const char *sub;
1997 Py_ssize_t sub_len;
1998 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001999 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002000
Antoine Pitrouac65d962011-10-20 23:54:17 +02002001 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
2002 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002003 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002004
Antoine Pitrouac65d962011-10-20 23:54:17 +02002005 if (subobj) {
2006 if (_getbuffer(subobj, &subbuf) < 0)
2007 return -2;
2008
2009 sub = subbuf.buf;
2010 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002011 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02002012 else {
2013 sub = &byte;
2014 sub_len = 1;
2015 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002016
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002017 if (dir > 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02002018 res = stringlib_find_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002019 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2020 sub, sub_len, start, end);
2021 else
Antoine Pitrouac65d962011-10-20 23:54:17 +02002022 res = stringlib_rfind_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002023 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2024 sub, sub_len, start, end);
Antoine Pitrouac65d962011-10-20 23:54:17 +02002025
2026 if (subobj)
2027 PyBuffer_Release(&subbuf);
2028
2029 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002030}
2031
2032
2033PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002034"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002035\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002036Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08002037such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002038arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002039\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002040Return -1 on failure.");
2041
Neal Norwitz6968b052007-02-27 19:02:19 +00002042static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002043bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00002044{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002045 Py_ssize_t result = bytes_find_internal(self, args, +1);
2046 if (result == -2)
2047 return NULL;
2048 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00002049}
2050
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002051
2052PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002053"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00002054\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002055Like B.find() but raise ValueError when the substring is not found.");
2056
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00002057static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002058bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00002059{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002060 Py_ssize_t result = bytes_find_internal(self, args, +1);
2061 if (result == -2)
2062 return NULL;
2063 if (result == -1) {
2064 PyErr_SetString(PyExc_ValueError,
2065 "substring not found");
2066 return NULL;
2067 }
2068 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00002069}
2070
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002071
2072PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002073"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002074\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002075Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08002076such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002077arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002078\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002079Return -1 on failure.");
2080
Neal Norwitz6968b052007-02-27 19:02:19 +00002081static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002082bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00002083{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002084 Py_ssize_t result = bytes_find_internal(self, args, -1);
2085 if (result == -2)
2086 return NULL;
2087 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00002088}
2089
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002090
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002091PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002092"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002093\n\
2094Like B.rfind() but raise ValueError when the substring is not found.");
2095
2096static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002097bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002098{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002099 Py_ssize_t result = bytes_find_internal(self, args, -1);
2100 if (result == -2)
2101 return NULL;
2102 if (result == -1) {
2103 PyErr_SetString(PyExc_ValueError,
2104 "substring not found");
2105 return NULL;
2106 }
2107 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002108}
2109
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002110
2111Py_LOCAL_INLINE(PyObject *)
2112do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002113{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002114 Py_buffer vsep;
2115 char *s = PyBytes_AS_STRING(self);
2116 Py_ssize_t len = PyBytes_GET_SIZE(self);
2117 char *sep;
2118 Py_ssize_t seplen;
2119 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002120
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002121 if (_getbuffer(sepobj, &vsep) < 0)
2122 return NULL;
2123 sep = vsep.buf;
2124 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002125
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002126 i = 0;
2127 if (striptype != RIGHTSTRIP) {
2128 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2129 i++;
2130 }
2131 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002132
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002133 j = len;
2134 if (striptype != LEFTSTRIP) {
2135 do {
2136 j--;
2137 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2138 j++;
2139 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002140
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002141 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002142
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002143 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2144 Py_INCREF(self);
2145 return (PyObject*)self;
2146 }
2147 else
2148 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002149}
2150
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002151
2152Py_LOCAL_INLINE(PyObject *)
2153do_strip(PyBytesObject *self, int striptype)
2154{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002155 char *s = PyBytes_AS_STRING(self);
2156 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002157
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002158 i = 0;
2159 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00002160 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002161 i++;
2162 }
2163 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002164
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002165 j = len;
2166 if (striptype != LEFTSTRIP) {
2167 do {
2168 j--;
David Malcolm96960882010-11-05 17:23:41 +00002169 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002170 j++;
2171 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002172
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002173 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2174 Py_INCREF(self);
2175 return (PyObject*)self;
2176 }
2177 else
2178 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002179}
2180
2181
2182Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002183do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002184{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002185 if (bytes != NULL && bytes != Py_None) {
2186 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002187 }
2188 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002189}
2190
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002191/*[clinic input]
2192bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002193
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002194 self: self(type="PyBytesObject *")
2195 bytes: object = None
2196 /
2197
2198Strip leading and trailing bytes contained in the argument.
2199
2200If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2201[clinic start generated code]*/
2202
2203PyDoc_STRVAR(bytes_strip__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02002204"strip($self, bytes=None, /)\n"
2205"--\n"
2206"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002207"Strip leading and trailing bytes contained in the argument.\n"
2208"\n"
2209"If the argument is omitted or None, strip leading and trailing ASCII whitespace.");
2210
2211#define BYTES_STRIP_METHODDEF \
2212 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, bytes_strip__doc__},
2213
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002214static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002215bytes_strip_impl(PyBytesObject *self, PyObject *bytes);
2216
2217static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002218bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002219{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002220 PyObject *return_value = NULL;
2221 PyObject *bytes = Py_None;
2222
2223 if (!PyArg_UnpackTuple(args, "strip",
2224 0, 1,
2225 &bytes))
2226 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02002227 return_value = bytes_strip_impl(self, bytes);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002228
2229exit:
2230 return return_value;
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002231}
2232
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002233static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002234bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Martin v. Löwis0efea322014-07-27 17:29:17 +02002235/*[clinic end generated code: output=c8234a599ba5ec35 input=37daa5fad1395d95]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002236{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002237 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002238}
2239
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002240/*[clinic input]
2241bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002242
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002243 self: self(type="PyBytesObject *")
2244 bytes: object = None
2245 /
2246
2247Strip leading bytes contained in the argument.
2248
2249If the argument is omitted or None, strip leading ASCII whitespace.
2250[clinic start generated code]*/
2251
2252PyDoc_STRVAR(bytes_lstrip__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02002253"lstrip($self, bytes=None, /)\n"
2254"--\n"
2255"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002256"Strip leading bytes contained in the argument.\n"
2257"\n"
2258"If the argument is omitted or None, strip leading ASCII whitespace.");
2259
2260#define BYTES_LSTRIP_METHODDEF \
2261 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, bytes_lstrip__doc__},
2262
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002263static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002264bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes);
2265
2266static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002267bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002268{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002269 PyObject *return_value = NULL;
2270 PyObject *bytes = Py_None;
2271
2272 if (!PyArg_UnpackTuple(args, "lstrip",
2273 0, 1,
2274 &bytes))
2275 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02002276 return_value = bytes_lstrip_impl(self, bytes);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002277
2278exit:
2279 return return_value;
2280}
2281
2282static PyObject *
2283bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Martin v. Löwis0efea322014-07-27 17:29:17 +02002284/*[clinic end generated code: output=529e8511ab6f1115 input=88811b09dfbc2988]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002285{
2286 return do_argstrip(self, LEFTSTRIP, bytes);
2287}
2288
2289/*[clinic input]
2290bytes.rstrip
2291
2292 self: self(type="PyBytesObject *")
2293 bytes: object = None
2294 /
2295
2296Strip trailing bytes contained in the argument.
2297
2298If the argument is omitted or None, strip trailing ASCII whitespace.
2299[clinic start generated code]*/
2300
2301PyDoc_STRVAR(bytes_rstrip__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02002302"rstrip($self, bytes=None, /)\n"
2303"--\n"
2304"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002305"Strip trailing bytes contained in the argument.\n"
2306"\n"
2307"If the argument is omitted or None, strip trailing ASCII whitespace.");
2308
2309#define BYTES_RSTRIP_METHODDEF \
2310 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, bytes_rstrip__doc__},
2311
2312static PyObject *
2313bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes);
2314
2315static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002316bytes_rstrip(PyBytesObject *self, PyObject *args)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002317{
2318 PyObject *return_value = NULL;
2319 PyObject *bytes = Py_None;
2320
2321 if (!PyArg_UnpackTuple(args, "rstrip",
2322 0, 1,
2323 &bytes))
2324 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02002325 return_value = bytes_rstrip_impl(self, bytes);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002326
2327exit:
2328 return return_value;
2329}
2330
2331static PyObject *
2332bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Martin v. Löwis0efea322014-07-27 17:29:17 +02002333/*[clinic end generated code: output=e98730bd133e6593 input=8f93c9cd361f0140]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002334{
2335 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002336}
Neal Norwitz6968b052007-02-27 19:02:19 +00002337
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002338
2339PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002340"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002341\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002342Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002343string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002344as in slice notation.");
2345
2346static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002347bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002348{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002349 PyObject *sub_obj;
2350 const char *str = PyBytes_AS_STRING(self), *sub;
2351 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02002352 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002353 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002354
Antoine Pitrouac65d962011-10-20 23:54:17 +02002355 Py_buffer vsub;
2356 PyObject *count_obj;
2357
2358 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
2359 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002360 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002361
Antoine Pitrouac65d962011-10-20 23:54:17 +02002362 if (sub_obj) {
2363 if (_getbuffer(sub_obj, &vsub) < 0)
2364 return NULL;
2365
2366 sub = vsub.buf;
2367 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002368 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02002369 else {
2370 sub = &byte;
2371 sub_len = 1;
2372 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002373
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002374 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002375
Antoine Pitrouac65d962011-10-20 23:54:17 +02002376 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002377 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2378 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02002379
2380 if (sub_obj)
2381 PyBuffer_Release(&vsub);
2382
2383 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002384}
2385
2386
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002387/*[clinic input]
2388bytes.translate
2389
2390 self: self(type="PyBytesObject *")
Victor Stinner049e5092014-08-17 22:20:00 +02002391 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002392 Translation table, which must be a bytes object of length 256.
2393 [
2394 deletechars: object
2395 ]
2396 /
2397
2398Return a copy with each character mapped by the given translation table.
2399
2400All characters occurring in the optional argument deletechars are removed.
2401The remaining characters are mapped through the given translation table.
2402[clinic start generated code]*/
2403
2404PyDoc_STRVAR(bytes_translate__doc__,
2405"translate(table, [deletechars])\n"
2406"Return a copy with each character mapped by the given translation table.\n"
2407"\n"
2408" table\n"
2409" Translation table, which must be a bytes object of length 256.\n"
2410"\n"
2411"All characters occurring in the optional argument deletechars are removed.\n"
2412"The remaining characters are mapped through the given translation table.");
2413
2414#define BYTES_TRANSLATE_METHODDEF \
2415 {"translate", (PyCFunction)bytes_translate, METH_VARARGS, bytes_translate__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002416
2417static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002418bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1, PyObject *deletechars);
2419
2420static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002421bytes_translate(PyBytesObject *self, PyObject *args)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002422{
2423 PyObject *return_value = NULL;
2424 PyObject *table;
2425 int group_right_1 = 0;
2426 PyObject *deletechars = NULL;
2427
2428 switch (PyTuple_GET_SIZE(args)) {
2429 case 1:
2430 if (!PyArg_ParseTuple(args, "O:translate", &table))
Martin v. Löwis0efea322014-07-27 17:29:17 +02002431 goto exit;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002432 break;
2433 case 2:
2434 if (!PyArg_ParseTuple(args, "OO:translate", &table, &deletechars))
Martin v. Löwis0efea322014-07-27 17:29:17 +02002435 goto exit;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002436 group_right_1 = 1;
2437 break;
2438 default:
2439 PyErr_SetString(PyExc_TypeError, "bytes.translate requires 1 to 2 arguments");
Martin v. Löwis0efea322014-07-27 17:29:17 +02002440 goto exit;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002441 }
Martin v. Löwis0efea322014-07-27 17:29:17 +02002442 return_value = bytes_translate_impl(self, table, group_right_1, deletechars);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002443
Martin v. Löwis0efea322014-07-27 17:29:17 +02002444exit:
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002445 return return_value;
2446}
2447
2448static PyObject *
2449bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1, PyObject *deletechars)
Larry Hastingsdfbeb162014-10-13 10:39:41 +01002450/*[clinic end generated code: output=f0f29a57f41df5d8 input=d8fa5519d7cc4be7]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002451{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002452 char *input, *output;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002453 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002454 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002455 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002456 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002457 Py_ssize_t inlen, tablen, dellen = 0;
2458 PyObject *result;
2459 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002460
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002461 if (PyBytes_Check(table)) {
2462 table_chars = PyBytes_AS_STRING(table);
2463 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002464 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002465 else if (table == Py_None) {
2466 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002467 tablen = 256;
2468 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002469 else if (PyObject_AsCharBuffer(table, &table_chars, &tablen))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002470 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002471
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002472 if (tablen != 256) {
2473 PyErr_SetString(PyExc_ValueError,
2474 "translation table must be 256 characters long");
2475 return NULL;
2476 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002477
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002478 if (deletechars != NULL) {
2479 if (PyBytes_Check(deletechars)) {
2480 del_table_chars = PyBytes_AS_STRING(deletechars);
2481 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002482 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002483 else if (PyObject_AsCharBuffer(deletechars, &del_table_chars, &dellen))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002484 return NULL;
2485 }
2486 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002487 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002488 dellen = 0;
2489 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002490
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002491 inlen = PyBytes_GET_SIZE(input_obj);
2492 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2493 if (result == NULL)
2494 return NULL;
2495 output_start = output = PyBytes_AsString(result);
2496 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002497
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002498 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002499 /* If no deletions are required, use faster code */
2500 for (i = inlen; --i >= 0; ) {
2501 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002502 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002503 changed = 1;
2504 }
2505 if (changed || !PyBytes_CheckExact(input_obj))
2506 return result;
2507 Py_DECREF(result);
2508 Py_INCREF(input_obj);
2509 return input_obj;
2510 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002511
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002512 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002513 for (i = 0; i < 256; i++)
2514 trans_table[i] = Py_CHARMASK(i);
2515 } else {
2516 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002517 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002518 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002519
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002520 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002521 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002522
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002523 for (i = inlen; --i >= 0; ) {
2524 c = Py_CHARMASK(*input++);
2525 if (trans_table[c] != -1)
2526 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2527 continue;
2528 changed = 1;
2529 }
2530 if (!changed && PyBytes_CheckExact(input_obj)) {
2531 Py_DECREF(result);
2532 Py_INCREF(input_obj);
2533 return input_obj;
2534 }
2535 /* Fix the size of the resulting string */
2536 if (inlen > 0)
2537 _PyBytes_Resize(&result, output - output_start);
2538 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002539}
2540
2541
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002542/*[clinic input]
2543
2544@staticmethod
2545bytes.maketrans
2546
2547 frm: object
2548 to: object
2549 /
2550
2551Return a translation table useable for the bytes or bytearray translate method.
2552
2553The returned table will be one where each byte in frm is mapped to the byte at
2554the same position in to.
2555
2556The bytes objects frm and to must be of the same length.
2557[clinic start generated code]*/
2558
2559PyDoc_STRVAR(bytes_maketrans__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02002560"maketrans(frm, to, /)\n"
2561"--\n"
2562"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002563"Return a translation table useable for the bytes or bytearray translate method.\n"
2564"\n"
2565"The returned table will be one where each byte in frm is mapped to the byte at\n"
2566"the same position in to.\n"
2567"\n"
2568"The bytes objects frm and to must be of the same length.");
2569
2570#define BYTES_MAKETRANS_METHODDEF \
2571 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC, bytes_maketrans__doc__},
2572
Georg Brandlabc38772009-04-12 15:51:51 +00002573static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002574bytes_maketrans_impl(PyObject *frm, PyObject *to);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002575
2576static PyObject *
2577bytes_maketrans(void *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00002578{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002579 PyObject *return_value = NULL;
2580 PyObject *frm;
2581 PyObject *to;
2582
2583 if (!PyArg_UnpackTuple(args, "maketrans",
2584 2, 2,
2585 &frm, &to))
2586 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02002587 return_value = bytes_maketrans_impl(frm, to);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002588
2589exit:
2590 return return_value;
2591}
2592
2593static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002594bytes_maketrans_impl(PyObject *frm, PyObject *to)
2595/*[clinic end generated code: output=89a3c3556975e466 input=d204f680f85da382]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002596{
2597 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002598}
2599
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002600/* find and count characters and substrings */
2601
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002602#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002603 ((char *)memchr((const void *)(target), c, target_len))
2604
2605/* String ops must return a string. */
2606/* If the object is subclass of string, create a copy */
2607Py_LOCAL(PyBytesObject *)
2608return_self(PyBytesObject *self)
2609{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002610 if (PyBytes_CheckExact(self)) {
2611 Py_INCREF(self);
2612 return self;
2613 }
2614 return (PyBytesObject *)PyBytes_FromStringAndSize(
2615 PyBytes_AS_STRING(self),
2616 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002617}
2618
2619Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00002620countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002621{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002622 Py_ssize_t count=0;
2623 const char *start=target;
2624 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002625
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002626 while ( (start=findchar(start, end-start, c)) != NULL ) {
2627 count++;
2628 if (count >= maxcount)
2629 break;
2630 start += 1;
2631 }
2632 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002633}
2634
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002635
2636/* Algorithms for different cases of string replacement */
2637
2638/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2639Py_LOCAL(PyBytesObject *)
2640replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002641 const char *to_s, Py_ssize_t to_len,
2642 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002643{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002644 char *self_s, *result_s;
2645 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002646 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002647 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002648
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002649 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002650
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002651 /* 1 at the end plus 1 after every character;
2652 count = min(maxcount, self_len + 1) */
2653 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002654 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002655 else
2656 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
2657 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002658
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002659 /* Check for overflow */
2660 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002661 assert(count > 0);
2662 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002663 PyErr_SetString(PyExc_OverflowError,
2664 "replacement bytes are too long");
2665 return NULL;
2666 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002667 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002668
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002669 if (! (result = (PyBytesObject *)
2670 PyBytes_FromStringAndSize(NULL, result_len)) )
2671 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002672
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002673 self_s = PyBytes_AS_STRING(self);
2674 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002675
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002676 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002677
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002678 /* Lay the first one down (guaranteed this will occur) */
2679 Py_MEMCPY(result_s, to_s, to_len);
2680 result_s += to_len;
2681 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002682
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002683 for (i=0; i<count; i++) {
2684 *result_s++ = *self_s++;
2685 Py_MEMCPY(result_s, to_s, to_len);
2686 result_s += to_len;
2687 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002688
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002689 /* Copy the rest of the original string */
2690 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002691
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002692 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002693}
2694
2695/* Special case for deleting a single character */
2696/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2697Py_LOCAL(PyBytesObject *)
2698replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002699 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002700{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002701 char *self_s, *result_s;
2702 char *start, *next, *end;
2703 Py_ssize_t self_len, result_len;
2704 Py_ssize_t count;
2705 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002706
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002707 self_len = PyBytes_GET_SIZE(self);
2708 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002709
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002710 count = countchar(self_s, self_len, from_c, maxcount);
2711 if (count == 0) {
2712 return return_self(self);
2713 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002714
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002715 result_len = self_len - count; /* from_len == 1 */
2716 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002717
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002718 if ( (result = (PyBytesObject *)
2719 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2720 return NULL;
2721 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002722
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002723 start = self_s;
2724 end = self_s + self_len;
2725 while (count-- > 0) {
2726 next = findchar(start, end-start, from_c);
2727 if (next == NULL)
2728 break;
2729 Py_MEMCPY(result_s, start, next-start);
2730 result_s += (next-start);
2731 start = next+1;
2732 }
2733 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002734
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002735 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002736}
2737
2738/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2739
2740Py_LOCAL(PyBytesObject *)
2741replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002742 const char *from_s, Py_ssize_t from_len,
2743 Py_ssize_t maxcount) {
2744 char *self_s, *result_s;
2745 char *start, *next, *end;
2746 Py_ssize_t self_len, result_len;
2747 Py_ssize_t count, offset;
2748 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002749
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002750 self_len = PyBytes_GET_SIZE(self);
2751 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002752
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002753 count = stringlib_count(self_s, self_len,
2754 from_s, from_len,
2755 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002756
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002757 if (count == 0) {
2758 /* no matches */
2759 return return_self(self);
2760 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002761
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002762 result_len = self_len - (count * from_len);
2763 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002764
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002765 if ( (result = (PyBytesObject *)
2766 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2767 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002768
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002769 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002770
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002771 start = self_s;
2772 end = self_s + self_len;
2773 while (count-- > 0) {
2774 offset = stringlib_find(start, end-start,
2775 from_s, from_len,
2776 0);
2777 if (offset == -1)
2778 break;
2779 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002780
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002781 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002782
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002783 result_s += (next-start);
2784 start = next+from_len;
2785 }
2786 Py_MEMCPY(result_s, start, end-start);
2787 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002788}
2789
2790/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2791Py_LOCAL(PyBytesObject *)
2792replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002793 char from_c, char to_c,
2794 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002795{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002796 char *self_s, *result_s, *start, *end, *next;
2797 Py_ssize_t self_len;
2798 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002799
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002800 /* The result string will be the same size */
2801 self_s = PyBytes_AS_STRING(self);
2802 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002803
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002804 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002805
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002806 if (next == NULL) {
2807 /* No matches; return the original string */
2808 return return_self(self);
2809 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002810
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002811 /* Need to make a new string */
2812 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2813 if (result == NULL)
2814 return NULL;
2815 result_s = PyBytes_AS_STRING(result);
2816 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002817
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002818 /* change everything in-place, starting with this one */
2819 start = result_s + (next-self_s);
2820 *start = to_c;
2821 start++;
2822 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002823
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002824 while (--maxcount > 0) {
2825 next = findchar(start, end-start, from_c);
2826 if (next == NULL)
2827 break;
2828 *next = to_c;
2829 start = next+1;
2830 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002831
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002832 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002833}
2834
2835/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2836Py_LOCAL(PyBytesObject *)
2837replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002838 const char *from_s, Py_ssize_t from_len,
2839 const char *to_s, Py_ssize_t to_len,
2840 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002841{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002842 char *result_s, *start, *end;
2843 char *self_s;
2844 Py_ssize_t self_len, offset;
2845 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002846
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002847 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002848
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002849 self_s = PyBytes_AS_STRING(self);
2850 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002851
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002852 offset = stringlib_find(self_s, self_len,
2853 from_s, from_len,
2854 0);
2855 if (offset == -1) {
2856 /* No matches; return the original string */
2857 return return_self(self);
2858 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002859
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002860 /* Need to make a new string */
2861 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2862 if (result == NULL)
2863 return NULL;
2864 result_s = PyBytes_AS_STRING(result);
2865 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002866
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002867 /* change everything in-place, starting with this one */
2868 start = result_s + offset;
2869 Py_MEMCPY(start, to_s, from_len);
2870 start += from_len;
2871 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002872
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002873 while ( --maxcount > 0) {
2874 offset = stringlib_find(start, end-start,
2875 from_s, from_len,
2876 0);
2877 if (offset==-1)
2878 break;
2879 Py_MEMCPY(start+offset, to_s, from_len);
2880 start += offset+from_len;
2881 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002882
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002883 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002884}
2885
2886/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2887Py_LOCAL(PyBytesObject *)
2888replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002889 char from_c,
2890 const char *to_s, Py_ssize_t to_len,
2891 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002892{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002893 char *self_s, *result_s;
2894 char *start, *next, *end;
2895 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002896 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002897 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002898
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002899 self_s = PyBytes_AS_STRING(self);
2900 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002901
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002902 count = countchar(self_s, self_len, from_c, maxcount);
2903 if (count == 0) {
2904 /* no matches, return unchanged */
2905 return return_self(self);
2906 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002907
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002908 /* use the difference between current and new, hence the "-1" */
2909 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002910 assert(count > 0);
2911 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002912 PyErr_SetString(PyExc_OverflowError,
2913 "replacement bytes are too long");
2914 return NULL;
2915 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002916 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002917
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002918 if ( (result = (PyBytesObject *)
2919 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2920 return NULL;
2921 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002922
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002923 start = self_s;
2924 end = self_s + self_len;
2925 while (count-- > 0) {
2926 next = findchar(start, end-start, from_c);
2927 if (next == NULL)
2928 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002929
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002930 if (next == start) {
2931 /* replace with the 'to' */
2932 Py_MEMCPY(result_s, to_s, to_len);
2933 result_s += to_len;
2934 start += 1;
2935 } else {
2936 /* copy the unchanged old then the 'to' */
2937 Py_MEMCPY(result_s, start, next-start);
2938 result_s += (next-start);
2939 Py_MEMCPY(result_s, to_s, to_len);
2940 result_s += to_len;
2941 start = next+1;
2942 }
2943 }
2944 /* Copy the remainder of the remaining string */
2945 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002946
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002947 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002948}
2949
2950/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2951Py_LOCAL(PyBytesObject *)
2952replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002953 const char *from_s, Py_ssize_t from_len,
2954 const char *to_s, Py_ssize_t to_len,
2955 Py_ssize_t maxcount) {
2956 char *self_s, *result_s;
2957 char *start, *next, *end;
2958 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002959 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002960 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002961
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002962 self_s = PyBytes_AS_STRING(self);
2963 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002964
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002965 count = stringlib_count(self_s, self_len,
2966 from_s, from_len,
2967 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002968
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002969 if (count == 0) {
2970 /* no matches, return unchanged */
2971 return return_self(self);
2972 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002973
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002974 /* Check for overflow */
2975 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002976 assert(count > 0);
2977 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002978 PyErr_SetString(PyExc_OverflowError,
2979 "replacement bytes are too long");
2980 return NULL;
2981 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002982 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002983
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002984 if ( (result = (PyBytesObject *)
2985 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2986 return NULL;
2987 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002988
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002989 start = self_s;
2990 end = self_s + self_len;
2991 while (count-- > 0) {
2992 offset = stringlib_find(start, end-start,
2993 from_s, from_len,
2994 0);
2995 if (offset == -1)
2996 break;
2997 next = start+offset;
2998 if (next == start) {
2999 /* replace with the 'to' */
3000 Py_MEMCPY(result_s, to_s, to_len);
3001 result_s += to_len;
3002 start += from_len;
3003 } else {
3004 /* copy the unchanged old then the 'to' */
3005 Py_MEMCPY(result_s, start, next-start);
3006 result_s += (next-start);
3007 Py_MEMCPY(result_s, to_s, to_len);
3008 result_s += to_len;
3009 start = next+from_len;
3010 }
3011 }
3012 /* Copy the remainder of the remaining string */
3013 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003014
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003015 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003016}
3017
3018
3019Py_LOCAL(PyBytesObject *)
3020replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003021 const char *from_s, Py_ssize_t from_len,
3022 const char *to_s, Py_ssize_t to_len,
3023 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003024{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003025 if (maxcount < 0) {
3026 maxcount = PY_SSIZE_T_MAX;
3027 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
3028 /* nothing to do; return the original string */
3029 return return_self(self);
3030 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003031
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003032 if (maxcount == 0 ||
3033 (from_len == 0 && to_len == 0)) {
3034 /* nothing to do; return the original string */
3035 return return_self(self);
3036 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003037
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003038 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003039
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003040 if (from_len == 0) {
3041 /* insert the 'to' string everywhere. */
3042 /* >>> "Python".replace("", ".") */
3043 /* '.P.y.t.h.o.n.' */
3044 return replace_interleave(self, to_s, to_len, maxcount);
3045 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003046
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003047 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3048 /* point for an empty self string to generate a non-empty string */
3049 /* Special case so the remaining code always gets a non-empty string */
3050 if (PyBytes_GET_SIZE(self) == 0) {
3051 return return_self(self);
3052 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003053
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003054 if (to_len == 0) {
3055 /* delete all occurrences of 'from' string */
3056 if (from_len == 1) {
3057 return replace_delete_single_character(
3058 self, from_s[0], maxcount);
3059 } else {
3060 return replace_delete_substring(self, from_s,
3061 from_len, maxcount);
3062 }
3063 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003064
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003065 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003066
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003067 if (from_len == to_len) {
3068 if (from_len == 1) {
3069 return replace_single_character_in_place(
3070 self,
3071 from_s[0],
3072 to_s[0],
3073 maxcount);
3074 } else {
3075 return replace_substring_in_place(
3076 self, from_s, from_len, to_s, to_len,
3077 maxcount);
3078 }
3079 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003080
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003081 /* Otherwise use the more generic algorithms */
3082 if (from_len == 1) {
3083 return replace_single_character(self, from_s[0],
3084 to_s, to_len, maxcount);
3085 } else {
3086 /* len('from')>=2, len('to')>=1 */
3087 return replace_substring(self, from_s, from_len, to_s, to_len,
3088 maxcount);
3089 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003090}
3091
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003092
3093/*[clinic input]
3094bytes.replace
3095
3096 old: object
3097 new: object
3098 count: Py_ssize_t = -1
3099 Maximum number of occurrences to replace.
3100 -1 (the default value) means replace all occurrences.
3101 /
3102
3103Return a copy with all occurrences of substring old replaced by new.
3104
3105If the optional argument count is given, only the first count occurrences are
3106replaced.
3107[clinic start generated code]*/
3108
3109PyDoc_STRVAR(bytes_replace__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02003110"replace($self, old, new, count=-1, /)\n"
3111"--\n"
3112"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003113"Return a copy with all occurrences of substring old replaced by new.\n"
3114"\n"
3115" count\n"
3116" Maximum number of occurrences to replace.\n"
3117" -1 (the default value) means replace all occurrences.\n"
3118"\n"
3119"If the optional argument count is given, only the first count occurrences are\n"
3120"replaced.");
3121
3122#define BYTES_REPLACE_METHODDEF \
3123 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, bytes_replace__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003124
3125static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003126bytes_replace_impl(PyBytesObject*self, PyObject *old, PyObject *new, Py_ssize_t count);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003127
3128static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003129bytes_replace(PyBytesObject*self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003130{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003131 PyObject *return_value = NULL;
3132 PyObject *old;
3133 PyObject *new;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003134 Py_ssize_t count = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003135
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003136 if (!PyArg_ParseTuple(args,
3137 "OO|n:replace",
3138 &old, &new, &count))
3139 goto exit;
3140 return_value = bytes_replace_impl(self, old, new, count);
3141
3142exit:
3143 return return_value;
3144}
3145
3146static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003147bytes_replace_impl(PyBytesObject*self, PyObject *old, PyObject *new, Py_ssize_t count)
3148/*[clinic end generated code: output=14ce72f4f9cb91cf input=d3ac254ea50f4ac1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003149{
3150 const char *old_s, *new_s;
3151 Py_ssize_t old_len, new_len;
3152
3153 if (PyBytes_Check(old)) {
3154 old_s = PyBytes_AS_STRING(old);
3155 old_len = PyBytes_GET_SIZE(old);
3156 }
3157 else if (PyObject_AsCharBuffer(old, &old_s, &old_len))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003158 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003159
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003160 if (PyBytes_Check(new)) {
3161 new_s = PyBytes_AS_STRING(new);
3162 new_len = PyBytes_GET_SIZE(new);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003163 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003164 else if (PyObject_AsCharBuffer(new, &new_s, &new_len))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003165 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003166
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003167 return (PyObject *)replace((PyBytesObject *) self,
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003168 old_s, old_len,
3169 new_s, new_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003170}
3171
3172/** End DALKE **/
3173
3174/* Matches the end (direction >= 0) or start (direction < 0) of self
3175 * against substr, using the start and end arguments. Returns
3176 * -1 on error, 0 if not found and 1 if found.
3177 */
3178Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003179_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003180 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003181{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003182 Py_ssize_t len = PyBytes_GET_SIZE(self);
3183 Py_ssize_t slen;
3184 const char* sub;
3185 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003186
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003187 if (PyBytes_Check(substr)) {
3188 sub = PyBytes_AS_STRING(substr);
3189 slen = PyBytes_GET_SIZE(substr);
3190 }
3191 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3192 return -1;
3193 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003194
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003195 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003196
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003197 if (direction < 0) {
3198 /* startswith */
3199 if (start+slen > len)
3200 return 0;
3201 } else {
3202 /* endswith */
3203 if (end-start < slen || start > len)
3204 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003205
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003206 if (end-slen > start)
3207 start = end - slen;
3208 }
3209 if (end-start >= slen)
3210 return ! memcmp(str+start, sub, slen);
3211 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003212}
3213
3214
3215PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003216"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003217\n\
3218Return True if B starts with the specified prefix, False otherwise.\n\
3219With optional start, test B beginning at that position.\n\
3220With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00003221prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003222
3223static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003224bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003225{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003226 Py_ssize_t start = 0;
3227 Py_ssize_t end = PY_SSIZE_T_MAX;
3228 PyObject *subobj;
3229 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003230
Jesus Ceaac451502011-04-20 17:09:23 +02003231 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003232 return NULL;
3233 if (PyTuple_Check(subobj)) {
3234 Py_ssize_t i;
3235 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3236 result = _bytes_tailmatch(self,
3237 PyTuple_GET_ITEM(subobj, i),
3238 start, end, -1);
3239 if (result == -1)
3240 return NULL;
3241 else if (result) {
3242 Py_RETURN_TRUE;
3243 }
3244 }
3245 Py_RETURN_FALSE;
3246 }
3247 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03003248 if (result == -1) {
3249 if (PyErr_ExceptionMatches(PyExc_TypeError))
3250 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
3251 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003252 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03003253 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003254 else
3255 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003256}
3257
3258
3259PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003260"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003261\n\
3262Return True if B ends with the specified suffix, False otherwise.\n\
3263With optional start, test B beginning at that position.\n\
3264With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00003265suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003266
3267static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003268bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003269{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003270 Py_ssize_t start = 0;
3271 Py_ssize_t end = PY_SSIZE_T_MAX;
3272 PyObject *subobj;
3273 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003274
Jesus Ceaac451502011-04-20 17:09:23 +02003275 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003276 return NULL;
3277 if (PyTuple_Check(subobj)) {
3278 Py_ssize_t i;
3279 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3280 result = _bytes_tailmatch(self,
3281 PyTuple_GET_ITEM(subobj, i),
3282 start, end, +1);
3283 if (result == -1)
3284 return NULL;
3285 else if (result) {
3286 Py_RETURN_TRUE;
3287 }
3288 }
3289 Py_RETURN_FALSE;
3290 }
3291 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03003292 if (result == -1) {
3293 if (PyErr_ExceptionMatches(PyExc_TypeError))
3294 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
3295 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003296 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03003297 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003298 else
3299 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003300}
3301
3302
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003303/*[clinic input]
3304bytes.decode
3305
3306 encoding: str(c_default="NULL") = 'utf-8'
3307 The encoding with which to decode the bytes.
3308 errors: str(c_default="NULL") = 'strict'
3309 The error handling scheme to use for the handling of decoding errors.
3310 The default is 'strict' meaning that decoding errors raise a
3311 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
3312 as well as any other name registered with codecs.register_error that
3313 can handle UnicodeDecodeErrors.
3314
3315Decode the bytes using the codec registered for encoding.
3316[clinic start generated code]*/
3317
3318PyDoc_STRVAR(bytes_decode__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02003319"decode($self, /, encoding=\'utf-8\', errors=\'strict\')\n"
3320"--\n"
3321"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003322"Decode the bytes using the codec registered for encoding.\n"
3323"\n"
3324" encoding\n"
3325" The encoding with which to decode the bytes.\n"
3326" errors\n"
3327" The error handling scheme to use for the handling of decoding errors.\n"
3328" The default is \'strict\' meaning that decoding errors raise a\n"
3329" UnicodeDecodeError. Other possible values are \'ignore\' and \'replace\'\n"
3330" as well as any other name registered with codecs.register_error that\n"
3331" can handle UnicodeDecodeErrors.");
3332
3333#define BYTES_DECODE_METHODDEF \
3334 {"decode", (PyCFunction)bytes_decode, METH_VARARGS|METH_KEYWORDS, bytes_decode__doc__},
3335
3336static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003337bytes_decode_impl(PyBytesObject*self, const char *encoding, const char *errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00003338
3339static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003340bytes_decode(PyBytesObject*self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00003341{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003342 PyObject *return_value = NULL;
3343 static char *_keywords[] = {"encoding", "errors", NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003344 const char *encoding = NULL;
3345 const char *errors = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +00003346
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003347 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
3348 "|ss:decode", _keywords,
3349 &encoding, &errors))
3350 goto exit;
3351 return_value = bytes_decode_impl(self, encoding, errors);
3352
3353exit:
3354 return return_value;
3355}
3356
3357static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003358bytes_decode_impl(PyBytesObject*self, const char *encoding, const char *errors)
3359/*[clinic end generated code: output=61a80290bbfce696 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003360{
Martin v. Löwis0efea322014-07-27 17:29:17 +02003361 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00003362}
3363
Guido van Rossum20188312006-05-05 15:15:40 +00003364
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003365/*[clinic input]
3366bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003367
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003368 keepends: int(py_default="False") = 0
3369
3370Return a list of the lines in the bytes, breaking at line boundaries.
3371
3372Line breaks are not included in the resulting list unless keepends is given and
3373true.
3374[clinic start generated code]*/
3375
3376PyDoc_STRVAR(bytes_splitlines__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02003377"splitlines($self, /, keepends=False)\n"
3378"--\n"
3379"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003380"Return a list of the lines in the bytes, breaking at line boundaries.\n"
3381"\n"
3382"Line breaks are not included in the resulting list unless keepends is given and\n"
3383"true.");
3384
3385#define BYTES_SPLITLINES_METHODDEF \
3386 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS|METH_KEYWORDS, bytes_splitlines__doc__},
3387
3388static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003389bytes_splitlines_impl(PyBytesObject*self, int keepends);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003390
3391static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003392bytes_splitlines(PyBytesObject*self, PyObject *args, PyObject *kwargs)
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003393{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003394 PyObject *return_value = NULL;
3395 static char *_keywords[] = {"keepends", NULL};
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003396 int keepends = 0;
3397
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003398 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
3399 "|i:splitlines", _keywords,
3400 &keepends))
3401 goto exit;
3402 return_value = bytes_splitlines_impl(self, keepends);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003403
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003404exit:
3405 return return_value;
3406}
3407
3408static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003409bytes_splitlines_impl(PyBytesObject*self, int keepends)
3410/*[clinic end generated code: output=79da057d05d126de input=ddb93e3351080c8c]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003411{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003412 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00003413 (PyObject*) self, PyBytes_AS_STRING(self),
3414 PyBytes_GET_SIZE(self), keepends
3415 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003416}
3417
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003418static int
Victor Stinner6430fd52011-09-29 04:02:13 +02003419hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003420{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003421 if (c >= 128)
3422 return -1;
David Malcolm96960882010-11-05 17:23:41 +00003423 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003424 return c - '0';
3425 else {
David Malcolm96960882010-11-05 17:23:41 +00003426 if (Py_ISUPPER(c))
3427 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003428 if (c >= 'a' && c <= 'f')
3429 return c - 'a' + 10;
3430 }
3431 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003432}
3433
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003434/*[clinic input]
3435@classmethod
3436bytes.fromhex
3437
3438 string: unicode
3439 /
3440
3441Create a bytes object from a string of hexadecimal numbers.
3442
3443Spaces between two numbers are accepted.
3444Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
3445[clinic start generated code]*/
3446
3447PyDoc_STRVAR(bytes_fromhex__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02003448"fromhex($type, string, /)\n"
3449"--\n"
3450"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003451"Create a bytes object from a string of hexadecimal numbers.\n"
3452"\n"
3453"Spaces between two numbers are accepted.\n"
Martin v. Löwis0efea322014-07-27 17:29:17 +02003454"Example: bytes.fromhex(\'B9 01EF\') -> b\'\\\\xb9\\\\x01\\\\xef\'.");
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003455
3456#define BYTES_FROMHEX_METHODDEF \
3457 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS, bytes_fromhex__doc__},
3458
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003459static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003460bytes_fromhex_impl(PyTypeObject *type, PyObject *string);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003461
3462static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003463bytes_fromhex(PyTypeObject *type, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003464{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003465 PyObject *return_value = NULL;
3466 PyObject *string;
3467
3468 if (!PyArg_ParseTuple(args,
3469 "U:fromhex",
3470 &string))
3471 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02003472 return_value = bytes_fromhex_impl(type, string);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003473
3474exit:
3475 return return_value;
3476}
3477
3478static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003479bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
3480/*[clinic end generated code: output=09e6cbef56cbbb65 input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003481{
3482 PyObject *newstring;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003483 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003484 Py_ssize_t hexlen, byteslen, i, j;
3485 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003486 void *data;
3487 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003488
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003489 assert(PyUnicode_Check(string));
3490 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003491 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003492 kind = PyUnicode_KIND(string);
3493 data = PyUnicode_DATA(string);
3494 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003495
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003496 byteslen = hexlen/2; /* This overestimates if there are spaces */
3497 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
3498 if (!newstring)
3499 return NULL;
3500 buf = PyBytes_AS_STRING(newstring);
3501 for (i = j = 0; i < hexlen; i += 2) {
3502 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003503 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003504 i++;
3505 if (i >= hexlen)
3506 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003507 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
3508 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003509 if (top == -1 || bot == -1) {
3510 PyErr_Format(PyExc_ValueError,
3511 "non-hexadecimal number found in "
3512 "fromhex() arg at position %zd", i);
3513 goto error;
3514 }
3515 buf[j++] = (top << 4) + bot;
3516 }
3517 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
3518 goto error;
3519 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003520
3521 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003522 Py_XDECREF(newstring);
3523 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003524}
3525
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003526/*[clinic input]
3527bytes.__sizeof__ as bytes_sizeof
3528
3529 self: self(type="PyBytesObject *")
3530
3531Returns the size of the bytes object in memory, in bytes.
3532[clinic start generated code]*/
3533
3534PyDoc_STRVAR(bytes_sizeof__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02003535"__sizeof__($self, /)\n"
3536"--\n"
3537"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003538"Returns the size of the bytes object in memory, in bytes.");
3539
3540#define BYTES_SIZEOF_METHODDEF \
3541 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS, bytes_sizeof__doc__},
Martin v. Löwis00709aa2008-06-04 14:18:43 +00003542
3543static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003544bytes_sizeof_impl(PyBytesObject *self);
3545
3546static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003547bytes_sizeof(PyBytesObject *self, PyObject *Py_UNUSED(ignored))
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003548{
Martin v. Löwis0efea322014-07-27 17:29:17 +02003549 return bytes_sizeof_impl(self);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003550}
3551
3552static PyObject *
3553bytes_sizeof_impl(PyBytesObject *self)
Martin v. Löwis0efea322014-07-27 17:29:17 +02003554/*[clinic end generated code: output=44933279343f24ae input=bee4c64bb42078ed]*/
Martin v. Löwis00709aa2008-06-04 14:18:43 +00003555{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003556 Py_ssize_t res;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003557 res = PyBytesObject_SIZE + Py_SIZE(self) * Py_TYPE(self)->tp_itemsize;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003558 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00003559}
3560
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003561
3562static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003563bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003564{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003565 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003566}
3567
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003568
3569static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003570bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003571 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
3572 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3573 _Py_capitalize__doc__},
3574 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3575 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003576 BYTES_DECODE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003577 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
3578 endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02003579 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003580 expandtabs__doc__},
3581 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003582 BYTES_FROMHEX_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003583 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3584 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3585 _Py_isalnum__doc__},
3586 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3587 _Py_isalpha__doc__},
3588 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3589 _Py_isdigit__doc__},
3590 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3591 _Py_islower__doc__},
3592 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3593 _Py_isspace__doc__},
3594 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3595 _Py_istitle__doc__},
3596 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3597 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003598 BYTES_JOIN_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003599 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3600 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003601 BYTES_LSTRIP_METHODDEF
3602 BYTES_MAKETRANS_METHODDEF
3603 BYTES_PARTITION_METHODDEF
3604 BYTES_REPLACE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003605 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3606 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3607 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003608 BYTES_RPARTITION_METHODDEF
3609 BYTES_RSPLIT_METHODDEF
3610 BYTES_RSTRIP_METHODDEF
3611 BYTES_SPLIT_METHODDEF
3612 BYTES_SPLITLINES_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003613 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
3614 startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003615 BYTES_STRIP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003616 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3617 _Py_swapcase__doc__},
3618 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003619 BYTES_TRANSLATE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003620 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3621 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003622 BYTES_SIZEOF_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003623 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003624};
3625
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003626static PyObject *
Ethan Furmanb95b5612015-01-23 20:05:18 -08003627bytes_mod(PyObject *v, PyObject *w)
3628{
3629 if (!PyBytes_Check(v))
3630 Py_RETURN_NOTIMPLEMENTED;
3631 return _PyBytes_Format(v, w);
3632}
3633
3634static PyNumberMethods bytes_as_number = {
3635 0, /*nb_add*/
3636 0, /*nb_subtract*/
3637 0, /*nb_multiply*/
3638 bytes_mod, /*nb_remainder*/
3639};
3640
3641static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003642str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3643
3644static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003645bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003646{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003647 PyObject *x = NULL;
3648 const char *encoding = NULL;
3649 const char *errors = NULL;
3650 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003651 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003652 Py_ssize_t size;
3653 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003654 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003655
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003656 if (type != &PyBytes_Type)
3657 return str_subtype_new(type, args, kwds);
3658 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
3659 &encoding, &errors))
3660 return NULL;
3661 if (x == NULL) {
3662 if (encoding != NULL || errors != NULL) {
3663 PyErr_SetString(PyExc_TypeError,
3664 "encoding or errors without sequence "
3665 "argument");
3666 return NULL;
3667 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02003668 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003669 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003670
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003671 if (PyUnicode_Check(x)) {
3672 /* Encode via the codec registry */
3673 if (encoding == NULL) {
3674 PyErr_SetString(PyExc_TypeError,
3675 "string argument without an encoding");
3676 return NULL;
3677 }
3678 new = PyUnicode_AsEncodedString(x, encoding, errors);
3679 if (new == NULL)
3680 return NULL;
3681 assert(PyBytes_Check(new));
3682 return new;
3683 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003684
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02003685 /* If it's not unicode, there can't be encoding or errors */
3686 if (encoding != NULL || errors != NULL) {
3687 PyErr_SetString(PyExc_TypeError,
3688 "encoding or errors without a string argument");
3689 return NULL;
3690 }
3691
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003692 /* We'd like to call PyObject_Bytes here, but we need to check for an
3693 integer argument before deferring to PyBytes_FromObject, something
3694 PyObject_Bytes doesn't do. */
3695 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
3696 if (func != NULL) {
3697 new = PyObject_CallFunctionObjArgs(func, NULL);
3698 Py_DECREF(func);
3699 if (new == NULL)
3700 return NULL;
3701 if (!PyBytes_Check(new)) {
3702 PyErr_Format(PyExc_TypeError,
3703 "__bytes__ returned non-bytes (type %.200s)",
3704 Py_TYPE(new)->tp_name);
3705 Py_DECREF(new);
3706 return NULL;
3707 }
3708 return new;
3709 }
3710 else if (PyErr_Occurred())
3711 return NULL;
3712
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003713 /* Is it an integer? */
3714 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
3715 if (size == -1 && PyErr_Occurred()) {
3716 if (PyErr_ExceptionMatches(PyExc_OverflowError))
3717 return NULL;
3718 PyErr_Clear();
3719 }
3720 else if (size < 0) {
3721 PyErr_SetString(PyExc_ValueError, "negative count");
3722 return NULL;
3723 }
3724 else {
Victor Stinnerdb067af2014-05-02 22:31:14 +02003725 new = _PyBytes_FromSize(size, 1);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003726 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003727 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003728 return new;
3729 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003730
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003731 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00003732}
3733
3734PyObject *
3735PyBytes_FromObject(PyObject *x)
3736{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003737 PyObject *new, *it;
3738 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003739
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003740 if (x == NULL) {
3741 PyErr_BadInternalCall();
3742 return NULL;
3743 }
Larry Hastingsca28e992012-05-24 22:58:30 -07003744
3745 if (PyBytes_CheckExact(x)) {
3746 Py_INCREF(x);
3747 return x;
3748 }
3749
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003750 /* Use the modern buffer interface */
3751 if (PyObject_CheckBuffer(x)) {
3752 Py_buffer view;
3753 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
3754 return NULL;
3755 new = PyBytes_FromStringAndSize(NULL, view.len);
3756 if (!new)
3757 goto fail;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003758 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
3759 &view, view.len, 'C') < 0)
3760 goto fail;
3761 PyBuffer_Release(&view);
3762 return new;
3763 fail:
3764 Py_XDECREF(new);
3765 PyBuffer_Release(&view);
3766 return NULL;
3767 }
3768 if (PyUnicode_Check(x)) {
3769 PyErr_SetString(PyExc_TypeError,
3770 "cannot convert unicode object to bytes");
3771 return NULL;
3772 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003773
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003774 if (PyList_CheckExact(x)) {
3775 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3776 if (new == NULL)
3777 return NULL;
3778 for (i = 0; i < Py_SIZE(x); i++) {
3779 Py_ssize_t value = PyNumber_AsSsize_t(
3780 PyList_GET_ITEM(x, i), PyExc_ValueError);
3781 if (value == -1 && PyErr_Occurred()) {
3782 Py_DECREF(new);
3783 return NULL;
3784 }
3785 if (value < 0 || value >= 256) {
3786 PyErr_SetString(PyExc_ValueError,
3787 "bytes must be in range(0, 256)");
3788 Py_DECREF(new);
3789 return NULL;
3790 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003791 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003792 }
3793 return new;
3794 }
3795 if (PyTuple_CheckExact(x)) {
3796 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3797 if (new == NULL)
3798 return NULL;
3799 for (i = 0; i < Py_SIZE(x); i++) {
3800 Py_ssize_t value = PyNumber_AsSsize_t(
3801 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
3802 if (value == -1 && PyErr_Occurred()) {
3803 Py_DECREF(new);
3804 return NULL;
3805 }
3806 if (value < 0 || value >= 256) {
3807 PyErr_SetString(PyExc_ValueError,
3808 "bytes must be in range(0, 256)");
3809 Py_DECREF(new);
3810 return NULL;
3811 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003812 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003813 }
3814 return new;
3815 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00003816
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003817 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02003818 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003819 if (size == -1 && PyErr_Occurred())
3820 return NULL;
3821 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
3822 returning a shared empty bytes string. This required because we
3823 want to call _PyBytes_Resize() the returned object, which we can
3824 only do on bytes objects with refcount == 1. */
Victor Stinner88d146b2014-08-17 21:12:18 +02003825 if (size == 0)
3826 size = 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003827 new = PyBytes_FromStringAndSize(NULL, size);
3828 if (new == NULL)
3829 return NULL;
Victor Stinner88d146b2014-08-17 21:12:18 +02003830 assert(Py_REFCNT(new) == 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003831
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003832 /* Get the iterator */
3833 it = PyObject_GetIter(x);
3834 if (it == NULL)
3835 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003836
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003837 /* Run the iterator to exhaustion */
3838 for (i = 0; ; i++) {
3839 PyObject *item;
3840 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003841
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003842 /* Get the next item */
3843 item = PyIter_Next(it);
3844 if (item == NULL) {
3845 if (PyErr_Occurred())
3846 goto error;
3847 break;
3848 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003849
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003850 /* Interpret it as an int (__index__) */
3851 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3852 Py_DECREF(item);
3853 if (value == -1 && PyErr_Occurred())
3854 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003855
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003856 /* Range check */
3857 if (value < 0 || value >= 256) {
3858 PyErr_SetString(PyExc_ValueError,
3859 "bytes must be in range(0, 256)");
3860 goto error;
3861 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003862
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003863 /* Append the byte */
3864 if (i >= size) {
3865 size = 2 * size + 1;
3866 if (_PyBytes_Resize(&new, size) < 0)
3867 goto error;
3868 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003869 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003870 }
3871 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003872
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003873 /* Clean up and return success */
3874 Py_DECREF(it);
3875 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003876
3877 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003878 Py_XDECREF(it);
Victor Stinner986e2242013-10-29 03:14:22 +01003879 Py_XDECREF(new);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003880 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003881}
3882
3883static PyObject *
3884str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3885{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003886 PyObject *tmp, *pnew;
3887 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003888
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003889 assert(PyType_IsSubtype(type, &PyBytes_Type));
3890 tmp = bytes_new(&PyBytes_Type, args, kwds);
3891 if (tmp == NULL)
3892 return NULL;
3893 assert(PyBytes_CheckExact(tmp));
3894 n = PyBytes_GET_SIZE(tmp);
3895 pnew = type->tp_alloc(type, n);
3896 if (pnew != NULL) {
3897 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3898 PyBytes_AS_STRING(tmp), n+1);
3899 ((PyBytesObject *)pnew)->ob_shash =
3900 ((PyBytesObject *)tmp)->ob_shash;
3901 }
3902 Py_DECREF(tmp);
3903 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003904}
3905
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003906PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003907"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003908bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003909bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003910bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3911bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003912\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003913Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003914 - an iterable yielding integers in range(256)\n\
3915 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003916 - any object implementing the buffer API.\n\
3917 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003918
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003919static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003920
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003921PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003922 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3923 "bytes",
3924 PyBytesObject_SIZE,
3925 sizeof(char),
3926 bytes_dealloc, /* tp_dealloc */
3927 0, /* tp_print */
3928 0, /* tp_getattr */
3929 0, /* tp_setattr */
3930 0, /* tp_reserved */
3931 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08003932 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003933 &bytes_as_sequence, /* tp_as_sequence */
3934 &bytes_as_mapping, /* tp_as_mapping */
3935 (hashfunc)bytes_hash, /* tp_hash */
3936 0, /* tp_call */
3937 bytes_str, /* tp_str */
3938 PyObject_GenericGetAttr, /* tp_getattro */
3939 0, /* tp_setattro */
3940 &bytes_as_buffer, /* tp_as_buffer */
3941 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3942 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3943 bytes_doc, /* tp_doc */
3944 0, /* tp_traverse */
3945 0, /* tp_clear */
3946 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3947 0, /* tp_weaklistoffset */
3948 bytes_iter, /* tp_iter */
3949 0, /* tp_iternext */
3950 bytes_methods, /* tp_methods */
3951 0, /* tp_members */
3952 0, /* tp_getset */
3953 &PyBaseObject_Type, /* tp_base */
3954 0, /* tp_dict */
3955 0, /* tp_descr_get */
3956 0, /* tp_descr_set */
3957 0, /* tp_dictoffset */
3958 0, /* tp_init */
3959 0, /* tp_alloc */
3960 bytes_new, /* tp_new */
3961 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003962};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003963
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003964void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003965PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003966{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003967 assert(pv != NULL);
3968 if (*pv == NULL)
3969 return;
3970 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003971 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003972 return;
3973 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02003974
3975 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
3976 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05003977 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02003978 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02003979
Antoine Pitrou161d6952014-05-01 14:36:20 +02003980 wb.len = -1;
3981 if (_getbuffer(w, &wb) < 0) {
3982 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3983 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3984 Py_CLEAR(*pv);
3985 return;
3986 }
3987
3988 oldsize = PyBytes_GET_SIZE(*pv);
3989 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3990 PyErr_NoMemory();
3991 goto error;
3992 }
3993 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3994 goto error;
3995
3996 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3997 PyBuffer_Release(&wb);
3998 return;
3999
4000 error:
4001 PyBuffer_Release(&wb);
4002 Py_CLEAR(*pv);
4003 return;
4004 }
4005
4006 else {
4007 /* Multiple references, need to create new object */
4008 PyObject *v;
4009 v = bytes_concat(*pv, w);
4010 Py_DECREF(*pv);
4011 *pv = v;
4012 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004013}
4014
4015void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02004016PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004017{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004018 PyBytes_Concat(pv, w);
4019 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004020}
4021
4022
Ethan Furmanb95b5612015-01-23 20:05:18 -08004023/* The following function breaks the notion that bytes are immutable:
4024 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004025 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08004026 as creating a new bytes object and destroying the old one, only
4027 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004028 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08004029 Note that if there's not enough memory to resize the bytes object, the
4030 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004031 memory" exception is set, and -1 is returned. Else (on success) 0 is
4032 returned, and the value in *pv may or may not be the same as on input.
4033 As always, an extra byte is allocated for a trailing \0 byte (newsize
4034 does *not* include that), and a trailing \0 byte is stored.
4035*/
4036
4037int
4038_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
4039{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02004040 PyObject *v;
4041 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004042 v = *pv;
4043 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
4044 *pv = 0;
4045 Py_DECREF(v);
4046 PyErr_BadInternalCall();
4047 return -1;
4048 }
4049 /* XXX UNREF/NEWREF interface should be more symmetrical */
4050 _Py_DEC_REFTOTAL;
4051 _Py_ForgetReference(v);
4052 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03004053 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004054 if (*pv == NULL) {
4055 PyObject_Del(v);
4056 PyErr_NoMemory();
4057 return -1;
4058 }
4059 _Py_NewReference(*pv);
4060 sv = (PyBytesObject *) *pv;
4061 Py_SIZE(sv) = newsize;
4062 sv->ob_sval[newsize] = '\0';
4063 sv->ob_shash = -1; /* invalidate cached hash value */
4064 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004065}
4066
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004067void
4068PyBytes_Fini(void)
4069{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004070 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02004071 for (i = 0; i < UCHAR_MAX + 1; i++)
4072 Py_CLEAR(characters[i]);
4073 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004074}
4075
Benjamin Peterson4116f362008-05-27 00:36:20 +00004076/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004077
4078typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004079 PyObject_HEAD
4080 Py_ssize_t it_index;
4081 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004082} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004083
4084static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004085striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004086{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004087 _PyObject_GC_UNTRACK(it);
4088 Py_XDECREF(it->it_seq);
4089 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004090}
4091
4092static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004093striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004094{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004095 Py_VISIT(it->it_seq);
4096 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004097}
4098
4099static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004100striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004101{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004102 PyBytesObject *seq;
4103 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004104
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004105 assert(it != NULL);
4106 seq = it->it_seq;
4107 if (seq == NULL)
4108 return NULL;
4109 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004110
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004111 if (it->it_index < PyBytes_GET_SIZE(seq)) {
4112 item = PyLong_FromLong(
4113 (unsigned char)seq->ob_sval[it->it_index]);
4114 if (item != NULL)
4115 ++it->it_index;
4116 return item;
4117 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004118
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004119 Py_DECREF(seq);
4120 it->it_seq = NULL;
4121 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004122}
4123
4124static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004125striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004126{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004127 Py_ssize_t len = 0;
4128 if (it->it_seq)
4129 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
4130 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004131}
4132
4133PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004134 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004135
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00004136static PyObject *
4137striter_reduce(striterobject *it)
4138{
4139 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02004140 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00004141 it->it_seq, it->it_index);
4142 } else {
4143 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
4144 if (u == NULL)
4145 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02004146 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00004147 }
4148}
4149
4150PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
4151
4152static PyObject *
4153striter_setstate(striterobject *it, PyObject *state)
4154{
4155 Py_ssize_t index = PyLong_AsSsize_t(state);
4156 if (index == -1 && PyErr_Occurred())
4157 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00004158 if (it->it_seq != NULL) {
4159 if (index < 0)
4160 index = 0;
4161 else if (index > PyBytes_GET_SIZE(it->it_seq))
4162 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
4163 it->it_index = index;
4164 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00004165 Py_RETURN_NONE;
4166}
4167
4168PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
4169
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004170static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004171 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
4172 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00004173 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
4174 reduce_doc},
4175 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
4176 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004177 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004178};
4179
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004180PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004181 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4182 "bytes_iterator", /* tp_name */
4183 sizeof(striterobject), /* tp_basicsize */
4184 0, /* tp_itemsize */
4185 /* methods */
4186 (destructor)striter_dealloc, /* tp_dealloc */
4187 0, /* tp_print */
4188 0, /* tp_getattr */
4189 0, /* tp_setattr */
4190 0, /* tp_reserved */
4191 0, /* tp_repr */
4192 0, /* tp_as_number */
4193 0, /* tp_as_sequence */
4194 0, /* tp_as_mapping */
4195 0, /* tp_hash */
4196 0, /* tp_call */
4197 0, /* tp_str */
4198 PyObject_GenericGetAttr, /* tp_getattro */
4199 0, /* tp_setattro */
4200 0, /* tp_as_buffer */
4201 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
4202 0, /* tp_doc */
4203 (traverseproc)striter_traverse, /* tp_traverse */
4204 0, /* tp_clear */
4205 0, /* tp_richcompare */
4206 0, /* tp_weaklistoffset */
4207 PyObject_SelfIter, /* tp_iter */
4208 (iternextfunc)striter_next, /* tp_iternext */
4209 striter_methods, /* tp_methods */
4210 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004211};
4212
4213static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00004214bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004215{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004216 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004217
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004218 if (!PyBytes_Check(seq)) {
4219 PyErr_BadInternalCall();
4220 return NULL;
4221 }
4222 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
4223 if (it == NULL)
4224 return NULL;
4225 it->it_index = 0;
4226 Py_INCREF(seq);
4227 it->it_seq = (PyBytesObject *)seq;
4228 _PyObject_GC_TRACK(it);
4229 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004230}