blob: 378c44d6ee2b3353ab4cd69b129aa40493414382 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020010/*[clinic input]
11class bytes
12[clinic start generated code]*/
13/*[clinic end generated code: checksum=da39a3ee5e6b4b0d3255bfef95601890afd80709]*/
14
Neal Norwitz2bad9702007-08-27 06:19:22 +000015static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000016_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000017{
Antoine Pitroucfc22b42012-10-16 21:07:23 +020018 PyBufferProcs *bufferprocs;
19 if (PyBytes_CheckExact(obj)) {
20 /* Fast path, e.g. for .join() of many bytes objects */
21 Py_INCREF(obj);
22 view->obj = obj;
23 view->buf = PyBytes_AS_STRING(obj);
24 view->len = PyBytes_GET_SIZE(obj);
25 return view->len;
26 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000027
Antoine Pitroucfc22b42012-10-16 21:07:23 +020028 bufferprocs = Py_TYPE(obj)->tp_as_buffer;
29 if (bufferprocs == NULL || bufferprocs->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000030 {
Antoine Pitroud1188562010-06-09 16:38:55 +000031 PyErr_Format(PyExc_TypeError,
32 "Type %.100s doesn't support the buffer API",
33 Py_TYPE(obj)->tp_name);
34 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000035 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000036
Antoine Pitroucfc22b42012-10-16 21:07:23 +020037 if (bufferprocs->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000038 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000039 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000040}
41
Christian Heimes2c9c7a52008-05-26 13:42:13 +000042#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000043Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000044#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000045
Christian Heimes2c9c7a52008-05-26 13:42:13 +000046static PyBytesObject *characters[UCHAR_MAX + 1];
47static PyBytesObject *nullstring;
48
Mark Dickinsonfd24b322008-12-06 15:33:31 +000049/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
50 for a string of length n should request PyBytesObject_SIZE + n bytes.
51
52 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
53 3 bytes per string allocation on a typical system.
54*/
55#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
56
Christian Heimes2c9c7a52008-05-26 13:42:13 +000057/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000058 For PyBytes_FromString(), the parameter `str' points to a null-terminated
59 string containing exactly `size' bytes.
60
61 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
62 either NULL or else points to a string containing at least `size' bytes.
63 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
64 not have to be null-terminated. (Therefore it is safe to construct a
65 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
66 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
67 bytes (setting the last byte to the null terminating character) and you can
68 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000069 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000070 alter the data yourself, since the strings may be shared.
71
72 The PyObject member `op->ob_size', which denotes the number of "extra
73 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020074 allocated for string data, not counting the null terminating character.
75 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000076 PyBytes_FromStringAndSize()) or the length of the string in the `str'
77 parameter (for PyBytes_FromString()).
78*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020079static PyObject *
80_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000081{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020082 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020083 assert(size >= 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000085#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000086 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000087#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000088 Py_INCREF(op);
89 return (PyObject *)op;
90 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000091
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000092 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
93 PyErr_SetString(PyExc_OverflowError,
94 "byte string is too large");
95 return NULL;
96 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000097
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000098 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020099 if (use_calloc)
100 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
101 else
102 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000103 if (op == NULL)
104 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100105 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000106 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +0200107 if (!use_calloc)
108 op->ob_sval[size] = '\0';
109 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000110 if (size == 0) {
111 nullstring = op;
112 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +0200113 }
114 return (PyObject *) op;
115}
116
117PyObject *
118PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
119{
120 PyBytesObject *op;
121 if (size < 0) {
122 PyErr_SetString(PyExc_SystemError,
123 "Negative size passed to PyBytes_FromStringAndSize");
124 return NULL;
125 }
126 if (size == 1 && str != NULL &&
127 (op = characters[*str & UCHAR_MAX]) != NULL)
128 {
129#ifdef COUNT_ALLOCS
130 one_strings++;
131#endif
132 Py_INCREF(op);
133 return (PyObject *)op;
134 }
135
136 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
137 if (op == NULL)
138 return NULL;
139 if (str == NULL)
140 return (PyObject *) op;
141
142 Py_MEMCPY(op->ob_sval, str, size);
143 /* share short strings */
144 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 characters[*str & UCHAR_MAX] = op;
146 Py_INCREF(op);
147 }
148 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000149}
150
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000151PyObject *
152PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000153{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200154 size_t size;
155 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000156
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000157 assert(str != NULL);
158 size = strlen(str);
159 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
160 PyErr_SetString(PyExc_OverflowError,
161 "byte string is too long");
162 return NULL;
163 }
164 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000165#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000166 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000167#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000168 Py_INCREF(op);
169 return (PyObject *)op;
170 }
171 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000172#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000173 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000174#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000175 Py_INCREF(op);
176 return (PyObject *)op;
177 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000178
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000179 /* Inline PyObject_NewVar */
180 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
181 if (op == NULL)
182 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100183 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000184 op->ob_shash = -1;
185 Py_MEMCPY(op->ob_sval, str, size+1);
186 /* share short strings */
187 if (size == 0) {
188 nullstring = op;
189 Py_INCREF(op);
190 } else if (size == 1) {
191 characters[*str & UCHAR_MAX] = op;
192 Py_INCREF(op);
193 }
194 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000195}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000196
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000197PyObject *
198PyBytes_FromFormatV(const char *format, va_list vargs)
199{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000200 va_list count;
201 Py_ssize_t n = 0;
202 const char* f;
203 char *s;
204 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000205
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000206 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000207 /* step 1: figure out how large a buffer we need */
208 for (f = format; *f; f++) {
209 if (*f == '%') {
210 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000211 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000212 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000213
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000214 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
215 * they don't affect the amount of space we reserve.
216 */
217 if ((*f == 'l' || *f == 'z') &&
218 (f[1] == 'd' || f[1] == 'u'))
219 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000220
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000221 switch (*f) {
222 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100223 {
224 int c = va_arg(count, int);
225 if (c < 0 || c > 255) {
226 PyErr_SetString(PyExc_OverflowError,
227 "PyBytes_FromFormatV(): %c format "
228 "expects an integer in range [0; 255]");
229 return NULL;
230 }
231 n++;
232 break;
233 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000234 case '%':
235 n++;
236 break;
237 case 'd': case 'u': case 'i': case 'x':
238 (void) va_arg(count, int);
239 /* 20 bytes is enough to hold a 64-bit
240 integer. Decimal takes the most space.
241 This isn't enough for octal. */
242 n += 20;
243 break;
244 case 's':
245 s = va_arg(count, char*);
246 n += strlen(s);
247 break;
248 case 'p':
249 (void) va_arg(count, int);
250 /* maximum 64-bit pointer representation:
251 * 0xffffffffffffffff
252 * so 19 characters is enough.
253 * XXX I count 18 -- what's the extra for?
254 */
255 n += 19;
256 break;
257 default:
258 /* if we stumble upon an unknown
259 formatting code, copy the rest of
260 the format string to the output
261 string. (we cannot just skip the
262 code, since there's no way to know
263 what's in the argument list) */
264 n += strlen(p);
265 goto expand;
266 }
267 } else
268 n++;
269 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000270 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000271 /* step 2: fill the buffer */
272 /* Since we've analyzed how much space we need for the worst case,
273 use sprintf directly instead of the slower PyOS_snprintf. */
274 string = PyBytes_FromStringAndSize(NULL, n);
275 if (!string)
276 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000277
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000278 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000279
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000280 for (f = format; *f; f++) {
281 if (*f == '%') {
282 const char* p = f++;
283 Py_ssize_t i;
284 int longflag = 0;
285 int size_tflag = 0;
286 /* parse the width.precision part (we're only
287 interested in the precision value, if any) */
288 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000289 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000290 n = (n*10) + *f++ - '0';
291 if (*f == '.') {
292 f++;
293 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000294 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000295 n = (n*10) + *f++ - '0';
296 }
David Malcolm96960882010-11-05 17:23:41 +0000297 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000298 f++;
299 /* handle the long flag, but only for %ld and %lu.
300 others can be added when necessary. */
301 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
302 longflag = 1;
303 ++f;
304 }
305 /* handle the size_t flag. */
306 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
307 size_tflag = 1;
308 ++f;
309 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000310
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000311 switch (*f) {
312 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100313 {
314 int c = va_arg(vargs, int);
315 /* c has been checked for overflow in the first step */
316 *s++ = (unsigned char)c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000317 break;
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100318 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000319 case 'd':
320 if (longflag)
321 sprintf(s, "%ld", va_arg(vargs, long));
322 else if (size_tflag)
323 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
324 va_arg(vargs, Py_ssize_t));
325 else
326 sprintf(s, "%d", va_arg(vargs, int));
327 s += strlen(s);
328 break;
329 case 'u':
330 if (longflag)
331 sprintf(s, "%lu",
332 va_arg(vargs, unsigned long));
333 else if (size_tflag)
334 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
335 va_arg(vargs, size_t));
336 else
337 sprintf(s, "%u",
338 va_arg(vargs, unsigned int));
339 s += strlen(s);
340 break;
341 case 'i':
342 sprintf(s, "%i", va_arg(vargs, int));
343 s += strlen(s);
344 break;
345 case 'x':
346 sprintf(s, "%x", va_arg(vargs, int));
347 s += strlen(s);
348 break;
349 case 's':
350 p = va_arg(vargs, char*);
351 i = strlen(p);
352 if (n > 0 && i > n)
353 i = n;
354 Py_MEMCPY(s, p, i);
355 s += i;
356 break;
357 case 'p':
358 sprintf(s, "%p", va_arg(vargs, void*));
359 /* %p is ill-defined: ensure leading 0x. */
360 if (s[1] == 'X')
361 s[1] = 'x';
362 else if (s[1] != 'x') {
363 memmove(s+2, s, strlen(s)+1);
364 s[0] = '0';
365 s[1] = 'x';
366 }
367 s += strlen(s);
368 break;
369 case '%':
370 *s++ = '%';
371 break;
372 default:
373 strcpy(s, p);
374 s += strlen(s);
375 goto end;
376 }
377 } else
378 *s++ = *f;
379 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000380
381 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000382 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
383 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000384}
385
386PyObject *
387PyBytes_FromFormat(const char *format, ...)
388{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000389 PyObject* ret;
390 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000391
392#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000393 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000394#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000395 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000396#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000397 ret = PyBytes_FromFormatV(format, vargs);
398 va_end(vargs);
399 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000400}
401
402static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000403bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000404{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000405 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000406}
407
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000408/* Unescape a backslash-escaped string. If unicode is non-zero,
409 the string is a u-literal. If recode_encoding is non-zero,
410 the string is UTF-8 encoded and should be re-encoded in the
411 specified encoding. */
412
413PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000414 Py_ssize_t len,
415 const char *errors,
416 Py_ssize_t unicode,
417 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000418{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000419 int c;
420 char *p, *buf;
421 const char *end;
422 PyObject *v;
423 Py_ssize_t newlen = recode_encoding ? 4*len:len;
424 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
425 if (v == NULL)
426 return NULL;
427 p = buf = PyBytes_AsString(v);
428 end = s + len;
429 while (s < end) {
430 if (*s != '\\') {
431 non_esc:
432 if (recode_encoding && (*s & 0x80)) {
433 PyObject *u, *w;
434 char *r;
435 const char* t;
436 Py_ssize_t rn;
437 t = s;
438 /* Decode non-ASCII bytes as UTF-8. */
439 while (t < end && (*t & 0x80)) t++;
440 u = PyUnicode_DecodeUTF8(s, t - s, errors);
441 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000442
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000443 /* Recode them in target encoding. */
444 w = PyUnicode_AsEncodedString(
445 u, recode_encoding, errors);
446 Py_DECREF(u);
447 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000448
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000449 /* Append bytes to output buffer. */
450 assert(PyBytes_Check(w));
451 r = PyBytes_AS_STRING(w);
452 rn = PyBytes_GET_SIZE(w);
453 Py_MEMCPY(p, r, rn);
454 p += rn;
455 Py_DECREF(w);
456 s = t;
457 } else {
458 *p++ = *s++;
459 }
460 continue;
461 }
462 s++;
463 if (s==end) {
464 PyErr_SetString(PyExc_ValueError,
465 "Trailing \\ in string");
466 goto failed;
467 }
468 switch (*s++) {
469 /* XXX This assumes ASCII! */
470 case '\n': break;
471 case '\\': *p++ = '\\'; break;
472 case '\'': *p++ = '\''; break;
473 case '\"': *p++ = '\"'; break;
474 case 'b': *p++ = '\b'; break;
475 case 'f': *p++ = '\014'; break; /* FF */
476 case 't': *p++ = '\t'; break;
477 case 'n': *p++ = '\n'; break;
478 case 'r': *p++ = '\r'; break;
479 case 'v': *p++ = '\013'; break; /* VT */
480 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
481 case '0': case '1': case '2': case '3':
482 case '4': case '5': case '6': case '7':
483 c = s[-1] - '0';
484 if (s < end && '0' <= *s && *s <= '7') {
485 c = (c<<3) + *s++ - '0';
486 if (s < end && '0' <= *s && *s <= '7')
487 c = (c<<3) + *s++ - '0';
488 }
489 *p++ = c;
490 break;
491 case 'x':
David Malcolm96960882010-11-05 17:23:41 +0000492 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000493 unsigned int x = 0;
494 c = Py_CHARMASK(*s);
495 s++;
David Malcolm96960882010-11-05 17:23:41 +0000496 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000497 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000498 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000499 x = 10 + c - 'a';
500 else
501 x = 10 + c - 'A';
502 x = x << 4;
503 c = Py_CHARMASK(*s);
504 s++;
David Malcolm96960882010-11-05 17:23:41 +0000505 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000506 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000507 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000508 x += 10 + c - 'a';
509 else
510 x += 10 + c - 'A';
511 *p++ = x;
512 break;
513 }
514 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +0200515 PyErr_Format(PyExc_ValueError,
516 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +0200517 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000518 goto failed;
519 }
520 if (strcmp(errors, "replace") == 0) {
521 *p++ = '?';
522 } else if (strcmp(errors, "ignore") == 0)
523 /* do nothing */;
524 else {
525 PyErr_Format(PyExc_ValueError,
526 "decoding error; unknown "
527 "error handling code: %.400s",
528 errors);
529 goto failed;
530 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +0200531 /* skip \x */
532 if (s < end && Py_ISXDIGIT(s[0]))
533 s++; /* and a hexdigit */
534 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000535 default:
536 *p++ = '\\';
537 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +0200538 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000539 UTF-8 bytes may follow. */
540 }
541 }
542 if (p-buf < newlen)
543 _PyBytes_Resize(&v, p - buf);
544 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000545 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000546 Py_DECREF(v);
547 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000548}
549
550/* -------------------------------------------------------------------- */
551/* object api */
552
553Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200554PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000555{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000556 if (!PyBytes_Check(op)) {
557 PyErr_Format(PyExc_TypeError,
558 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
559 return -1;
560 }
561 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000562}
563
564char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200565PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000566{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000567 if (!PyBytes_Check(op)) {
568 PyErr_Format(PyExc_TypeError,
569 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
570 return NULL;
571 }
572 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000573}
574
575int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200576PyBytes_AsStringAndSize(PyObject *obj,
577 char **s,
578 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000579{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000580 if (s == NULL) {
581 PyErr_BadInternalCall();
582 return -1;
583 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000584
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000585 if (!PyBytes_Check(obj)) {
586 PyErr_Format(PyExc_TypeError,
587 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
588 return -1;
589 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000590
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000591 *s = PyBytes_AS_STRING(obj);
592 if (len != NULL)
593 *len = PyBytes_GET_SIZE(obj);
594 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
595 PyErr_SetString(PyExc_TypeError,
596 "expected bytes with no null");
597 return -1;
598 }
599 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000600}
Neal Norwitz6968b052007-02-27 19:02:19 +0000601
602/* -------------------------------------------------------------------- */
603/* Methods */
604
Eric Smith0923d1d2009-04-16 20:16:10 +0000605#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000606
607#include "stringlib/fastsearch.h"
608#include "stringlib/count.h"
609#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +0200610#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000611#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000612#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000613#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000614
Eric Smith0f78bff2009-11-30 01:01:42 +0000615#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000616
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000617PyObject *
618PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000619{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200620 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200621 Py_ssize_t i, length = Py_SIZE(op);
622 size_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000623 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200624 unsigned char quote, *s, *p;
625
626 /* Compute size of output string */
627 squotes = dquotes = 0;
628 newsize = 3; /* b'' */
629 s = (unsigned char*)op->ob_sval;
630 for (i = 0; i < length; i++) {
631 switch(s[i]) {
632 case '\'': squotes++; newsize++; break;
633 case '"': dquotes++; newsize++; break;
634 case '\\': case '\t': case '\n': case '\r':
635 newsize += 2; break; /* \C */
636 default:
637 if (s[i] < ' ' || s[i] >= 0x7f)
638 newsize += 4; /* \xHH */
639 else
640 newsize++;
641 }
642 }
643 quote = '\'';
644 if (smartquotes && squotes && !dquotes)
645 quote = '"';
646 if (squotes && quote == '\'')
647 newsize += squotes;
Victor Stinner6430fd52011-09-29 04:02:13 +0200648
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200649 if (newsize > (PY_SSIZE_T_MAX - sizeof(PyUnicodeObject) - 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000650 PyErr_SetString(PyExc_OverflowError,
651 "bytes object is too large to make repr");
652 return NULL;
653 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200654
655 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000656 if (v == NULL) {
657 return NULL;
658 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200659 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000660
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200661 *p++ = 'b', *p++ = quote;
662 for (i = 0; i < length; i++) {
663 unsigned char c = op->ob_sval[i];
664 if (c == quote || c == '\\')
665 *p++ = '\\', *p++ = c;
666 else if (c == '\t')
667 *p++ = '\\', *p++ = 't';
668 else if (c == '\n')
669 *p++ = '\\', *p++ = 'n';
670 else if (c == '\r')
671 *p++ = '\\', *p++ = 'r';
672 else if (c < ' ' || c >= 0x7f) {
673 *p++ = '\\';
674 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200675 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
676 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000677 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200678 else
679 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000680 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200681 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +0200682 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200683 return v;
Neal Norwitz6968b052007-02-27 19:02:19 +0000684}
685
Neal Norwitz6968b052007-02-27 19:02:19 +0000686static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000687bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000688{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000689 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000690}
691
Neal Norwitz6968b052007-02-27 19:02:19 +0000692static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000693bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000694{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000695 if (Py_BytesWarningFlag) {
696 if (PyErr_WarnEx(PyExc_BytesWarning,
697 "str() on a bytes instance", 1))
698 return NULL;
699 }
700 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000701}
702
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000703static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000704bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000705{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000706 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000707}
Neal Norwitz6968b052007-02-27 19:02:19 +0000708
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000709/* This is also used by PyBytes_Concat() */
710static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000711bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000712{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000713 Py_ssize_t size;
714 Py_buffer va, vb;
715 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000716
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000717 va.len = -1;
718 vb.len = -1;
719 if (_getbuffer(a, &va) < 0 ||
720 _getbuffer(b, &vb) < 0) {
721 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
722 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
723 goto done;
724 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000725
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000726 /* Optimize end cases */
727 if (va.len == 0 && PyBytes_CheckExact(b)) {
728 result = b;
729 Py_INCREF(result);
730 goto done;
731 }
732 if (vb.len == 0 && PyBytes_CheckExact(a)) {
733 result = a;
734 Py_INCREF(result);
735 goto done;
736 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000737
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000738 size = va.len + vb.len;
739 if (size < 0) {
740 PyErr_NoMemory();
741 goto done;
742 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000743
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000744 result = PyBytes_FromStringAndSize(NULL, size);
745 if (result != NULL) {
746 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
747 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
748 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000749
750 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000751 if (va.len != -1)
752 PyBuffer_Release(&va);
753 if (vb.len != -1)
754 PyBuffer_Release(&vb);
755 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000756}
Neal Norwitz6968b052007-02-27 19:02:19 +0000757
758static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200759bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000760{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200761 Py_ssize_t i;
762 Py_ssize_t j;
763 Py_ssize_t size;
764 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000765 size_t nbytes;
766 if (n < 0)
767 n = 0;
768 /* watch out for overflows: the size can overflow int,
769 * and the # of bytes needed can overflow size_t
770 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000771 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000772 PyErr_SetString(PyExc_OverflowError,
773 "repeated bytes are too long");
774 return NULL;
775 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000776 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000777 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
778 Py_INCREF(a);
779 return (PyObject *)a;
780 }
781 nbytes = (size_t)size;
782 if (nbytes + PyBytesObject_SIZE <= nbytes) {
783 PyErr_SetString(PyExc_OverflowError,
784 "repeated bytes are too long");
785 return NULL;
786 }
787 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
788 if (op == NULL)
789 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100790 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000791 op->ob_shash = -1;
792 op->ob_sval[size] = '\0';
793 if (Py_SIZE(a) == 1 && n > 0) {
794 memset(op->ob_sval, a->ob_sval[0] , n);
795 return (PyObject *) op;
796 }
797 i = 0;
798 if (i < size) {
799 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
800 i = Py_SIZE(a);
801 }
802 while (i < size) {
803 j = (i <= size-i) ? i : size-i;
804 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
805 i += j;
806 }
807 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000808}
809
Guido van Rossum98297ee2007-11-06 21:34:58 +0000810static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000811bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000812{
813 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
814 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000815 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +0000816 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +0000817 PyErr_Clear();
818 if (_getbuffer(arg, &varg) < 0)
819 return -1;
820 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
821 varg.buf, varg.len, 0);
822 PyBuffer_Release(&varg);
823 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000824 }
825 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000826 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
827 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000828 }
829
Antoine Pitrou0010d372010-08-15 17:12:55 +0000830 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000831}
832
Neal Norwitz6968b052007-02-27 19:02:19 +0000833static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200834bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000835{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000836 if (i < 0 || i >= Py_SIZE(a)) {
837 PyErr_SetString(PyExc_IndexError, "index out of range");
838 return NULL;
839 }
840 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000841}
842
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100843Py_LOCAL(int)
844bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
845{
846 int cmp;
847 Py_ssize_t len;
848
849 len = Py_SIZE(a);
850 if (Py_SIZE(b) != len)
851 return 0;
852
853 if (a->ob_sval[0] != b->ob_sval[0])
854 return 0;
855
856 cmp = memcmp(a->ob_sval, b->ob_sval, len);
857 return (cmp == 0);
858}
859
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000860static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000861bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000862{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000863 int c;
864 Py_ssize_t len_a, len_b;
865 Py_ssize_t min_len;
866 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000867
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000868 /* Make sure both arguments are strings. */
869 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
870 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
871 (PyObject_IsInstance((PyObject*)a,
872 (PyObject*)&PyUnicode_Type) ||
873 PyObject_IsInstance((PyObject*)b,
874 (PyObject*)&PyUnicode_Type))) {
875 if (PyErr_WarnEx(PyExc_BytesWarning,
876 "Comparison between bytes and string", 1))
877 return NULL;
878 }
879 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000880 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100881 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000882 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100883 case Py_EQ:
884 case Py_LE:
885 case Py_GE:
886 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000887 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100888 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100889 case Py_NE:
890 case Py_LT:
891 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000892 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100893 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100894 default:
895 PyErr_BadArgument();
896 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000897 }
898 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100899 else if (op == Py_EQ || op == Py_NE) {
900 int eq = bytes_compare_eq(a, b);
901 eq ^= (op == Py_NE);
902 result = eq ? Py_True : Py_False;
903 }
904 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100905 len_a = Py_SIZE(a);
906 len_b = Py_SIZE(b);
907 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100908 if (min_len > 0) {
909 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100910 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100911 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000912 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100913 else
914 c = 0;
915 if (c == 0)
916 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
917 switch (op) {
918 case Py_LT: c = c < 0; break;
919 case Py_LE: c = c <= 0; break;
920 case Py_GT: c = c > 0; break;
921 case Py_GE: c = c >= 0; break;
922 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100923 PyErr_BadArgument();
924 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100925 }
926 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000927 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100928
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000929 Py_INCREF(result);
930 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000931}
932
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000933static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000934bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000935{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +0100936 if (a->ob_shash == -1) {
937 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +0100938 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +0100939 }
940 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +0000941}
942
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000943static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000944bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000945{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000946 if (PyIndex_Check(item)) {
947 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
948 if (i == -1 && PyErr_Occurred())
949 return NULL;
950 if (i < 0)
951 i += PyBytes_GET_SIZE(self);
952 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
953 PyErr_SetString(PyExc_IndexError,
954 "index out of range");
955 return NULL;
956 }
957 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
958 }
959 else if (PySlice_Check(item)) {
960 Py_ssize_t start, stop, step, slicelength, cur, i;
961 char* source_buf;
962 char* result_buf;
963 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000964
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000965 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000966 PyBytes_GET_SIZE(self),
967 &start, &stop, &step, &slicelength) < 0) {
968 return NULL;
969 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000970
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000971 if (slicelength <= 0) {
972 return PyBytes_FromStringAndSize("", 0);
973 }
974 else if (start == 0 && step == 1 &&
975 slicelength == PyBytes_GET_SIZE(self) &&
976 PyBytes_CheckExact(self)) {
977 Py_INCREF(self);
978 return (PyObject *)self;
979 }
980 else if (step == 1) {
981 return PyBytes_FromStringAndSize(
982 PyBytes_AS_STRING(self) + start,
983 slicelength);
984 }
985 else {
986 source_buf = PyBytes_AS_STRING(self);
987 result = PyBytes_FromStringAndSize(NULL, slicelength);
988 if (result == NULL)
989 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000990
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000991 result_buf = PyBytes_AS_STRING(result);
992 for (cur = start, i = 0; i < slicelength;
993 cur += step, i++) {
994 result_buf[i] = source_buf[cur];
995 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000996
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000997 return result;
998 }
999 }
1000 else {
1001 PyErr_Format(PyExc_TypeError,
1002 "byte indices must be integers, not %.200s",
1003 Py_TYPE(item)->tp_name);
1004 return NULL;
1005 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001006}
1007
1008static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001009bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001010{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001011 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1012 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001013}
1014
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001015static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001016 (lenfunc)bytes_length, /*sq_length*/
1017 (binaryfunc)bytes_concat, /*sq_concat*/
1018 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1019 (ssizeargfunc)bytes_item, /*sq_item*/
1020 0, /*sq_slice*/
1021 0, /*sq_ass_item*/
1022 0, /*sq_ass_slice*/
1023 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001024};
1025
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001026static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001027 (lenfunc)bytes_length,
1028 (binaryfunc)bytes_subscript,
1029 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001030};
1031
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001032static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001033 (getbufferproc)bytes_buffer_getbuffer,
1034 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001035};
1036
1037
1038#define LEFTSTRIP 0
1039#define RIGHTSTRIP 1
1040#define BOTHSTRIP 2
1041
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001042/*[clinic input]
1043bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001044
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001045 sep: object = None
1046 The delimiter according which to split the bytes.
1047 None (the default value) means split on ASCII whitespace characters
1048 (space, tab, return, newline, formfeed, vertical tab).
1049 maxsplit: Py_ssize_t = -1
1050 Maximum number of splits to do.
1051 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001052
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001053Return a list of the sections in the bytes, using sep as the delimiter.
1054[clinic start generated code]*/
1055
1056PyDoc_STRVAR(bytes_split__doc__,
1057"split(sep=None, maxsplit=-1)\n"
1058"Return a list of the sections in the bytes, using sep as the delimiter.\n"
1059"\n"
1060" sep\n"
1061" The delimiter according which to split the bytes.\n"
1062" None (the default value) means split on ASCII whitespace characters\n"
1063" (space, tab, return, newline, formfeed, vertical tab).\n"
1064" maxsplit\n"
1065" Maximum number of splits to do.\n"
1066" -1 (the default value) means no limit.");
1067
1068#define BYTES_SPLIT_METHODDEF \
1069 {"split", (PyCFunction)bytes_split, METH_VARARGS|METH_KEYWORDS, bytes_split__doc__},
Neal Norwitz6968b052007-02-27 19:02:19 +00001070
1071static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001072bytes_split_impl(PyObject *self, PyObject *sep, Py_ssize_t maxsplit);
1073
1074static PyObject *
1075bytes_split(PyObject *self, PyObject *args, PyObject *kwargs)
Neal Norwitz6968b052007-02-27 19:02:19 +00001076{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001077 PyObject *return_value = NULL;
1078 static char *_keywords[] = {"sep", "maxsplit", NULL};
1079 PyObject *sep = Py_None;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001080 Py_ssize_t maxsplit = -1;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001081
1082 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
1083 "|On:split", _keywords,
1084 &sep, &maxsplit))
1085 goto exit;
1086 return_value = bytes_split_impl(self, sep, maxsplit);
1087
1088exit:
1089 return return_value;
1090}
1091
1092static PyObject *
1093bytes_split_impl(PyObject *self, PyObject *sep, Py_ssize_t maxsplit)
1094/*[clinic end generated code: checksum=0c1bf4dba4fc7e03254d9c2f670d8e2682b38785]*/
1095{
1096 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001097 const char *s = PyBytes_AS_STRING(self), *sub;
1098 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001099 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001100
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001101 if (maxsplit < 0)
1102 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001103 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001104 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001105 if (_getbuffer(sep, &vsub) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001106 return NULL;
1107 sub = vsub.buf;
1108 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001109
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001110 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1111 PyBuffer_Release(&vsub);
1112 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001113}
1114
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001115/*[clinic input]
1116bytes.partition
1117
1118 self: self(type="PyBytesObject *")
1119 sep: object
1120 /
1121
1122Partition the bytes into three parts using the given separator.
1123
1124This will search for the separator sep in the bytes. If the separator is found,
1125returns a 3-tuple containing the part before the separator, the separator
1126itself, and the part after it.
1127
1128If the separator is not found, returns a 3-tuple containing the original bytes
1129object and two empty bytes objects.
1130[clinic start generated code]*/
1131
1132PyDoc_STRVAR(bytes_partition__doc__,
1133"partition(sep)\n"
1134"Partition the bytes into three parts using the given separator.\n"
1135"\n"
1136"This will search for the separator sep in the bytes. If the separator is found,\n"
1137"returns a 3-tuple containing the part before the separator, the separator\n"
1138"itself, and the part after it.\n"
1139"\n"
1140"If the separator is not found, returns a 3-tuple containing the original bytes\n"
1141"object and two empty bytes objects.");
1142
1143#define BYTES_PARTITION_METHODDEF \
1144 {"partition", (PyCFunction)bytes_partition, METH_O, bytes_partition__doc__},
Neal Norwitz6968b052007-02-27 19:02:19 +00001145
1146static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001147bytes_partition(PyBytesObject *self, PyObject *sep)
1148/*[clinic end generated code: checksum=02ddd49338037b02d203b165fb2e48c6eb779983]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001149{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001150 const char *sep_chars;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001151 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001152
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001153 if (PyBytes_Check(sep)) {
1154 sep_chars = PyBytes_AS_STRING(sep);
1155 sep_len = PyBytes_GET_SIZE(sep);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001156 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001157 else if (PyObject_AsCharBuffer(sep, &sep_chars, &sep_len))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001158 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001159
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001160 return stringlib_partition(
1161 (PyObject*) self,
1162 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001163 sep, sep_chars, sep_len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001164 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001165}
1166
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001167/*[clinic input]
1168bytes.rpartition
1169
1170 self: self(type="PyBytesObject *")
1171 sep: object
1172 /
1173
1174Partition the bytes into three parts using the given separator.
1175
1176This will search for the separator sep in the bytes, starting and the end. If
1177the separator is found, returns a 3-tuple containing the part before the
1178separator, the separator itself, and the part after it.
1179
1180If the separator is not found, returns a 3-tuple containing two empty bytes
1181objects and the original bytes object.
1182[clinic start generated code]*/
1183
1184PyDoc_STRVAR(bytes_rpartition__doc__,
1185"rpartition(sep)\n"
1186"Partition the bytes into three parts using the given separator.\n"
1187"\n"
1188"This will search for the separator sep in the bytes, starting and the end. If\n"
1189"the separator is found, returns a 3-tuple containing the part before the\n"
1190"separator, the separator itself, and the part after it.\n"
1191"\n"
1192"If the separator is not found, returns a 3-tuple containing two empty bytes\n"
1193"objects and the original bytes object.");
1194
1195#define BYTES_RPARTITION_METHODDEF \
1196 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, bytes_rpartition__doc__},
Neal Norwitz6968b052007-02-27 19:02:19 +00001197
1198static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001199bytes_rpartition(PyBytesObject *self, PyObject *sep)
1200/*[clinic end generated code: checksum=af06be67ab873c2792db9961f504350bc99f126a]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001201{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001202 const char *sep_chars;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001203 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001204
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001205 if (PyBytes_Check(sep)) {
1206 sep_chars = PyBytes_AS_STRING(sep);
1207 sep_len = PyBytes_GET_SIZE(sep);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001208 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001209 else if (PyObject_AsCharBuffer(sep, &sep_chars, &sep_len))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001210 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001211
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001212 return stringlib_rpartition(
1213 (PyObject*) self,
1214 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001215 sep, sep_chars, sep_len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001216 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001217}
1218
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001219/*[clinic input]
1220bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001221
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001222Return a list of the sections in the bytes, using sep as the delimiter.
1223
1224Splitting is done starting at the end of the bytes and working to the front.
1225[clinic start generated code]*/
1226
1227PyDoc_STRVAR(bytes_rsplit__doc__,
1228"rsplit(sep=None, maxsplit=-1)\n"
1229"Return a list of the sections in the bytes, using sep as the delimiter.\n"
1230"\n"
1231" sep\n"
1232" The delimiter according which to split the bytes.\n"
1233" None (the default value) means split on ASCII whitespace characters\n"
1234" (space, tab, return, newline, formfeed, vertical tab).\n"
1235" maxsplit\n"
1236" Maximum number of splits to do.\n"
1237" -1 (the default value) means no limit.\n"
1238"\n"
1239"Splitting is done starting at the end of the bytes and working to the front.");
1240
1241#define BYTES_RSPLIT_METHODDEF \
1242 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS|METH_KEYWORDS, bytes_rsplit__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001243
Neal Norwitz6968b052007-02-27 19:02:19 +00001244static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001245bytes_rsplit_impl(PyObject *self, PyObject *sep, Py_ssize_t maxsplit);
1246
1247static PyObject *
1248bytes_rsplit(PyObject *self, PyObject *args, PyObject *kwargs)
Neal Norwitz6968b052007-02-27 19:02:19 +00001249{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001250 PyObject *return_value = NULL;
1251 static char *_keywords[] = {"sep", "maxsplit", NULL};
1252 PyObject *sep = Py_None;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001253 Py_ssize_t maxsplit = -1;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001254
1255 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
1256 "|On:rsplit", _keywords,
1257 &sep, &maxsplit))
1258 goto exit;
1259 return_value = bytes_rsplit_impl(self, sep, maxsplit);
1260
1261exit:
1262 return return_value;
1263}
1264
1265static PyObject *
1266bytes_rsplit_impl(PyObject *self, PyObject *sep, Py_ssize_t maxsplit)
1267/*[clinic end generated code: checksum=f5bcee1d73b31b9df8821731f4ed3a8d1bc78588]*/
1268{
1269 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001270 const char *s = PyBytes_AS_STRING(self), *sub;
1271 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001272 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001273
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001274 if (maxsplit < 0)
1275 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001276 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001277 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001278 if (_getbuffer(sep, &vsub) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001279 return NULL;
1280 sub = vsub.buf;
1281 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001282
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001283 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1284 PyBuffer_Release(&vsub);
1285 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001286}
1287
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001288
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001289/*[clinic input]
1290bytes.join
1291
1292 iterable_of_bytes: object
1293 /
1294
1295Concatenate any number of bytes objects.
1296
1297The bytes whose method is called is inserted in between each pair.
1298
1299The result is returned as a new bytes object.
1300
1301Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1302[clinic start generated code]*/
1303
1304PyDoc_STRVAR(bytes_join__doc__,
1305"join(iterable_of_bytes)\n"
1306"Concatenate any number of bytes objects.\n"
1307"\n"
1308"The bytes whose method is called is inserted in between each pair.\n"
1309"\n"
1310"The result is returned as a new bytes object.\n"
1311"\n"
1312"Example: b\'.\'.join([b\'ab\', b\'pq\', b\'rs\']) -> b\'ab.pq.rs\'.");
1313
1314#define BYTES_JOIN_METHODDEF \
1315 {"join", (PyCFunction)bytes_join, METH_O, bytes_join__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001316
Neal Norwitz6968b052007-02-27 19:02:19 +00001317static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001318bytes_join(PyObject *self, PyObject *iterable_of_bytes)
1319/*[clinic end generated code: checksum=3fa2b5fc3b1494ba4db416303571f4ecd055090b]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001320{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001321 return stringlib_bytes_join(self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001322}
1323
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001324PyObject *
1325_PyBytes_Join(PyObject *sep, PyObject *x)
1326{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001327 assert(sep != NULL && PyBytes_Check(sep));
1328 assert(x != NULL);
1329 return bytes_join(sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001330}
1331
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001332/* helper macro to fixup start/end slice values */
1333#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001334 if (end > len) \
1335 end = len; \
1336 else if (end < 0) { \
1337 end += len; \
1338 if (end < 0) \
1339 end = 0; \
1340 } \
1341 if (start < 0) { \
1342 start += len; \
1343 if (start < 0) \
1344 start = 0; \
1345 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001346
1347Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001348bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001349{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001350 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001351 char byte;
1352 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001353 const char *sub;
1354 Py_ssize_t sub_len;
1355 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001356 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001357
Antoine Pitrouac65d962011-10-20 23:54:17 +02001358 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1359 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001360 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001361
Antoine Pitrouac65d962011-10-20 23:54:17 +02001362 if (subobj) {
1363 if (_getbuffer(subobj, &subbuf) < 0)
1364 return -2;
1365
1366 sub = subbuf.buf;
1367 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001368 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001369 else {
1370 sub = &byte;
1371 sub_len = 1;
1372 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001373
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001374 if (dir > 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001375 res = stringlib_find_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001376 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1377 sub, sub_len, start, end);
1378 else
Antoine Pitrouac65d962011-10-20 23:54:17 +02001379 res = stringlib_rfind_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001380 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1381 sub, sub_len, start, end);
Antoine Pitrouac65d962011-10-20 23:54:17 +02001382
1383 if (subobj)
1384 PyBuffer_Release(&subbuf);
1385
1386 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001387}
1388
1389
1390PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001391"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001392\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001393Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001394such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001395arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001396\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001397Return -1 on failure.");
1398
Neal Norwitz6968b052007-02-27 19:02:19 +00001399static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001400bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001401{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001402 Py_ssize_t result = bytes_find_internal(self, args, +1);
1403 if (result == -2)
1404 return NULL;
1405 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001406}
1407
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001408
1409PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001410"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001411\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001412Like B.find() but raise ValueError when the substring is not found.");
1413
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001414static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001415bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001416{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001417 Py_ssize_t result = bytes_find_internal(self, args, +1);
1418 if (result == -2)
1419 return NULL;
1420 if (result == -1) {
1421 PyErr_SetString(PyExc_ValueError,
1422 "substring not found");
1423 return NULL;
1424 }
1425 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001426}
1427
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001428
1429PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001430"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001431\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001432Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001433such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001434arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001435\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001436Return -1 on failure.");
1437
Neal Norwitz6968b052007-02-27 19:02:19 +00001438static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001439bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001440{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001441 Py_ssize_t result = bytes_find_internal(self, args, -1);
1442 if (result == -2)
1443 return NULL;
1444 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001445}
1446
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001447
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001448PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001449"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001450\n\
1451Like B.rfind() but raise ValueError when the substring is not found.");
1452
1453static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001454bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001455{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001456 Py_ssize_t result = bytes_find_internal(self, args, -1);
1457 if (result == -2)
1458 return NULL;
1459 if (result == -1) {
1460 PyErr_SetString(PyExc_ValueError,
1461 "substring not found");
1462 return NULL;
1463 }
1464 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001465}
1466
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001467
1468Py_LOCAL_INLINE(PyObject *)
1469do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001470{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001471 Py_buffer vsep;
1472 char *s = PyBytes_AS_STRING(self);
1473 Py_ssize_t len = PyBytes_GET_SIZE(self);
1474 char *sep;
1475 Py_ssize_t seplen;
1476 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001477
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001478 if (_getbuffer(sepobj, &vsep) < 0)
1479 return NULL;
1480 sep = vsep.buf;
1481 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001482
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001483 i = 0;
1484 if (striptype != RIGHTSTRIP) {
1485 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1486 i++;
1487 }
1488 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001489
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001490 j = len;
1491 if (striptype != LEFTSTRIP) {
1492 do {
1493 j--;
1494 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1495 j++;
1496 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001497
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001498 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001499
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001500 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1501 Py_INCREF(self);
1502 return (PyObject*)self;
1503 }
1504 else
1505 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001506}
1507
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001508
1509Py_LOCAL_INLINE(PyObject *)
1510do_strip(PyBytesObject *self, int striptype)
1511{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001512 char *s = PyBytes_AS_STRING(self);
1513 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001514
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001515 i = 0;
1516 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001517 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001518 i++;
1519 }
1520 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001521
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001522 j = len;
1523 if (striptype != LEFTSTRIP) {
1524 do {
1525 j--;
David Malcolm96960882010-11-05 17:23:41 +00001526 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001527 j++;
1528 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001529
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001530 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1531 Py_INCREF(self);
1532 return (PyObject*)self;
1533 }
1534 else
1535 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001536}
1537
1538
1539Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001540do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001541{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001542 if (bytes != NULL && bytes != Py_None) {
1543 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001544 }
1545 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001546}
1547
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001548/*[clinic input]
1549bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001550
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001551 self: self(type="PyBytesObject *")
1552 bytes: object = None
1553 /
1554
1555Strip leading and trailing bytes contained in the argument.
1556
1557If the argument is omitted or None, strip leading and trailing ASCII whitespace.
1558[clinic start generated code]*/
1559
1560PyDoc_STRVAR(bytes_strip__doc__,
1561"strip(bytes=None)\n"
1562"Strip leading and trailing bytes contained in the argument.\n"
1563"\n"
1564"If the argument is omitted or None, strip leading and trailing ASCII whitespace.");
1565
1566#define BYTES_STRIP_METHODDEF \
1567 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, bytes_strip__doc__},
1568
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001569static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001570bytes_strip_impl(PyBytesObject *self, PyObject *bytes);
1571
1572static PyObject *
1573bytes_strip(PyObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001574{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001575 PyObject *return_value = NULL;
1576 PyObject *bytes = Py_None;
1577
1578 if (!PyArg_UnpackTuple(args, "strip",
1579 0, 1,
1580 &bytes))
1581 goto exit;
1582 return_value = bytes_strip_impl((PyBytesObject *)self, bytes);
1583
1584exit:
1585 return return_value;
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001586}
1587
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001588static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001589bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
1590/*[clinic end generated code: checksum=3c59229e9332a1782987f047d43a9526a3b3c90f]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001591{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001592 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001593}
1594
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001595/*[clinic input]
1596bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001597
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001598 self: self(type="PyBytesObject *")
1599 bytes: object = None
1600 /
1601
1602Strip leading bytes contained in the argument.
1603
1604If the argument is omitted or None, strip leading ASCII whitespace.
1605[clinic start generated code]*/
1606
1607PyDoc_STRVAR(bytes_lstrip__doc__,
1608"lstrip(bytes=None)\n"
1609"Strip leading bytes contained in the argument.\n"
1610"\n"
1611"If the argument is omitted or None, strip leading ASCII whitespace.");
1612
1613#define BYTES_LSTRIP_METHODDEF \
1614 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, bytes_lstrip__doc__},
1615
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001616static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001617bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes);
1618
1619static PyObject *
1620bytes_lstrip(PyObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001621{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001622 PyObject *return_value = NULL;
1623 PyObject *bytes = Py_None;
1624
1625 if (!PyArg_UnpackTuple(args, "lstrip",
1626 0, 1,
1627 &bytes))
1628 goto exit;
1629 return_value = bytes_lstrip_impl((PyBytesObject *)self, bytes);
1630
1631exit:
1632 return return_value;
1633}
1634
1635static PyObject *
1636bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
1637/*[clinic end generated code: checksum=34a0a2cae35c06ad984c5657659d4d28ec0e407a]*/
1638{
1639 return do_argstrip(self, LEFTSTRIP, bytes);
1640}
1641
1642/*[clinic input]
1643bytes.rstrip
1644
1645 self: self(type="PyBytesObject *")
1646 bytes: object = None
1647 /
1648
1649Strip trailing bytes contained in the argument.
1650
1651If the argument is omitted or None, strip trailing ASCII whitespace.
1652[clinic start generated code]*/
1653
1654PyDoc_STRVAR(bytes_rstrip__doc__,
1655"rstrip(bytes=None)\n"
1656"Strip trailing bytes contained in the argument.\n"
1657"\n"
1658"If the argument is omitted or None, strip trailing ASCII whitespace.");
1659
1660#define BYTES_RSTRIP_METHODDEF \
1661 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, bytes_rstrip__doc__},
1662
1663static PyObject *
1664bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes);
1665
1666static PyObject *
1667bytes_rstrip(PyObject *self, PyObject *args)
1668{
1669 PyObject *return_value = NULL;
1670 PyObject *bytes = Py_None;
1671
1672 if (!PyArg_UnpackTuple(args, "rstrip",
1673 0, 1,
1674 &bytes))
1675 goto exit;
1676 return_value = bytes_rstrip_impl((PyBytesObject *)self, bytes);
1677
1678exit:
1679 return return_value;
1680}
1681
1682static PyObject *
1683bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
1684/*[clinic end generated code: checksum=eeb1b0cff2f4bfbad7324eea81dd9dec2a872ad6]*/
1685{
1686 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001687}
Neal Norwitz6968b052007-02-27 19:02:19 +00001688
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001689
1690PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001691"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001692\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001693Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001694string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001695as in slice notation.");
1696
1697static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001698bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001699{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001700 PyObject *sub_obj;
1701 const char *str = PyBytes_AS_STRING(self), *sub;
1702 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001703 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001704 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001705
Antoine Pitrouac65d962011-10-20 23:54:17 +02001706 Py_buffer vsub;
1707 PyObject *count_obj;
1708
1709 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
1710 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001711 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001712
Antoine Pitrouac65d962011-10-20 23:54:17 +02001713 if (sub_obj) {
1714 if (_getbuffer(sub_obj, &vsub) < 0)
1715 return NULL;
1716
1717 sub = vsub.buf;
1718 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001719 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001720 else {
1721 sub = &byte;
1722 sub_len = 1;
1723 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001724
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001725 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001726
Antoine Pitrouac65d962011-10-20 23:54:17 +02001727 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001728 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
1729 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02001730
1731 if (sub_obj)
1732 PyBuffer_Release(&vsub);
1733
1734 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001735}
1736
1737
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001738/*[clinic input]
1739bytes.translate
1740
1741 self: self(type="PyBytesObject *")
1742 table: object
1743 Translation table, which must be a bytes object of length 256.
1744 [
1745 deletechars: object
1746 ]
1747 /
1748
1749Return a copy with each character mapped by the given translation table.
1750
1751All characters occurring in the optional argument deletechars are removed.
1752The remaining characters are mapped through the given translation table.
1753[clinic start generated code]*/
1754
1755PyDoc_STRVAR(bytes_translate__doc__,
1756"translate(table, [deletechars])\n"
1757"Return a copy with each character mapped by the given translation table.\n"
1758"\n"
1759" table\n"
1760" Translation table, which must be a bytes object of length 256.\n"
1761"\n"
1762"All characters occurring in the optional argument deletechars are removed.\n"
1763"The remaining characters are mapped through the given translation table.");
1764
1765#define BYTES_TRANSLATE_METHODDEF \
1766 {"translate", (PyCFunction)bytes_translate, METH_VARARGS, bytes_translate__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001767
1768static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001769bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1, PyObject *deletechars);
1770
1771static PyObject *
1772bytes_translate(PyObject *self, PyObject *args)
1773{
1774 PyObject *return_value = NULL;
1775 PyObject *table;
1776 int group_right_1 = 0;
1777 PyObject *deletechars = NULL;
1778
1779 switch (PyTuple_GET_SIZE(args)) {
1780 case 1:
1781 if (!PyArg_ParseTuple(args, "O:translate", &table))
1782 return NULL;
1783 break;
1784 case 2:
1785 if (!PyArg_ParseTuple(args, "OO:translate", &table, &deletechars))
1786 return NULL;
1787 group_right_1 = 1;
1788 break;
1789 default:
1790 PyErr_SetString(PyExc_TypeError, "bytes.translate requires 1 to 2 arguments");
1791 return NULL;
1792 }
1793 return_value = bytes_translate_impl((PyBytesObject *)self, table, group_right_1, deletechars);
1794
1795 return return_value;
1796}
1797
1798static PyObject *
1799bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1, PyObject *deletechars)
1800/*[clinic end generated code: checksum=5ebfc00fffd8122849d1e02ee784c29a7228f0bb]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001801{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001802 char *input, *output;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001803 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001804 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001805 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001806 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001807 Py_ssize_t inlen, tablen, dellen = 0;
1808 PyObject *result;
1809 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001810
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001811 if (PyBytes_Check(table)) {
1812 table_chars = PyBytes_AS_STRING(table);
1813 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001814 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001815 else if (table == Py_None) {
1816 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001817 tablen = 256;
1818 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001819 else if (PyObject_AsCharBuffer(table, &table_chars, &tablen))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001820 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001821
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001822 if (tablen != 256) {
1823 PyErr_SetString(PyExc_ValueError,
1824 "translation table must be 256 characters long");
1825 return NULL;
1826 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001827
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001828 if (deletechars != NULL) {
1829 if (PyBytes_Check(deletechars)) {
1830 del_table_chars = PyBytes_AS_STRING(deletechars);
1831 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001832 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001833 else if (PyObject_AsCharBuffer(deletechars, &del_table_chars, &dellen))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001834 return NULL;
1835 }
1836 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001837 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001838 dellen = 0;
1839 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001840
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001841 inlen = PyBytes_GET_SIZE(input_obj);
1842 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1843 if (result == NULL)
1844 return NULL;
1845 output_start = output = PyBytes_AsString(result);
1846 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001847
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001848 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001849 /* If no deletions are required, use faster code */
1850 for (i = inlen; --i >= 0; ) {
1851 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001852 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001853 changed = 1;
1854 }
1855 if (changed || !PyBytes_CheckExact(input_obj))
1856 return result;
1857 Py_DECREF(result);
1858 Py_INCREF(input_obj);
1859 return input_obj;
1860 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001861
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001862 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001863 for (i = 0; i < 256; i++)
1864 trans_table[i] = Py_CHARMASK(i);
1865 } else {
1866 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001867 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001868 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001869
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001870 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001871 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001872
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001873 for (i = inlen; --i >= 0; ) {
1874 c = Py_CHARMASK(*input++);
1875 if (trans_table[c] != -1)
1876 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1877 continue;
1878 changed = 1;
1879 }
1880 if (!changed && PyBytes_CheckExact(input_obj)) {
1881 Py_DECREF(result);
1882 Py_INCREF(input_obj);
1883 return input_obj;
1884 }
1885 /* Fix the size of the resulting string */
1886 if (inlen > 0)
1887 _PyBytes_Resize(&result, output - output_start);
1888 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001889}
1890
1891
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001892/*[clinic input]
1893
1894@staticmethod
1895bytes.maketrans
1896
1897 frm: object
1898 to: object
1899 /
1900
1901Return a translation table useable for the bytes or bytearray translate method.
1902
1903The returned table will be one where each byte in frm is mapped to the byte at
1904the same position in to.
1905
1906The bytes objects frm and to must be of the same length.
1907[clinic start generated code]*/
1908
1909PyDoc_STRVAR(bytes_maketrans__doc__,
1910"maketrans(frm, to)\n"
1911"Return a translation table useable for the bytes or bytearray translate method.\n"
1912"\n"
1913"The returned table will be one where each byte in frm is mapped to the byte at\n"
1914"the same position in to.\n"
1915"\n"
1916"The bytes objects frm and to must be of the same length.");
1917
1918#define BYTES_MAKETRANS_METHODDEF \
1919 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC, bytes_maketrans__doc__},
1920
Georg Brandlabc38772009-04-12 15:51:51 +00001921static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001922bytes_maketrans_impl(void *null, PyObject *frm, PyObject *to);
1923
1924static PyObject *
1925bytes_maketrans(void *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001926{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001927 PyObject *return_value = NULL;
1928 PyObject *frm;
1929 PyObject *to;
1930
1931 if (!PyArg_UnpackTuple(args, "maketrans",
1932 2, 2,
1933 &frm, &to))
1934 goto exit;
1935 return_value = bytes_maketrans_impl(null, frm, to);
1936
1937exit:
1938 return return_value;
1939}
1940
1941static PyObject *
1942bytes_maketrans_impl(void *null, PyObject *frm, PyObject *to)
1943/*[clinic end generated code: checksum=79a066bfdc71b55bd4bc8bce540e34a57ac53a8d]*/
1944{
1945 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00001946}
1947
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001948/* find and count characters and substrings */
1949
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001950#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001951 ((char *)memchr((const void *)(target), c, target_len))
1952
1953/* String ops must return a string. */
1954/* If the object is subclass of string, create a copy */
1955Py_LOCAL(PyBytesObject *)
1956return_self(PyBytesObject *self)
1957{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001958 if (PyBytes_CheckExact(self)) {
1959 Py_INCREF(self);
1960 return self;
1961 }
1962 return (PyBytesObject *)PyBytes_FromStringAndSize(
1963 PyBytes_AS_STRING(self),
1964 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001965}
1966
1967Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00001968countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001969{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001970 Py_ssize_t count=0;
1971 const char *start=target;
1972 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001973
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001974 while ( (start=findchar(start, end-start, c)) != NULL ) {
1975 count++;
1976 if (count >= maxcount)
1977 break;
1978 start += 1;
1979 }
1980 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001981}
1982
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001983
1984/* Algorithms for different cases of string replacement */
1985
1986/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1987Py_LOCAL(PyBytesObject *)
1988replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001989 const char *to_s, Py_ssize_t to_len,
1990 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001991{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001992 char *self_s, *result_s;
1993 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001994 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001995 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001996
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001997 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001998
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001999 /* 1 at the end plus 1 after every character;
2000 count = min(maxcount, self_len + 1) */
2001 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002002 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002003 else
2004 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
2005 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002006
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002007 /* Check for overflow */
2008 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002009 assert(count > 0);
2010 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002011 PyErr_SetString(PyExc_OverflowError,
2012 "replacement bytes are too long");
2013 return NULL;
2014 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002015 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002016
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002017 if (! (result = (PyBytesObject *)
2018 PyBytes_FromStringAndSize(NULL, result_len)) )
2019 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002020
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002021 self_s = PyBytes_AS_STRING(self);
2022 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002023
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002024 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002025
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002026 /* Lay the first one down (guaranteed this will occur) */
2027 Py_MEMCPY(result_s, to_s, to_len);
2028 result_s += to_len;
2029 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002030
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002031 for (i=0; i<count; i++) {
2032 *result_s++ = *self_s++;
2033 Py_MEMCPY(result_s, to_s, to_len);
2034 result_s += to_len;
2035 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002036
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002037 /* Copy the rest of the original string */
2038 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002039
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002040 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002041}
2042
2043/* Special case for deleting a single character */
2044/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2045Py_LOCAL(PyBytesObject *)
2046replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002047 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002048{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002049 char *self_s, *result_s;
2050 char *start, *next, *end;
2051 Py_ssize_t self_len, result_len;
2052 Py_ssize_t count;
2053 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002054
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002055 self_len = PyBytes_GET_SIZE(self);
2056 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002057
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002058 count = countchar(self_s, self_len, from_c, maxcount);
2059 if (count == 0) {
2060 return return_self(self);
2061 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002062
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002063 result_len = self_len - count; /* from_len == 1 */
2064 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002065
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002066 if ( (result = (PyBytesObject *)
2067 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2068 return NULL;
2069 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002070
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002071 start = self_s;
2072 end = self_s + self_len;
2073 while (count-- > 0) {
2074 next = findchar(start, end-start, from_c);
2075 if (next == NULL)
2076 break;
2077 Py_MEMCPY(result_s, start, next-start);
2078 result_s += (next-start);
2079 start = next+1;
2080 }
2081 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002082
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002083 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002084}
2085
2086/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2087
2088Py_LOCAL(PyBytesObject *)
2089replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002090 const char *from_s, Py_ssize_t from_len,
2091 Py_ssize_t maxcount) {
2092 char *self_s, *result_s;
2093 char *start, *next, *end;
2094 Py_ssize_t self_len, result_len;
2095 Py_ssize_t count, offset;
2096 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002097
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002098 self_len = PyBytes_GET_SIZE(self);
2099 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002100
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002101 count = stringlib_count(self_s, self_len,
2102 from_s, from_len,
2103 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002104
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002105 if (count == 0) {
2106 /* no matches */
2107 return return_self(self);
2108 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002109
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002110 result_len = self_len - (count * from_len);
2111 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002112
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002113 if ( (result = (PyBytesObject *)
2114 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2115 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002116
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002117 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002118
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002119 start = self_s;
2120 end = self_s + self_len;
2121 while (count-- > 0) {
2122 offset = stringlib_find(start, end-start,
2123 from_s, from_len,
2124 0);
2125 if (offset == -1)
2126 break;
2127 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002128
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002129 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002130
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002131 result_s += (next-start);
2132 start = next+from_len;
2133 }
2134 Py_MEMCPY(result_s, start, end-start);
2135 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002136}
2137
2138/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2139Py_LOCAL(PyBytesObject *)
2140replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002141 char from_c, char to_c,
2142 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002143{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002144 char *self_s, *result_s, *start, *end, *next;
2145 Py_ssize_t self_len;
2146 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002147
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002148 /* The result string will be the same size */
2149 self_s = PyBytes_AS_STRING(self);
2150 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002151
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002152 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002153
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002154 if (next == NULL) {
2155 /* No matches; return the original string */
2156 return return_self(self);
2157 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002158
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002159 /* Need to make a new string */
2160 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2161 if (result == NULL)
2162 return NULL;
2163 result_s = PyBytes_AS_STRING(result);
2164 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002165
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002166 /* change everything in-place, starting with this one */
2167 start = result_s + (next-self_s);
2168 *start = to_c;
2169 start++;
2170 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002171
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002172 while (--maxcount > 0) {
2173 next = findchar(start, end-start, from_c);
2174 if (next == NULL)
2175 break;
2176 *next = to_c;
2177 start = next+1;
2178 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002179
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002180 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002181}
2182
2183/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2184Py_LOCAL(PyBytesObject *)
2185replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002186 const char *from_s, Py_ssize_t from_len,
2187 const char *to_s, Py_ssize_t to_len,
2188 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002189{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002190 char *result_s, *start, *end;
2191 char *self_s;
2192 Py_ssize_t self_len, offset;
2193 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002194
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002195 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002196
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002197 self_s = PyBytes_AS_STRING(self);
2198 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002199
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002200 offset = stringlib_find(self_s, self_len,
2201 from_s, from_len,
2202 0);
2203 if (offset == -1) {
2204 /* No matches; return the original string */
2205 return return_self(self);
2206 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002207
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002208 /* Need to make a new string */
2209 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2210 if (result == NULL)
2211 return NULL;
2212 result_s = PyBytes_AS_STRING(result);
2213 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002214
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002215 /* change everything in-place, starting with this one */
2216 start = result_s + offset;
2217 Py_MEMCPY(start, to_s, from_len);
2218 start += from_len;
2219 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002220
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002221 while ( --maxcount > 0) {
2222 offset = stringlib_find(start, end-start,
2223 from_s, from_len,
2224 0);
2225 if (offset==-1)
2226 break;
2227 Py_MEMCPY(start+offset, to_s, from_len);
2228 start += offset+from_len;
2229 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002230
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002231 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002232}
2233
2234/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2235Py_LOCAL(PyBytesObject *)
2236replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002237 char from_c,
2238 const char *to_s, Py_ssize_t to_len,
2239 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002240{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002241 char *self_s, *result_s;
2242 char *start, *next, *end;
2243 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002244 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002245 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002246
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002247 self_s = PyBytes_AS_STRING(self);
2248 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002249
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002250 count = countchar(self_s, self_len, from_c, maxcount);
2251 if (count == 0) {
2252 /* no matches, return unchanged */
2253 return return_self(self);
2254 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002255
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002256 /* use the difference between current and new, hence the "-1" */
2257 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002258 assert(count > 0);
2259 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002260 PyErr_SetString(PyExc_OverflowError,
2261 "replacement bytes are too long");
2262 return NULL;
2263 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002264 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002265
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002266 if ( (result = (PyBytesObject *)
2267 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2268 return NULL;
2269 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002270
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002271 start = self_s;
2272 end = self_s + self_len;
2273 while (count-- > 0) {
2274 next = findchar(start, end-start, from_c);
2275 if (next == NULL)
2276 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002277
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002278 if (next == start) {
2279 /* replace with the 'to' */
2280 Py_MEMCPY(result_s, to_s, to_len);
2281 result_s += to_len;
2282 start += 1;
2283 } else {
2284 /* copy the unchanged old then the 'to' */
2285 Py_MEMCPY(result_s, start, next-start);
2286 result_s += (next-start);
2287 Py_MEMCPY(result_s, to_s, to_len);
2288 result_s += to_len;
2289 start = next+1;
2290 }
2291 }
2292 /* Copy the remainder of the remaining string */
2293 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002294
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002295 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002296}
2297
2298/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2299Py_LOCAL(PyBytesObject *)
2300replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002301 const char *from_s, Py_ssize_t from_len,
2302 const char *to_s, Py_ssize_t to_len,
2303 Py_ssize_t maxcount) {
2304 char *self_s, *result_s;
2305 char *start, *next, *end;
2306 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002307 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002308 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002309
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002310 self_s = PyBytes_AS_STRING(self);
2311 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002312
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002313 count = stringlib_count(self_s, self_len,
2314 from_s, from_len,
2315 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002316
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002317 if (count == 0) {
2318 /* no matches, return unchanged */
2319 return return_self(self);
2320 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002321
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002322 /* Check for overflow */
2323 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002324 assert(count > 0);
2325 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002326 PyErr_SetString(PyExc_OverflowError,
2327 "replacement bytes are too long");
2328 return NULL;
2329 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002330 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002331
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002332 if ( (result = (PyBytesObject *)
2333 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2334 return NULL;
2335 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002336
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002337 start = self_s;
2338 end = self_s + self_len;
2339 while (count-- > 0) {
2340 offset = stringlib_find(start, end-start,
2341 from_s, from_len,
2342 0);
2343 if (offset == -1)
2344 break;
2345 next = start+offset;
2346 if (next == start) {
2347 /* replace with the 'to' */
2348 Py_MEMCPY(result_s, to_s, to_len);
2349 result_s += to_len;
2350 start += from_len;
2351 } else {
2352 /* copy the unchanged old then the 'to' */
2353 Py_MEMCPY(result_s, start, next-start);
2354 result_s += (next-start);
2355 Py_MEMCPY(result_s, to_s, to_len);
2356 result_s += to_len;
2357 start = next+from_len;
2358 }
2359 }
2360 /* Copy the remainder of the remaining string */
2361 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002362
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002363 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002364}
2365
2366
2367Py_LOCAL(PyBytesObject *)
2368replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002369 const char *from_s, Py_ssize_t from_len,
2370 const char *to_s, Py_ssize_t to_len,
2371 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002372{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002373 if (maxcount < 0) {
2374 maxcount = PY_SSIZE_T_MAX;
2375 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2376 /* nothing to do; return the original string */
2377 return return_self(self);
2378 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002379
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002380 if (maxcount == 0 ||
2381 (from_len == 0 && to_len == 0)) {
2382 /* nothing to do; return the original string */
2383 return return_self(self);
2384 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002385
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002386 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002387
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002388 if (from_len == 0) {
2389 /* insert the 'to' string everywhere. */
2390 /* >>> "Python".replace("", ".") */
2391 /* '.P.y.t.h.o.n.' */
2392 return replace_interleave(self, to_s, to_len, maxcount);
2393 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002394
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002395 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2396 /* point for an empty self string to generate a non-empty string */
2397 /* Special case so the remaining code always gets a non-empty string */
2398 if (PyBytes_GET_SIZE(self) == 0) {
2399 return return_self(self);
2400 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002401
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002402 if (to_len == 0) {
2403 /* delete all occurrences of 'from' string */
2404 if (from_len == 1) {
2405 return replace_delete_single_character(
2406 self, from_s[0], maxcount);
2407 } else {
2408 return replace_delete_substring(self, from_s,
2409 from_len, maxcount);
2410 }
2411 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002412
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002413 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002414
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002415 if (from_len == to_len) {
2416 if (from_len == 1) {
2417 return replace_single_character_in_place(
2418 self,
2419 from_s[0],
2420 to_s[0],
2421 maxcount);
2422 } else {
2423 return replace_substring_in_place(
2424 self, from_s, from_len, to_s, to_len,
2425 maxcount);
2426 }
2427 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002428
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002429 /* Otherwise use the more generic algorithms */
2430 if (from_len == 1) {
2431 return replace_single_character(self, from_s[0],
2432 to_s, to_len, maxcount);
2433 } else {
2434 /* len('from')>=2, len('to')>=1 */
2435 return replace_substring(self, from_s, from_len, to_s, to_len,
2436 maxcount);
2437 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002438}
2439
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002440
2441/*[clinic input]
2442bytes.replace
2443
2444 old: object
2445 new: object
2446 count: Py_ssize_t = -1
2447 Maximum number of occurrences to replace.
2448 -1 (the default value) means replace all occurrences.
2449 /
2450
2451Return a copy with all occurrences of substring old replaced by new.
2452
2453If the optional argument count is given, only the first count occurrences are
2454replaced.
2455[clinic start generated code]*/
2456
2457PyDoc_STRVAR(bytes_replace__doc__,
2458"replace(old, new, count=-1)\n"
2459"Return a copy with all occurrences of substring old replaced by new.\n"
2460"\n"
2461" count\n"
2462" Maximum number of occurrences to replace.\n"
2463" -1 (the default value) means replace all occurrences.\n"
2464"\n"
2465"If the optional argument count is given, only the first count occurrences are\n"
2466"replaced.");
2467
2468#define BYTES_REPLACE_METHODDEF \
2469 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, bytes_replace__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002470
2471static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002472bytes_replace_impl(PyObject *self, PyObject *old, PyObject *new, Py_ssize_t count);
2473
2474static PyObject *
2475bytes_replace(PyObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002476{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002477 PyObject *return_value = NULL;
2478 PyObject *old;
2479 PyObject *new;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002480 Py_ssize_t count = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002481
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002482 if (!PyArg_ParseTuple(args,
2483 "OO|n:replace",
2484 &old, &new, &count))
2485 goto exit;
2486 return_value = bytes_replace_impl(self, old, new, count);
2487
2488exit:
2489 return return_value;
2490}
2491
2492static PyObject *
2493bytes_replace_impl(PyObject *self, PyObject *old, PyObject *new, Py_ssize_t count)
2494/*[clinic end generated code: checksum=a4dfe745baf7f8a8e6d98d3ed6cb838c588c4caa]*/
2495{
2496 const char *old_s, *new_s;
2497 Py_ssize_t old_len, new_len;
2498
2499 if (PyBytes_Check(old)) {
2500 old_s = PyBytes_AS_STRING(old);
2501 old_len = PyBytes_GET_SIZE(old);
2502 }
2503 else if (PyObject_AsCharBuffer(old, &old_s, &old_len))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002504 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002505
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002506 if (PyBytes_Check(new)) {
2507 new_s = PyBytes_AS_STRING(new);
2508 new_len = PyBytes_GET_SIZE(new);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002509 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002510 else if (PyObject_AsCharBuffer(new, &new_s, &new_len))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002511 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002512
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002513 return (PyObject *)replace((PyBytesObject *) self,
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002514 old_s, old_len,
2515 new_s, new_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002516}
2517
2518/** End DALKE **/
2519
2520/* Matches the end (direction >= 0) or start (direction < 0) of self
2521 * against substr, using the start and end arguments. Returns
2522 * -1 on error, 0 if not found and 1 if found.
2523 */
2524Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002525_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002526 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002527{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002528 Py_ssize_t len = PyBytes_GET_SIZE(self);
2529 Py_ssize_t slen;
2530 const char* sub;
2531 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002532
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002533 if (PyBytes_Check(substr)) {
2534 sub = PyBytes_AS_STRING(substr);
2535 slen = PyBytes_GET_SIZE(substr);
2536 }
2537 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2538 return -1;
2539 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002540
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002541 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002542
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002543 if (direction < 0) {
2544 /* startswith */
2545 if (start+slen > len)
2546 return 0;
2547 } else {
2548 /* endswith */
2549 if (end-start < slen || start > len)
2550 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002551
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002552 if (end-slen > start)
2553 start = end - slen;
2554 }
2555 if (end-start >= slen)
2556 return ! memcmp(str+start, sub, slen);
2557 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002558}
2559
2560
2561PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002562"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002563\n\
2564Return True if B starts with the specified prefix, False otherwise.\n\
2565With optional start, test B beginning at that position.\n\
2566With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002567prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002568
2569static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002570bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002571{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002572 Py_ssize_t start = 0;
2573 Py_ssize_t end = PY_SSIZE_T_MAX;
2574 PyObject *subobj;
2575 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002576
Jesus Ceaac451502011-04-20 17:09:23 +02002577 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002578 return NULL;
2579 if (PyTuple_Check(subobj)) {
2580 Py_ssize_t i;
2581 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2582 result = _bytes_tailmatch(self,
2583 PyTuple_GET_ITEM(subobj, i),
2584 start, end, -1);
2585 if (result == -1)
2586 return NULL;
2587 else if (result) {
2588 Py_RETURN_TRUE;
2589 }
2590 }
2591 Py_RETURN_FALSE;
2592 }
2593 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002594 if (result == -1) {
2595 if (PyErr_ExceptionMatches(PyExc_TypeError))
2596 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2597 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002598 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002599 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002600 else
2601 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002602}
2603
2604
2605PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002606"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002607\n\
2608Return True if B ends with the specified suffix, False otherwise.\n\
2609With optional start, test B beginning at that position.\n\
2610With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002611suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002612
2613static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002614bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002615{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002616 Py_ssize_t start = 0;
2617 Py_ssize_t end = PY_SSIZE_T_MAX;
2618 PyObject *subobj;
2619 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002620
Jesus Ceaac451502011-04-20 17:09:23 +02002621 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002622 return NULL;
2623 if (PyTuple_Check(subobj)) {
2624 Py_ssize_t i;
2625 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2626 result = _bytes_tailmatch(self,
2627 PyTuple_GET_ITEM(subobj, i),
2628 start, end, +1);
2629 if (result == -1)
2630 return NULL;
2631 else if (result) {
2632 Py_RETURN_TRUE;
2633 }
2634 }
2635 Py_RETURN_FALSE;
2636 }
2637 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002638 if (result == -1) {
2639 if (PyErr_ExceptionMatches(PyExc_TypeError))
2640 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2641 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002642 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002643 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002644 else
2645 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002646}
2647
2648
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002649/*[clinic input]
2650bytes.decode
2651
2652 encoding: str(c_default="NULL") = 'utf-8'
2653 The encoding with which to decode the bytes.
2654 errors: str(c_default="NULL") = 'strict'
2655 The error handling scheme to use for the handling of decoding errors.
2656 The default is 'strict' meaning that decoding errors raise a
2657 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2658 as well as any other name registered with codecs.register_error that
2659 can handle UnicodeDecodeErrors.
2660
2661Decode the bytes using the codec registered for encoding.
2662[clinic start generated code]*/
2663
2664PyDoc_STRVAR(bytes_decode__doc__,
2665"decode(encoding=\'utf-8\', errors=\'strict\')\n"
2666"Decode the bytes using the codec registered for encoding.\n"
2667"\n"
2668" encoding\n"
2669" The encoding with which to decode the bytes.\n"
2670" errors\n"
2671" The error handling scheme to use for the handling of decoding errors.\n"
2672" The default is \'strict\' meaning that decoding errors raise a\n"
2673" UnicodeDecodeError. Other possible values are \'ignore\' and \'replace\'\n"
2674" as well as any other name registered with codecs.register_error that\n"
2675" can handle UnicodeDecodeErrors.");
2676
2677#define BYTES_DECODE_METHODDEF \
2678 {"decode", (PyCFunction)bytes_decode, METH_VARARGS|METH_KEYWORDS, bytes_decode__doc__},
2679
2680static PyObject *
2681bytes_decode_impl(PyObject *self, const char *encoding, const char *errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002682
2683static PyObject *
Benjamin Peterson308d6372009-09-18 21:42:35 +00002684bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002685{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002686 PyObject *return_value = NULL;
2687 static char *_keywords[] = {"encoding", "errors", NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002688 const char *encoding = NULL;
2689 const char *errors = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +00002690
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002691 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
2692 "|ss:decode", _keywords,
2693 &encoding, &errors))
2694 goto exit;
2695 return_value = bytes_decode_impl(self, encoding, errors);
2696
2697exit:
2698 return return_value;
2699}
2700
2701static PyObject *
2702bytes_decode_impl(PyObject *self, const char *encoding, const char *errors)
2703/*[clinic end generated code: checksum=b6efcc4420539a09d08df3aa733696a7119a22c7]*/
2704{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002705 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002706}
2707
Guido van Rossum20188312006-05-05 15:15:40 +00002708
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002709/*[clinic input]
2710bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002711
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002712 keepends: int(py_default="False") = 0
2713
2714Return a list of the lines in the bytes, breaking at line boundaries.
2715
2716Line breaks are not included in the resulting list unless keepends is given and
2717true.
2718[clinic start generated code]*/
2719
2720PyDoc_STRVAR(bytes_splitlines__doc__,
2721"splitlines(keepends=False)\n"
2722"Return a list of the lines in the bytes, breaking at line boundaries.\n"
2723"\n"
2724"Line breaks are not included in the resulting list unless keepends is given and\n"
2725"true.");
2726
2727#define BYTES_SPLITLINES_METHODDEF \
2728 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS|METH_KEYWORDS, bytes_splitlines__doc__},
2729
2730static PyObject *
2731bytes_splitlines_impl(PyObject *self, int keepends);
2732
2733static PyObject *
2734bytes_splitlines(PyObject *self, PyObject *args, PyObject *kwargs)
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002735{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002736 PyObject *return_value = NULL;
2737 static char *_keywords[] = {"keepends", NULL};
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002738 int keepends = 0;
2739
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002740 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
2741 "|i:splitlines", _keywords,
2742 &keepends))
2743 goto exit;
2744 return_value = bytes_splitlines_impl(self, keepends);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002745
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002746exit:
2747 return return_value;
2748}
2749
2750static PyObject *
2751bytes_splitlines_impl(PyObject *self, int keepends)
2752/*[clinic end generated code: checksum=462dd01b87dcda72c538d8d89a310fcdab58cc8c]*/
2753{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002754 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002755 (PyObject*) self, PyBytes_AS_STRING(self),
2756 PyBytes_GET_SIZE(self), keepends
2757 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002758}
2759
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002760static int
Victor Stinner6430fd52011-09-29 04:02:13 +02002761hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002762{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002763 if (c >= 128)
2764 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002765 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002766 return c - '0';
2767 else {
David Malcolm96960882010-11-05 17:23:41 +00002768 if (Py_ISUPPER(c))
2769 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002770 if (c >= 'a' && c <= 'f')
2771 return c - 'a' + 10;
2772 }
2773 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002774}
2775
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002776/*[clinic input]
2777@classmethod
2778bytes.fromhex
2779
2780 string: unicode
2781 /
2782
2783Create a bytes object from a string of hexadecimal numbers.
2784
2785Spaces between two numbers are accepted.
2786Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2787[clinic start generated code]*/
2788
2789PyDoc_STRVAR(bytes_fromhex__doc__,
2790"fromhex(string)\n"
2791"Create a bytes object from a string of hexadecimal numbers.\n"
2792"\n"
2793"Spaces between two numbers are accepted.\n"
2794"Example: bytes.fromhex(\'B9 01EF\') -> b\'\\xb9\\x01\\xef\'.");
2795
2796#define BYTES_FROMHEX_METHODDEF \
2797 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS, bytes_fromhex__doc__},
2798
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002799static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002800bytes_fromhex_impl(PyTypeObject *cls, PyObject *string);
2801
2802static PyObject *
2803bytes_fromhex(PyTypeObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002804{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002805 PyObject *return_value = NULL;
2806 PyObject *string;
2807
2808 if (!PyArg_ParseTuple(args,
2809 "U:fromhex",
2810 &string))
2811 goto exit;
2812 return_value = bytes_fromhex_impl(cls, string);
2813
2814exit:
2815 return return_value;
2816}
2817
2818static PyObject *
2819bytes_fromhex_impl(PyTypeObject *cls, PyObject *string)
2820/*[clinic end generated code: checksum=0b6825075af40e95429328af699b6aae26ecaf94]*/
2821{
2822 PyObject *newstring;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002823 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002824 Py_ssize_t hexlen, byteslen, i, j;
2825 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002826 void *data;
2827 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002828
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002829 assert(PyUnicode_Check(string));
2830 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002831 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002832 kind = PyUnicode_KIND(string);
2833 data = PyUnicode_DATA(string);
2834 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002835
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002836 byteslen = hexlen/2; /* This overestimates if there are spaces */
2837 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2838 if (!newstring)
2839 return NULL;
2840 buf = PyBytes_AS_STRING(newstring);
2841 for (i = j = 0; i < hexlen; i += 2) {
2842 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002843 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002844 i++;
2845 if (i >= hexlen)
2846 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002847 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
2848 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002849 if (top == -1 || bot == -1) {
2850 PyErr_Format(PyExc_ValueError,
2851 "non-hexadecimal number found in "
2852 "fromhex() arg at position %zd", i);
2853 goto error;
2854 }
2855 buf[j++] = (top << 4) + bot;
2856 }
2857 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2858 goto error;
2859 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002860
2861 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002862 Py_XDECREF(newstring);
2863 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002864}
2865
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002866/*[clinic input]
2867bytes.__sizeof__ as bytes_sizeof
2868
2869 self: self(type="PyBytesObject *")
2870
2871Returns the size of the bytes object in memory, in bytes.
2872[clinic start generated code]*/
2873
2874PyDoc_STRVAR(bytes_sizeof__doc__,
2875"__sizeof__()\n"
2876"Returns the size of the bytes object in memory, in bytes.");
2877
2878#define BYTES_SIZEOF_METHODDEF \
2879 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS, bytes_sizeof__doc__},
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002880
2881static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002882bytes_sizeof_impl(PyBytesObject *self);
2883
2884static PyObject *
2885bytes_sizeof(PyObject *self, PyObject *Py_UNUSED(ignored))
2886{
2887 return bytes_sizeof_impl((PyBytesObject *)self);
2888}
2889
2890static PyObject *
2891bytes_sizeof_impl(PyBytesObject *self)
2892/*[clinic end generated code: checksum=354ce9f0aa31e0fc76fa4d8ca5df234c8b78f49a]*/
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002893{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002894 Py_ssize_t res;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002895 res = PyBytesObject_SIZE + Py_SIZE(self) * Py_TYPE(self)->tp_itemsize;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002896 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002897}
2898
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002899
2900static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002901bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002902{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002903 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002904}
2905
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002906
2907static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002908bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002909 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2910 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2911 _Py_capitalize__doc__},
2912 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2913 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002914 BYTES_DECODE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002915 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2916 endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02002917 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002918 expandtabs__doc__},
2919 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002920 BYTES_FROMHEX_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002921 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2922 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2923 _Py_isalnum__doc__},
2924 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2925 _Py_isalpha__doc__},
2926 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2927 _Py_isdigit__doc__},
2928 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2929 _Py_islower__doc__},
2930 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2931 _Py_isspace__doc__},
2932 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2933 _Py_istitle__doc__},
2934 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2935 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002936 BYTES_JOIN_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002937 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2938 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002939 BYTES_LSTRIP_METHODDEF
2940 BYTES_MAKETRANS_METHODDEF
2941 BYTES_PARTITION_METHODDEF
2942 BYTES_REPLACE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002943 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2944 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2945 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002946 BYTES_RPARTITION_METHODDEF
2947 BYTES_RSPLIT_METHODDEF
2948 BYTES_RSTRIP_METHODDEF
2949 BYTES_SPLIT_METHODDEF
2950 BYTES_SPLITLINES_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002951 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2952 startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002953 BYTES_STRIP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002954 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2955 _Py_swapcase__doc__},
2956 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002957 BYTES_TRANSLATE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002958 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2959 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002960 BYTES_SIZEOF_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002961 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002962};
2963
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002964static PyObject *
2965str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2966
2967static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002968bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002969{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002970 PyObject *x = NULL;
2971 const char *encoding = NULL;
2972 const char *errors = NULL;
2973 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002974 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002975 Py_ssize_t size;
2976 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002977 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002978
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002979 if (type != &PyBytes_Type)
2980 return str_subtype_new(type, args, kwds);
2981 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2982 &encoding, &errors))
2983 return NULL;
2984 if (x == NULL) {
2985 if (encoding != NULL || errors != NULL) {
2986 PyErr_SetString(PyExc_TypeError,
2987 "encoding or errors without sequence "
2988 "argument");
2989 return NULL;
2990 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02002991 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002992 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002993
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002994 if (PyUnicode_Check(x)) {
2995 /* Encode via the codec registry */
2996 if (encoding == NULL) {
2997 PyErr_SetString(PyExc_TypeError,
2998 "string argument without an encoding");
2999 return NULL;
3000 }
3001 new = PyUnicode_AsEncodedString(x, encoding, errors);
3002 if (new == NULL)
3003 return NULL;
3004 assert(PyBytes_Check(new));
3005 return new;
3006 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003007
3008 /* We'd like to call PyObject_Bytes here, but we need to check for an
3009 integer argument before deferring to PyBytes_FromObject, something
3010 PyObject_Bytes doesn't do. */
3011 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
3012 if (func != NULL) {
3013 new = PyObject_CallFunctionObjArgs(func, NULL);
3014 Py_DECREF(func);
3015 if (new == NULL)
3016 return NULL;
3017 if (!PyBytes_Check(new)) {
3018 PyErr_Format(PyExc_TypeError,
3019 "__bytes__ returned non-bytes (type %.200s)",
3020 Py_TYPE(new)->tp_name);
3021 Py_DECREF(new);
3022 return NULL;
3023 }
3024 return new;
3025 }
3026 else if (PyErr_Occurred())
3027 return NULL;
3028
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003029 /* Is it an integer? */
3030 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
3031 if (size == -1 && PyErr_Occurred()) {
3032 if (PyErr_ExceptionMatches(PyExc_OverflowError))
3033 return NULL;
3034 PyErr_Clear();
3035 }
3036 else if (size < 0) {
3037 PyErr_SetString(PyExc_ValueError, "negative count");
3038 return NULL;
3039 }
3040 else {
Victor Stinnerdb067af2014-05-02 22:31:14 +02003041 new = _PyBytes_FromSize(size, 1);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003042 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003043 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003044 return new;
3045 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003046
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003047 /* If it's not unicode, there can't be encoding or errors */
3048 if (encoding != NULL || errors != NULL) {
3049 PyErr_SetString(PyExc_TypeError,
3050 "encoding or errors without a string argument");
3051 return NULL;
3052 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003053
3054 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00003055}
3056
3057PyObject *
3058PyBytes_FromObject(PyObject *x)
3059{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003060 PyObject *new, *it;
3061 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003062
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003063 if (x == NULL) {
3064 PyErr_BadInternalCall();
3065 return NULL;
3066 }
Larry Hastingsca28e992012-05-24 22:58:30 -07003067
3068 if (PyBytes_CheckExact(x)) {
3069 Py_INCREF(x);
3070 return x;
3071 }
3072
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003073 /* Use the modern buffer interface */
3074 if (PyObject_CheckBuffer(x)) {
3075 Py_buffer view;
3076 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
3077 return NULL;
3078 new = PyBytes_FromStringAndSize(NULL, view.len);
3079 if (!new)
3080 goto fail;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003081 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
3082 &view, view.len, 'C') < 0)
3083 goto fail;
3084 PyBuffer_Release(&view);
3085 return new;
3086 fail:
3087 Py_XDECREF(new);
3088 PyBuffer_Release(&view);
3089 return NULL;
3090 }
3091 if (PyUnicode_Check(x)) {
3092 PyErr_SetString(PyExc_TypeError,
3093 "cannot convert unicode object to bytes");
3094 return NULL;
3095 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003097 if (PyList_CheckExact(x)) {
3098 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3099 if (new == NULL)
3100 return NULL;
3101 for (i = 0; i < Py_SIZE(x); i++) {
3102 Py_ssize_t value = PyNumber_AsSsize_t(
3103 PyList_GET_ITEM(x, i), PyExc_ValueError);
3104 if (value == -1 && PyErr_Occurred()) {
3105 Py_DECREF(new);
3106 return NULL;
3107 }
3108 if (value < 0 || value >= 256) {
3109 PyErr_SetString(PyExc_ValueError,
3110 "bytes must be in range(0, 256)");
3111 Py_DECREF(new);
3112 return NULL;
3113 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003114 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003115 }
3116 return new;
3117 }
3118 if (PyTuple_CheckExact(x)) {
3119 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3120 if (new == NULL)
3121 return NULL;
3122 for (i = 0; i < Py_SIZE(x); i++) {
3123 Py_ssize_t value = PyNumber_AsSsize_t(
3124 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
3125 if (value == -1 && PyErr_Occurred()) {
3126 Py_DECREF(new);
3127 return NULL;
3128 }
3129 if (value < 0 || value >= 256) {
3130 PyErr_SetString(PyExc_ValueError,
3131 "bytes must be in range(0, 256)");
3132 Py_DECREF(new);
3133 return NULL;
3134 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003135 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003136 }
3137 return new;
3138 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00003139
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003140 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02003141 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003142 if (size == -1 && PyErr_Occurred())
3143 return NULL;
3144 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
3145 returning a shared empty bytes string. This required because we
3146 want to call _PyBytes_Resize() the returned object, which we can
3147 only do on bytes objects with refcount == 1. */
3148 size += 1;
3149 new = PyBytes_FromStringAndSize(NULL, size);
3150 if (new == NULL)
3151 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003152
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003153 /* Get the iterator */
3154 it = PyObject_GetIter(x);
3155 if (it == NULL)
3156 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003157
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003158 /* Run the iterator to exhaustion */
3159 for (i = 0; ; i++) {
3160 PyObject *item;
3161 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003162
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003163 /* Get the next item */
3164 item = PyIter_Next(it);
3165 if (item == NULL) {
3166 if (PyErr_Occurred())
3167 goto error;
3168 break;
3169 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003170
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003171 /* Interpret it as an int (__index__) */
3172 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3173 Py_DECREF(item);
3174 if (value == -1 && PyErr_Occurred())
3175 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003176
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003177 /* Range check */
3178 if (value < 0 || value >= 256) {
3179 PyErr_SetString(PyExc_ValueError,
3180 "bytes must be in range(0, 256)");
3181 goto error;
3182 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003183
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003184 /* Append the byte */
3185 if (i >= size) {
3186 size = 2 * size + 1;
3187 if (_PyBytes_Resize(&new, size) < 0)
3188 goto error;
3189 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003190 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003191 }
3192 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003193
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003194 /* Clean up and return success */
3195 Py_DECREF(it);
3196 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003197
3198 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003199 Py_XDECREF(it);
Victor Stinner986e2242013-10-29 03:14:22 +01003200 Py_XDECREF(new);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003201 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003202}
3203
3204static PyObject *
3205str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3206{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003207 PyObject *tmp, *pnew;
3208 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003209
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003210 assert(PyType_IsSubtype(type, &PyBytes_Type));
3211 tmp = bytes_new(&PyBytes_Type, args, kwds);
3212 if (tmp == NULL)
3213 return NULL;
3214 assert(PyBytes_CheckExact(tmp));
3215 n = PyBytes_GET_SIZE(tmp);
3216 pnew = type->tp_alloc(type, n);
3217 if (pnew != NULL) {
3218 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3219 PyBytes_AS_STRING(tmp), n+1);
3220 ((PyBytesObject *)pnew)->ob_shash =
3221 ((PyBytesObject *)tmp)->ob_shash;
3222 }
3223 Py_DECREF(tmp);
3224 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003225}
3226
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003227PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003228"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003229bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003230bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003231bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3232bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003233\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003234Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003235 - an iterable yielding integers in range(256)\n\
3236 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003237 - any object implementing the buffer API.\n\
3238 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003239
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003240static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003241
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003242PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003243 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3244 "bytes",
3245 PyBytesObject_SIZE,
3246 sizeof(char),
3247 bytes_dealloc, /* tp_dealloc */
3248 0, /* tp_print */
3249 0, /* tp_getattr */
3250 0, /* tp_setattr */
3251 0, /* tp_reserved */
3252 (reprfunc)bytes_repr, /* tp_repr */
3253 0, /* tp_as_number */
3254 &bytes_as_sequence, /* tp_as_sequence */
3255 &bytes_as_mapping, /* tp_as_mapping */
3256 (hashfunc)bytes_hash, /* tp_hash */
3257 0, /* tp_call */
3258 bytes_str, /* tp_str */
3259 PyObject_GenericGetAttr, /* tp_getattro */
3260 0, /* tp_setattro */
3261 &bytes_as_buffer, /* tp_as_buffer */
3262 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3263 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3264 bytes_doc, /* tp_doc */
3265 0, /* tp_traverse */
3266 0, /* tp_clear */
3267 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3268 0, /* tp_weaklistoffset */
3269 bytes_iter, /* tp_iter */
3270 0, /* tp_iternext */
3271 bytes_methods, /* tp_methods */
3272 0, /* tp_members */
3273 0, /* tp_getset */
3274 &PyBaseObject_Type, /* tp_base */
3275 0, /* tp_dict */
3276 0, /* tp_descr_get */
3277 0, /* tp_descr_set */
3278 0, /* tp_dictoffset */
3279 0, /* tp_init */
3280 0, /* tp_alloc */
3281 bytes_new, /* tp_new */
3282 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003283};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003284
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003285void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003286PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003287{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003288 assert(pv != NULL);
3289 if (*pv == NULL)
3290 return;
3291 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003292 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003293 return;
3294 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02003295
3296 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
3297 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05003298 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02003299 Py_buffer wb;
3300
3301 wb.len = -1;
3302 if (_getbuffer(w, &wb) < 0) {
3303 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3304 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3305 Py_CLEAR(*pv);
3306 return;
3307 }
3308
3309 oldsize = PyBytes_GET_SIZE(*pv);
3310 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3311 PyErr_NoMemory();
3312 goto error;
3313 }
3314 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3315 goto error;
3316
3317 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3318 PyBuffer_Release(&wb);
3319 return;
3320
3321 error:
3322 PyBuffer_Release(&wb);
3323 Py_CLEAR(*pv);
3324 return;
3325 }
3326
3327 else {
3328 /* Multiple references, need to create new object */
3329 PyObject *v;
3330 v = bytes_concat(*pv, w);
3331 Py_DECREF(*pv);
3332 *pv = v;
3333 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003334}
3335
3336void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003337PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003338{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003339 PyBytes_Concat(pv, w);
3340 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003341}
3342
3343
3344/* The following function breaks the notion that strings are immutable:
3345 it changes the size of a string. We get away with this only if there
3346 is only one module referencing the object. You can also think of it
3347 as creating a new string object and destroying the old one, only
3348 more efficiently. In any case, don't use this if the string may
3349 already be known to some other part of the code...
3350 Note that if there's not enough memory to resize the string, the original
3351 string object at *pv is deallocated, *pv is set to NULL, an "out of
3352 memory" exception is set, and -1 is returned. Else (on success) 0 is
3353 returned, and the value in *pv may or may not be the same as on input.
3354 As always, an extra byte is allocated for a trailing \0 byte (newsize
3355 does *not* include that), and a trailing \0 byte is stored.
3356*/
3357
3358int
3359_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3360{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003361 PyObject *v;
3362 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003363 v = *pv;
3364 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
3365 *pv = 0;
3366 Py_DECREF(v);
3367 PyErr_BadInternalCall();
3368 return -1;
3369 }
3370 /* XXX UNREF/NEWREF interface should be more symmetrical */
3371 _Py_DEC_REFTOTAL;
3372 _Py_ForgetReference(v);
3373 *pv = (PyObject *)
3374 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
3375 if (*pv == NULL) {
3376 PyObject_Del(v);
3377 PyErr_NoMemory();
3378 return -1;
3379 }
3380 _Py_NewReference(*pv);
3381 sv = (PyBytesObject *) *pv;
3382 Py_SIZE(sv) = newsize;
3383 sv->ob_sval[newsize] = '\0';
3384 sv->ob_shash = -1; /* invalidate cached hash value */
3385 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003386}
3387
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003388void
3389PyBytes_Fini(void)
3390{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003391 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003392 for (i = 0; i < UCHAR_MAX + 1; i++)
3393 Py_CLEAR(characters[i]);
3394 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003395}
3396
Benjamin Peterson4116f362008-05-27 00:36:20 +00003397/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003398
3399typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003400 PyObject_HEAD
3401 Py_ssize_t it_index;
3402 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003403} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003404
3405static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003406striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003407{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003408 _PyObject_GC_UNTRACK(it);
3409 Py_XDECREF(it->it_seq);
3410 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003411}
3412
3413static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003414striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003415{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003416 Py_VISIT(it->it_seq);
3417 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003418}
3419
3420static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003421striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003422{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003423 PyBytesObject *seq;
3424 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003425
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003426 assert(it != NULL);
3427 seq = it->it_seq;
3428 if (seq == NULL)
3429 return NULL;
3430 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003431
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003432 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3433 item = PyLong_FromLong(
3434 (unsigned char)seq->ob_sval[it->it_index]);
3435 if (item != NULL)
3436 ++it->it_index;
3437 return item;
3438 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003439
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003440 Py_DECREF(seq);
3441 it->it_seq = NULL;
3442 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003443}
3444
3445static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003446striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003447{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003448 Py_ssize_t len = 0;
3449 if (it->it_seq)
3450 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3451 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003452}
3453
3454PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003455 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003456
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003457static PyObject *
3458striter_reduce(striterobject *it)
3459{
3460 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02003461 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003462 it->it_seq, it->it_index);
3463 } else {
3464 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
3465 if (u == NULL)
3466 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02003467 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003468 }
3469}
3470
3471PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3472
3473static PyObject *
3474striter_setstate(striterobject *it, PyObject *state)
3475{
3476 Py_ssize_t index = PyLong_AsSsize_t(state);
3477 if (index == -1 && PyErr_Occurred())
3478 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003479 if (it->it_seq != NULL) {
3480 if (index < 0)
3481 index = 0;
3482 else if (index > PyBytes_GET_SIZE(it->it_seq))
3483 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3484 it->it_index = index;
3485 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003486 Py_RETURN_NONE;
3487}
3488
3489PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3490
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003491static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003492 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3493 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003494 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3495 reduce_doc},
3496 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3497 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003498 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003499};
3500
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003501PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003502 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3503 "bytes_iterator", /* tp_name */
3504 sizeof(striterobject), /* tp_basicsize */
3505 0, /* tp_itemsize */
3506 /* methods */
3507 (destructor)striter_dealloc, /* tp_dealloc */
3508 0, /* tp_print */
3509 0, /* tp_getattr */
3510 0, /* tp_setattr */
3511 0, /* tp_reserved */
3512 0, /* tp_repr */
3513 0, /* tp_as_number */
3514 0, /* tp_as_sequence */
3515 0, /* tp_as_mapping */
3516 0, /* tp_hash */
3517 0, /* tp_call */
3518 0, /* tp_str */
3519 PyObject_GenericGetAttr, /* tp_getattro */
3520 0, /* tp_setattro */
3521 0, /* tp_as_buffer */
3522 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3523 0, /* tp_doc */
3524 (traverseproc)striter_traverse, /* tp_traverse */
3525 0, /* tp_clear */
3526 0, /* tp_richcompare */
3527 0, /* tp_weaklistoffset */
3528 PyObject_SelfIter, /* tp_iter */
3529 (iternextfunc)striter_next, /* tp_iternext */
3530 striter_methods, /* tp_methods */
3531 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003532};
3533
3534static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003535bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003536{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003537 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003538
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003539 if (!PyBytes_Check(seq)) {
3540 PyErr_BadInternalCall();
3541 return NULL;
3542 }
3543 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3544 if (it == NULL)
3545 return NULL;
3546 it->it_index = 0;
3547 Py_INCREF(seq);
3548 it->it_seq = (PyBytesObject *)seq;
3549 _PyObject_GC_TRACK(it);
3550 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003551}