blob: 47898fe975cfaab06d5e4214062c57d69ff2eacc [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Christian Heimes90aa7642007-12-19 02:45:37 +000013 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000014
Gregory P. Smith60d241f2007-10-16 06:31:30 +000015 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000016 {
Antoine Pitroud1188562010-06-09 16:38:55 +000017 PyErr_Format(PyExc_TypeError,
18 "Type %.100s doesn't support the buffer API",
19 Py_TYPE(obj)->tp_name);
20 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000021 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000023 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000024 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000025 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000026}
27
Christian Heimes2c9c7a52008-05-26 13:42:13 +000028#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000029Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000030#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000031
Christian Heimes2c9c7a52008-05-26 13:42:13 +000032static PyBytesObject *characters[UCHAR_MAX + 1];
33static PyBytesObject *nullstring;
34
Mark Dickinsonfd24b322008-12-06 15:33:31 +000035/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
36 for a string of length n should request PyBytesObject_SIZE + n bytes.
37
38 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
39 3 bytes per string allocation on a typical system.
40*/
41#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
42
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000044 For PyBytes_FromString(), the parameter `str' points to a null-terminated
45 string containing exactly `size' bytes.
46
47 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
48 either NULL or else points to a string containing at least `size' bytes.
49 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
50 not have to be null-terminated. (Therefore it is safe to construct a
51 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
52 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
53 bytes (setting the last byte to the null terminating character) and you can
54 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000055 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000056 alter the data yourself, since the strings may be shared.
57
58 The PyObject member `op->ob_size', which denotes the number of "extra
59 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020060 allocated for string data, not counting the null terminating character.
61 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000062 PyBytes_FromStringAndSize()) or the length of the string in the `str'
63 parameter (for PyBytes_FromString()).
64*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000065PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000066PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000067{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000068 register PyBytesObject *op;
69 if (size < 0) {
70 PyErr_SetString(PyExc_SystemError,
71 "Negative size passed to PyBytes_FromStringAndSize");
72 return NULL;
73 }
74 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000075#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000076 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000077#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078 Py_INCREF(op);
79 return (PyObject *)op;
80 }
81 if (size == 1 && str != NULL &&
82 (op = characters[*str & UCHAR_MAX]) != NULL)
83 {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000084#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000086#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 Py_INCREF(op);
88 return (PyObject *)op;
89 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000090
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
92 PyErr_SetString(PyExc_OverflowError,
93 "byte string is too large");
94 return NULL;
95 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000097 /* Inline PyObject_NewVar */
98 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
99 if (op == NULL)
100 return PyErr_NoMemory();
101 PyObject_INIT_VAR(op, &PyBytes_Type, size);
102 op->ob_shash = -1;
103 if (str != NULL)
104 Py_MEMCPY(op->ob_sval, str, size);
105 op->ob_sval[size] = '\0';
106 /* share short strings */
107 if (size == 0) {
108 nullstring = op;
109 Py_INCREF(op);
110 } else if (size == 1 && str != NULL) {
111 characters[*str & UCHAR_MAX] = op;
112 Py_INCREF(op);
113 }
114 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000115}
116
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000117PyObject *
118PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000119{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000120 register size_t size;
121 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000122
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000123 assert(str != NULL);
124 size = strlen(str);
125 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
126 PyErr_SetString(PyExc_OverflowError,
127 "byte string is too long");
128 return NULL;
129 }
130 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000131#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000133#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 Py_INCREF(op);
135 return (PyObject *)op;
136 }
137 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000138#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000139 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000140#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 Py_INCREF(op);
142 return (PyObject *)op;
143 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000144
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 /* Inline PyObject_NewVar */
146 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
147 if (op == NULL)
148 return PyErr_NoMemory();
149 PyObject_INIT_VAR(op, &PyBytes_Type, size);
150 op->ob_shash = -1;
151 Py_MEMCPY(op->ob_sval, str, size+1);
152 /* share short strings */
153 if (size == 0) {
154 nullstring = op;
155 Py_INCREF(op);
156 } else if (size == 1) {
157 characters[*str & UCHAR_MAX] = op;
158 Py_INCREF(op);
159 }
160 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000161}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000162
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000163PyObject *
164PyBytes_FromFormatV(const char *format, va_list vargs)
165{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000166 va_list count;
167 Py_ssize_t n = 0;
168 const char* f;
169 char *s;
170 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000171
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000172 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000173 /* step 1: figure out how large a buffer we need */
174 for (f = format; *f; f++) {
175 if (*f == '%') {
176 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000177 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000178 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000179
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000180 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
181 * they don't affect the amount of space we reserve.
182 */
183 if ((*f == 'l' || *f == 'z') &&
184 (f[1] == 'd' || f[1] == 'u'))
185 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000186
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000187 switch (*f) {
188 case 'c':
189 (void)va_arg(count, int);
190 /* fall through... */
191 case '%':
192 n++;
193 break;
194 case 'd': case 'u': case 'i': case 'x':
195 (void) va_arg(count, int);
196 /* 20 bytes is enough to hold a 64-bit
197 integer. Decimal takes the most space.
198 This isn't enough for octal. */
199 n += 20;
200 break;
201 case 's':
202 s = va_arg(count, char*);
203 n += strlen(s);
204 break;
205 case 'p':
206 (void) va_arg(count, int);
207 /* maximum 64-bit pointer representation:
208 * 0xffffffffffffffff
209 * so 19 characters is enough.
210 * XXX I count 18 -- what's the extra for?
211 */
212 n += 19;
213 break;
214 default:
215 /* if we stumble upon an unknown
216 formatting code, copy the rest of
217 the format string to the output
218 string. (we cannot just skip the
219 code, since there's no way to know
220 what's in the argument list) */
221 n += strlen(p);
222 goto expand;
223 }
224 } else
225 n++;
226 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000227 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 /* step 2: fill the buffer */
229 /* Since we've analyzed how much space we need for the worst case,
230 use sprintf directly instead of the slower PyOS_snprintf. */
231 string = PyBytes_FromStringAndSize(NULL, n);
232 if (!string)
233 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000234
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000235 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000236
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000237 for (f = format; *f; f++) {
238 if (*f == '%') {
239 const char* p = f++;
240 Py_ssize_t i;
241 int longflag = 0;
242 int size_tflag = 0;
243 /* parse the width.precision part (we're only
244 interested in the precision value, if any) */
245 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000246 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000247 n = (n*10) + *f++ - '0';
248 if (*f == '.') {
249 f++;
250 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000251 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000252 n = (n*10) + *f++ - '0';
253 }
David Malcolm96960882010-11-05 17:23:41 +0000254 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000255 f++;
256 /* handle the long flag, but only for %ld and %lu.
257 others can be added when necessary. */
258 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
259 longflag = 1;
260 ++f;
261 }
262 /* handle the size_t flag. */
263 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
264 size_tflag = 1;
265 ++f;
266 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000267
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000268 switch (*f) {
269 case 'c':
270 *s++ = va_arg(vargs, int);
271 break;
272 case 'd':
273 if (longflag)
274 sprintf(s, "%ld", va_arg(vargs, long));
275 else if (size_tflag)
276 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
277 va_arg(vargs, Py_ssize_t));
278 else
279 sprintf(s, "%d", va_arg(vargs, int));
280 s += strlen(s);
281 break;
282 case 'u':
283 if (longflag)
284 sprintf(s, "%lu",
285 va_arg(vargs, unsigned long));
286 else if (size_tflag)
287 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
288 va_arg(vargs, size_t));
289 else
290 sprintf(s, "%u",
291 va_arg(vargs, unsigned int));
292 s += strlen(s);
293 break;
294 case 'i':
295 sprintf(s, "%i", va_arg(vargs, int));
296 s += strlen(s);
297 break;
298 case 'x':
299 sprintf(s, "%x", va_arg(vargs, int));
300 s += strlen(s);
301 break;
302 case 's':
303 p = va_arg(vargs, char*);
304 i = strlen(p);
305 if (n > 0 && i > n)
306 i = n;
307 Py_MEMCPY(s, p, i);
308 s += i;
309 break;
310 case 'p':
311 sprintf(s, "%p", va_arg(vargs, void*));
312 /* %p is ill-defined: ensure leading 0x. */
313 if (s[1] == 'X')
314 s[1] = 'x';
315 else if (s[1] != 'x') {
316 memmove(s+2, s, strlen(s)+1);
317 s[0] = '0';
318 s[1] = 'x';
319 }
320 s += strlen(s);
321 break;
322 case '%':
323 *s++ = '%';
324 break;
325 default:
326 strcpy(s, p);
327 s += strlen(s);
328 goto end;
329 }
330 } else
331 *s++ = *f;
332 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000333
334 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000335 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
336 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000337}
338
339PyObject *
340PyBytes_FromFormat(const char *format, ...)
341{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000342 PyObject* ret;
343 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000344
345#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000347#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000348 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000349#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 ret = PyBytes_FromFormatV(format, vargs);
351 va_end(vargs);
352 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000353}
354
355static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000356bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000357{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000358 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000359}
360
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000361/* Unescape a backslash-escaped string. If unicode is non-zero,
362 the string is a u-literal. If recode_encoding is non-zero,
363 the string is UTF-8 encoded and should be re-encoded in the
364 specified encoding. */
365
366PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 Py_ssize_t len,
368 const char *errors,
369 Py_ssize_t unicode,
370 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000371{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 int c;
373 char *p, *buf;
374 const char *end;
375 PyObject *v;
376 Py_ssize_t newlen = recode_encoding ? 4*len:len;
377 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
378 if (v == NULL)
379 return NULL;
380 p = buf = PyBytes_AsString(v);
381 end = s + len;
382 while (s < end) {
383 if (*s != '\\') {
384 non_esc:
385 if (recode_encoding && (*s & 0x80)) {
386 PyObject *u, *w;
387 char *r;
388 const char* t;
389 Py_ssize_t rn;
390 t = s;
391 /* Decode non-ASCII bytes as UTF-8. */
392 while (t < end && (*t & 0x80)) t++;
393 u = PyUnicode_DecodeUTF8(s, t - s, errors);
394 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000395
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000396 /* Recode them in target encoding. */
397 w = PyUnicode_AsEncodedString(
398 u, recode_encoding, errors);
399 Py_DECREF(u);
400 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000401
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000402 /* Append bytes to output buffer. */
403 assert(PyBytes_Check(w));
404 r = PyBytes_AS_STRING(w);
405 rn = PyBytes_GET_SIZE(w);
406 Py_MEMCPY(p, r, rn);
407 p += rn;
408 Py_DECREF(w);
409 s = t;
410 } else {
411 *p++ = *s++;
412 }
413 continue;
414 }
415 s++;
416 if (s==end) {
417 PyErr_SetString(PyExc_ValueError,
418 "Trailing \\ in string");
419 goto failed;
420 }
421 switch (*s++) {
422 /* XXX This assumes ASCII! */
423 case '\n': break;
424 case '\\': *p++ = '\\'; break;
425 case '\'': *p++ = '\''; break;
426 case '\"': *p++ = '\"'; break;
427 case 'b': *p++ = '\b'; break;
428 case 'f': *p++ = '\014'; break; /* FF */
429 case 't': *p++ = '\t'; break;
430 case 'n': *p++ = '\n'; break;
431 case 'r': *p++ = '\r'; break;
432 case 'v': *p++ = '\013'; break; /* VT */
433 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
434 case '0': case '1': case '2': case '3':
435 case '4': case '5': case '6': case '7':
436 c = s[-1] - '0';
437 if (s < end && '0' <= *s && *s <= '7') {
438 c = (c<<3) + *s++ - '0';
439 if (s < end && '0' <= *s && *s <= '7')
440 c = (c<<3) + *s++ - '0';
441 }
442 *p++ = c;
443 break;
444 case 'x':
David Malcolm96960882010-11-05 17:23:41 +0000445 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000446 unsigned int x = 0;
447 c = Py_CHARMASK(*s);
448 s++;
David Malcolm96960882010-11-05 17:23:41 +0000449 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000450 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000451 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000452 x = 10 + c - 'a';
453 else
454 x = 10 + c - 'A';
455 x = x << 4;
456 c = Py_CHARMASK(*s);
457 s++;
David Malcolm96960882010-11-05 17:23:41 +0000458 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000459 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000460 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000461 x += 10 + c - 'a';
462 else
463 x += 10 + c - 'A';
464 *p++ = x;
465 break;
466 }
467 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +0200468 PyErr_Format(PyExc_ValueError,
469 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +0200470 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000471 goto failed;
472 }
473 if (strcmp(errors, "replace") == 0) {
474 *p++ = '?';
475 } else if (strcmp(errors, "ignore") == 0)
476 /* do nothing */;
477 else {
478 PyErr_Format(PyExc_ValueError,
479 "decoding error; unknown "
480 "error handling code: %.400s",
481 errors);
482 goto failed;
483 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +0200484 /* skip \x */
485 if (s < end && Py_ISXDIGIT(s[0]))
486 s++; /* and a hexdigit */
487 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000488 default:
489 *p++ = '\\';
490 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +0200491 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000492 UTF-8 bytes may follow. */
493 }
494 }
495 if (p-buf < newlen)
496 _PyBytes_Resize(&v, p - buf);
497 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000498 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000499 Py_DECREF(v);
500 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000501}
502
503/* -------------------------------------------------------------------- */
504/* object api */
505
506Py_ssize_t
507PyBytes_Size(register PyObject *op)
508{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000509 if (!PyBytes_Check(op)) {
510 PyErr_Format(PyExc_TypeError,
511 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
512 return -1;
513 }
514 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000515}
516
517char *
518PyBytes_AsString(register PyObject *op)
519{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000520 if (!PyBytes_Check(op)) {
521 PyErr_Format(PyExc_TypeError,
522 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
523 return NULL;
524 }
525 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000526}
527
528int
529PyBytes_AsStringAndSize(register PyObject *obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000530 register char **s,
531 register Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000532{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000533 if (s == NULL) {
534 PyErr_BadInternalCall();
535 return -1;
536 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000537
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000538 if (!PyBytes_Check(obj)) {
539 PyErr_Format(PyExc_TypeError,
540 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
541 return -1;
542 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000543
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000544 *s = PyBytes_AS_STRING(obj);
545 if (len != NULL)
546 *len = PyBytes_GET_SIZE(obj);
547 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
548 PyErr_SetString(PyExc_TypeError,
549 "expected bytes with no null");
550 return -1;
551 }
552 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000553}
Neal Norwitz6968b052007-02-27 19:02:19 +0000554
555/* -------------------------------------------------------------------- */
556/* Methods */
557
Eric Smith0923d1d2009-04-16 20:16:10 +0000558#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000559
560#include "stringlib/fastsearch.h"
561#include "stringlib/count.h"
562#include "stringlib/find.h"
563#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000564#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000565#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000566
Eric Smith0f78bff2009-11-30 01:01:42 +0000567#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000568
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000569PyObject *
570PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000571{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000572 register PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200573 Py_ssize_t i, length = Py_SIZE(op);
574 size_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000575 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200576 unsigned char quote, *s, *p;
577
578 /* Compute size of output string */
579 squotes = dquotes = 0;
580 newsize = 3; /* b'' */
581 s = (unsigned char*)op->ob_sval;
582 for (i = 0; i < length; i++) {
583 switch(s[i]) {
584 case '\'': squotes++; newsize++; break;
585 case '"': dquotes++; newsize++; break;
586 case '\\': case '\t': case '\n': case '\r':
587 newsize += 2; break; /* \C */
588 default:
589 if (s[i] < ' ' || s[i] >= 0x7f)
590 newsize += 4; /* \xHH */
591 else
592 newsize++;
593 }
594 }
595 quote = '\'';
596 if (smartquotes && squotes && !dquotes)
597 quote = '"';
598 if (squotes && quote == '\'')
599 newsize += squotes;
Victor Stinner6430fd52011-09-29 04:02:13 +0200600
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200601 if (newsize > (PY_SSIZE_T_MAX - sizeof(PyUnicodeObject) - 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000602 PyErr_SetString(PyExc_OverflowError,
603 "bytes object is too large to make repr");
604 return NULL;
605 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200606
607 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000608 if (v == NULL) {
609 return NULL;
610 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200611 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000612
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200613 *p++ = 'b', *p++ = quote;
614 for (i = 0; i < length; i++) {
615 unsigned char c = op->ob_sval[i];
616 if (c == quote || c == '\\')
617 *p++ = '\\', *p++ = c;
618 else if (c == '\t')
619 *p++ = '\\', *p++ = 't';
620 else if (c == '\n')
621 *p++ = '\\', *p++ = 'n';
622 else if (c == '\r')
623 *p++ = '\\', *p++ = 'r';
624 else if (c < ' ' || c >= 0x7f) {
625 *p++ = '\\';
626 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200627 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
628 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000629 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200630 else
631 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000632 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200633 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +0200634 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200635 return v;
Neal Norwitz6968b052007-02-27 19:02:19 +0000636}
637
Neal Norwitz6968b052007-02-27 19:02:19 +0000638static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000639bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000640{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000641 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000642}
643
Neal Norwitz6968b052007-02-27 19:02:19 +0000644static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000645bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000646{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000647 if (Py_BytesWarningFlag) {
648 if (PyErr_WarnEx(PyExc_BytesWarning,
649 "str() on a bytes instance", 1))
650 return NULL;
651 }
652 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000653}
654
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000655static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000656bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000657{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000658 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000659}
Neal Norwitz6968b052007-02-27 19:02:19 +0000660
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000661/* This is also used by PyBytes_Concat() */
662static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000663bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000664{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000665 Py_ssize_t size;
666 Py_buffer va, vb;
667 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000668
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000669 va.len = -1;
670 vb.len = -1;
671 if (_getbuffer(a, &va) < 0 ||
672 _getbuffer(b, &vb) < 0) {
673 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
674 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
675 goto done;
676 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000677
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000678 /* Optimize end cases */
679 if (va.len == 0 && PyBytes_CheckExact(b)) {
680 result = b;
681 Py_INCREF(result);
682 goto done;
683 }
684 if (vb.len == 0 && PyBytes_CheckExact(a)) {
685 result = a;
686 Py_INCREF(result);
687 goto done;
688 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000689
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000690 size = va.len + vb.len;
691 if (size < 0) {
692 PyErr_NoMemory();
693 goto done;
694 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000695
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000696 result = PyBytes_FromStringAndSize(NULL, size);
697 if (result != NULL) {
698 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
699 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
700 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000701
702 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000703 if (va.len != -1)
704 PyBuffer_Release(&va);
705 if (vb.len != -1)
706 PyBuffer_Release(&vb);
707 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000708}
Neal Norwitz6968b052007-02-27 19:02:19 +0000709
710static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000711bytes_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000712{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000713 register Py_ssize_t i;
714 register Py_ssize_t j;
715 register Py_ssize_t size;
716 register PyBytesObject *op;
717 size_t nbytes;
718 if (n < 0)
719 n = 0;
720 /* watch out for overflows: the size can overflow int,
721 * and the # of bytes needed can overflow size_t
722 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000723 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000724 PyErr_SetString(PyExc_OverflowError,
725 "repeated bytes are too long");
726 return NULL;
727 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000728 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000729 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
730 Py_INCREF(a);
731 return (PyObject *)a;
732 }
733 nbytes = (size_t)size;
734 if (nbytes + PyBytesObject_SIZE <= nbytes) {
735 PyErr_SetString(PyExc_OverflowError,
736 "repeated bytes are too long");
737 return NULL;
738 }
739 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
740 if (op == NULL)
741 return PyErr_NoMemory();
742 PyObject_INIT_VAR(op, &PyBytes_Type, size);
743 op->ob_shash = -1;
744 op->ob_sval[size] = '\0';
745 if (Py_SIZE(a) == 1 && n > 0) {
746 memset(op->ob_sval, a->ob_sval[0] , n);
747 return (PyObject *) op;
748 }
749 i = 0;
750 if (i < size) {
751 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
752 i = Py_SIZE(a);
753 }
754 while (i < size) {
755 j = (i <= size-i) ? i : size-i;
756 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
757 i += j;
758 }
759 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000760}
761
Guido van Rossum98297ee2007-11-06 21:34:58 +0000762static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000763bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000764{
765 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
766 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000767 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +0000768 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +0000769 PyErr_Clear();
770 if (_getbuffer(arg, &varg) < 0)
771 return -1;
772 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
773 varg.buf, varg.len, 0);
774 PyBuffer_Release(&varg);
775 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000776 }
777 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000778 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
779 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000780 }
781
Antoine Pitrou0010d372010-08-15 17:12:55 +0000782 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000783}
784
Neal Norwitz6968b052007-02-27 19:02:19 +0000785static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000786bytes_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000787{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000788 if (i < 0 || i >= Py_SIZE(a)) {
789 PyErr_SetString(PyExc_IndexError, "index out of range");
790 return NULL;
791 }
792 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000793}
794
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000795static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000796bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000797{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000798 int c;
799 Py_ssize_t len_a, len_b;
800 Py_ssize_t min_len;
801 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000802
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000803 /* Make sure both arguments are strings. */
804 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
805 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
806 (PyObject_IsInstance((PyObject*)a,
807 (PyObject*)&PyUnicode_Type) ||
808 PyObject_IsInstance((PyObject*)b,
809 (PyObject*)&PyUnicode_Type))) {
810 if (PyErr_WarnEx(PyExc_BytesWarning,
811 "Comparison between bytes and string", 1))
812 return NULL;
813 }
814 result = Py_NotImplemented;
815 goto out;
816 }
817 if (a == b) {
818 switch (op) {
819 case Py_EQ:case Py_LE:case Py_GE:
820 result = Py_True;
821 goto out;
822 case Py_NE:case Py_LT:case Py_GT:
823 result = Py_False;
824 goto out;
825 }
826 }
827 if (op == Py_EQ) {
828 /* Supporting Py_NE here as well does not save
829 much time, since Py_NE is rarely used. */
830 if (Py_SIZE(a) == Py_SIZE(b)
831 && (a->ob_sval[0] == b->ob_sval[0]
832 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
833 result = Py_True;
834 } else {
835 result = Py_False;
836 }
837 goto out;
838 }
839 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
840 min_len = (len_a < len_b) ? len_a : len_b;
841 if (min_len > 0) {
842 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
843 if (c==0)
844 c = memcmp(a->ob_sval, b->ob_sval, min_len);
845 } else
846 c = 0;
847 if (c == 0)
848 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
849 switch (op) {
850 case Py_LT: c = c < 0; break;
851 case Py_LE: c = c <= 0; break;
852 case Py_EQ: assert(0); break; /* unreachable */
853 case Py_NE: c = c != 0; break;
854 case Py_GT: c = c > 0; break;
855 case Py_GE: c = c >= 0; break;
856 default:
857 result = Py_NotImplemented;
858 goto out;
859 }
860 result = c ? Py_True : Py_False;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000861 out:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000862 Py_INCREF(result);
863 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000864}
865
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000866static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000867bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000868{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +0100869 if (a->ob_shash == -1) {
870 /* Can't fail */
871 a->ob_shash = _Py_HashBytes((unsigned char *) a->ob_sval, Py_SIZE(a));
872 }
873 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +0000874}
875
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000876static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000877bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000878{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000879 if (PyIndex_Check(item)) {
880 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
881 if (i == -1 && PyErr_Occurred())
882 return NULL;
883 if (i < 0)
884 i += PyBytes_GET_SIZE(self);
885 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
886 PyErr_SetString(PyExc_IndexError,
887 "index out of range");
888 return NULL;
889 }
890 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
891 }
892 else if (PySlice_Check(item)) {
893 Py_ssize_t start, stop, step, slicelength, cur, i;
894 char* source_buf;
895 char* result_buf;
896 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000897
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000898 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000899 PyBytes_GET_SIZE(self),
900 &start, &stop, &step, &slicelength) < 0) {
901 return NULL;
902 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000903
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000904 if (slicelength <= 0) {
905 return PyBytes_FromStringAndSize("", 0);
906 }
907 else if (start == 0 && step == 1 &&
908 slicelength == PyBytes_GET_SIZE(self) &&
909 PyBytes_CheckExact(self)) {
910 Py_INCREF(self);
911 return (PyObject *)self;
912 }
913 else if (step == 1) {
914 return PyBytes_FromStringAndSize(
915 PyBytes_AS_STRING(self) + start,
916 slicelength);
917 }
918 else {
919 source_buf = PyBytes_AS_STRING(self);
920 result = PyBytes_FromStringAndSize(NULL, slicelength);
921 if (result == NULL)
922 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000923
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000924 result_buf = PyBytes_AS_STRING(result);
925 for (cur = start, i = 0; i < slicelength;
926 cur += step, i++) {
927 result_buf[i] = source_buf[cur];
928 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000929
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000930 return result;
931 }
932 }
933 else {
934 PyErr_Format(PyExc_TypeError,
935 "byte indices must be integers, not %.200s",
936 Py_TYPE(item)->tp_name);
937 return NULL;
938 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000939}
940
941static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000942bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000943{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000944 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
945 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000946}
947
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000948static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000949 (lenfunc)bytes_length, /*sq_length*/
950 (binaryfunc)bytes_concat, /*sq_concat*/
951 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
952 (ssizeargfunc)bytes_item, /*sq_item*/
953 0, /*sq_slice*/
954 0, /*sq_ass_item*/
955 0, /*sq_ass_slice*/
956 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000957};
958
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000959static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000960 (lenfunc)bytes_length,
961 (binaryfunc)bytes_subscript,
962 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000963};
964
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000965static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000966 (getbufferproc)bytes_buffer_getbuffer,
967 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000968};
969
970
971#define LEFTSTRIP 0
972#define RIGHTSTRIP 1
973#define BOTHSTRIP 2
974
975/* Arrays indexed by above */
976static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
977
978#define STRIPNAME(i) (stripformat[i]+3)
979
Neal Norwitz6968b052007-02-27 19:02:19 +0000980PyDoc_STRVAR(split__doc__,
Ezio Melotticda6b6d2012-02-26 09:39:55 +0200981"B.split(sep=None, maxsplit=-1) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +0000982\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +0000983Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000984If sep is not specified or is None, B is split on ASCII whitespace\n\
985characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +0000986If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +0000987
988static PyObject *
Ezio Melotticda6b6d2012-02-26 09:39:55 +0200989bytes_split(PyBytesObject *self, PyObject *args, PyObject *kwds)
Neal Norwitz6968b052007-02-27 19:02:19 +0000990{
Ezio Melotticda6b6d2012-02-26 09:39:55 +0200991 static char *kwlist[] = {"sep", "maxsplit", 0};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000992 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
993 Py_ssize_t maxsplit = -1;
994 const char *s = PyBytes_AS_STRING(self), *sub;
995 Py_buffer vsub;
996 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +0000997
Ezio Melotticda6b6d2012-02-26 09:39:55 +0200998 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:split",
999 kwlist, &subobj, &maxsplit))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001000 return NULL;
1001 if (maxsplit < 0)
1002 maxsplit = PY_SSIZE_T_MAX;
1003 if (subobj == Py_None)
1004 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1005 if (_getbuffer(subobj, &vsub) < 0)
1006 return NULL;
1007 sub = vsub.buf;
1008 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001009
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001010 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1011 PyBuffer_Release(&vsub);
1012 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001013}
1014
Neal Norwitz6968b052007-02-27 19:02:19 +00001015PyDoc_STRVAR(partition__doc__,
1016"B.partition(sep) -> (head, sep, tail)\n\
1017\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001018Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001019the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001020found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001021
1022static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001023bytes_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001024{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001025 const char *sep;
1026 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001027
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001028 if (PyBytes_Check(sep_obj)) {
1029 sep = PyBytes_AS_STRING(sep_obj);
1030 sep_len = PyBytes_GET_SIZE(sep_obj);
1031 }
1032 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1033 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001034
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001035 return stringlib_partition(
1036 (PyObject*) self,
1037 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1038 sep_obj, sep, sep_len
1039 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001040}
1041
1042PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti5b2b2422010-01-25 11:58:28 +00001043"B.rpartition(sep) -> (head, sep, tail)\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001044\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001045Search for the separator sep in B, starting at the end of B,\n\
1046and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001047part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001048bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001049
1050static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001051bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001052{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001053 const char *sep;
1054 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001055
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001056 if (PyBytes_Check(sep_obj)) {
1057 sep = PyBytes_AS_STRING(sep_obj);
1058 sep_len = PyBytes_GET_SIZE(sep_obj);
1059 }
1060 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1061 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001062
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001063 return stringlib_rpartition(
1064 (PyObject*) self,
1065 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1066 sep_obj, sep, sep_len
1067 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001068}
1069
Neal Norwitz6968b052007-02-27 19:02:19 +00001070PyDoc_STRVAR(rsplit__doc__,
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001071"B.rsplit(sep=None, maxsplit=-1) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001072\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001073Return a list of the sections in B, using sep as the delimiter,\n\
1074starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001075If sep is not given, B is split on ASCII whitespace characters\n\
1076(space, tab, return, newline, formfeed, vertical tab).\n\
1077If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001078
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001079
Neal Norwitz6968b052007-02-27 19:02:19 +00001080static PyObject *
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001081bytes_rsplit(PyBytesObject *self, PyObject *args, PyObject *kwds)
Neal Norwitz6968b052007-02-27 19:02:19 +00001082{
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001083 static char *kwlist[] = {"sep", "maxsplit", 0};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001084 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1085 Py_ssize_t maxsplit = -1;
1086 const char *s = PyBytes_AS_STRING(self), *sub;
1087 Py_buffer vsub;
1088 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001089
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001090 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:rsplit",
1091 kwlist, &subobj, &maxsplit))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001092 return NULL;
1093 if (maxsplit < 0)
1094 maxsplit = PY_SSIZE_T_MAX;
1095 if (subobj == Py_None)
1096 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1097 if (_getbuffer(subobj, &vsub) < 0)
1098 return NULL;
1099 sub = vsub.buf;
1100 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001101
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001102 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1103 PyBuffer_Release(&vsub);
1104 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001105}
1106
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001107
1108PyDoc_STRVAR(join__doc__,
1109"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001110\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001111Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001112Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1113
Neal Norwitz6968b052007-02-27 19:02:19 +00001114static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001115bytes_join(PyObject *self, PyObject *orig)
Neal Norwitz6968b052007-02-27 19:02:19 +00001116{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001117 char *sep = PyBytes_AS_STRING(self);
1118 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1119 PyObject *res = NULL;
1120 char *p;
1121 Py_ssize_t seqlen = 0;
1122 size_t sz = 0;
1123 Py_ssize_t i;
1124 PyObject *seq, *item;
Neal Norwitz6968b052007-02-27 19:02:19 +00001125
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001126 seq = PySequence_Fast(orig, "");
1127 if (seq == NULL) {
1128 return NULL;
1129 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001130
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001131 seqlen = PySequence_Size(seq);
1132 if (seqlen == 0) {
1133 Py_DECREF(seq);
1134 return PyBytes_FromString("");
1135 }
1136 if (seqlen == 1) {
1137 item = PySequence_Fast_GET_ITEM(seq, 0);
1138 if (PyBytes_CheckExact(item)) {
1139 Py_INCREF(item);
1140 Py_DECREF(seq);
1141 return item;
1142 }
1143 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001144
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001145 /* There are at least two things to join, or else we have a subclass
1146 * of the builtin types in the sequence.
1147 * Do a pre-pass to figure out the total amount of space we'll
1148 * need (sz), and see whether all argument are bytes.
1149 */
1150 /* XXX Shouldn't we use _getbuffer() on these items instead? */
1151 for (i = 0; i < seqlen; i++) {
1152 const size_t old_sz = sz;
1153 item = PySequence_Fast_GET_ITEM(seq, i);
1154 if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
1155 PyErr_Format(PyExc_TypeError,
1156 "sequence item %zd: expected bytes,"
1157 " %.80s found",
1158 i, Py_TYPE(item)->tp_name);
1159 Py_DECREF(seq);
1160 return NULL;
1161 }
1162 sz += Py_SIZE(item);
1163 if (i != 0)
1164 sz += seplen;
1165 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1166 PyErr_SetString(PyExc_OverflowError,
1167 "join() result is too long for bytes");
1168 Py_DECREF(seq);
1169 return NULL;
1170 }
1171 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001172
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001173 /* Allocate result space. */
1174 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1175 if (res == NULL) {
1176 Py_DECREF(seq);
1177 return NULL;
1178 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001179
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001180 /* Catenate everything. */
1181 /* I'm not worried about a PyByteArray item growing because there's
1182 nowhere in this function where we release the GIL. */
1183 p = PyBytes_AS_STRING(res);
1184 for (i = 0; i < seqlen; ++i) {
1185 size_t n;
1186 char *q;
1187 if (i) {
1188 Py_MEMCPY(p, sep, seplen);
1189 p += seplen;
1190 }
1191 item = PySequence_Fast_GET_ITEM(seq, i);
1192 n = Py_SIZE(item);
1193 if (PyBytes_Check(item))
1194 q = PyBytes_AS_STRING(item);
1195 else
1196 q = PyByteArray_AS_STRING(item);
1197 Py_MEMCPY(p, q, n);
1198 p += n;
1199 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001200
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001201 Py_DECREF(seq);
1202 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001203}
1204
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001205PyObject *
1206_PyBytes_Join(PyObject *sep, PyObject *x)
1207{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001208 assert(sep != NULL && PyBytes_Check(sep));
1209 assert(x != NULL);
1210 return bytes_join(sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001211}
1212
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001213/* helper macro to fixup start/end slice values */
1214#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001215 if (end > len) \
1216 end = len; \
1217 else if (end < 0) { \
1218 end += len; \
1219 if (end < 0) \
1220 end = 0; \
1221 } \
1222 if (start < 0) { \
1223 start += len; \
1224 if (start < 0) \
1225 start = 0; \
1226 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001227
1228Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001229bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001230{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001231 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001232 char byte;
1233 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001234 const char *sub;
1235 Py_ssize_t sub_len;
1236 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001237 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001238
Antoine Pitrouac65d962011-10-20 23:54:17 +02001239 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1240 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001241 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001242
Antoine Pitrouac65d962011-10-20 23:54:17 +02001243 if (subobj) {
1244 if (_getbuffer(subobj, &subbuf) < 0)
1245 return -2;
1246
1247 sub = subbuf.buf;
1248 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001249 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001250 else {
1251 sub = &byte;
1252 sub_len = 1;
1253 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001254
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001255 if (dir > 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001256 res = stringlib_find_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001257 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1258 sub, sub_len, start, end);
1259 else
Antoine Pitrouac65d962011-10-20 23:54:17 +02001260 res = stringlib_rfind_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001261 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1262 sub, sub_len, start, end);
Antoine Pitrouac65d962011-10-20 23:54:17 +02001263
1264 if (subobj)
1265 PyBuffer_Release(&subbuf);
1266
1267 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001268}
1269
1270
1271PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001272"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001273\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001274Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001275such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001276arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001277\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001278Return -1 on failure.");
1279
Neal Norwitz6968b052007-02-27 19:02:19 +00001280static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001281bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001282{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001283 Py_ssize_t result = bytes_find_internal(self, args, +1);
1284 if (result == -2)
1285 return NULL;
1286 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001287}
1288
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001289
1290PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001291"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001292\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001293Like B.find() but raise ValueError when the substring is not found.");
1294
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001295static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001296bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001297{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001298 Py_ssize_t result = bytes_find_internal(self, args, +1);
1299 if (result == -2)
1300 return NULL;
1301 if (result == -1) {
1302 PyErr_SetString(PyExc_ValueError,
1303 "substring not found");
1304 return NULL;
1305 }
1306 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001307}
1308
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001309
1310PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001311"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001312\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001313Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001314such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001315arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001316\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001317Return -1 on failure.");
1318
Neal Norwitz6968b052007-02-27 19:02:19 +00001319static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001320bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001321{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001322 Py_ssize_t result = bytes_find_internal(self, args, -1);
1323 if (result == -2)
1324 return NULL;
1325 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001326}
1327
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001328
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001329PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001330"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001331\n\
1332Like B.rfind() but raise ValueError when the substring is not found.");
1333
1334static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001335bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001336{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001337 Py_ssize_t result = bytes_find_internal(self, args, -1);
1338 if (result == -2)
1339 return NULL;
1340 if (result == -1) {
1341 PyErr_SetString(PyExc_ValueError,
1342 "substring not found");
1343 return NULL;
1344 }
1345 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001346}
1347
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001348
1349Py_LOCAL_INLINE(PyObject *)
1350do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001351{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001352 Py_buffer vsep;
1353 char *s = PyBytes_AS_STRING(self);
1354 Py_ssize_t len = PyBytes_GET_SIZE(self);
1355 char *sep;
1356 Py_ssize_t seplen;
1357 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001358
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001359 if (_getbuffer(sepobj, &vsep) < 0)
1360 return NULL;
1361 sep = vsep.buf;
1362 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001363
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001364 i = 0;
1365 if (striptype != RIGHTSTRIP) {
1366 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1367 i++;
1368 }
1369 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001370
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001371 j = len;
1372 if (striptype != LEFTSTRIP) {
1373 do {
1374 j--;
1375 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1376 j++;
1377 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001378
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001379 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001380
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001381 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1382 Py_INCREF(self);
1383 return (PyObject*)self;
1384 }
1385 else
1386 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001387}
1388
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001389
1390Py_LOCAL_INLINE(PyObject *)
1391do_strip(PyBytesObject *self, int striptype)
1392{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001393 char *s = PyBytes_AS_STRING(self);
1394 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001395
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001396 i = 0;
1397 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001398 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001399 i++;
1400 }
1401 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001402
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001403 j = len;
1404 if (striptype != LEFTSTRIP) {
1405 do {
1406 j--;
David Malcolm96960882010-11-05 17:23:41 +00001407 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001408 j++;
1409 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001410
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001411 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1412 Py_INCREF(self);
1413 return (PyObject*)self;
1414 }
1415 else
1416 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001417}
1418
1419
1420Py_LOCAL_INLINE(PyObject *)
1421do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1422{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001423 PyObject *sep = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001424
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001425 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1426 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001427
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001428 if (sep != NULL && sep != Py_None) {
1429 return do_xstrip(self, striptype, sep);
1430 }
1431 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001432}
1433
1434
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001435PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001436"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001437\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001438Strip leading and trailing bytes contained in the argument.\n\
Georg Brandlbeca27a2012-01-22 21:31:21 +01001439If the argument is omitted, strip leading and trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001440static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001441bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001442{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001443 if (PyTuple_GET_SIZE(args) == 0)
1444 return do_strip(self, BOTHSTRIP); /* Common case */
1445 else
1446 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001447}
1448
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001449
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001450PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001451"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001452\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001453Strip leading bytes contained in the argument.\n\
1454If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001455static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001456bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001457{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001458 if (PyTuple_GET_SIZE(args) == 0)
1459 return do_strip(self, LEFTSTRIP); /* Common case */
1460 else
1461 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001462}
1463
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001464
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001465PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001466"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001467\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001468Strip trailing bytes contained in the argument.\n\
1469If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001470static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001471bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001472{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001473 if (PyTuple_GET_SIZE(args) == 0)
1474 return do_strip(self, RIGHTSTRIP); /* Common case */
1475 else
1476 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001477}
Neal Norwitz6968b052007-02-27 19:02:19 +00001478
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001479
1480PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001481"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001482\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001483Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001484string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001485as in slice notation.");
1486
1487static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001488bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001489{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001490 PyObject *sub_obj;
1491 const char *str = PyBytes_AS_STRING(self), *sub;
1492 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001493 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001494 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001495
Antoine Pitrouac65d962011-10-20 23:54:17 +02001496 Py_buffer vsub;
1497 PyObject *count_obj;
1498
1499 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
1500 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001501 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001502
Antoine Pitrouac65d962011-10-20 23:54:17 +02001503 if (sub_obj) {
1504 if (_getbuffer(sub_obj, &vsub) < 0)
1505 return NULL;
1506
1507 sub = vsub.buf;
1508 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001509 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001510 else {
1511 sub = &byte;
1512 sub_len = 1;
1513 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001514
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001515 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001516
Antoine Pitrouac65d962011-10-20 23:54:17 +02001517 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001518 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
1519 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02001520
1521 if (sub_obj)
1522 PyBuffer_Release(&vsub);
1523
1524 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001525}
1526
1527
1528PyDoc_STRVAR(translate__doc__,
1529"B.translate(table[, deletechars]) -> bytes\n\
1530\n\
1531Return a copy of B, where all characters occurring in the\n\
1532optional argument deletechars are removed, and the remaining\n\
1533characters have been mapped through the given translation\n\
1534table, which must be a bytes object of length 256.");
1535
1536static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001537bytes_translate(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001538{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001539 register char *input, *output;
1540 const char *table;
1541 register Py_ssize_t i, c, changed = 0;
1542 PyObject *input_obj = (PyObject*)self;
1543 const char *output_start, *del_table=NULL;
1544 Py_ssize_t inlen, tablen, dellen = 0;
1545 PyObject *result;
1546 int trans_table[256];
1547 PyObject *tableobj, *delobj = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001548
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001549 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1550 &tableobj, &delobj))
1551 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001552
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001553 if (PyBytes_Check(tableobj)) {
1554 table = PyBytes_AS_STRING(tableobj);
1555 tablen = PyBytes_GET_SIZE(tableobj);
1556 }
1557 else if (tableobj == Py_None) {
1558 table = NULL;
1559 tablen = 256;
1560 }
1561 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1562 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001563
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001564 if (tablen != 256) {
1565 PyErr_SetString(PyExc_ValueError,
1566 "translation table must be 256 characters long");
1567 return NULL;
1568 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001569
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001570 if (delobj != NULL) {
1571 if (PyBytes_Check(delobj)) {
1572 del_table = PyBytes_AS_STRING(delobj);
1573 dellen = PyBytes_GET_SIZE(delobj);
1574 }
1575 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1576 return NULL;
1577 }
1578 else {
1579 del_table = NULL;
1580 dellen = 0;
1581 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001582
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001583 inlen = PyBytes_GET_SIZE(input_obj);
1584 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1585 if (result == NULL)
1586 return NULL;
1587 output_start = output = PyBytes_AsString(result);
1588 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001589
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001590 if (dellen == 0 && table != NULL) {
1591 /* If no deletions are required, use faster code */
1592 for (i = inlen; --i >= 0; ) {
1593 c = Py_CHARMASK(*input++);
1594 if (Py_CHARMASK((*output++ = table[c])) != c)
1595 changed = 1;
1596 }
1597 if (changed || !PyBytes_CheckExact(input_obj))
1598 return result;
1599 Py_DECREF(result);
1600 Py_INCREF(input_obj);
1601 return input_obj;
1602 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001603
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001604 if (table == NULL) {
1605 for (i = 0; i < 256; i++)
1606 trans_table[i] = Py_CHARMASK(i);
1607 } else {
1608 for (i = 0; i < 256; i++)
1609 trans_table[i] = Py_CHARMASK(table[i]);
1610 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001611
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001612 for (i = 0; i < dellen; i++)
1613 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001614
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001615 for (i = inlen; --i >= 0; ) {
1616 c = Py_CHARMASK(*input++);
1617 if (trans_table[c] != -1)
1618 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1619 continue;
1620 changed = 1;
1621 }
1622 if (!changed && PyBytes_CheckExact(input_obj)) {
1623 Py_DECREF(result);
1624 Py_INCREF(input_obj);
1625 return input_obj;
1626 }
1627 /* Fix the size of the resulting string */
1628 if (inlen > 0)
1629 _PyBytes_Resize(&result, output - output_start);
1630 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001631}
1632
1633
Georg Brandlabc38772009-04-12 15:51:51 +00001634static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001635bytes_maketrans(PyObject *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001636{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001637 return _Py_bytes_maketrans(args);
Georg Brandlabc38772009-04-12 15:51:51 +00001638}
1639
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001640/* find and count characters and substrings */
1641
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001642#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001643 ((char *)memchr((const void *)(target), c, target_len))
1644
1645/* String ops must return a string. */
1646/* If the object is subclass of string, create a copy */
1647Py_LOCAL(PyBytesObject *)
1648return_self(PyBytesObject *self)
1649{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001650 if (PyBytes_CheckExact(self)) {
1651 Py_INCREF(self);
1652 return self;
1653 }
1654 return (PyBytesObject *)PyBytes_FromStringAndSize(
1655 PyBytes_AS_STRING(self),
1656 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001657}
1658
1659Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00001660countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001661{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001662 Py_ssize_t count=0;
1663 const char *start=target;
1664 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001665
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001666 while ( (start=findchar(start, end-start, c)) != NULL ) {
1667 count++;
1668 if (count >= maxcount)
1669 break;
1670 start += 1;
1671 }
1672 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001673}
1674
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001675
1676/* Algorithms for different cases of string replacement */
1677
1678/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1679Py_LOCAL(PyBytesObject *)
1680replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001681 const char *to_s, Py_ssize_t to_len,
1682 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001683{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001684 char *self_s, *result_s;
1685 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001686 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001687 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001688
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001689 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001690
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001691 /* 1 at the end plus 1 after every character;
1692 count = min(maxcount, self_len + 1) */
1693 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001694 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001695 else
1696 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
1697 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001698
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001699 /* Check for overflow */
1700 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001701 assert(count > 0);
1702 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001703 PyErr_SetString(PyExc_OverflowError,
1704 "replacement bytes are too long");
1705 return NULL;
1706 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001707 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001708
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001709 if (! (result = (PyBytesObject *)
1710 PyBytes_FromStringAndSize(NULL, result_len)) )
1711 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001712
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001713 self_s = PyBytes_AS_STRING(self);
1714 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001715
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001716 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001717
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001718 /* Lay the first one down (guaranteed this will occur) */
1719 Py_MEMCPY(result_s, to_s, to_len);
1720 result_s += to_len;
1721 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001722
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001723 for (i=0; i<count; i++) {
1724 *result_s++ = *self_s++;
1725 Py_MEMCPY(result_s, to_s, to_len);
1726 result_s += to_len;
1727 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001728
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001729 /* Copy the rest of the original string */
1730 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001731
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001732 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001733}
1734
1735/* Special case for deleting a single character */
1736/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1737Py_LOCAL(PyBytesObject *)
1738replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001739 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001740{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001741 char *self_s, *result_s;
1742 char *start, *next, *end;
1743 Py_ssize_t self_len, result_len;
1744 Py_ssize_t count;
1745 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001746
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001747 self_len = PyBytes_GET_SIZE(self);
1748 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001749
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001750 count = countchar(self_s, self_len, from_c, maxcount);
1751 if (count == 0) {
1752 return return_self(self);
1753 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001754
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001755 result_len = self_len - count; /* from_len == 1 */
1756 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001757
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001758 if ( (result = (PyBytesObject *)
1759 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1760 return NULL;
1761 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001762
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001763 start = self_s;
1764 end = self_s + self_len;
1765 while (count-- > 0) {
1766 next = findchar(start, end-start, from_c);
1767 if (next == NULL)
1768 break;
1769 Py_MEMCPY(result_s, start, next-start);
1770 result_s += (next-start);
1771 start = next+1;
1772 }
1773 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001774
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001775 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001776}
1777
1778/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1779
1780Py_LOCAL(PyBytesObject *)
1781replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001782 const char *from_s, Py_ssize_t from_len,
1783 Py_ssize_t maxcount) {
1784 char *self_s, *result_s;
1785 char *start, *next, *end;
1786 Py_ssize_t self_len, result_len;
1787 Py_ssize_t count, offset;
1788 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001789
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001790 self_len = PyBytes_GET_SIZE(self);
1791 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001792
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001793 count = stringlib_count(self_s, self_len,
1794 from_s, from_len,
1795 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001796
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001797 if (count == 0) {
1798 /* no matches */
1799 return return_self(self);
1800 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001801
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001802 result_len = self_len - (count * from_len);
1803 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001804
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001805 if ( (result = (PyBytesObject *)
1806 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1807 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001808
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001809 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001810
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001811 start = self_s;
1812 end = self_s + self_len;
1813 while (count-- > 0) {
1814 offset = stringlib_find(start, end-start,
1815 from_s, from_len,
1816 0);
1817 if (offset == -1)
1818 break;
1819 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001820
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001821 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001822
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001823 result_s += (next-start);
1824 start = next+from_len;
1825 }
1826 Py_MEMCPY(result_s, start, end-start);
1827 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001828}
1829
1830/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1831Py_LOCAL(PyBytesObject *)
1832replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001833 char from_c, char to_c,
1834 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001835{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001836 char *self_s, *result_s, *start, *end, *next;
1837 Py_ssize_t self_len;
1838 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001839
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001840 /* The result string will be the same size */
1841 self_s = PyBytes_AS_STRING(self);
1842 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001843
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001844 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001845
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001846 if (next == NULL) {
1847 /* No matches; return the original string */
1848 return return_self(self);
1849 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001850
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001851 /* Need to make a new string */
1852 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1853 if (result == NULL)
1854 return NULL;
1855 result_s = PyBytes_AS_STRING(result);
1856 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001857
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001858 /* change everything in-place, starting with this one */
1859 start = result_s + (next-self_s);
1860 *start = to_c;
1861 start++;
1862 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001863
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001864 while (--maxcount > 0) {
1865 next = findchar(start, end-start, from_c);
1866 if (next == NULL)
1867 break;
1868 *next = to_c;
1869 start = next+1;
1870 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001871
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001872 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001873}
1874
1875/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1876Py_LOCAL(PyBytesObject *)
1877replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001878 const char *from_s, Py_ssize_t from_len,
1879 const char *to_s, Py_ssize_t to_len,
1880 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001881{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001882 char *result_s, *start, *end;
1883 char *self_s;
1884 Py_ssize_t self_len, offset;
1885 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001886
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001887 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001888
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001889 self_s = PyBytes_AS_STRING(self);
1890 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001891
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001892 offset = stringlib_find(self_s, self_len,
1893 from_s, from_len,
1894 0);
1895 if (offset == -1) {
1896 /* No matches; return the original string */
1897 return return_self(self);
1898 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001899
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001900 /* Need to make a new string */
1901 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1902 if (result == NULL)
1903 return NULL;
1904 result_s = PyBytes_AS_STRING(result);
1905 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001906
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001907 /* change everything in-place, starting with this one */
1908 start = result_s + offset;
1909 Py_MEMCPY(start, to_s, from_len);
1910 start += from_len;
1911 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001912
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001913 while ( --maxcount > 0) {
1914 offset = stringlib_find(start, end-start,
1915 from_s, from_len,
1916 0);
1917 if (offset==-1)
1918 break;
1919 Py_MEMCPY(start+offset, to_s, from_len);
1920 start += offset+from_len;
1921 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001922
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001923 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001924}
1925
1926/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1927Py_LOCAL(PyBytesObject *)
1928replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001929 char from_c,
1930 const char *to_s, Py_ssize_t to_len,
1931 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001932{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001933 char *self_s, *result_s;
1934 char *start, *next, *end;
1935 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001936 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001937 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001938
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001939 self_s = PyBytes_AS_STRING(self);
1940 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001941
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001942 count = countchar(self_s, self_len, from_c, maxcount);
1943 if (count == 0) {
1944 /* no matches, return unchanged */
1945 return return_self(self);
1946 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001947
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001948 /* use the difference between current and new, hence the "-1" */
1949 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001950 assert(count > 0);
1951 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001952 PyErr_SetString(PyExc_OverflowError,
1953 "replacement bytes are too long");
1954 return NULL;
1955 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001956 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001957
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001958 if ( (result = (PyBytesObject *)
1959 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1960 return NULL;
1961 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001962
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001963 start = self_s;
1964 end = self_s + self_len;
1965 while (count-- > 0) {
1966 next = findchar(start, end-start, from_c);
1967 if (next == NULL)
1968 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001969
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001970 if (next == start) {
1971 /* replace with the 'to' */
1972 Py_MEMCPY(result_s, to_s, to_len);
1973 result_s += to_len;
1974 start += 1;
1975 } else {
1976 /* copy the unchanged old then the 'to' */
1977 Py_MEMCPY(result_s, start, next-start);
1978 result_s += (next-start);
1979 Py_MEMCPY(result_s, to_s, to_len);
1980 result_s += to_len;
1981 start = next+1;
1982 }
1983 }
1984 /* Copy the remainder of the remaining string */
1985 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001986
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001987 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001988}
1989
1990/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1991Py_LOCAL(PyBytesObject *)
1992replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001993 const char *from_s, Py_ssize_t from_len,
1994 const char *to_s, Py_ssize_t to_len,
1995 Py_ssize_t maxcount) {
1996 char *self_s, *result_s;
1997 char *start, *next, *end;
1998 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001999 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002000 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002001
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002002 self_s = PyBytes_AS_STRING(self);
2003 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002004
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002005 count = stringlib_count(self_s, self_len,
2006 from_s, from_len,
2007 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002008
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002009 if (count == 0) {
2010 /* no matches, return unchanged */
2011 return return_self(self);
2012 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002013
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002014 /* Check for overflow */
2015 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002016 assert(count > 0);
2017 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002018 PyErr_SetString(PyExc_OverflowError,
2019 "replacement bytes are too long");
2020 return NULL;
2021 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002022 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002023
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002024 if ( (result = (PyBytesObject *)
2025 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2026 return NULL;
2027 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002028
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002029 start = self_s;
2030 end = self_s + self_len;
2031 while (count-- > 0) {
2032 offset = stringlib_find(start, end-start,
2033 from_s, from_len,
2034 0);
2035 if (offset == -1)
2036 break;
2037 next = start+offset;
2038 if (next == start) {
2039 /* replace with the 'to' */
2040 Py_MEMCPY(result_s, to_s, to_len);
2041 result_s += to_len;
2042 start += from_len;
2043 } else {
2044 /* copy the unchanged old then the 'to' */
2045 Py_MEMCPY(result_s, start, next-start);
2046 result_s += (next-start);
2047 Py_MEMCPY(result_s, to_s, to_len);
2048 result_s += to_len;
2049 start = next+from_len;
2050 }
2051 }
2052 /* Copy the remainder of the remaining string */
2053 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002054
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002055 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002056}
2057
2058
2059Py_LOCAL(PyBytesObject *)
2060replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002061 const char *from_s, Py_ssize_t from_len,
2062 const char *to_s, Py_ssize_t to_len,
2063 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002064{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002065 if (maxcount < 0) {
2066 maxcount = PY_SSIZE_T_MAX;
2067 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2068 /* nothing to do; return the original string */
2069 return return_self(self);
2070 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002071
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002072 if (maxcount == 0 ||
2073 (from_len == 0 && to_len == 0)) {
2074 /* nothing to do; return the original string */
2075 return return_self(self);
2076 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002077
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002078 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002079
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002080 if (from_len == 0) {
2081 /* insert the 'to' string everywhere. */
2082 /* >>> "Python".replace("", ".") */
2083 /* '.P.y.t.h.o.n.' */
2084 return replace_interleave(self, to_s, to_len, maxcount);
2085 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002086
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002087 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2088 /* point for an empty self string to generate a non-empty string */
2089 /* Special case so the remaining code always gets a non-empty string */
2090 if (PyBytes_GET_SIZE(self) == 0) {
2091 return return_self(self);
2092 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002093
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002094 if (to_len == 0) {
2095 /* delete all occurrences of 'from' string */
2096 if (from_len == 1) {
2097 return replace_delete_single_character(
2098 self, from_s[0], maxcount);
2099 } else {
2100 return replace_delete_substring(self, from_s,
2101 from_len, maxcount);
2102 }
2103 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002104
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002105 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002106
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002107 if (from_len == to_len) {
2108 if (from_len == 1) {
2109 return replace_single_character_in_place(
2110 self,
2111 from_s[0],
2112 to_s[0],
2113 maxcount);
2114 } else {
2115 return replace_substring_in_place(
2116 self, from_s, from_len, to_s, to_len,
2117 maxcount);
2118 }
2119 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002120
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002121 /* Otherwise use the more generic algorithms */
2122 if (from_len == 1) {
2123 return replace_single_character(self, from_s[0],
2124 to_s, to_len, maxcount);
2125 } else {
2126 /* len('from')>=2, len('to')>=1 */
2127 return replace_substring(self, from_s, from_len, to_s, to_len,
2128 maxcount);
2129 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002130}
2131
2132PyDoc_STRVAR(replace__doc__,
2133"B.replace(old, new[, count]) -> bytes\n\
2134\n\
2135Return a copy of B with all occurrences of subsection\n\
2136old replaced by new. If the optional argument count is\n\
Senthil Kumaran9a9dd1c2010-09-08 12:50:29 +00002137given, only first count occurances are replaced.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002138
2139static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002140bytes_replace(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002141{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002142 Py_ssize_t count = -1;
2143 PyObject *from, *to;
2144 const char *from_s, *to_s;
2145 Py_ssize_t from_len, to_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002146
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002147 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2148 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002149
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002150 if (PyBytes_Check(from)) {
2151 from_s = PyBytes_AS_STRING(from);
2152 from_len = PyBytes_GET_SIZE(from);
2153 }
2154 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2155 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002156
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002157 if (PyBytes_Check(to)) {
2158 to_s = PyBytes_AS_STRING(to);
2159 to_len = PyBytes_GET_SIZE(to);
2160 }
2161 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2162 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002163
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002164 return (PyObject *)replace((PyBytesObject *) self,
2165 from_s, from_len,
2166 to_s, to_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002167}
2168
2169/** End DALKE **/
2170
2171/* Matches the end (direction >= 0) or start (direction < 0) of self
2172 * against substr, using the start and end arguments. Returns
2173 * -1 on error, 0 if not found and 1 if found.
2174 */
2175Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002176_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002177 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002178{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002179 Py_ssize_t len = PyBytes_GET_SIZE(self);
2180 Py_ssize_t slen;
2181 const char* sub;
2182 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002183
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002184 if (PyBytes_Check(substr)) {
2185 sub = PyBytes_AS_STRING(substr);
2186 slen = PyBytes_GET_SIZE(substr);
2187 }
2188 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2189 return -1;
2190 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002191
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002192 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002193
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002194 if (direction < 0) {
2195 /* startswith */
2196 if (start+slen > len)
2197 return 0;
2198 } else {
2199 /* endswith */
2200 if (end-start < slen || start > len)
2201 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002202
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002203 if (end-slen > start)
2204 start = end - slen;
2205 }
2206 if (end-start >= slen)
2207 return ! memcmp(str+start, sub, slen);
2208 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002209}
2210
2211
2212PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002213"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002214\n\
2215Return True if B starts with the specified prefix, False otherwise.\n\
2216With optional start, test B beginning at that position.\n\
2217With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002218prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002219
2220static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002221bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002222{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002223 Py_ssize_t start = 0;
2224 Py_ssize_t end = PY_SSIZE_T_MAX;
2225 PyObject *subobj;
2226 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002227
Jesus Ceaac451502011-04-20 17:09:23 +02002228 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002229 return NULL;
2230 if (PyTuple_Check(subobj)) {
2231 Py_ssize_t i;
2232 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2233 result = _bytes_tailmatch(self,
2234 PyTuple_GET_ITEM(subobj, i),
2235 start, end, -1);
2236 if (result == -1)
2237 return NULL;
2238 else if (result) {
2239 Py_RETURN_TRUE;
2240 }
2241 }
2242 Py_RETURN_FALSE;
2243 }
2244 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002245 if (result == -1) {
2246 if (PyErr_ExceptionMatches(PyExc_TypeError))
2247 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2248 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002249 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002250 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002251 else
2252 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002253}
2254
2255
2256PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002257"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002258\n\
2259Return True if B ends with the specified suffix, False otherwise.\n\
2260With optional start, test B beginning at that position.\n\
2261With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002262suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002263
2264static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002265bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002266{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002267 Py_ssize_t start = 0;
2268 Py_ssize_t end = PY_SSIZE_T_MAX;
2269 PyObject *subobj;
2270 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002271
Jesus Ceaac451502011-04-20 17:09:23 +02002272 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002273 return NULL;
2274 if (PyTuple_Check(subobj)) {
2275 Py_ssize_t i;
2276 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2277 result = _bytes_tailmatch(self,
2278 PyTuple_GET_ITEM(subobj, i),
2279 start, end, +1);
2280 if (result == -1)
2281 return NULL;
2282 else if (result) {
2283 Py_RETURN_TRUE;
2284 }
2285 }
2286 Py_RETURN_FALSE;
2287 }
2288 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002289 if (result == -1) {
2290 if (PyErr_ExceptionMatches(PyExc_TypeError))
2291 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2292 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002293 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002294 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002295 else
2296 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002297}
2298
2299
2300PyDoc_STRVAR(decode__doc__,
Victor Stinnerc911bbf2010-11-07 19:04:46 +00002301"B.decode(encoding='utf-8', errors='strict') -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002302\n\
Victor Stinnere14e2122010-11-07 18:41:46 +00002303Decode B using the codec registered for encoding. Default encoding\n\
2304is 'utf-8'. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002305handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2306a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002307as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002308able to handle UnicodeDecodeErrors.");
2309
2310static PyObject *
Benjamin Peterson308d6372009-09-18 21:42:35 +00002311bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002312{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002313 const char *encoding = NULL;
2314 const char *errors = NULL;
2315 static char *kwlist[] = {"encoding", "errors", 0};
Guido van Rossumd624f182006-04-24 13:47:05 +00002316
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002317 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2318 return NULL;
2319 if (encoding == NULL)
2320 encoding = PyUnicode_GetDefaultEncoding();
2321 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002322}
2323
Guido van Rossum20188312006-05-05 15:15:40 +00002324
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002325PyDoc_STRVAR(splitlines__doc__,
2326"B.splitlines([keepends]) -> list of lines\n\
2327\n\
2328Return a list of the lines in B, breaking at line boundaries.\n\
2329Line breaks are not included in the resulting list unless keepends\n\
2330is given and true.");
2331
2332static PyObject*
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002333bytes_splitlines(PyObject *self, PyObject *args, PyObject *kwds)
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002334{
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002335 static char *kwlist[] = {"keepends", 0};
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002336 int keepends = 0;
2337
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002338 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:splitlines",
2339 kwlist, &keepends))
Antoine Pitroud1188562010-06-09 16:38:55 +00002340 return NULL;
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002341
2342 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002343 (PyObject*) self, PyBytes_AS_STRING(self),
2344 PyBytes_GET_SIZE(self), keepends
2345 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002346}
2347
2348
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002349PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002350"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002351\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002352Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002353Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002354Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002355
2356static int
Victor Stinner6430fd52011-09-29 04:02:13 +02002357hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002358{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002359 if (c >= 128)
2360 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002361 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002362 return c - '0';
2363 else {
David Malcolm96960882010-11-05 17:23:41 +00002364 if (Py_ISUPPER(c))
2365 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002366 if (c >= 'a' && c <= 'f')
2367 return c - 'a' + 10;
2368 }
2369 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002370}
2371
2372static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002373bytes_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002374{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002375 PyObject *newstring, *hexobj;
2376 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002377 Py_ssize_t hexlen, byteslen, i, j;
2378 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002379 void *data;
2380 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002381
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002382 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2383 return NULL;
2384 assert(PyUnicode_Check(hexobj));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002385 if (PyUnicode_READY(hexobj))
2386 return NULL;
2387 kind = PyUnicode_KIND(hexobj);
2388 data = PyUnicode_DATA(hexobj);
2389 hexlen = PyUnicode_GET_LENGTH(hexobj);
2390
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002391 byteslen = hexlen/2; /* This overestimates if there are spaces */
2392 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2393 if (!newstring)
2394 return NULL;
2395 buf = PyBytes_AS_STRING(newstring);
2396 for (i = j = 0; i < hexlen; i += 2) {
2397 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002398 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002399 i++;
2400 if (i >= hexlen)
2401 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002402 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
2403 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002404 if (top == -1 || bot == -1) {
2405 PyErr_Format(PyExc_ValueError,
2406 "non-hexadecimal number found in "
2407 "fromhex() arg at position %zd", i);
2408 goto error;
2409 }
2410 buf[j++] = (top << 4) + bot;
2411 }
2412 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2413 goto error;
2414 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002415
2416 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002417 Py_XDECREF(newstring);
2418 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002419}
2420
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002421PyDoc_STRVAR(sizeof__doc__,
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002422"B.__sizeof__() -> size of B in memory, in bytes");
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002423
2424static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002425bytes_sizeof(PyBytesObject *v)
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002426{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002427 Py_ssize_t res;
2428 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2429 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002430}
2431
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002432
2433static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002434bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002435{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002436 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002437}
2438
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002439
2440static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002441bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002442 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2443 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2444 _Py_capitalize__doc__},
2445 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2446 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2447 {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
2448 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2449 endswith__doc__},
2450 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2451 expandtabs__doc__},
2452 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2453 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2454 fromhex_doc},
2455 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2456 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2457 _Py_isalnum__doc__},
2458 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2459 _Py_isalpha__doc__},
2460 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2461 _Py_isdigit__doc__},
2462 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2463 _Py_islower__doc__},
2464 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2465 _Py_isspace__doc__},
2466 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2467 _Py_istitle__doc__},
2468 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2469 _Py_isupper__doc__},
2470 {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
2471 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2472 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2473 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2474 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
2475 _Py_maketrans__doc__},
2476 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2477 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2478 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2479 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2480 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2481 {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
2482 rpartition__doc__},
Ezio Melotticda6b6d2012-02-26 09:39:55 +02002483 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS | METH_KEYWORDS, rsplit__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002484 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
Ezio Melotticda6b6d2012-02-26 09:39:55 +02002485 {"split", (PyCFunction)bytes_split, METH_VARARGS | METH_KEYWORDS, split__doc__},
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002486 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002487 splitlines__doc__},
2488 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2489 startswith__doc__},
2490 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
2491 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2492 _Py_swapcase__doc__},
2493 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2494 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
2495 translate__doc__},
2496 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2497 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2498 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
2499 sizeof__doc__},
2500 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002501};
2502
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002503static PyObject *
2504str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2505
2506static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002507bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002508{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002509 PyObject *x = NULL;
2510 const char *encoding = NULL;
2511 const char *errors = NULL;
2512 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002513 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002514 Py_ssize_t size;
2515 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002516 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002517
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002518 if (type != &PyBytes_Type)
2519 return str_subtype_new(type, args, kwds);
2520 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2521 &encoding, &errors))
2522 return NULL;
2523 if (x == NULL) {
2524 if (encoding != NULL || errors != NULL) {
2525 PyErr_SetString(PyExc_TypeError,
2526 "encoding or errors without sequence "
2527 "argument");
2528 return NULL;
2529 }
2530 return PyBytes_FromString("");
2531 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002532
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002533 if (PyUnicode_Check(x)) {
2534 /* Encode via the codec registry */
2535 if (encoding == NULL) {
2536 PyErr_SetString(PyExc_TypeError,
2537 "string argument without an encoding");
2538 return NULL;
2539 }
2540 new = PyUnicode_AsEncodedString(x, encoding, errors);
2541 if (new == NULL)
2542 return NULL;
2543 assert(PyBytes_Check(new));
2544 return new;
2545 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002546
2547 /* We'd like to call PyObject_Bytes here, but we need to check for an
2548 integer argument before deferring to PyBytes_FromObject, something
2549 PyObject_Bytes doesn't do. */
2550 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2551 if (func != NULL) {
2552 new = PyObject_CallFunctionObjArgs(func, NULL);
2553 Py_DECREF(func);
2554 if (new == NULL)
2555 return NULL;
2556 if (!PyBytes_Check(new)) {
2557 PyErr_Format(PyExc_TypeError,
2558 "__bytes__ returned non-bytes (type %.200s)",
2559 Py_TYPE(new)->tp_name);
2560 Py_DECREF(new);
2561 return NULL;
2562 }
2563 return new;
2564 }
2565 else if (PyErr_Occurred())
2566 return NULL;
2567
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002568 /* Is it an integer? */
2569 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2570 if (size == -1 && PyErr_Occurred()) {
2571 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2572 return NULL;
2573 PyErr_Clear();
2574 }
2575 else if (size < 0) {
2576 PyErr_SetString(PyExc_ValueError, "negative count");
2577 return NULL;
2578 }
2579 else {
2580 new = PyBytes_FromStringAndSize(NULL, size);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002581 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002582 return NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002583 if (size > 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002584 memset(((PyBytesObject*)new)->ob_sval, 0, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002585 return new;
2586 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002587
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002588 /* If it's not unicode, there can't be encoding or errors */
2589 if (encoding != NULL || errors != NULL) {
2590 PyErr_SetString(PyExc_TypeError,
2591 "encoding or errors without a string argument");
2592 return NULL;
2593 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002594
2595 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002596}
2597
2598PyObject *
2599PyBytes_FromObject(PyObject *x)
2600{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002601 PyObject *new, *it;
2602 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002603
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002604 if (x == NULL) {
2605 PyErr_BadInternalCall();
2606 return NULL;
2607 }
Larry Hastingsca28e992012-05-24 22:58:30 -07002608
2609 if (PyBytes_CheckExact(x)) {
2610 Py_INCREF(x);
2611 return x;
2612 }
2613
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002614 /* Use the modern buffer interface */
2615 if (PyObject_CheckBuffer(x)) {
2616 Py_buffer view;
2617 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2618 return NULL;
2619 new = PyBytes_FromStringAndSize(NULL, view.len);
2620 if (!new)
2621 goto fail;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002622 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2623 &view, view.len, 'C') < 0)
2624 goto fail;
2625 PyBuffer_Release(&view);
2626 return new;
2627 fail:
2628 Py_XDECREF(new);
2629 PyBuffer_Release(&view);
2630 return NULL;
2631 }
2632 if (PyUnicode_Check(x)) {
2633 PyErr_SetString(PyExc_TypeError,
2634 "cannot convert unicode object to bytes");
2635 return NULL;
2636 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002637
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002638 if (PyList_CheckExact(x)) {
2639 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2640 if (new == NULL)
2641 return NULL;
2642 for (i = 0; i < Py_SIZE(x); i++) {
2643 Py_ssize_t value = PyNumber_AsSsize_t(
2644 PyList_GET_ITEM(x, i), PyExc_ValueError);
2645 if (value == -1 && PyErr_Occurred()) {
2646 Py_DECREF(new);
2647 return NULL;
2648 }
2649 if (value < 0 || value >= 256) {
2650 PyErr_SetString(PyExc_ValueError,
2651 "bytes must be in range(0, 256)");
2652 Py_DECREF(new);
2653 return NULL;
2654 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002655 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002656 }
2657 return new;
2658 }
2659 if (PyTuple_CheckExact(x)) {
2660 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2661 if (new == NULL)
2662 return NULL;
2663 for (i = 0; i < Py_SIZE(x); i++) {
2664 Py_ssize_t value = PyNumber_AsSsize_t(
2665 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
2666 if (value == -1 && PyErr_Occurred()) {
2667 Py_DECREF(new);
2668 return NULL;
2669 }
2670 if (value < 0 || value >= 256) {
2671 PyErr_SetString(PyExc_ValueError,
2672 "bytes must be in range(0, 256)");
2673 Py_DECREF(new);
2674 return NULL;
2675 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002676 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002677 }
2678 return new;
2679 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00002680
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002681 /* For iterator version, create a string object and resize as needed */
2682 size = _PyObject_LengthHint(x, 64);
2683 if (size == -1 && PyErr_Occurred())
2684 return NULL;
2685 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
2686 returning a shared empty bytes string. This required because we
2687 want to call _PyBytes_Resize() the returned object, which we can
2688 only do on bytes objects with refcount == 1. */
2689 size += 1;
2690 new = PyBytes_FromStringAndSize(NULL, size);
2691 if (new == NULL)
2692 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002693
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002694 /* Get the iterator */
2695 it = PyObject_GetIter(x);
2696 if (it == NULL)
2697 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002698
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002699 /* Run the iterator to exhaustion */
2700 for (i = 0; ; i++) {
2701 PyObject *item;
2702 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002703
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002704 /* Get the next item */
2705 item = PyIter_Next(it);
2706 if (item == NULL) {
2707 if (PyErr_Occurred())
2708 goto error;
2709 break;
2710 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002711
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002712 /* Interpret it as an int (__index__) */
2713 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2714 Py_DECREF(item);
2715 if (value == -1 && PyErr_Occurred())
2716 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002717
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002718 /* Range check */
2719 if (value < 0 || value >= 256) {
2720 PyErr_SetString(PyExc_ValueError,
2721 "bytes must be in range(0, 256)");
2722 goto error;
2723 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002724
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002725 /* Append the byte */
2726 if (i >= size) {
2727 size = 2 * size + 1;
2728 if (_PyBytes_Resize(&new, size) < 0)
2729 goto error;
2730 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002731 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002732 }
2733 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002734
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002735 /* Clean up and return success */
2736 Py_DECREF(it);
2737 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002738
2739 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002740 /* Error handling when new != NULL */
2741 Py_XDECREF(it);
2742 Py_DECREF(new);
2743 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002744}
2745
2746static PyObject *
2747str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2748{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002749 PyObject *tmp, *pnew;
2750 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002751
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002752 assert(PyType_IsSubtype(type, &PyBytes_Type));
2753 tmp = bytes_new(&PyBytes_Type, args, kwds);
2754 if (tmp == NULL)
2755 return NULL;
2756 assert(PyBytes_CheckExact(tmp));
2757 n = PyBytes_GET_SIZE(tmp);
2758 pnew = type->tp_alloc(type, n);
2759 if (pnew != NULL) {
2760 Py_MEMCPY(PyBytes_AS_STRING(pnew),
2761 PyBytes_AS_STRING(tmp), n+1);
2762 ((PyBytesObject *)pnew)->ob_shash =
2763 ((PyBytesObject *)tmp)->ob_shash;
2764 }
2765 Py_DECREF(tmp);
2766 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002767}
2768
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002769PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002770"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002771bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002772bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002773bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2774bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002775\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002776Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002777 - an iterable yielding integers in range(256)\n\
2778 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002779 - any object implementing the buffer API.\n\
2780 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002781
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002782static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002783
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002784PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002785 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2786 "bytes",
2787 PyBytesObject_SIZE,
2788 sizeof(char),
2789 bytes_dealloc, /* tp_dealloc */
2790 0, /* tp_print */
2791 0, /* tp_getattr */
2792 0, /* tp_setattr */
2793 0, /* tp_reserved */
2794 (reprfunc)bytes_repr, /* tp_repr */
2795 0, /* tp_as_number */
2796 &bytes_as_sequence, /* tp_as_sequence */
2797 &bytes_as_mapping, /* tp_as_mapping */
2798 (hashfunc)bytes_hash, /* tp_hash */
2799 0, /* tp_call */
2800 bytes_str, /* tp_str */
2801 PyObject_GenericGetAttr, /* tp_getattro */
2802 0, /* tp_setattro */
2803 &bytes_as_buffer, /* tp_as_buffer */
2804 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2805 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2806 bytes_doc, /* tp_doc */
2807 0, /* tp_traverse */
2808 0, /* tp_clear */
2809 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2810 0, /* tp_weaklistoffset */
2811 bytes_iter, /* tp_iter */
2812 0, /* tp_iternext */
2813 bytes_methods, /* tp_methods */
2814 0, /* tp_members */
2815 0, /* tp_getset */
2816 &PyBaseObject_Type, /* tp_base */
2817 0, /* tp_dict */
2818 0, /* tp_descr_get */
2819 0, /* tp_descr_set */
2820 0, /* tp_dictoffset */
2821 0, /* tp_init */
2822 0, /* tp_alloc */
2823 bytes_new, /* tp_new */
2824 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002825};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002826
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002827void
2828PyBytes_Concat(register PyObject **pv, register PyObject *w)
2829{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002830 register PyObject *v;
2831 assert(pv != NULL);
2832 if (*pv == NULL)
2833 return;
2834 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002835 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002836 return;
2837 }
2838 v = bytes_concat(*pv, w);
2839 Py_DECREF(*pv);
2840 *pv = v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002841}
2842
2843void
2844PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
2845{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002846 PyBytes_Concat(pv, w);
2847 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002848}
2849
2850
2851/* The following function breaks the notion that strings are immutable:
2852 it changes the size of a string. We get away with this only if there
2853 is only one module referencing the object. You can also think of it
2854 as creating a new string object and destroying the old one, only
2855 more efficiently. In any case, don't use this if the string may
2856 already be known to some other part of the code...
2857 Note that if there's not enough memory to resize the string, the original
2858 string object at *pv is deallocated, *pv is set to NULL, an "out of
2859 memory" exception is set, and -1 is returned. Else (on success) 0 is
2860 returned, and the value in *pv may or may not be the same as on input.
2861 As always, an extra byte is allocated for a trailing \0 byte (newsize
2862 does *not* include that), and a trailing \0 byte is stored.
2863*/
2864
2865int
2866_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2867{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002868 register PyObject *v;
2869 register PyBytesObject *sv;
2870 v = *pv;
2871 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
2872 *pv = 0;
2873 Py_DECREF(v);
2874 PyErr_BadInternalCall();
2875 return -1;
2876 }
2877 /* XXX UNREF/NEWREF interface should be more symmetrical */
2878 _Py_DEC_REFTOTAL;
2879 _Py_ForgetReference(v);
2880 *pv = (PyObject *)
2881 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
2882 if (*pv == NULL) {
2883 PyObject_Del(v);
2884 PyErr_NoMemory();
2885 return -1;
2886 }
2887 _Py_NewReference(*pv);
2888 sv = (PyBytesObject *) *pv;
2889 Py_SIZE(sv) = newsize;
2890 sv->ob_sval[newsize] = '\0';
2891 sv->ob_shash = -1; /* invalidate cached hash value */
2892 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002893}
2894
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002895void
2896PyBytes_Fini(void)
2897{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002898 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002899 for (i = 0; i < UCHAR_MAX + 1; i++)
2900 Py_CLEAR(characters[i]);
2901 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002902}
2903
Benjamin Peterson4116f362008-05-27 00:36:20 +00002904/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002905
2906typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002907 PyObject_HEAD
2908 Py_ssize_t it_index;
2909 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002910} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002911
2912static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002913striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002914{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002915 _PyObject_GC_UNTRACK(it);
2916 Py_XDECREF(it->it_seq);
2917 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002918}
2919
2920static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002921striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002922{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002923 Py_VISIT(it->it_seq);
2924 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002925}
2926
2927static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002928striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002929{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002930 PyBytesObject *seq;
2931 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002932
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002933 assert(it != NULL);
2934 seq = it->it_seq;
2935 if (seq == NULL)
2936 return NULL;
2937 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002938
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002939 if (it->it_index < PyBytes_GET_SIZE(seq)) {
2940 item = PyLong_FromLong(
2941 (unsigned char)seq->ob_sval[it->it_index]);
2942 if (item != NULL)
2943 ++it->it_index;
2944 return item;
2945 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002946
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002947 Py_DECREF(seq);
2948 it->it_seq = NULL;
2949 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002950}
2951
2952static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002953striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002954{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002955 Py_ssize_t len = 0;
2956 if (it->it_seq)
2957 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
2958 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002959}
2960
2961PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002962 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002963
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002964static PyObject *
2965striter_reduce(striterobject *it)
2966{
2967 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02002968 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002969 it->it_seq, it->it_index);
2970 } else {
2971 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
2972 if (u == NULL)
2973 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02002974 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002975 }
2976}
2977
2978PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
2979
2980static PyObject *
2981striter_setstate(striterobject *it, PyObject *state)
2982{
2983 Py_ssize_t index = PyLong_AsSsize_t(state);
2984 if (index == -1 && PyErr_Occurred())
2985 return NULL;
2986 if (index < 0)
2987 index = 0;
2988 it->it_index = index;
2989 Py_RETURN_NONE;
2990}
2991
2992PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
2993
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002994static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002995 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
2996 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002997 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
2998 reduce_doc},
2999 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3000 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003001 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003002};
3003
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003004PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003005 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3006 "bytes_iterator", /* tp_name */
3007 sizeof(striterobject), /* tp_basicsize */
3008 0, /* tp_itemsize */
3009 /* methods */
3010 (destructor)striter_dealloc, /* tp_dealloc */
3011 0, /* tp_print */
3012 0, /* tp_getattr */
3013 0, /* tp_setattr */
3014 0, /* tp_reserved */
3015 0, /* tp_repr */
3016 0, /* tp_as_number */
3017 0, /* tp_as_sequence */
3018 0, /* tp_as_mapping */
3019 0, /* tp_hash */
3020 0, /* tp_call */
3021 0, /* tp_str */
3022 PyObject_GenericGetAttr, /* tp_getattro */
3023 0, /* tp_setattro */
3024 0, /* tp_as_buffer */
3025 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3026 0, /* tp_doc */
3027 (traverseproc)striter_traverse, /* tp_traverse */
3028 0, /* tp_clear */
3029 0, /* tp_richcompare */
3030 0, /* tp_weaklistoffset */
3031 PyObject_SelfIter, /* tp_iter */
3032 (iternextfunc)striter_next, /* tp_iternext */
3033 striter_methods, /* tp_methods */
3034 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003035};
3036
3037static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003038bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003039{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003040 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003041
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003042 if (!PyBytes_Check(seq)) {
3043 PyErr_BadInternalCall();
3044 return NULL;
3045 }
3046 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3047 if (it == NULL)
3048 return NULL;
3049 it->it_index = 0;
3050 Py_INCREF(seq);
3051 it->it_seq = (PyBytesObject *)seq;
3052 _PyObject_GC_TRACK(it);
3053 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003054}