blob: 32c5d71ecd1c9d952c0af3b7b62db6901c03b22e [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Christian Heimes90aa7642007-12-19 02:45:37 +000013 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000014
Gregory P. Smith60d241f2007-10-16 06:31:30 +000015 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000016 {
Antoine Pitroud1188562010-06-09 16:38:55 +000017 PyErr_Format(PyExc_TypeError,
18 "Type %.100s doesn't support the buffer API",
19 Py_TYPE(obj)->tp_name);
20 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000021 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000023 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000024 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000025 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000026}
27
Christian Heimes2c9c7a52008-05-26 13:42:13 +000028#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000029Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000030#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000031
Christian Heimes2c9c7a52008-05-26 13:42:13 +000032static PyBytesObject *characters[UCHAR_MAX + 1];
33static PyBytesObject *nullstring;
34
Mark Dickinsonfd24b322008-12-06 15:33:31 +000035/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
36 for a string of length n should request PyBytesObject_SIZE + n bytes.
37
38 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
39 3 bytes per string allocation on a typical system.
40*/
41#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
42
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000044 For PyBytes_FromString(), the parameter `str' points to a null-terminated
45 string containing exactly `size' bytes.
46
47 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
48 either NULL or else points to a string containing at least `size' bytes.
49 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
50 not have to be null-terminated. (Therefore it is safe to construct a
51 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
52 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
53 bytes (setting the last byte to the null terminating character) and you can
54 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000055 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000056 alter the data yourself, since the strings may be shared.
57
58 The PyObject member `op->ob_size', which denotes the number of "extra
59 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020060 allocated for string data, not counting the null terminating character.
61 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000062 PyBytes_FromStringAndSize()) or the length of the string in the `str'
63 parameter (for PyBytes_FromString()).
64*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000065PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000066PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000067{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000068 register PyBytesObject *op;
69 if (size < 0) {
70 PyErr_SetString(PyExc_SystemError,
71 "Negative size passed to PyBytes_FromStringAndSize");
72 return NULL;
73 }
74 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000075#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000076 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000077#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078 Py_INCREF(op);
79 return (PyObject *)op;
80 }
81 if (size == 1 && str != NULL &&
82 (op = characters[*str & UCHAR_MAX]) != NULL)
83 {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000084#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000086#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 Py_INCREF(op);
88 return (PyObject *)op;
89 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000090
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
92 PyErr_SetString(PyExc_OverflowError,
93 "byte string is too large");
94 return NULL;
95 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000097 /* Inline PyObject_NewVar */
98 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
99 if (op == NULL)
100 return PyErr_NoMemory();
101 PyObject_INIT_VAR(op, &PyBytes_Type, size);
102 op->ob_shash = -1;
103 if (str != NULL)
104 Py_MEMCPY(op->ob_sval, str, size);
105 op->ob_sval[size] = '\0';
106 /* share short strings */
107 if (size == 0) {
108 nullstring = op;
109 Py_INCREF(op);
110 } else if (size == 1 && str != NULL) {
111 characters[*str & UCHAR_MAX] = op;
112 Py_INCREF(op);
113 }
114 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000115}
116
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000117PyObject *
118PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000119{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000120 register size_t size;
121 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000122
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000123 assert(str != NULL);
124 size = strlen(str);
125 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
126 PyErr_SetString(PyExc_OverflowError,
127 "byte string is too long");
128 return NULL;
129 }
130 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000131#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000133#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 Py_INCREF(op);
135 return (PyObject *)op;
136 }
137 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000138#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000139 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000140#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 Py_INCREF(op);
142 return (PyObject *)op;
143 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000144
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 /* Inline PyObject_NewVar */
146 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
147 if (op == NULL)
148 return PyErr_NoMemory();
149 PyObject_INIT_VAR(op, &PyBytes_Type, size);
150 op->ob_shash = -1;
151 Py_MEMCPY(op->ob_sval, str, size+1);
152 /* share short strings */
153 if (size == 0) {
154 nullstring = op;
155 Py_INCREF(op);
156 } else if (size == 1) {
157 characters[*str & UCHAR_MAX] = op;
158 Py_INCREF(op);
159 }
160 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000161}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000162
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000163PyObject *
164PyBytes_FromFormatV(const char *format, va_list vargs)
165{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000166 va_list count;
167 Py_ssize_t n = 0;
168 const char* f;
169 char *s;
170 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000171
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000172 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000173 /* step 1: figure out how large a buffer we need */
174 for (f = format; *f; f++) {
175 if (*f == '%') {
176 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000177 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000178 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000179
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000180 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
181 * they don't affect the amount of space we reserve.
182 */
183 if ((*f == 'l' || *f == 'z') &&
184 (f[1] == 'd' || f[1] == 'u'))
185 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000186
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000187 switch (*f) {
188 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100189 {
190 int c = va_arg(count, int);
191 if (c < 0 || c > 255) {
192 PyErr_SetString(PyExc_OverflowError,
193 "PyBytes_FromFormatV(): %c format "
194 "expects an integer in range [0; 255]");
195 return NULL;
196 }
197 n++;
198 break;
199 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000200 case '%':
201 n++;
202 break;
203 case 'd': case 'u': case 'i': case 'x':
204 (void) va_arg(count, int);
205 /* 20 bytes is enough to hold a 64-bit
206 integer. Decimal takes the most space.
207 This isn't enough for octal. */
208 n += 20;
209 break;
210 case 's':
211 s = va_arg(count, char*);
212 n += strlen(s);
213 break;
214 case 'p':
215 (void) va_arg(count, int);
216 /* maximum 64-bit pointer representation:
217 * 0xffffffffffffffff
218 * so 19 characters is enough.
219 * XXX I count 18 -- what's the extra for?
220 */
221 n += 19;
222 break;
223 default:
224 /* if we stumble upon an unknown
225 formatting code, copy the rest of
226 the format string to the output
227 string. (we cannot just skip the
228 code, since there's no way to know
229 what's in the argument list) */
230 n += strlen(p);
231 goto expand;
232 }
233 } else
234 n++;
235 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000236 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000237 /* step 2: fill the buffer */
238 /* Since we've analyzed how much space we need for the worst case,
239 use sprintf directly instead of the slower PyOS_snprintf. */
240 string = PyBytes_FromStringAndSize(NULL, n);
241 if (!string)
242 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000243
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000244 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000245
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000246 for (f = format; *f; f++) {
247 if (*f == '%') {
248 const char* p = f++;
249 Py_ssize_t i;
250 int longflag = 0;
251 int size_tflag = 0;
252 /* parse the width.precision part (we're only
253 interested in the precision value, if any) */
254 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000255 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000256 n = (n*10) + *f++ - '0';
257 if (*f == '.') {
258 f++;
259 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000260 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000261 n = (n*10) + *f++ - '0';
262 }
David Malcolm96960882010-11-05 17:23:41 +0000263 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000264 f++;
265 /* handle the long flag, but only for %ld and %lu.
266 others can be added when necessary. */
267 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
268 longflag = 1;
269 ++f;
270 }
271 /* handle the size_t flag. */
272 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
273 size_tflag = 1;
274 ++f;
275 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000276
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000277 switch (*f) {
278 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100279 {
280 int c = va_arg(vargs, int);
281 /* c has been checked for overflow in the first step */
282 *s++ = (unsigned char)c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000283 break;
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100284 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000285 case 'd':
286 if (longflag)
287 sprintf(s, "%ld", va_arg(vargs, long));
288 else if (size_tflag)
289 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
290 va_arg(vargs, Py_ssize_t));
291 else
292 sprintf(s, "%d", va_arg(vargs, int));
293 s += strlen(s);
294 break;
295 case 'u':
296 if (longflag)
297 sprintf(s, "%lu",
298 va_arg(vargs, unsigned long));
299 else if (size_tflag)
300 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
301 va_arg(vargs, size_t));
302 else
303 sprintf(s, "%u",
304 va_arg(vargs, unsigned int));
305 s += strlen(s);
306 break;
307 case 'i':
308 sprintf(s, "%i", va_arg(vargs, int));
309 s += strlen(s);
310 break;
311 case 'x':
312 sprintf(s, "%x", va_arg(vargs, int));
313 s += strlen(s);
314 break;
315 case 's':
316 p = va_arg(vargs, char*);
317 i = strlen(p);
318 if (n > 0 && i > n)
319 i = n;
320 Py_MEMCPY(s, p, i);
321 s += i;
322 break;
323 case 'p':
324 sprintf(s, "%p", va_arg(vargs, void*));
325 /* %p is ill-defined: ensure leading 0x. */
326 if (s[1] == 'X')
327 s[1] = 'x';
328 else if (s[1] != 'x') {
329 memmove(s+2, s, strlen(s)+1);
330 s[0] = '0';
331 s[1] = 'x';
332 }
333 s += strlen(s);
334 break;
335 case '%':
336 *s++ = '%';
337 break;
338 default:
339 strcpy(s, p);
340 s += strlen(s);
341 goto end;
342 }
343 } else
344 *s++ = *f;
345 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000346
347 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000348 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
349 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000350}
351
352PyObject *
353PyBytes_FromFormat(const char *format, ...)
354{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000355 PyObject* ret;
356 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000357
358#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000360#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000361 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000362#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000363 ret = PyBytes_FromFormatV(format, vargs);
364 va_end(vargs);
365 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000366}
367
368static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000369bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000370{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000371 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000372}
373
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000374/* Unescape a backslash-escaped string. If unicode is non-zero,
375 the string is a u-literal. If recode_encoding is non-zero,
376 the string is UTF-8 encoded and should be re-encoded in the
377 specified encoding. */
378
379PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000380 Py_ssize_t len,
381 const char *errors,
382 Py_ssize_t unicode,
383 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000384{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000385 int c;
386 char *p, *buf;
387 const char *end;
388 PyObject *v;
389 Py_ssize_t newlen = recode_encoding ? 4*len:len;
390 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
391 if (v == NULL)
392 return NULL;
393 p = buf = PyBytes_AsString(v);
394 end = s + len;
395 while (s < end) {
396 if (*s != '\\') {
397 non_esc:
398 if (recode_encoding && (*s & 0x80)) {
399 PyObject *u, *w;
400 char *r;
401 const char* t;
402 Py_ssize_t rn;
403 t = s;
404 /* Decode non-ASCII bytes as UTF-8. */
405 while (t < end && (*t & 0x80)) t++;
406 u = PyUnicode_DecodeUTF8(s, t - s, errors);
407 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000408
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000409 /* Recode them in target encoding. */
410 w = PyUnicode_AsEncodedString(
411 u, recode_encoding, errors);
412 Py_DECREF(u);
413 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000414
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000415 /* Append bytes to output buffer. */
416 assert(PyBytes_Check(w));
417 r = PyBytes_AS_STRING(w);
418 rn = PyBytes_GET_SIZE(w);
419 Py_MEMCPY(p, r, rn);
420 p += rn;
421 Py_DECREF(w);
422 s = t;
423 } else {
424 *p++ = *s++;
425 }
426 continue;
427 }
428 s++;
429 if (s==end) {
430 PyErr_SetString(PyExc_ValueError,
431 "Trailing \\ in string");
432 goto failed;
433 }
434 switch (*s++) {
435 /* XXX This assumes ASCII! */
436 case '\n': break;
437 case '\\': *p++ = '\\'; break;
438 case '\'': *p++ = '\''; break;
439 case '\"': *p++ = '\"'; break;
440 case 'b': *p++ = '\b'; break;
441 case 'f': *p++ = '\014'; break; /* FF */
442 case 't': *p++ = '\t'; break;
443 case 'n': *p++ = '\n'; break;
444 case 'r': *p++ = '\r'; break;
445 case 'v': *p++ = '\013'; break; /* VT */
446 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
447 case '0': case '1': case '2': case '3':
448 case '4': case '5': case '6': case '7':
449 c = s[-1] - '0';
450 if (s < end && '0' <= *s && *s <= '7') {
451 c = (c<<3) + *s++ - '0';
452 if (s < end && '0' <= *s && *s <= '7')
453 c = (c<<3) + *s++ - '0';
454 }
455 *p++ = c;
456 break;
457 case 'x':
David Malcolm96960882010-11-05 17:23:41 +0000458 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000459 unsigned int x = 0;
460 c = Py_CHARMASK(*s);
461 s++;
David Malcolm96960882010-11-05 17:23:41 +0000462 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000463 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000464 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000465 x = 10 + c - 'a';
466 else
467 x = 10 + c - 'A';
468 x = x << 4;
469 c = Py_CHARMASK(*s);
470 s++;
David Malcolm96960882010-11-05 17:23:41 +0000471 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000472 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000473 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000474 x += 10 + c - 'a';
475 else
476 x += 10 + c - 'A';
477 *p++ = x;
478 break;
479 }
480 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +0200481 PyErr_Format(PyExc_ValueError,
482 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +0200483 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000484 goto failed;
485 }
486 if (strcmp(errors, "replace") == 0) {
487 *p++ = '?';
488 } else if (strcmp(errors, "ignore") == 0)
489 /* do nothing */;
490 else {
491 PyErr_Format(PyExc_ValueError,
492 "decoding error; unknown "
493 "error handling code: %.400s",
494 errors);
495 goto failed;
496 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +0200497 /* skip \x */
498 if (s < end && Py_ISXDIGIT(s[0]))
499 s++; /* and a hexdigit */
500 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000501 default:
502 *p++ = '\\';
503 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +0200504 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000505 UTF-8 bytes may follow. */
506 }
507 }
508 if (p-buf < newlen)
509 _PyBytes_Resize(&v, p - buf);
510 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000511 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000512 Py_DECREF(v);
513 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000514}
515
516/* -------------------------------------------------------------------- */
517/* object api */
518
519Py_ssize_t
520PyBytes_Size(register PyObject *op)
521{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000522 if (!PyBytes_Check(op)) {
523 PyErr_Format(PyExc_TypeError,
524 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
525 return -1;
526 }
527 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000528}
529
530char *
531PyBytes_AsString(register PyObject *op)
532{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000533 if (!PyBytes_Check(op)) {
534 PyErr_Format(PyExc_TypeError,
535 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
536 return NULL;
537 }
538 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000539}
540
541int
542PyBytes_AsStringAndSize(register PyObject *obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000543 register char **s,
544 register Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000545{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000546 if (s == NULL) {
547 PyErr_BadInternalCall();
548 return -1;
549 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000550
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000551 if (!PyBytes_Check(obj)) {
552 PyErr_Format(PyExc_TypeError,
553 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
554 return -1;
555 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000556
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000557 *s = PyBytes_AS_STRING(obj);
558 if (len != NULL)
559 *len = PyBytes_GET_SIZE(obj);
560 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
561 PyErr_SetString(PyExc_TypeError,
562 "expected bytes with no null");
563 return -1;
564 }
565 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000566}
Neal Norwitz6968b052007-02-27 19:02:19 +0000567
568/* -------------------------------------------------------------------- */
569/* Methods */
570
Eric Smith0923d1d2009-04-16 20:16:10 +0000571#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000572
573#include "stringlib/fastsearch.h"
574#include "stringlib/count.h"
575#include "stringlib/find.h"
576#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000577#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000578#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000579
Eric Smith0f78bff2009-11-30 01:01:42 +0000580#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000581
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000582PyObject *
583PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000584{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000585 register PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200586 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -0400587 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000588 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200589 unsigned char quote, *s, *p;
590
591 /* Compute size of output string */
592 squotes = dquotes = 0;
593 newsize = 3; /* b'' */
594 s = (unsigned char*)op->ob_sval;
595 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -0400596 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200597 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -0400598 case '\'': squotes++; break;
599 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200600 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -0400601 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200602 default:
603 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -0400604 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200605 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -0400606 if (newsize > PY_SSIZE_T_MAX - incr)
607 goto overflow;
608 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200609 }
610 quote = '\'';
611 if (smartquotes && squotes && !dquotes)
612 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -0400613 if (squotes && quote == '\'') {
614 if (newsize > PY_SSIZE_T_MAX - squotes)
615 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200616 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000617 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200618
619 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000620 if (v == NULL) {
621 return NULL;
622 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200623 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000624
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200625 *p++ = 'b', *p++ = quote;
626 for (i = 0; i < length; i++) {
627 unsigned char c = op->ob_sval[i];
628 if (c == quote || c == '\\')
629 *p++ = '\\', *p++ = c;
630 else if (c == '\t')
631 *p++ = '\\', *p++ = 't';
632 else if (c == '\n')
633 *p++ = '\\', *p++ = 'n';
634 else if (c == '\r')
635 *p++ = '\\', *p++ = 'r';
636 else if (c < ' ' || c >= 0x7f) {
637 *p++ = '\\';
638 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200639 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
640 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000641 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200642 else
643 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000644 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200645 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +0200646 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200647 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -0400648
649 overflow:
650 PyErr_SetString(PyExc_OverflowError,
651 "bytes object is too large to make repr");
652 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000653}
654
Neal Norwitz6968b052007-02-27 19:02:19 +0000655static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000656bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000657{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000658 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000659}
660
Neal Norwitz6968b052007-02-27 19:02:19 +0000661static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000662bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000663{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000664 if (Py_BytesWarningFlag) {
665 if (PyErr_WarnEx(PyExc_BytesWarning,
666 "str() on a bytes instance", 1))
667 return NULL;
668 }
669 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000670}
671
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000672static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000673bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000674{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000675 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000676}
Neal Norwitz6968b052007-02-27 19:02:19 +0000677
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000678/* This is also used by PyBytes_Concat() */
679static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000680bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000681{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000682 Py_ssize_t size;
683 Py_buffer va, vb;
684 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000685
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000686 va.len = -1;
687 vb.len = -1;
688 if (_getbuffer(a, &va) < 0 ||
689 _getbuffer(b, &vb) < 0) {
690 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
691 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
692 goto done;
693 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000694
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000695 /* Optimize end cases */
696 if (va.len == 0 && PyBytes_CheckExact(b)) {
697 result = b;
698 Py_INCREF(result);
699 goto done;
700 }
701 if (vb.len == 0 && PyBytes_CheckExact(a)) {
702 result = a;
703 Py_INCREF(result);
704 goto done;
705 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000706
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000707 size = va.len + vb.len;
708 if (size < 0) {
709 PyErr_NoMemory();
710 goto done;
711 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000712
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000713 result = PyBytes_FromStringAndSize(NULL, size);
714 if (result != NULL) {
715 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
716 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
717 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000718
719 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000720 if (va.len != -1)
721 PyBuffer_Release(&va);
722 if (vb.len != -1)
723 PyBuffer_Release(&vb);
724 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000725}
Neal Norwitz6968b052007-02-27 19:02:19 +0000726
727static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000728bytes_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000729{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000730 register Py_ssize_t i;
731 register Py_ssize_t j;
732 register Py_ssize_t size;
733 register PyBytesObject *op;
734 size_t nbytes;
735 if (n < 0)
736 n = 0;
737 /* watch out for overflows: the size can overflow int,
738 * and the # of bytes needed can overflow size_t
739 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000740 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000741 PyErr_SetString(PyExc_OverflowError,
742 "repeated bytes are too long");
743 return NULL;
744 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000745 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000746 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
747 Py_INCREF(a);
748 return (PyObject *)a;
749 }
750 nbytes = (size_t)size;
751 if (nbytes + PyBytesObject_SIZE <= nbytes) {
752 PyErr_SetString(PyExc_OverflowError,
753 "repeated bytes are too long");
754 return NULL;
755 }
756 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
757 if (op == NULL)
758 return PyErr_NoMemory();
759 PyObject_INIT_VAR(op, &PyBytes_Type, size);
760 op->ob_shash = -1;
761 op->ob_sval[size] = '\0';
762 if (Py_SIZE(a) == 1 && n > 0) {
763 memset(op->ob_sval, a->ob_sval[0] , n);
764 return (PyObject *) op;
765 }
766 i = 0;
767 if (i < size) {
768 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
769 i = Py_SIZE(a);
770 }
771 while (i < size) {
772 j = (i <= size-i) ? i : size-i;
773 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
774 i += j;
775 }
776 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000777}
778
Guido van Rossum98297ee2007-11-06 21:34:58 +0000779static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000780bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000781{
782 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
783 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000784 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +0000785 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +0000786 PyErr_Clear();
787 if (_getbuffer(arg, &varg) < 0)
788 return -1;
789 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
790 varg.buf, varg.len, 0);
791 PyBuffer_Release(&varg);
792 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000793 }
794 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000795 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
796 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000797 }
798
Antoine Pitrou0010d372010-08-15 17:12:55 +0000799 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000800}
801
Neal Norwitz6968b052007-02-27 19:02:19 +0000802static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000803bytes_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000804{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000805 if (i < 0 || i >= Py_SIZE(a)) {
806 PyErr_SetString(PyExc_IndexError, "index out of range");
807 return NULL;
808 }
809 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000810}
811
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000812static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000813bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000814{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000815 int c;
816 Py_ssize_t len_a, len_b;
817 Py_ssize_t min_len;
818 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000819
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000820 /* Make sure both arguments are strings. */
821 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
822 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
823 (PyObject_IsInstance((PyObject*)a,
824 (PyObject*)&PyUnicode_Type) ||
825 PyObject_IsInstance((PyObject*)b,
826 (PyObject*)&PyUnicode_Type))) {
827 if (PyErr_WarnEx(PyExc_BytesWarning,
828 "Comparison between bytes and string", 1))
829 return NULL;
830 }
831 result = Py_NotImplemented;
832 goto out;
833 }
834 if (a == b) {
835 switch (op) {
836 case Py_EQ:case Py_LE:case Py_GE:
837 result = Py_True;
838 goto out;
839 case Py_NE:case Py_LT:case Py_GT:
840 result = Py_False;
841 goto out;
842 }
843 }
844 if (op == Py_EQ) {
845 /* Supporting Py_NE here as well does not save
846 much time, since Py_NE is rarely used. */
847 if (Py_SIZE(a) == Py_SIZE(b)
848 && (a->ob_sval[0] == b->ob_sval[0]
849 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
850 result = Py_True;
851 } else {
852 result = Py_False;
853 }
854 goto out;
855 }
856 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
857 min_len = (len_a < len_b) ? len_a : len_b;
858 if (min_len > 0) {
859 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
860 if (c==0)
861 c = memcmp(a->ob_sval, b->ob_sval, min_len);
862 } else
863 c = 0;
864 if (c == 0)
865 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
866 switch (op) {
867 case Py_LT: c = c < 0; break;
868 case Py_LE: c = c <= 0; break;
869 case Py_EQ: assert(0); break; /* unreachable */
870 case Py_NE: c = c != 0; break;
871 case Py_GT: c = c > 0; break;
872 case Py_GE: c = c >= 0; break;
873 default:
874 result = Py_NotImplemented;
875 goto out;
876 }
877 result = c ? Py_True : Py_False;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000878 out:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000879 Py_INCREF(result);
880 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000881}
882
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000883static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000884bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000885{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +0100886 if (a->ob_shash == -1) {
887 /* Can't fail */
888 a->ob_shash = _Py_HashBytes((unsigned char *) a->ob_sval, Py_SIZE(a));
889 }
890 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +0000891}
892
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000893static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000894bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000895{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000896 if (PyIndex_Check(item)) {
897 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
898 if (i == -1 && PyErr_Occurred())
899 return NULL;
900 if (i < 0)
901 i += PyBytes_GET_SIZE(self);
902 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
903 PyErr_SetString(PyExc_IndexError,
904 "index out of range");
905 return NULL;
906 }
907 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
908 }
909 else if (PySlice_Check(item)) {
910 Py_ssize_t start, stop, step, slicelength, cur, i;
911 char* source_buf;
912 char* result_buf;
913 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000914
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000915 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000916 PyBytes_GET_SIZE(self),
917 &start, &stop, &step, &slicelength) < 0) {
918 return NULL;
919 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000920
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000921 if (slicelength <= 0) {
922 return PyBytes_FromStringAndSize("", 0);
923 }
924 else if (start == 0 && step == 1 &&
925 slicelength == PyBytes_GET_SIZE(self) &&
926 PyBytes_CheckExact(self)) {
927 Py_INCREF(self);
928 return (PyObject *)self;
929 }
930 else if (step == 1) {
931 return PyBytes_FromStringAndSize(
932 PyBytes_AS_STRING(self) + start,
933 slicelength);
934 }
935 else {
936 source_buf = PyBytes_AS_STRING(self);
937 result = PyBytes_FromStringAndSize(NULL, slicelength);
938 if (result == NULL)
939 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000940
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000941 result_buf = PyBytes_AS_STRING(result);
942 for (cur = start, i = 0; i < slicelength;
943 cur += step, i++) {
944 result_buf[i] = source_buf[cur];
945 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000946
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000947 return result;
948 }
949 }
950 else {
951 PyErr_Format(PyExc_TypeError,
952 "byte indices must be integers, not %.200s",
953 Py_TYPE(item)->tp_name);
954 return NULL;
955 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000956}
957
958static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000959bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000960{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000961 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
962 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000963}
964
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000965static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000966 (lenfunc)bytes_length, /*sq_length*/
967 (binaryfunc)bytes_concat, /*sq_concat*/
968 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
969 (ssizeargfunc)bytes_item, /*sq_item*/
970 0, /*sq_slice*/
971 0, /*sq_ass_item*/
972 0, /*sq_ass_slice*/
973 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000974};
975
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000976static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000977 (lenfunc)bytes_length,
978 (binaryfunc)bytes_subscript,
979 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000980};
981
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000982static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000983 (getbufferproc)bytes_buffer_getbuffer,
984 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000985};
986
987
988#define LEFTSTRIP 0
989#define RIGHTSTRIP 1
990#define BOTHSTRIP 2
991
992/* Arrays indexed by above */
993static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
994
995#define STRIPNAME(i) (stripformat[i]+3)
996
Neal Norwitz6968b052007-02-27 19:02:19 +0000997PyDoc_STRVAR(split__doc__,
Ezio Melotticda6b6d2012-02-26 09:39:55 +0200998"B.split(sep=None, maxsplit=-1) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +0000999\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001000Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001001If sep is not specified or is None, B is split on ASCII whitespace\n\
1002characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001003If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001004
1005static PyObject *
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001006bytes_split(PyBytesObject *self, PyObject *args, PyObject *kwds)
Neal Norwitz6968b052007-02-27 19:02:19 +00001007{
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001008 static char *kwlist[] = {"sep", "maxsplit", 0};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001009 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1010 Py_ssize_t maxsplit = -1;
1011 const char *s = PyBytes_AS_STRING(self), *sub;
1012 Py_buffer vsub;
1013 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001014
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001015 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:split",
1016 kwlist, &subobj, &maxsplit))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001017 return NULL;
1018 if (maxsplit < 0)
1019 maxsplit = PY_SSIZE_T_MAX;
1020 if (subobj == Py_None)
1021 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1022 if (_getbuffer(subobj, &vsub) < 0)
1023 return NULL;
1024 sub = vsub.buf;
1025 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001026
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001027 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1028 PyBuffer_Release(&vsub);
1029 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001030}
1031
Neal Norwitz6968b052007-02-27 19:02:19 +00001032PyDoc_STRVAR(partition__doc__,
1033"B.partition(sep) -> (head, sep, tail)\n\
1034\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001035Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001036the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001037found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001038
1039static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001040bytes_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001041{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001042 const char *sep;
1043 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001044
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001045 if (PyBytes_Check(sep_obj)) {
1046 sep = PyBytes_AS_STRING(sep_obj);
1047 sep_len = PyBytes_GET_SIZE(sep_obj);
1048 }
1049 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1050 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001051
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001052 return stringlib_partition(
1053 (PyObject*) self,
1054 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1055 sep_obj, sep, sep_len
1056 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001057}
1058
1059PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti5b2b2422010-01-25 11:58:28 +00001060"B.rpartition(sep) -> (head, sep, tail)\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001061\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001062Search for the separator sep in B, starting at the end of B,\n\
1063and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001064part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001065bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001066
1067static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001068bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001069{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001070 const char *sep;
1071 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001072
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001073 if (PyBytes_Check(sep_obj)) {
1074 sep = PyBytes_AS_STRING(sep_obj);
1075 sep_len = PyBytes_GET_SIZE(sep_obj);
1076 }
1077 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1078 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001079
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001080 return stringlib_rpartition(
1081 (PyObject*) self,
1082 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1083 sep_obj, sep, sep_len
1084 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001085}
1086
Neal Norwitz6968b052007-02-27 19:02:19 +00001087PyDoc_STRVAR(rsplit__doc__,
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001088"B.rsplit(sep=None, maxsplit=-1) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001089\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001090Return a list of the sections in B, using sep as the delimiter,\n\
1091starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001092If sep is not given, B is split on ASCII whitespace characters\n\
1093(space, tab, return, newline, formfeed, vertical tab).\n\
1094If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001095
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001096
Neal Norwitz6968b052007-02-27 19:02:19 +00001097static PyObject *
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001098bytes_rsplit(PyBytesObject *self, PyObject *args, PyObject *kwds)
Neal Norwitz6968b052007-02-27 19:02:19 +00001099{
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001100 static char *kwlist[] = {"sep", "maxsplit", 0};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001101 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1102 Py_ssize_t maxsplit = -1;
1103 const char *s = PyBytes_AS_STRING(self), *sub;
1104 Py_buffer vsub;
1105 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001106
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001107 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:rsplit",
1108 kwlist, &subobj, &maxsplit))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001109 return NULL;
1110 if (maxsplit < 0)
1111 maxsplit = PY_SSIZE_T_MAX;
1112 if (subobj == Py_None)
1113 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1114 if (_getbuffer(subobj, &vsub) < 0)
1115 return NULL;
1116 sub = vsub.buf;
1117 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001118
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001119 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1120 PyBuffer_Release(&vsub);
1121 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001122}
1123
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001124
1125PyDoc_STRVAR(join__doc__,
1126"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001127\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001128Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001129Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1130
Neal Norwitz6968b052007-02-27 19:02:19 +00001131static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001132bytes_join(PyObject *self, PyObject *orig)
Neal Norwitz6968b052007-02-27 19:02:19 +00001133{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001134 char *sep = PyBytes_AS_STRING(self);
1135 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1136 PyObject *res = NULL;
1137 char *p;
1138 Py_ssize_t seqlen = 0;
1139 size_t sz = 0;
1140 Py_ssize_t i;
1141 PyObject *seq, *item;
Neal Norwitz6968b052007-02-27 19:02:19 +00001142
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001143 seq = PySequence_Fast(orig, "");
1144 if (seq == NULL) {
1145 return NULL;
1146 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001147
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001148 seqlen = PySequence_Size(seq);
1149 if (seqlen == 0) {
1150 Py_DECREF(seq);
1151 return PyBytes_FromString("");
1152 }
1153 if (seqlen == 1) {
1154 item = PySequence_Fast_GET_ITEM(seq, 0);
1155 if (PyBytes_CheckExact(item)) {
1156 Py_INCREF(item);
1157 Py_DECREF(seq);
1158 return item;
1159 }
1160 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001161
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001162 /* There are at least two things to join, or else we have a subclass
1163 * of the builtin types in the sequence.
1164 * Do a pre-pass to figure out the total amount of space we'll
1165 * need (sz), and see whether all argument are bytes.
1166 */
1167 /* XXX Shouldn't we use _getbuffer() on these items instead? */
1168 for (i = 0; i < seqlen; i++) {
1169 const size_t old_sz = sz;
1170 item = PySequence_Fast_GET_ITEM(seq, i);
1171 if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
1172 PyErr_Format(PyExc_TypeError,
1173 "sequence item %zd: expected bytes,"
1174 " %.80s found",
1175 i, Py_TYPE(item)->tp_name);
1176 Py_DECREF(seq);
1177 return NULL;
1178 }
1179 sz += Py_SIZE(item);
1180 if (i != 0)
1181 sz += seplen;
1182 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1183 PyErr_SetString(PyExc_OverflowError,
1184 "join() result is too long for bytes");
1185 Py_DECREF(seq);
1186 return NULL;
1187 }
1188 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001189
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001190 /* Allocate result space. */
1191 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1192 if (res == NULL) {
1193 Py_DECREF(seq);
1194 return NULL;
1195 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001196
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001197 /* Catenate everything. */
1198 /* I'm not worried about a PyByteArray item growing because there's
1199 nowhere in this function where we release the GIL. */
1200 p = PyBytes_AS_STRING(res);
1201 for (i = 0; i < seqlen; ++i) {
1202 size_t n;
1203 char *q;
1204 if (i) {
1205 Py_MEMCPY(p, sep, seplen);
1206 p += seplen;
1207 }
1208 item = PySequence_Fast_GET_ITEM(seq, i);
1209 n = Py_SIZE(item);
1210 if (PyBytes_Check(item))
1211 q = PyBytes_AS_STRING(item);
1212 else
1213 q = PyByteArray_AS_STRING(item);
1214 Py_MEMCPY(p, q, n);
1215 p += n;
1216 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001217
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001218 Py_DECREF(seq);
1219 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001220}
1221
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001222PyObject *
1223_PyBytes_Join(PyObject *sep, PyObject *x)
1224{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001225 assert(sep != NULL && PyBytes_Check(sep));
1226 assert(x != NULL);
1227 return bytes_join(sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001228}
1229
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001230/* helper macro to fixup start/end slice values */
1231#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001232 if (end > len) \
1233 end = len; \
1234 else if (end < 0) { \
1235 end += len; \
1236 if (end < 0) \
1237 end = 0; \
1238 } \
1239 if (start < 0) { \
1240 start += len; \
1241 if (start < 0) \
1242 start = 0; \
1243 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001244
1245Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001246bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001247{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001248 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001249 char byte;
1250 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001251 const char *sub;
1252 Py_ssize_t sub_len;
1253 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001254 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001255
Antoine Pitrouac65d962011-10-20 23:54:17 +02001256 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1257 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001258 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001259
Antoine Pitrouac65d962011-10-20 23:54:17 +02001260 if (subobj) {
1261 if (_getbuffer(subobj, &subbuf) < 0)
1262 return -2;
1263
1264 sub = subbuf.buf;
1265 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001266 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001267 else {
1268 sub = &byte;
1269 sub_len = 1;
1270 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001271
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001272 if (dir > 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001273 res = stringlib_find_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001274 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1275 sub, sub_len, start, end);
1276 else
Antoine Pitrouac65d962011-10-20 23:54:17 +02001277 res = stringlib_rfind_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001278 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1279 sub, sub_len, start, end);
Antoine Pitrouac65d962011-10-20 23:54:17 +02001280
1281 if (subobj)
1282 PyBuffer_Release(&subbuf);
1283
1284 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001285}
1286
1287
1288PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001289"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001290\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001291Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001292such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001293arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001294\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001295Return -1 on failure.");
1296
Neal Norwitz6968b052007-02-27 19:02:19 +00001297static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001298bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001299{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001300 Py_ssize_t result = bytes_find_internal(self, args, +1);
1301 if (result == -2)
1302 return NULL;
1303 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001304}
1305
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001306
1307PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001308"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001309\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001310Like B.find() but raise ValueError when the substring is not found.");
1311
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001312static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001313bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001314{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001315 Py_ssize_t result = bytes_find_internal(self, args, +1);
1316 if (result == -2)
1317 return NULL;
1318 if (result == -1) {
1319 PyErr_SetString(PyExc_ValueError,
1320 "substring not found");
1321 return NULL;
1322 }
1323 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001324}
1325
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001326
1327PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001328"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001329\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001330Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001331such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001332arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001333\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001334Return -1 on failure.");
1335
Neal Norwitz6968b052007-02-27 19:02:19 +00001336static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001337bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001338{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001339 Py_ssize_t result = bytes_find_internal(self, args, -1);
1340 if (result == -2)
1341 return NULL;
1342 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001343}
1344
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001345
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001346PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001347"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001348\n\
1349Like B.rfind() but raise ValueError when the substring is not found.");
1350
1351static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001352bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001353{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001354 Py_ssize_t result = bytes_find_internal(self, args, -1);
1355 if (result == -2)
1356 return NULL;
1357 if (result == -1) {
1358 PyErr_SetString(PyExc_ValueError,
1359 "substring not found");
1360 return NULL;
1361 }
1362 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001363}
1364
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001365
1366Py_LOCAL_INLINE(PyObject *)
1367do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001368{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001369 Py_buffer vsep;
1370 char *s = PyBytes_AS_STRING(self);
1371 Py_ssize_t len = PyBytes_GET_SIZE(self);
1372 char *sep;
1373 Py_ssize_t seplen;
1374 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001375
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001376 if (_getbuffer(sepobj, &vsep) < 0)
1377 return NULL;
1378 sep = vsep.buf;
1379 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001380
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001381 i = 0;
1382 if (striptype != RIGHTSTRIP) {
1383 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1384 i++;
1385 }
1386 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001387
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001388 j = len;
1389 if (striptype != LEFTSTRIP) {
1390 do {
1391 j--;
1392 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1393 j++;
1394 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001395
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001396 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001397
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001398 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1399 Py_INCREF(self);
1400 return (PyObject*)self;
1401 }
1402 else
1403 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001404}
1405
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001406
1407Py_LOCAL_INLINE(PyObject *)
1408do_strip(PyBytesObject *self, int striptype)
1409{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001410 char *s = PyBytes_AS_STRING(self);
1411 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001412
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001413 i = 0;
1414 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001415 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001416 i++;
1417 }
1418 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001419
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001420 j = len;
1421 if (striptype != LEFTSTRIP) {
1422 do {
1423 j--;
David Malcolm96960882010-11-05 17:23:41 +00001424 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001425 j++;
1426 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001427
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001428 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1429 Py_INCREF(self);
1430 return (PyObject*)self;
1431 }
1432 else
1433 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001434}
1435
1436
1437Py_LOCAL_INLINE(PyObject *)
1438do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1439{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001440 PyObject *sep = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001441
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001442 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1443 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001444
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001445 if (sep != NULL && sep != Py_None) {
1446 return do_xstrip(self, striptype, sep);
1447 }
1448 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001449}
1450
1451
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001452PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001453"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001454\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001455Strip leading and trailing bytes contained in the argument.\n\
Georg Brandlbeca27a2012-01-22 21:31:21 +01001456If the argument is omitted, strip leading and trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001457static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001458bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001459{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001460 if (PyTuple_GET_SIZE(args) == 0)
1461 return do_strip(self, BOTHSTRIP); /* Common case */
1462 else
1463 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001464}
1465
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001466
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001467PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001468"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001469\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001470Strip leading bytes contained in the argument.\n\
1471If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001472static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001473bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001474{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001475 if (PyTuple_GET_SIZE(args) == 0)
1476 return do_strip(self, LEFTSTRIP); /* Common case */
1477 else
1478 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001479}
1480
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001481
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001482PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001483"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001484\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001485Strip trailing bytes contained in the argument.\n\
1486If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001487static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001488bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001489{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001490 if (PyTuple_GET_SIZE(args) == 0)
1491 return do_strip(self, RIGHTSTRIP); /* Common case */
1492 else
1493 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001494}
Neal Norwitz6968b052007-02-27 19:02:19 +00001495
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001496
1497PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001498"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001499\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001500Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001501string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001502as in slice notation.");
1503
1504static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001505bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001506{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001507 PyObject *sub_obj;
1508 const char *str = PyBytes_AS_STRING(self), *sub;
1509 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001510 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001511 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001512
Antoine Pitrouac65d962011-10-20 23:54:17 +02001513 Py_buffer vsub;
1514 PyObject *count_obj;
1515
1516 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
1517 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001518 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001519
Antoine Pitrouac65d962011-10-20 23:54:17 +02001520 if (sub_obj) {
1521 if (_getbuffer(sub_obj, &vsub) < 0)
1522 return NULL;
1523
1524 sub = vsub.buf;
1525 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001526 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001527 else {
1528 sub = &byte;
1529 sub_len = 1;
1530 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001531
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001532 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001533
Antoine Pitrouac65d962011-10-20 23:54:17 +02001534 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001535 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
1536 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02001537
1538 if (sub_obj)
1539 PyBuffer_Release(&vsub);
1540
1541 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001542}
1543
1544
1545PyDoc_STRVAR(translate__doc__,
1546"B.translate(table[, deletechars]) -> bytes\n\
1547\n\
1548Return a copy of B, where all characters occurring in the\n\
1549optional argument deletechars are removed, and the remaining\n\
1550characters have been mapped through the given translation\n\
1551table, which must be a bytes object of length 256.");
1552
1553static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001554bytes_translate(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001555{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001556 register char *input, *output;
1557 const char *table;
1558 register Py_ssize_t i, c, changed = 0;
1559 PyObject *input_obj = (PyObject*)self;
1560 const char *output_start, *del_table=NULL;
1561 Py_ssize_t inlen, tablen, dellen = 0;
1562 PyObject *result;
1563 int trans_table[256];
1564 PyObject *tableobj, *delobj = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001565
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001566 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1567 &tableobj, &delobj))
1568 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001569
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001570 if (PyBytes_Check(tableobj)) {
1571 table = PyBytes_AS_STRING(tableobj);
1572 tablen = PyBytes_GET_SIZE(tableobj);
1573 }
1574 else if (tableobj == Py_None) {
1575 table = NULL;
1576 tablen = 256;
1577 }
1578 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1579 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001580
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001581 if (tablen != 256) {
1582 PyErr_SetString(PyExc_ValueError,
1583 "translation table must be 256 characters long");
1584 return NULL;
1585 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001586
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001587 if (delobj != NULL) {
1588 if (PyBytes_Check(delobj)) {
1589 del_table = PyBytes_AS_STRING(delobj);
1590 dellen = PyBytes_GET_SIZE(delobj);
1591 }
1592 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1593 return NULL;
1594 }
1595 else {
1596 del_table = NULL;
1597 dellen = 0;
1598 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001599
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001600 inlen = PyBytes_GET_SIZE(input_obj);
1601 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1602 if (result == NULL)
1603 return NULL;
1604 output_start = output = PyBytes_AsString(result);
1605 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001606
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001607 if (dellen == 0 && table != NULL) {
1608 /* If no deletions are required, use faster code */
1609 for (i = inlen; --i >= 0; ) {
1610 c = Py_CHARMASK(*input++);
1611 if (Py_CHARMASK((*output++ = table[c])) != c)
1612 changed = 1;
1613 }
1614 if (changed || !PyBytes_CheckExact(input_obj))
1615 return result;
1616 Py_DECREF(result);
1617 Py_INCREF(input_obj);
1618 return input_obj;
1619 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001620
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001621 if (table == NULL) {
1622 for (i = 0; i < 256; i++)
1623 trans_table[i] = Py_CHARMASK(i);
1624 } else {
1625 for (i = 0; i < 256; i++)
1626 trans_table[i] = Py_CHARMASK(table[i]);
1627 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001628
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001629 for (i = 0; i < dellen; i++)
1630 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001631
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001632 for (i = inlen; --i >= 0; ) {
1633 c = Py_CHARMASK(*input++);
1634 if (trans_table[c] != -1)
1635 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1636 continue;
1637 changed = 1;
1638 }
1639 if (!changed && PyBytes_CheckExact(input_obj)) {
1640 Py_DECREF(result);
1641 Py_INCREF(input_obj);
1642 return input_obj;
1643 }
1644 /* Fix the size of the resulting string */
1645 if (inlen > 0)
1646 _PyBytes_Resize(&result, output - output_start);
1647 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001648}
1649
1650
Georg Brandlabc38772009-04-12 15:51:51 +00001651static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001652bytes_maketrans(PyObject *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001653{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001654 return _Py_bytes_maketrans(args);
Georg Brandlabc38772009-04-12 15:51:51 +00001655}
1656
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001657/* find and count characters and substrings */
1658
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001659#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001660 ((char *)memchr((const void *)(target), c, target_len))
1661
1662/* String ops must return a string. */
1663/* If the object is subclass of string, create a copy */
1664Py_LOCAL(PyBytesObject *)
1665return_self(PyBytesObject *self)
1666{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001667 if (PyBytes_CheckExact(self)) {
1668 Py_INCREF(self);
1669 return self;
1670 }
1671 return (PyBytesObject *)PyBytes_FromStringAndSize(
1672 PyBytes_AS_STRING(self),
1673 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001674}
1675
1676Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00001677countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001678{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001679 Py_ssize_t count=0;
1680 const char *start=target;
1681 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001682
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001683 while ( (start=findchar(start, end-start, c)) != NULL ) {
1684 count++;
1685 if (count >= maxcount)
1686 break;
1687 start += 1;
1688 }
1689 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001690}
1691
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001692
1693/* Algorithms for different cases of string replacement */
1694
1695/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1696Py_LOCAL(PyBytesObject *)
1697replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001698 const char *to_s, Py_ssize_t to_len,
1699 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001700{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001701 char *self_s, *result_s;
1702 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001703 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001704 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001705
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001706 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001707
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001708 /* 1 at the end plus 1 after every character;
1709 count = min(maxcount, self_len + 1) */
1710 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001711 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001712 else
1713 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
1714 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001715
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001716 /* Check for overflow */
1717 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001718 assert(count > 0);
1719 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001720 PyErr_SetString(PyExc_OverflowError,
1721 "replacement bytes are too long");
1722 return NULL;
1723 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001724 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001725
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001726 if (! (result = (PyBytesObject *)
1727 PyBytes_FromStringAndSize(NULL, result_len)) )
1728 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001729
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001730 self_s = PyBytes_AS_STRING(self);
1731 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001732
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001733 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001734
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001735 /* Lay the first one down (guaranteed this will occur) */
1736 Py_MEMCPY(result_s, to_s, to_len);
1737 result_s += to_len;
1738 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001739
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001740 for (i=0; i<count; i++) {
1741 *result_s++ = *self_s++;
1742 Py_MEMCPY(result_s, to_s, to_len);
1743 result_s += to_len;
1744 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001745
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001746 /* Copy the rest of the original string */
1747 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001748
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001749 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001750}
1751
1752/* Special case for deleting a single character */
1753/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1754Py_LOCAL(PyBytesObject *)
1755replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001756 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001757{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001758 char *self_s, *result_s;
1759 char *start, *next, *end;
1760 Py_ssize_t self_len, result_len;
1761 Py_ssize_t count;
1762 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001763
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001764 self_len = PyBytes_GET_SIZE(self);
1765 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001766
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001767 count = countchar(self_s, self_len, from_c, maxcount);
1768 if (count == 0) {
1769 return return_self(self);
1770 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001771
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001772 result_len = self_len - count; /* from_len == 1 */
1773 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001774
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001775 if ( (result = (PyBytesObject *)
1776 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1777 return NULL;
1778 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001779
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001780 start = self_s;
1781 end = self_s + self_len;
1782 while (count-- > 0) {
1783 next = findchar(start, end-start, from_c);
1784 if (next == NULL)
1785 break;
1786 Py_MEMCPY(result_s, start, next-start);
1787 result_s += (next-start);
1788 start = next+1;
1789 }
1790 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001791
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001792 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001793}
1794
1795/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1796
1797Py_LOCAL(PyBytesObject *)
1798replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001799 const char *from_s, Py_ssize_t from_len,
1800 Py_ssize_t maxcount) {
1801 char *self_s, *result_s;
1802 char *start, *next, *end;
1803 Py_ssize_t self_len, result_len;
1804 Py_ssize_t count, offset;
1805 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001806
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001807 self_len = PyBytes_GET_SIZE(self);
1808 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001809
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001810 count = stringlib_count(self_s, self_len,
1811 from_s, from_len,
1812 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001813
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001814 if (count == 0) {
1815 /* no matches */
1816 return return_self(self);
1817 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001818
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001819 result_len = self_len - (count * from_len);
1820 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001821
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001822 if ( (result = (PyBytesObject *)
1823 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1824 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001825
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001826 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001827
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001828 start = self_s;
1829 end = self_s + self_len;
1830 while (count-- > 0) {
1831 offset = stringlib_find(start, end-start,
1832 from_s, from_len,
1833 0);
1834 if (offset == -1)
1835 break;
1836 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001837
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001838 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001839
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001840 result_s += (next-start);
1841 start = next+from_len;
1842 }
1843 Py_MEMCPY(result_s, start, end-start);
1844 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001845}
1846
1847/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1848Py_LOCAL(PyBytesObject *)
1849replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001850 char from_c, char to_c,
1851 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001852{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001853 char *self_s, *result_s, *start, *end, *next;
1854 Py_ssize_t self_len;
1855 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001856
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001857 /* The result string will be the same size */
1858 self_s = PyBytes_AS_STRING(self);
1859 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001860
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001861 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001862
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001863 if (next == NULL) {
1864 /* No matches; return the original string */
1865 return return_self(self);
1866 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001867
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001868 /* Need to make a new string */
1869 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1870 if (result == NULL)
1871 return NULL;
1872 result_s = PyBytes_AS_STRING(result);
1873 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001874
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001875 /* change everything in-place, starting with this one */
1876 start = result_s + (next-self_s);
1877 *start = to_c;
1878 start++;
1879 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001880
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001881 while (--maxcount > 0) {
1882 next = findchar(start, end-start, from_c);
1883 if (next == NULL)
1884 break;
1885 *next = to_c;
1886 start = next+1;
1887 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001888
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001889 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001890}
1891
1892/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1893Py_LOCAL(PyBytesObject *)
1894replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001895 const char *from_s, Py_ssize_t from_len,
1896 const char *to_s, Py_ssize_t to_len,
1897 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001898{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001899 char *result_s, *start, *end;
1900 char *self_s;
1901 Py_ssize_t self_len, offset;
1902 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001903
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001904 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001905
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001906 self_s = PyBytes_AS_STRING(self);
1907 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001908
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001909 offset = stringlib_find(self_s, self_len,
1910 from_s, from_len,
1911 0);
1912 if (offset == -1) {
1913 /* No matches; return the original string */
1914 return return_self(self);
1915 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001916
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001917 /* Need to make a new string */
1918 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1919 if (result == NULL)
1920 return NULL;
1921 result_s = PyBytes_AS_STRING(result);
1922 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001923
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001924 /* change everything in-place, starting with this one */
1925 start = result_s + offset;
1926 Py_MEMCPY(start, to_s, from_len);
1927 start += from_len;
1928 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001929
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001930 while ( --maxcount > 0) {
1931 offset = stringlib_find(start, end-start,
1932 from_s, from_len,
1933 0);
1934 if (offset==-1)
1935 break;
1936 Py_MEMCPY(start+offset, to_s, from_len);
1937 start += offset+from_len;
1938 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001939
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001940 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001941}
1942
1943/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1944Py_LOCAL(PyBytesObject *)
1945replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001946 char from_c,
1947 const char *to_s, Py_ssize_t to_len,
1948 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001949{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001950 char *self_s, *result_s;
1951 char *start, *next, *end;
1952 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001953 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001954 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001955
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001956 self_s = PyBytes_AS_STRING(self);
1957 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001958
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001959 count = countchar(self_s, self_len, from_c, maxcount);
1960 if (count == 0) {
1961 /* no matches, return unchanged */
1962 return return_self(self);
1963 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001964
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001965 /* use the difference between current and new, hence the "-1" */
1966 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001967 assert(count > 0);
1968 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001969 PyErr_SetString(PyExc_OverflowError,
1970 "replacement bytes are too long");
1971 return NULL;
1972 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001973 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001974
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001975 if ( (result = (PyBytesObject *)
1976 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1977 return NULL;
1978 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001979
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001980 start = self_s;
1981 end = self_s + self_len;
1982 while (count-- > 0) {
1983 next = findchar(start, end-start, from_c);
1984 if (next == NULL)
1985 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001986
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001987 if (next == start) {
1988 /* replace with the 'to' */
1989 Py_MEMCPY(result_s, to_s, to_len);
1990 result_s += to_len;
1991 start += 1;
1992 } else {
1993 /* copy the unchanged old then the 'to' */
1994 Py_MEMCPY(result_s, start, next-start);
1995 result_s += (next-start);
1996 Py_MEMCPY(result_s, to_s, to_len);
1997 result_s += to_len;
1998 start = next+1;
1999 }
2000 }
2001 /* Copy the remainder of the remaining string */
2002 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002003
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002004 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002005}
2006
2007/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2008Py_LOCAL(PyBytesObject *)
2009replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002010 const char *from_s, Py_ssize_t from_len,
2011 const char *to_s, Py_ssize_t to_len,
2012 Py_ssize_t maxcount) {
2013 char *self_s, *result_s;
2014 char *start, *next, *end;
2015 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002016 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002017 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002018
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002019 self_s = PyBytes_AS_STRING(self);
2020 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002021
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002022 count = stringlib_count(self_s, self_len,
2023 from_s, from_len,
2024 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002025
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002026 if (count == 0) {
2027 /* no matches, return unchanged */
2028 return return_self(self);
2029 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002030
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002031 /* Check for overflow */
2032 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002033 assert(count > 0);
2034 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002035 PyErr_SetString(PyExc_OverflowError,
2036 "replacement bytes are too long");
2037 return NULL;
2038 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002039 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002040
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002041 if ( (result = (PyBytesObject *)
2042 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2043 return NULL;
2044 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002045
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002046 start = self_s;
2047 end = self_s + self_len;
2048 while (count-- > 0) {
2049 offset = stringlib_find(start, end-start,
2050 from_s, from_len,
2051 0);
2052 if (offset == -1)
2053 break;
2054 next = start+offset;
2055 if (next == start) {
2056 /* replace with the 'to' */
2057 Py_MEMCPY(result_s, to_s, to_len);
2058 result_s += to_len;
2059 start += from_len;
2060 } else {
2061 /* copy the unchanged old then the 'to' */
2062 Py_MEMCPY(result_s, start, next-start);
2063 result_s += (next-start);
2064 Py_MEMCPY(result_s, to_s, to_len);
2065 result_s += to_len;
2066 start = next+from_len;
2067 }
2068 }
2069 /* Copy the remainder of the remaining string */
2070 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002071
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002072 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002073}
2074
2075
2076Py_LOCAL(PyBytesObject *)
2077replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002078 const char *from_s, Py_ssize_t from_len,
2079 const char *to_s, Py_ssize_t to_len,
2080 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002081{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002082 if (maxcount < 0) {
2083 maxcount = PY_SSIZE_T_MAX;
2084 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2085 /* nothing to do; return the original string */
2086 return return_self(self);
2087 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002088
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002089 if (maxcount == 0 ||
2090 (from_len == 0 && to_len == 0)) {
2091 /* nothing to do; return the original string */
2092 return return_self(self);
2093 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002094
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002095 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002097 if (from_len == 0) {
2098 /* insert the 'to' string everywhere. */
2099 /* >>> "Python".replace("", ".") */
2100 /* '.P.y.t.h.o.n.' */
2101 return replace_interleave(self, to_s, to_len, maxcount);
2102 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002103
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002104 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2105 /* point for an empty self string to generate a non-empty string */
2106 /* Special case so the remaining code always gets a non-empty string */
2107 if (PyBytes_GET_SIZE(self) == 0) {
2108 return return_self(self);
2109 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002110
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002111 if (to_len == 0) {
2112 /* delete all occurrences of 'from' string */
2113 if (from_len == 1) {
2114 return replace_delete_single_character(
2115 self, from_s[0], maxcount);
2116 } else {
2117 return replace_delete_substring(self, from_s,
2118 from_len, maxcount);
2119 }
2120 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002121
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002122 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002123
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002124 if (from_len == to_len) {
2125 if (from_len == 1) {
2126 return replace_single_character_in_place(
2127 self,
2128 from_s[0],
2129 to_s[0],
2130 maxcount);
2131 } else {
2132 return replace_substring_in_place(
2133 self, from_s, from_len, to_s, to_len,
2134 maxcount);
2135 }
2136 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002137
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002138 /* Otherwise use the more generic algorithms */
2139 if (from_len == 1) {
2140 return replace_single_character(self, from_s[0],
2141 to_s, to_len, maxcount);
2142 } else {
2143 /* len('from')>=2, len('to')>=1 */
2144 return replace_substring(self, from_s, from_len, to_s, to_len,
2145 maxcount);
2146 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002147}
2148
2149PyDoc_STRVAR(replace__doc__,
2150"B.replace(old, new[, count]) -> bytes\n\
2151\n\
2152Return a copy of B with all occurrences of subsection\n\
2153old replaced by new. If the optional argument count is\n\
Senthil Kumaran9a9dd1c2010-09-08 12:50:29 +00002154given, only first count occurances are replaced.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002155
2156static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002157bytes_replace(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002158{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002159 Py_ssize_t count = -1;
2160 PyObject *from, *to;
2161 const char *from_s, *to_s;
2162 Py_ssize_t from_len, to_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002163
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002164 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2165 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002166
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002167 if (PyBytes_Check(from)) {
2168 from_s = PyBytes_AS_STRING(from);
2169 from_len = PyBytes_GET_SIZE(from);
2170 }
2171 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2172 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002173
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002174 if (PyBytes_Check(to)) {
2175 to_s = PyBytes_AS_STRING(to);
2176 to_len = PyBytes_GET_SIZE(to);
2177 }
2178 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2179 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002180
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002181 return (PyObject *)replace((PyBytesObject *) self,
2182 from_s, from_len,
2183 to_s, to_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002184}
2185
2186/** End DALKE **/
2187
2188/* Matches the end (direction >= 0) or start (direction < 0) of self
2189 * against substr, using the start and end arguments. Returns
2190 * -1 on error, 0 if not found and 1 if found.
2191 */
2192Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002193_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002194 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002195{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002196 Py_ssize_t len = PyBytes_GET_SIZE(self);
2197 Py_ssize_t slen;
2198 const char* sub;
2199 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002200
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002201 if (PyBytes_Check(substr)) {
2202 sub = PyBytes_AS_STRING(substr);
2203 slen = PyBytes_GET_SIZE(substr);
2204 }
2205 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2206 return -1;
2207 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002208
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002209 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002210
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002211 if (direction < 0) {
2212 /* startswith */
2213 if (start+slen > len)
2214 return 0;
2215 } else {
2216 /* endswith */
2217 if (end-start < slen || start > len)
2218 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002219
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002220 if (end-slen > start)
2221 start = end - slen;
2222 }
2223 if (end-start >= slen)
2224 return ! memcmp(str+start, sub, slen);
2225 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002226}
2227
2228
2229PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002230"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002231\n\
2232Return True if B starts with the specified prefix, False otherwise.\n\
2233With optional start, test B beginning at that position.\n\
2234With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002235prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002236
2237static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002238bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002239{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002240 Py_ssize_t start = 0;
2241 Py_ssize_t end = PY_SSIZE_T_MAX;
2242 PyObject *subobj;
2243 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002244
Jesus Ceaac451502011-04-20 17:09:23 +02002245 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002246 return NULL;
2247 if (PyTuple_Check(subobj)) {
2248 Py_ssize_t i;
2249 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2250 result = _bytes_tailmatch(self,
2251 PyTuple_GET_ITEM(subobj, i),
2252 start, end, -1);
2253 if (result == -1)
2254 return NULL;
2255 else if (result) {
2256 Py_RETURN_TRUE;
2257 }
2258 }
2259 Py_RETURN_FALSE;
2260 }
2261 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002262 if (result == -1) {
2263 if (PyErr_ExceptionMatches(PyExc_TypeError))
2264 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2265 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002266 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002267 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002268 else
2269 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002270}
2271
2272
2273PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002274"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002275\n\
2276Return True if B ends with the specified suffix, False otherwise.\n\
2277With optional start, test B beginning at that position.\n\
2278With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002279suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002280
2281static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002282bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002283{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002284 Py_ssize_t start = 0;
2285 Py_ssize_t end = PY_SSIZE_T_MAX;
2286 PyObject *subobj;
2287 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002288
Jesus Ceaac451502011-04-20 17:09:23 +02002289 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002290 return NULL;
2291 if (PyTuple_Check(subobj)) {
2292 Py_ssize_t i;
2293 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2294 result = _bytes_tailmatch(self,
2295 PyTuple_GET_ITEM(subobj, i),
2296 start, end, +1);
2297 if (result == -1)
2298 return NULL;
2299 else if (result) {
2300 Py_RETURN_TRUE;
2301 }
2302 }
2303 Py_RETURN_FALSE;
2304 }
2305 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002306 if (result == -1) {
2307 if (PyErr_ExceptionMatches(PyExc_TypeError))
2308 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2309 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002310 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002311 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002312 else
2313 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002314}
2315
2316
2317PyDoc_STRVAR(decode__doc__,
Victor Stinnerc911bbf2010-11-07 19:04:46 +00002318"B.decode(encoding='utf-8', errors='strict') -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002319\n\
Victor Stinnere14e2122010-11-07 18:41:46 +00002320Decode B using the codec registered for encoding. Default encoding\n\
2321is 'utf-8'. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002322handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2323a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002324as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002325able to handle UnicodeDecodeErrors.");
2326
2327static PyObject *
Benjamin Peterson308d6372009-09-18 21:42:35 +00002328bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002329{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002330 const char *encoding = NULL;
2331 const char *errors = NULL;
2332 static char *kwlist[] = {"encoding", "errors", 0};
Guido van Rossumd624f182006-04-24 13:47:05 +00002333
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002334 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2335 return NULL;
2336 if (encoding == NULL)
2337 encoding = PyUnicode_GetDefaultEncoding();
2338 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002339}
2340
Guido van Rossum20188312006-05-05 15:15:40 +00002341
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002342PyDoc_STRVAR(splitlines__doc__,
2343"B.splitlines([keepends]) -> list of lines\n\
2344\n\
2345Return a list of the lines in B, breaking at line boundaries.\n\
2346Line breaks are not included in the resulting list unless keepends\n\
2347is given and true.");
2348
2349static PyObject*
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002350bytes_splitlines(PyObject *self, PyObject *args, PyObject *kwds)
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002351{
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002352 static char *kwlist[] = {"keepends", 0};
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002353 int keepends = 0;
2354
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002355 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:splitlines",
2356 kwlist, &keepends))
Antoine Pitroud1188562010-06-09 16:38:55 +00002357 return NULL;
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002358
2359 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002360 (PyObject*) self, PyBytes_AS_STRING(self),
2361 PyBytes_GET_SIZE(self), keepends
2362 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002363}
2364
2365
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002366PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002367"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002368\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002369Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002370Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002371Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002372
2373static int
Victor Stinner6430fd52011-09-29 04:02:13 +02002374hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002375{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002376 if (c >= 128)
2377 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002378 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002379 return c - '0';
2380 else {
David Malcolm96960882010-11-05 17:23:41 +00002381 if (Py_ISUPPER(c))
2382 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002383 if (c >= 'a' && c <= 'f')
2384 return c - 'a' + 10;
2385 }
2386 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002387}
2388
2389static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002390bytes_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002391{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002392 PyObject *newstring, *hexobj;
2393 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002394 Py_ssize_t hexlen, byteslen, i, j;
2395 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002396 void *data;
2397 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002398
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002399 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2400 return NULL;
2401 assert(PyUnicode_Check(hexobj));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002402 if (PyUnicode_READY(hexobj))
2403 return NULL;
2404 kind = PyUnicode_KIND(hexobj);
2405 data = PyUnicode_DATA(hexobj);
2406 hexlen = PyUnicode_GET_LENGTH(hexobj);
2407
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002408 byteslen = hexlen/2; /* This overestimates if there are spaces */
2409 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2410 if (!newstring)
2411 return NULL;
2412 buf = PyBytes_AS_STRING(newstring);
2413 for (i = j = 0; i < hexlen; i += 2) {
2414 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002415 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002416 i++;
2417 if (i >= hexlen)
2418 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002419 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
2420 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002421 if (top == -1 || bot == -1) {
2422 PyErr_Format(PyExc_ValueError,
2423 "non-hexadecimal number found in "
2424 "fromhex() arg at position %zd", i);
2425 goto error;
2426 }
2427 buf[j++] = (top << 4) + bot;
2428 }
2429 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2430 goto error;
2431 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002432
2433 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002434 Py_XDECREF(newstring);
2435 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002436}
2437
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002438PyDoc_STRVAR(sizeof__doc__,
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002439"B.__sizeof__() -> size of B in memory, in bytes");
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002440
2441static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002442bytes_sizeof(PyBytesObject *v)
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002443{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002444 Py_ssize_t res;
2445 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2446 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002447}
2448
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002449
2450static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002451bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002452{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002453 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002454}
2455
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002456
2457static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002458bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002459 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2460 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2461 _Py_capitalize__doc__},
2462 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2463 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2464 {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
2465 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2466 endswith__doc__},
2467 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2468 expandtabs__doc__},
2469 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2470 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2471 fromhex_doc},
2472 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2473 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2474 _Py_isalnum__doc__},
2475 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2476 _Py_isalpha__doc__},
2477 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2478 _Py_isdigit__doc__},
2479 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2480 _Py_islower__doc__},
2481 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2482 _Py_isspace__doc__},
2483 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2484 _Py_istitle__doc__},
2485 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2486 _Py_isupper__doc__},
2487 {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
2488 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2489 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2490 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2491 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
2492 _Py_maketrans__doc__},
2493 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2494 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2495 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2496 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2497 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2498 {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
2499 rpartition__doc__},
Ezio Melotticda6b6d2012-02-26 09:39:55 +02002500 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS | METH_KEYWORDS, rsplit__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002501 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
Ezio Melotticda6b6d2012-02-26 09:39:55 +02002502 {"split", (PyCFunction)bytes_split, METH_VARARGS | METH_KEYWORDS, split__doc__},
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002503 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002504 splitlines__doc__},
2505 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2506 startswith__doc__},
2507 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
2508 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2509 _Py_swapcase__doc__},
2510 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2511 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
2512 translate__doc__},
2513 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2514 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2515 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
2516 sizeof__doc__},
2517 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002518};
2519
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002520static PyObject *
2521str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2522
2523static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002524bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002525{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002526 PyObject *x = NULL;
2527 const char *encoding = NULL;
2528 const char *errors = NULL;
2529 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002530 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002531 Py_ssize_t size;
2532 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002533 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002534
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002535 if (type != &PyBytes_Type)
2536 return str_subtype_new(type, args, kwds);
2537 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2538 &encoding, &errors))
2539 return NULL;
2540 if (x == NULL) {
2541 if (encoding != NULL || errors != NULL) {
2542 PyErr_SetString(PyExc_TypeError,
2543 "encoding or errors without sequence "
2544 "argument");
2545 return NULL;
2546 }
2547 return PyBytes_FromString("");
2548 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002549
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002550 if (PyUnicode_Check(x)) {
2551 /* Encode via the codec registry */
2552 if (encoding == NULL) {
2553 PyErr_SetString(PyExc_TypeError,
2554 "string argument without an encoding");
2555 return NULL;
2556 }
2557 new = PyUnicode_AsEncodedString(x, encoding, errors);
2558 if (new == NULL)
2559 return NULL;
2560 assert(PyBytes_Check(new));
2561 return new;
2562 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002563
2564 /* We'd like to call PyObject_Bytes here, but we need to check for an
2565 integer argument before deferring to PyBytes_FromObject, something
2566 PyObject_Bytes doesn't do. */
2567 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2568 if (func != NULL) {
2569 new = PyObject_CallFunctionObjArgs(func, NULL);
2570 Py_DECREF(func);
2571 if (new == NULL)
2572 return NULL;
2573 if (!PyBytes_Check(new)) {
2574 PyErr_Format(PyExc_TypeError,
2575 "__bytes__ returned non-bytes (type %.200s)",
2576 Py_TYPE(new)->tp_name);
2577 Py_DECREF(new);
2578 return NULL;
2579 }
2580 return new;
2581 }
2582 else if (PyErr_Occurred())
2583 return NULL;
2584
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002585 /* Is it an integer? */
2586 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2587 if (size == -1 && PyErr_Occurred()) {
2588 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2589 return NULL;
2590 PyErr_Clear();
2591 }
2592 else if (size < 0) {
2593 PyErr_SetString(PyExc_ValueError, "negative count");
2594 return NULL;
2595 }
2596 else {
2597 new = PyBytes_FromStringAndSize(NULL, size);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002598 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002599 return NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002600 if (size > 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002601 memset(((PyBytesObject*)new)->ob_sval, 0, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002602 return new;
2603 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002604
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002605 /* If it's not unicode, there can't be encoding or errors */
2606 if (encoding != NULL || errors != NULL) {
2607 PyErr_SetString(PyExc_TypeError,
2608 "encoding or errors without a string argument");
2609 return NULL;
2610 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002611
2612 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002613}
2614
2615PyObject *
2616PyBytes_FromObject(PyObject *x)
2617{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002618 PyObject *new, *it;
2619 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002620
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002621 if (x == NULL) {
2622 PyErr_BadInternalCall();
2623 return NULL;
2624 }
Larry Hastingsca28e992012-05-24 22:58:30 -07002625
2626 if (PyBytes_CheckExact(x)) {
2627 Py_INCREF(x);
2628 return x;
2629 }
2630
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002631 /* Use the modern buffer interface */
2632 if (PyObject_CheckBuffer(x)) {
2633 Py_buffer view;
2634 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2635 return NULL;
2636 new = PyBytes_FromStringAndSize(NULL, view.len);
2637 if (!new)
2638 goto fail;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002639 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2640 &view, view.len, 'C') < 0)
2641 goto fail;
2642 PyBuffer_Release(&view);
2643 return new;
2644 fail:
2645 Py_XDECREF(new);
2646 PyBuffer_Release(&view);
2647 return NULL;
2648 }
2649 if (PyUnicode_Check(x)) {
2650 PyErr_SetString(PyExc_TypeError,
2651 "cannot convert unicode object to bytes");
2652 return NULL;
2653 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002654
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002655 if (PyList_CheckExact(x)) {
2656 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2657 if (new == NULL)
2658 return NULL;
2659 for (i = 0; i < Py_SIZE(x); i++) {
2660 Py_ssize_t value = PyNumber_AsSsize_t(
2661 PyList_GET_ITEM(x, i), PyExc_ValueError);
2662 if (value == -1 && PyErr_Occurred()) {
2663 Py_DECREF(new);
2664 return NULL;
2665 }
2666 if (value < 0 || value >= 256) {
2667 PyErr_SetString(PyExc_ValueError,
2668 "bytes must be in range(0, 256)");
2669 Py_DECREF(new);
2670 return NULL;
2671 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002672 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002673 }
2674 return new;
2675 }
2676 if (PyTuple_CheckExact(x)) {
2677 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2678 if (new == NULL)
2679 return NULL;
2680 for (i = 0; i < Py_SIZE(x); i++) {
2681 Py_ssize_t value = PyNumber_AsSsize_t(
2682 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
2683 if (value == -1 && PyErr_Occurred()) {
2684 Py_DECREF(new);
2685 return NULL;
2686 }
2687 if (value < 0 || value >= 256) {
2688 PyErr_SetString(PyExc_ValueError,
2689 "bytes must be in range(0, 256)");
2690 Py_DECREF(new);
2691 return NULL;
2692 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002693 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002694 }
2695 return new;
2696 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00002697
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002698 /* For iterator version, create a string object and resize as needed */
2699 size = _PyObject_LengthHint(x, 64);
2700 if (size == -1 && PyErr_Occurred())
2701 return NULL;
2702 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
2703 returning a shared empty bytes string. This required because we
2704 want to call _PyBytes_Resize() the returned object, which we can
2705 only do on bytes objects with refcount == 1. */
2706 size += 1;
2707 new = PyBytes_FromStringAndSize(NULL, size);
2708 if (new == NULL)
2709 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002710
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002711 /* Get the iterator */
2712 it = PyObject_GetIter(x);
2713 if (it == NULL)
2714 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002715
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002716 /* Run the iterator to exhaustion */
2717 for (i = 0; ; i++) {
2718 PyObject *item;
2719 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002720
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002721 /* Get the next item */
2722 item = PyIter_Next(it);
2723 if (item == NULL) {
2724 if (PyErr_Occurred())
2725 goto error;
2726 break;
2727 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002728
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002729 /* Interpret it as an int (__index__) */
2730 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2731 Py_DECREF(item);
2732 if (value == -1 && PyErr_Occurred())
2733 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002734
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002735 /* Range check */
2736 if (value < 0 || value >= 256) {
2737 PyErr_SetString(PyExc_ValueError,
2738 "bytes must be in range(0, 256)");
2739 goto error;
2740 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002741
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002742 /* Append the byte */
2743 if (i >= size) {
2744 size = 2 * size + 1;
2745 if (_PyBytes_Resize(&new, size) < 0)
2746 goto error;
2747 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002748 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002749 }
2750 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002751
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002752 /* Clean up and return success */
2753 Py_DECREF(it);
2754 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002755
2756 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002757 /* Error handling when new != NULL */
2758 Py_XDECREF(it);
2759 Py_DECREF(new);
2760 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002761}
2762
2763static PyObject *
2764str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2765{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002766 PyObject *tmp, *pnew;
2767 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002768
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002769 assert(PyType_IsSubtype(type, &PyBytes_Type));
2770 tmp = bytes_new(&PyBytes_Type, args, kwds);
2771 if (tmp == NULL)
2772 return NULL;
2773 assert(PyBytes_CheckExact(tmp));
2774 n = PyBytes_GET_SIZE(tmp);
2775 pnew = type->tp_alloc(type, n);
2776 if (pnew != NULL) {
2777 Py_MEMCPY(PyBytes_AS_STRING(pnew),
2778 PyBytes_AS_STRING(tmp), n+1);
2779 ((PyBytesObject *)pnew)->ob_shash =
2780 ((PyBytesObject *)tmp)->ob_shash;
2781 }
2782 Py_DECREF(tmp);
2783 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002784}
2785
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002786PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002787"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002788bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002789bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002790bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2791bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002792\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002793Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002794 - an iterable yielding integers in range(256)\n\
2795 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002796 - any object implementing the buffer API.\n\
2797 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002798
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002799static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002800
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002801PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002802 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2803 "bytes",
2804 PyBytesObject_SIZE,
2805 sizeof(char),
2806 bytes_dealloc, /* tp_dealloc */
2807 0, /* tp_print */
2808 0, /* tp_getattr */
2809 0, /* tp_setattr */
2810 0, /* tp_reserved */
2811 (reprfunc)bytes_repr, /* tp_repr */
2812 0, /* tp_as_number */
2813 &bytes_as_sequence, /* tp_as_sequence */
2814 &bytes_as_mapping, /* tp_as_mapping */
2815 (hashfunc)bytes_hash, /* tp_hash */
2816 0, /* tp_call */
2817 bytes_str, /* tp_str */
2818 PyObject_GenericGetAttr, /* tp_getattro */
2819 0, /* tp_setattro */
2820 &bytes_as_buffer, /* tp_as_buffer */
2821 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2822 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2823 bytes_doc, /* tp_doc */
2824 0, /* tp_traverse */
2825 0, /* tp_clear */
2826 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2827 0, /* tp_weaklistoffset */
2828 bytes_iter, /* tp_iter */
2829 0, /* tp_iternext */
2830 bytes_methods, /* tp_methods */
2831 0, /* tp_members */
2832 0, /* tp_getset */
2833 &PyBaseObject_Type, /* tp_base */
2834 0, /* tp_dict */
2835 0, /* tp_descr_get */
2836 0, /* tp_descr_set */
2837 0, /* tp_dictoffset */
2838 0, /* tp_init */
2839 0, /* tp_alloc */
2840 bytes_new, /* tp_new */
2841 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002842};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002843
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002844void
2845PyBytes_Concat(register PyObject **pv, register PyObject *w)
2846{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002847 register PyObject *v;
2848 assert(pv != NULL);
2849 if (*pv == NULL)
2850 return;
2851 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002852 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002853 return;
2854 }
2855 v = bytes_concat(*pv, w);
2856 Py_DECREF(*pv);
2857 *pv = v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002858}
2859
2860void
2861PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
2862{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002863 PyBytes_Concat(pv, w);
2864 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002865}
2866
2867
2868/* The following function breaks the notion that strings are immutable:
2869 it changes the size of a string. We get away with this only if there
2870 is only one module referencing the object. You can also think of it
2871 as creating a new string object and destroying the old one, only
2872 more efficiently. In any case, don't use this if the string may
2873 already be known to some other part of the code...
2874 Note that if there's not enough memory to resize the string, the original
2875 string object at *pv is deallocated, *pv is set to NULL, an "out of
2876 memory" exception is set, and -1 is returned. Else (on success) 0 is
2877 returned, and the value in *pv may or may not be the same as on input.
2878 As always, an extra byte is allocated for a trailing \0 byte (newsize
2879 does *not* include that), and a trailing \0 byte is stored.
2880*/
2881
2882int
2883_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2884{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002885 register PyObject *v;
2886 register PyBytesObject *sv;
2887 v = *pv;
2888 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
2889 *pv = 0;
2890 Py_DECREF(v);
2891 PyErr_BadInternalCall();
2892 return -1;
2893 }
2894 /* XXX UNREF/NEWREF interface should be more symmetrical */
2895 _Py_DEC_REFTOTAL;
2896 _Py_ForgetReference(v);
2897 *pv = (PyObject *)
2898 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
2899 if (*pv == NULL) {
2900 PyObject_Del(v);
2901 PyErr_NoMemory();
2902 return -1;
2903 }
2904 _Py_NewReference(*pv);
2905 sv = (PyBytesObject *) *pv;
2906 Py_SIZE(sv) = newsize;
2907 sv->ob_sval[newsize] = '\0';
2908 sv->ob_shash = -1; /* invalidate cached hash value */
2909 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002910}
2911
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002912void
2913PyBytes_Fini(void)
2914{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002915 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002916 for (i = 0; i < UCHAR_MAX + 1; i++)
2917 Py_CLEAR(characters[i]);
2918 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002919}
2920
Benjamin Peterson4116f362008-05-27 00:36:20 +00002921/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002922
2923typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002924 PyObject_HEAD
2925 Py_ssize_t it_index;
2926 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002927} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002928
2929static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002930striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002931{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002932 _PyObject_GC_UNTRACK(it);
2933 Py_XDECREF(it->it_seq);
2934 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002935}
2936
2937static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002938striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002939{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002940 Py_VISIT(it->it_seq);
2941 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002942}
2943
2944static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002945striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002946{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002947 PyBytesObject *seq;
2948 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002949
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002950 assert(it != NULL);
2951 seq = it->it_seq;
2952 if (seq == NULL)
2953 return NULL;
2954 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002955
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002956 if (it->it_index < PyBytes_GET_SIZE(seq)) {
2957 item = PyLong_FromLong(
2958 (unsigned char)seq->ob_sval[it->it_index]);
2959 if (item != NULL)
2960 ++it->it_index;
2961 return item;
2962 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002963
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002964 Py_DECREF(seq);
2965 it->it_seq = NULL;
2966 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002967}
2968
2969static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002970striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002971{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002972 Py_ssize_t len = 0;
2973 if (it->it_seq)
2974 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
2975 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002976}
2977
2978PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002979 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002980
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002981static PyObject *
2982striter_reduce(striterobject *it)
2983{
2984 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02002985 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002986 it->it_seq, it->it_index);
2987 } else {
2988 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
2989 if (u == NULL)
2990 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02002991 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002992 }
2993}
2994
2995PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
2996
2997static PyObject *
2998striter_setstate(striterobject *it, PyObject *state)
2999{
3000 Py_ssize_t index = PyLong_AsSsize_t(state);
3001 if (index == -1 && PyErr_Occurred())
3002 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003003 if (it->it_seq != NULL) {
3004 if (index < 0)
3005 index = 0;
3006 else if (index > PyBytes_GET_SIZE(it->it_seq))
3007 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3008 it->it_index = index;
3009 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003010 Py_RETURN_NONE;
3011}
3012
3013PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3014
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003015static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003016 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3017 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003018 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3019 reduce_doc},
3020 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3021 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003022 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003023};
3024
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003025PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003026 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3027 "bytes_iterator", /* tp_name */
3028 sizeof(striterobject), /* tp_basicsize */
3029 0, /* tp_itemsize */
3030 /* methods */
3031 (destructor)striter_dealloc, /* tp_dealloc */
3032 0, /* tp_print */
3033 0, /* tp_getattr */
3034 0, /* tp_setattr */
3035 0, /* tp_reserved */
3036 0, /* tp_repr */
3037 0, /* tp_as_number */
3038 0, /* tp_as_sequence */
3039 0, /* tp_as_mapping */
3040 0, /* tp_hash */
3041 0, /* tp_call */
3042 0, /* tp_str */
3043 PyObject_GenericGetAttr, /* tp_getattro */
3044 0, /* tp_setattro */
3045 0, /* tp_as_buffer */
3046 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3047 0, /* tp_doc */
3048 (traverseproc)striter_traverse, /* tp_traverse */
3049 0, /* tp_clear */
3050 0, /* tp_richcompare */
3051 0, /* tp_weaklistoffset */
3052 PyObject_SelfIter, /* tp_iter */
3053 (iternextfunc)striter_next, /* tp_iternext */
3054 striter_methods, /* tp_methods */
3055 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003056};
3057
3058static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003059bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003060{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003061 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003062
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003063 if (!PyBytes_Check(seq)) {
3064 PyErr_BadInternalCall();
3065 return NULL;
3066 }
3067 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3068 if (it == NULL)
3069 return NULL;
3070 it->it_index = 0;
3071 Py_INCREF(seq);
3072 it->it_seq = (PyBytesObject *)seq;
3073 _PyObject_GC_TRACK(it);
3074 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003075}