blob: 9dcb74e8fdb264abbed9a6363c1c30ede4aaf1d3 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Christian Heimes90aa7642007-12-19 02:45:37 +000013 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000014
Gregory P. Smith60d241f2007-10-16 06:31:30 +000015 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000016 {
Antoine Pitroud1188562010-06-09 16:38:55 +000017 PyErr_Format(PyExc_TypeError,
18 "Type %.100s doesn't support the buffer API",
19 Py_TYPE(obj)->tp_name);
20 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000021 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000023 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000024 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000025 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000026}
27
Christian Heimes2c9c7a52008-05-26 13:42:13 +000028#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000029Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000030#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000031
Christian Heimes2c9c7a52008-05-26 13:42:13 +000032static PyBytesObject *characters[UCHAR_MAX + 1];
33static PyBytesObject *nullstring;
34
Mark Dickinsonfd24b322008-12-06 15:33:31 +000035/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
36 for a string of length n should request PyBytesObject_SIZE + n bytes.
37
38 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
39 3 bytes per string allocation on a typical system.
40*/
41#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
42
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000044 For PyBytes_FromString(), the parameter `str' points to a null-terminated
45 string containing exactly `size' bytes.
46
47 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
48 either NULL or else points to a string containing at least `size' bytes.
49 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
50 not have to be null-terminated. (Therefore it is safe to construct a
51 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
52 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
53 bytes (setting the last byte to the null terminating character) and you can
54 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000055 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000056 alter the data yourself, since the strings may be shared.
57
58 The PyObject member `op->ob_size', which denotes the number of "extra
59 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020060 allocated for string data, not counting the null terminating character.
61 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000062 PyBytes_FromStringAndSize()) or the length of the string in the `str'
63 parameter (for PyBytes_FromString()).
64*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000065PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000066PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000067{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000068 register PyBytesObject *op;
69 if (size < 0) {
70 PyErr_SetString(PyExc_SystemError,
71 "Negative size passed to PyBytes_FromStringAndSize");
72 return NULL;
73 }
74 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000075#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000076 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000077#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078 Py_INCREF(op);
79 return (PyObject *)op;
80 }
81 if (size == 1 && str != NULL &&
82 (op = characters[*str & UCHAR_MAX]) != NULL)
83 {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000084#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000086#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 Py_INCREF(op);
88 return (PyObject *)op;
89 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000090
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
92 PyErr_SetString(PyExc_OverflowError,
93 "byte string is too large");
94 return NULL;
95 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000097 /* Inline PyObject_NewVar */
98 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
99 if (op == NULL)
100 return PyErr_NoMemory();
101 PyObject_INIT_VAR(op, &PyBytes_Type, size);
102 op->ob_shash = -1;
103 if (str != NULL)
104 Py_MEMCPY(op->ob_sval, str, size);
105 op->ob_sval[size] = '\0';
106 /* share short strings */
107 if (size == 0) {
108 nullstring = op;
109 Py_INCREF(op);
110 } else if (size == 1 && str != NULL) {
111 characters[*str & UCHAR_MAX] = op;
112 Py_INCREF(op);
113 }
114 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000115}
116
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000117PyObject *
118PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000119{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000120 register size_t size;
121 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000122
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000123 assert(str != NULL);
124 size = strlen(str);
125 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
126 PyErr_SetString(PyExc_OverflowError,
127 "byte string is too long");
128 return NULL;
129 }
130 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000131#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000133#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 Py_INCREF(op);
135 return (PyObject *)op;
136 }
137 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000138#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000139 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000140#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 Py_INCREF(op);
142 return (PyObject *)op;
143 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000144
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 /* Inline PyObject_NewVar */
146 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
147 if (op == NULL)
148 return PyErr_NoMemory();
149 PyObject_INIT_VAR(op, &PyBytes_Type, size);
150 op->ob_shash = -1;
151 Py_MEMCPY(op->ob_sval, str, size+1);
152 /* share short strings */
153 if (size == 0) {
154 nullstring = op;
155 Py_INCREF(op);
156 } else if (size == 1) {
157 characters[*str & UCHAR_MAX] = op;
158 Py_INCREF(op);
159 }
160 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000161}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000162
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000163PyObject *
164PyBytes_FromFormatV(const char *format, va_list vargs)
165{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000166 va_list count;
167 Py_ssize_t n = 0;
168 const char* f;
169 char *s;
170 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000171
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000172 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000173 /* step 1: figure out how large a buffer we need */
174 for (f = format; *f; f++) {
175 if (*f == '%') {
176 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000177 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000178 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000179
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000180 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
181 * they don't affect the amount of space we reserve.
182 */
183 if ((*f == 'l' || *f == 'z') &&
184 (f[1] == 'd' || f[1] == 'u'))
185 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000186
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000187 switch (*f) {
188 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100189 {
190 int c = va_arg(count, int);
191 if (c < 0 || c > 255) {
192 PyErr_SetString(PyExc_OverflowError,
193 "PyBytes_FromFormatV(): %c format "
194 "expects an integer in range [0; 255]");
195 return NULL;
196 }
197 n++;
198 break;
199 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000200 case '%':
201 n++;
202 break;
203 case 'd': case 'u': case 'i': case 'x':
204 (void) va_arg(count, int);
205 /* 20 bytes is enough to hold a 64-bit
206 integer. Decimal takes the most space.
207 This isn't enough for octal. */
208 n += 20;
209 break;
210 case 's':
211 s = va_arg(count, char*);
212 n += strlen(s);
213 break;
214 case 'p':
215 (void) va_arg(count, int);
216 /* maximum 64-bit pointer representation:
217 * 0xffffffffffffffff
218 * so 19 characters is enough.
219 * XXX I count 18 -- what's the extra for?
220 */
221 n += 19;
222 break;
223 default:
224 /* if we stumble upon an unknown
225 formatting code, copy the rest of
226 the format string to the output
227 string. (we cannot just skip the
228 code, since there's no way to know
229 what's in the argument list) */
230 n += strlen(p);
231 goto expand;
232 }
233 } else
234 n++;
235 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000236 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000237 /* step 2: fill the buffer */
238 /* Since we've analyzed how much space we need for the worst case,
239 use sprintf directly instead of the slower PyOS_snprintf. */
240 string = PyBytes_FromStringAndSize(NULL, n);
241 if (!string)
242 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000243
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000244 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000245
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000246 for (f = format; *f; f++) {
247 if (*f == '%') {
248 const char* p = f++;
249 Py_ssize_t i;
250 int longflag = 0;
251 int size_tflag = 0;
252 /* parse the width.precision part (we're only
253 interested in the precision value, if any) */
254 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000255 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000256 n = (n*10) + *f++ - '0';
257 if (*f == '.') {
258 f++;
259 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000260 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000261 n = (n*10) + *f++ - '0';
262 }
David Malcolm96960882010-11-05 17:23:41 +0000263 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000264 f++;
265 /* handle the long flag, but only for %ld and %lu.
266 others can be added when necessary. */
267 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
268 longflag = 1;
269 ++f;
270 }
271 /* handle the size_t flag. */
272 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
273 size_tflag = 1;
274 ++f;
275 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000276
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000277 switch (*f) {
278 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100279 {
280 int c = va_arg(vargs, int);
281 /* c has been checked for overflow in the first step */
282 *s++ = (unsigned char)c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000283 break;
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100284 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000285 case 'd':
286 if (longflag)
287 sprintf(s, "%ld", va_arg(vargs, long));
288 else if (size_tflag)
289 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
290 va_arg(vargs, Py_ssize_t));
291 else
292 sprintf(s, "%d", va_arg(vargs, int));
293 s += strlen(s);
294 break;
295 case 'u':
296 if (longflag)
297 sprintf(s, "%lu",
298 va_arg(vargs, unsigned long));
299 else if (size_tflag)
300 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
301 va_arg(vargs, size_t));
302 else
303 sprintf(s, "%u",
304 va_arg(vargs, unsigned int));
305 s += strlen(s);
306 break;
307 case 'i':
308 sprintf(s, "%i", va_arg(vargs, int));
309 s += strlen(s);
310 break;
311 case 'x':
312 sprintf(s, "%x", va_arg(vargs, int));
313 s += strlen(s);
314 break;
315 case 's':
316 p = va_arg(vargs, char*);
317 i = strlen(p);
318 if (n > 0 && i > n)
319 i = n;
320 Py_MEMCPY(s, p, i);
321 s += i;
322 break;
323 case 'p':
324 sprintf(s, "%p", va_arg(vargs, void*));
325 /* %p is ill-defined: ensure leading 0x. */
326 if (s[1] == 'X')
327 s[1] = 'x';
328 else if (s[1] != 'x') {
329 memmove(s+2, s, strlen(s)+1);
330 s[0] = '0';
331 s[1] = 'x';
332 }
333 s += strlen(s);
334 break;
335 case '%':
336 *s++ = '%';
337 break;
338 default:
339 strcpy(s, p);
340 s += strlen(s);
341 goto end;
342 }
343 } else
344 *s++ = *f;
345 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000346
347 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000348 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
349 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000350}
351
352PyObject *
353PyBytes_FromFormat(const char *format, ...)
354{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000355 PyObject* ret;
356 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000357
358#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000360#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000361 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000362#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000363 ret = PyBytes_FromFormatV(format, vargs);
364 va_end(vargs);
365 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000366}
367
368static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000369bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000370{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000371 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000372}
373
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000374/* Unescape a backslash-escaped string. If unicode is non-zero,
375 the string is a u-literal. If recode_encoding is non-zero,
376 the string is UTF-8 encoded and should be re-encoded in the
377 specified encoding. */
378
379PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000380 Py_ssize_t len,
381 const char *errors,
382 Py_ssize_t unicode,
383 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000384{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000385 int c;
386 char *p, *buf;
387 const char *end;
388 PyObject *v;
389 Py_ssize_t newlen = recode_encoding ? 4*len:len;
390 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
391 if (v == NULL)
392 return NULL;
393 p = buf = PyBytes_AsString(v);
394 end = s + len;
395 while (s < end) {
396 if (*s != '\\') {
397 non_esc:
398 if (recode_encoding && (*s & 0x80)) {
399 PyObject *u, *w;
400 char *r;
401 const char* t;
402 Py_ssize_t rn;
403 t = s;
404 /* Decode non-ASCII bytes as UTF-8. */
405 while (t < end && (*t & 0x80)) t++;
406 u = PyUnicode_DecodeUTF8(s, t - s, errors);
407 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000408
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000409 /* Recode them in target encoding. */
410 w = PyUnicode_AsEncodedString(
411 u, recode_encoding, errors);
412 Py_DECREF(u);
413 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000414
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000415 /* Append bytes to output buffer. */
416 assert(PyBytes_Check(w));
417 r = PyBytes_AS_STRING(w);
418 rn = PyBytes_GET_SIZE(w);
419 Py_MEMCPY(p, r, rn);
420 p += rn;
421 Py_DECREF(w);
422 s = t;
423 } else {
424 *p++ = *s++;
425 }
426 continue;
427 }
428 s++;
429 if (s==end) {
430 PyErr_SetString(PyExc_ValueError,
431 "Trailing \\ in string");
432 goto failed;
433 }
434 switch (*s++) {
435 /* XXX This assumes ASCII! */
436 case '\n': break;
437 case '\\': *p++ = '\\'; break;
438 case '\'': *p++ = '\''; break;
439 case '\"': *p++ = '\"'; break;
440 case 'b': *p++ = '\b'; break;
441 case 'f': *p++ = '\014'; break; /* FF */
442 case 't': *p++ = '\t'; break;
443 case 'n': *p++ = '\n'; break;
444 case 'r': *p++ = '\r'; break;
445 case 'v': *p++ = '\013'; break; /* VT */
446 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
447 case '0': case '1': case '2': case '3':
448 case '4': case '5': case '6': case '7':
449 c = s[-1] - '0';
450 if (s < end && '0' <= *s && *s <= '7') {
451 c = (c<<3) + *s++ - '0';
452 if (s < end && '0' <= *s && *s <= '7')
453 c = (c<<3) + *s++ - '0';
454 }
455 *p++ = c;
456 break;
457 case 'x':
David Malcolm96960882010-11-05 17:23:41 +0000458 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000459 unsigned int x = 0;
460 c = Py_CHARMASK(*s);
461 s++;
David Malcolm96960882010-11-05 17:23:41 +0000462 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000463 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000464 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000465 x = 10 + c - 'a';
466 else
467 x = 10 + c - 'A';
468 x = x << 4;
469 c = Py_CHARMASK(*s);
470 s++;
David Malcolm96960882010-11-05 17:23:41 +0000471 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000472 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000473 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000474 x += 10 + c - 'a';
475 else
476 x += 10 + c - 'A';
477 *p++ = x;
478 break;
479 }
480 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +0200481 PyErr_Format(PyExc_ValueError,
482 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +0200483 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000484 goto failed;
485 }
486 if (strcmp(errors, "replace") == 0) {
487 *p++ = '?';
488 } else if (strcmp(errors, "ignore") == 0)
489 /* do nothing */;
490 else {
491 PyErr_Format(PyExc_ValueError,
492 "decoding error; unknown "
493 "error handling code: %.400s",
494 errors);
495 goto failed;
496 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +0200497 /* skip \x */
498 if (s < end && Py_ISXDIGIT(s[0]))
499 s++; /* and a hexdigit */
500 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000501 default:
502 *p++ = '\\';
503 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +0200504 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000505 UTF-8 bytes may follow. */
506 }
507 }
508 if (p-buf < newlen)
509 _PyBytes_Resize(&v, p - buf);
510 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000511 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000512 Py_DECREF(v);
513 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000514}
515
516/* -------------------------------------------------------------------- */
517/* object api */
518
519Py_ssize_t
520PyBytes_Size(register PyObject *op)
521{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000522 if (!PyBytes_Check(op)) {
523 PyErr_Format(PyExc_TypeError,
524 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
525 return -1;
526 }
527 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000528}
529
530char *
531PyBytes_AsString(register PyObject *op)
532{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000533 if (!PyBytes_Check(op)) {
534 PyErr_Format(PyExc_TypeError,
535 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
536 return NULL;
537 }
538 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000539}
540
541int
542PyBytes_AsStringAndSize(register PyObject *obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000543 register char **s,
544 register Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000545{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000546 if (s == NULL) {
547 PyErr_BadInternalCall();
548 return -1;
549 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000550
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000551 if (!PyBytes_Check(obj)) {
552 PyErr_Format(PyExc_TypeError,
553 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
554 return -1;
555 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000556
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000557 *s = PyBytes_AS_STRING(obj);
558 if (len != NULL)
559 *len = PyBytes_GET_SIZE(obj);
560 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
561 PyErr_SetString(PyExc_TypeError,
562 "expected bytes with no null");
563 return -1;
564 }
565 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000566}
Neal Norwitz6968b052007-02-27 19:02:19 +0000567
568/* -------------------------------------------------------------------- */
569/* Methods */
570
Eric Smith0923d1d2009-04-16 20:16:10 +0000571#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000572
573#include "stringlib/fastsearch.h"
574#include "stringlib/count.h"
575#include "stringlib/find.h"
576#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000577#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000578#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000579
Eric Smith0f78bff2009-11-30 01:01:42 +0000580#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000581
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000582PyObject *
583PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000584{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000585 register PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200586 Py_ssize_t i, length = Py_SIZE(op);
587 size_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000588 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200589 unsigned char quote, *s, *p;
590
591 /* Compute size of output string */
592 squotes = dquotes = 0;
593 newsize = 3; /* b'' */
594 s = (unsigned char*)op->ob_sval;
595 for (i = 0; i < length; i++) {
596 switch(s[i]) {
597 case '\'': squotes++; newsize++; break;
598 case '"': dquotes++; newsize++; break;
599 case '\\': case '\t': case '\n': case '\r':
600 newsize += 2; break; /* \C */
601 default:
602 if (s[i] < ' ' || s[i] >= 0x7f)
603 newsize += 4; /* \xHH */
604 else
605 newsize++;
606 }
607 }
608 quote = '\'';
609 if (smartquotes && squotes && !dquotes)
610 quote = '"';
611 if (squotes && quote == '\'')
612 newsize += squotes;
Victor Stinner6430fd52011-09-29 04:02:13 +0200613
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200614 if (newsize > (PY_SSIZE_T_MAX - sizeof(PyUnicodeObject) - 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000615 PyErr_SetString(PyExc_OverflowError,
616 "bytes object is too large to make repr");
617 return NULL;
618 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200619
620 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000621 if (v == NULL) {
622 return NULL;
623 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200624 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000625
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200626 *p++ = 'b', *p++ = quote;
627 for (i = 0; i < length; i++) {
628 unsigned char c = op->ob_sval[i];
629 if (c == quote || c == '\\')
630 *p++ = '\\', *p++ = c;
631 else if (c == '\t')
632 *p++ = '\\', *p++ = 't';
633 else if (c == '\n')
634 *p++ = '\\', *p++ = 'n';
635 else if (c == '\r')
636 *p++ = '\\', *p++ = 'r';
637 else if (c < ' ' || c >= 0x7f) {
638 *p++ = '\\';
639 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200640 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
641 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000642 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200643 else
644 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000645 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200646 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +0200647 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200648 return v;
Neal Norwitz6968b052007-02-27 19:02:19 +0000649}
650
Neal Norwitz6968b052007-02-27 19:02:19 +0000651static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000652bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000653{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000654 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000655}
656
Neal Norwitz6968b052007-02-27 19:02:19 +0000657static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000658bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000659{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000660 if (Py_BytesWarningFlag) {
661 if (PyErr_WarnEx(PyExc_BytesWarning,
662 "str() on a bytes instance", 1))
663 return NULL;
664 }
665 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000666}
667
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000668static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000669bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000670{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000671 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000672}
Neal Norwitz6968b052007-02-27 19:02:19 +0000673
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000674/* This is also used by PyBytes_Concat() */
675static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000676bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000677{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000678 Py_ssize_t size;
679 Py_buffer va, vb;
680 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000681
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000682 va.len = -1;
683 vb.len = -1;
684 if (_getbuffer(a, &va) < 0 ||
685 _getbuffer(b, &vb) < 0) {
686 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
687 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
688 goto done;
689 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000690
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000691 /* Optimize end cases */
692 if (va.len == 0 && PyBytes_CheckExact(b)) {
693 result = b;
694 Py_INCREF(result);
695 goto done;
696 }
697 if (vb.len == 0 && PyBytes_CheckExact(a)) {
698 result = a;
699 Py_INCREF(result);
700 goto done;
701 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000702
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000703 size = va.len + vb.len;
704 if (size < 0) {
705 PyErr_NoMemory();
706 goto done;
707 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000708
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000709 result = PyBytes_FromStringAndSize(NULL, size);
710 if (result != NULL) {
711 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
712 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
713 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000714
715 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000716 if (va.len != -1)
717 PyBuffer_Release(&va);
718 if (vb.len != -1)
719 PyBuffer_Release(&vb);
720 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000721}
Neal Norwitz6968b052007-02-27 19:02:19 +0000722
723static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000724bytes_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000725{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000726 register Py_ssize_t i;
727 register Py_ssize_t j;
728 register Py_ssize_t size;
729 register PyBytesObject *op;
730 size_t nbytes;
731 if (n < 0)
732 n = 0;
733 /* watch out for overflows: the size can overflow int,
734 * and the # of bytes needed can overflow size_t
735 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000736 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000737 PyErr_SetString(PyExc_OverflowError,
738 "repeated bytes are too long");
739 return NULL;
740 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000741 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000742 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
743 Py_INCREF(a);
744 return (PyObject *)a;
745 }
746 nbytes = (size_t)size;
747 if (nbytes + PyBytesObject_SIZE <= nbytes) {
748 PyErr_SetString(PyExc_OverflowError,
749 "repeated bytes are too long");
750 return NULL;
751 }
752 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
753 if (op == NULL)
754 return PyErr_NoMemory();
755 PyObject_INIT_VAR(op, &PyBytes_Type, size);
756 op->ob_shash = -1;
757 op->ob_sval[size] = '\0';
758 if (Py_SIZE(a) == 1 && n > 0) {
759 memset(op->ob_sval, a->ob_sval[0] , n);
760 return (PyObject *) op;
761 }
762 i = 0;
763 if (i < size) {
764 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
765 i = Py_SIZE(a);
766 }
767 while (i < size) {
768 j = (i <= size-i) ? i : size-i;
769 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
770 i += j;
771 }
772 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000773}
774
Guido van Rossum98297ee2007-11-06 21:34:58 +0000775static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000776bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000777{
778 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
779 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000780 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +0000781 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +0000782 PyErr_Clear();
783 if (_getbuffer(arg, &varg) < 0)
784 return -1;
785 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
786 varg.buf, varg.len, 0);
787 PyBuffer_Release(&varg);
788 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000789 }
790 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000791 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
792 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000793 }
794
Antoine Pitrou0010d372010-08-15 17:12:55 +0000795 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000796}
797
Neal Norwitz6968b052007-02-27 19:02:19 +0000798static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000799bytes_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000800{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000801 if (i < 0 || i >= Py_SIZE(a)) {
802 PyErr_SetString(PyExc_IndexError, "index out of range");
803 return NULL;
804 }
805 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000806}
807
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000808static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000809bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000810{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000811 int c;
812 Py_ssize_t len_a, len_b;
813 Py_ssize_t min_len;
814 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000815
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000816 /* Make sure both arguments are strings. */
817 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
818 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
819 (PyObject_IsInstance((PyObject*)a,
820 (PyObject*)&PyUnicode_Type) ||
821 PyObject_IsInstance((PyObject*)b,
822 (PyObject*)&PyUnicode_Type))) {
823 if (PyErr_WarnEx(PyExc_BytesWarning,
824 "Comparison between bytes and string", 1))
825 return NULL;
826 }
827 result = Py_NotImplemented;
828 goto out;
829 }
830 if (a == b) {
831 switch (op) {
832 case Py_EQ:case Py_LE:case Py_GE:
833 result = Py_True;
834 goto out;
835 case Py_NE:case Py_LT:case Py_GT:
836 result = Py_False;
837 goto out;
838 }
839 }
840 if (op == Py_EQ) {
841 /* Supporting Py_NE here as well does not save
842 much time, since Py_NE is rarely used. */
843 if (Py_SIZE(a) == Py_SIZE(b)
844 && (a->ob_sval[0] == b->ob_sval[0]
845 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
846 result = Py_True;
847 } else {
848 result = Py_False;
849 }
850 goto out;
851 }
852 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
853 min_len = (len_a < len_b) ? len_a : len_b;
854 if (min_len > 0) {
855 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
856 if (c==0)
857 c = memcmp(a->ob_sval, b->ob_sval, min_len);
858 } else
859 c = 0;
860 if (c == 0)
861 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
862 switch (op) {
863 case Py_LT: c = c < 0; break;
864 case Py_LE: c = c <= 0; break;
865 case Py_EQ: assert(0); break; /* unreachable */
866 case Py_NE: c = c != 0; break;
867 case Py_GT: c = c > 0; break;
868 case Py_GE: c = c >= 0; break;
869 default:
870 result = Py_NotImplemented;
871 goto out;
872 }
873 result = c ? Py_True : Py_False;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000874 out:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000875 Py_INCREF(result);
876 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000877}
878
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000879static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000880bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000881{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +0100882 if (a->ob_shash == -1) {
883 /* Can't fail */
884 a->ob_shash = _Py_HashBytes((unsigned char *) a->ob_sval, Py_SIZE(a));
885 }
886 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +0000887}
888
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000889static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000890bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000891{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000892 if (PyIndex_Check(item)) {
893 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
894 if (i == -1 && PyErr_Occurred())
895 return NULL;
896 if (i < 0)
897 i += PyBytes_GET_SIZE(self);
898 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
899 PyErr_SetString(PyExc_IndexError,
900 "index out of range");
901 return NULL;
902 }
903 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
904 }
905 else if (PySlice_Check(item)) {
906 Py_ssize_t start, stop, step, slicelength, cur, i;
907 char* source_buf;
908 char* result_buf;
909 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000910
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000911 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000912 PyBytes_GET_SIZE(self),
913 &start, &stop, &step, &slicelength) < 0) {
914 return NULL;
915 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000916
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000917 if (slicelength <= 0) {
918 return PyBytes_FromStringAndSize("", 0);
919 }
920 else if (start == 0 && step == 1 &&
921 slicelength == PyBytes_GET_SIZE(self) &&
922 PyBytes_CheckExact(self)) {
923 Py_INCREF(self);
924 return (PyObject *)self;
925 }
926 else if (step == 1) {
927 return PyBytes_FromStringAndSize(
928 PyBytes_AS_STRING(self) + start,
929 slicelength);
930 }
931 else {
932 source_buf = PyBytes_AS_STRING(self);
933 result = PyBytes_FromStringAndSize(NULL, slicelength);
934 if (result == NULL)
935 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000936
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000937 result_buf = PyBytes_AS_STRING(result);
938 for (cur = start, i = 0; i < slicelength;
939 cur += step, i++) {
940 result_buf[i] = source_buf[cur];
941 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000942
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000943 return result;
944 }
945 }
946 else {
947 PyErr_Format(PyExc_TypeError,
948 "byte indices must be integers, not %.200s",
949 Py_TYPE(item)->tp_name);
950 return NULL;
951 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000952}
953
954static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000955bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000956{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000957 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
958 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000959}
960
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000961static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000962 (lenfunc)bytes_length, /*sq_length*/
963 (binaryfunc)bytes_concat, /*sq_concat*/
964 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
965 (ssizeargfunc)bytes_item, /*sq_item*/
966 0, /*sq_slice*/
967 0, /*sq_ass_item*/
968 0, /*sq_ass_slice*/
969 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000970};
971
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000972static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000973 (lenfunc)bytes_length,
974 (binaryfunc)bytes_subscript,
975 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000976};
977
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000978static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000979 (getbufferproc)bytes_buffer_getbuffer,
980 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000981};
982
983
984#define LEFTSTRIP 0
985#define RIGHTSTRIP 1
986#define BOTHSTRIP 2
987
988/* Arrays indexed by above */
989static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
990
991#define STRIPNAME(i) (stripformat[i]+3)
992
Neal Norwitz6968b052007-02-27 19:02:19 +0000993PyDoc_STRVAR(split__doc__,
Ezio Melotticda6b6d2012-02-26 09:39:55 +0200994"B.split(sep=None, maxsplit=-1) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +0000995\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +0000996Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000997If sep is not specified or is None, B is split on ASCII whitespace\n\
998characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +0000999If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001000
1001static PyObject *
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001002bytes_split(PyBytesObject *self, PyObject *args, PyObject *kwds)
Neal Norwitz6968b052007-02-27 19:02:19 +00001003{
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001004 static char *kwlist[] = {"sep", "maxsplit", 0};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001005 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1006 Py_ssize_t maxsplit = -1;
1007 const char *s = PyBytes_AS_STRING(self), *sub;
1008 Py_buffer vsub;
1009 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001010
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001011 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:split",
1012 kwlist, &subobj, &maxsplit))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001013 return NULL;
1014 if (maxsplit < 0)
1015 maxsplit = PY_SSIZE_T_MAX;
1016 if (subobj == Py_None)
1017 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1018 if (_getbuffer(subobj, &vsub) < 0)
1019 return NULL;
1020 sub = vsub.buf;
1021 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001022
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001023 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1024 PyBuffer_Release(&vsub);
1025 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001026}
1027
Neal Norwitz6968b052007-02-27 19:02:19 +00001028PyDoc_STRVAR(partition__doc__,
1029"B.partition(sep) -> (head, sep, tail)\n\
1030\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001031Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001032the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001033found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001034
1035static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001036bytes_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001037{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001038 const char *sep;
1039 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001040
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001041 if (PyBytes_Check(sep_obj)) {
1042 sep = PyBytes_AS_STRING(sep_obj);
1043 sep_len = PyBytes_GET_SIZE(sep_obj);
1044 }
1045 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1046 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001047
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001048 return stringlib_partition(
1049 (PyObject*) self,
1050 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1051 sep_obj, sep, sep_len
1052 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001053}
1054
1055PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti5b2b2422010-01-25 11:58:28 +00001056"B.rpartition(sep) -> (head, sep, tail)\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001057\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001058Search for the separator sep in B, starting at the end of B,\n\
1059and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001060part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001061bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001062
1063static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001064bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001065{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001066 const char *sep;
1067 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001068
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001069 if (PyBytes_Check(sep_obj)) {
1070 sep = PyBytes_AS_STRING(sep_obj);
1071 sep_len = PyBytes_GET_SIZE(sep_obj);
1072 }
1073 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1074 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001075
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001076 return stringlib_rpartition(
1077 (PyObject*) self,
1078 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1079 sep_obj, sep, sep_len
1080 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001081}
1082
Neal Norwitz6968b052007-02-27 19:02:19 +00001083PyDoc_STRVAR(rsplit__doc__,
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001084"B.rsplit(sep=None, maxsplit=-1) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001085\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001086Return a list of the sections in B, using sep as the delimiter,\n\
1087starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001088If sep is not given, B is split on ASCII whitespace characters\n\
1089(space, tab, return, newline, formfeed, vertical tab).\n\
1090If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001091
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001092
Neal Norwitz6968b052007-02-27 19:02:19 +00001093static PyObject *
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001094bytes_rsplit(PyBytesObject *self, PyObject *args, PyObject *kwds)
Neal Norwitz6968b052007-02-27 19:02:19 +00001095{
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001096 static char *kwlist[] = {"sep", "maxsplit", 0};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001097 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1098 Py_ssize_t maxsplit = -1;
1099 const char *s = PyBytes_AS_STRING(self), *sub;
1100 Py_buffer vsub;
1101 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001102
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001103 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:rsplit",
1104 kwlist, &subobj, &maxsplit))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001105 return NULL;
1106 if (maxsplit < 0)
1107 maxsplit = PY_SSIZE_T_MAX;
1108 if (subobj == Py_None)
1109 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1110 if (_getbuffer(subobj, &vsub) < 0)
1111 return NULL;
1112 sub = vsub.buf;
1113 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001114
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001115 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1116 PyBuffer_Release(&vsub);
1117 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001118}
1119
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001120
1121PyDoc_STRVAR(join__doc__,
1122"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001123\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001124Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001125Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1126
Neal Norwitz6968b052007-02-27 19:02:19 +00001127static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001128bytes_join(PyObject *self, PyObject *orig)
Neal Norwitz6968b052007-02-27 19:02:19 +00001129{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001130 char *sep = PyBytes_AS_STRING(self);
1131 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1132 PyObject *res = NULL;
1133 char *p;
1134 Py_ssize_t seqlen = 0;
1135 size_t sz = 0;
1136 Py_ssize_t i;
1137 PyObject *seq, *item;
Neal Norwitz6968b052007-02-27 19:02:19 +00001138
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001139 seq = PySequence_Fast(orig, "");
1140 if (seq == NULL) {
1141 return NULL;
1142 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001143
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001144 seqlen = PySequence_Size(seq);
1145 if (seqlen == 0) {
1146 Py_DECREF(seq);
1147 return PyBytes_FromString("");
1148 }
1149 if (seqlen == 1) {
1150 item = PySequence_Fast_GET_ITEM(seq, 0);
1151 if (PyBytes_CheckExact(item)) {
1152 Py_INCREF(item);
1153 Py_DECREF(seq);
1154 return item;
1155 }
1156 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001157
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001158 /* There are at least two things to join, or else we have a subclass
1159 * of the builtin types in the sequence.
1160 * Do a pre-pass to figure out the total amount of space we'll
1161 * need (sz), and see whether all argument are bytes.
1162 */
1163 /* XXX Shouldn't we use _getbuffer() on these items instead? */
1164 for (i = 0; i < seqlen; i++) {
1165 const size_t old_sz = sz;
1166 item = PySequence_Fast_GET_ITEM(seq, i);
1167 if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
1168 PyErr_Format(PyExc_TypeError,
1169 "sequence item %zd: expected bytes,"
1170 " %.80s found",
1171 i, Py_TYPE(item)->tp_name);
1172 Py_DECREF(seq);
1173 return NULL;
1174 }
1175 sz += Py_SIZE(item);
1176 if (i != 0)
1177 sz += seplen;
1178 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1179 PyErr_SetString(PyExc_OverflowError,
1180 "join() result is too long for bytes");
1181 Py_DECREF(seq);
1182 return NULL;
1183 }
1184 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001185
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001186 /* Allocate result space. */
1187 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1188 if (res == NULL) {
1189 Py_DECREF(seq);
1190 return NULL;
1191 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001192
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001193 /* Catenate everything. */
1194 /* I'm not worried about a PyByteArray item growing because there's
1195 nowhere in this function where we release the GIL. */
1196 p = PyBytes_AS_STRING(res);
1197 for (i = 0; i < seqlen; ++i) {
1198 size_t n;
1199 char *q;
1200 if (i) {
1201 Py_MEMCPY(p, sep, seplen);
1202 p += seplen;
1203 }
1204 item = PySequence_Fast_GET_ITEM(seq, i);
1205 n = Py_SIZE(item);
1206 if (PyBytes_Check(item))
1207 q = PyBytes_AS_STRING(item);
1208 else
1209 q = PyByteArray_AS_STRING(item);
1210 Py_MEMCPY(p, q, n);
1211 p += n;
1212 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001213
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001214 Py_DECREF(seq);
1215 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001216}
1217
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001218PyObject *
1219_PyBytes_Join(PyObject *sep, PyObject *x)
1220{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001221 assert(sep != NULL && PyBytes_Check(sep));
1222 assert(x != NULL);
1223 return bytes_join(sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001224}
1225
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001226/* helper macro to fixup start/end slice values */
1227#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001228 if (end > len) \
1229 end = len; \
1230 else if (end < 0) { \
1231 end += len; \
1232 if (end < 0) \
1233 end = 0; \
1234 } \
1235 if (start < 0) { \
1236 start += len; \
1237 if (start < 0) \
1238 start = 0; \
1239 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001240
1241Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001242bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001243{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001244 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001245 char byte;
1246 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001247 const char *sub;
1248 Py_ssize_t sub_len;
1249 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001250 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001251
Antoine Pitrouac65d962011-10-20 23:54:17 +02001252 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1253 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001254 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001255
Antoine Pitrouac65d962011-10-20 23:54:17 +02001256 if (subobj) {
1257 if (_getbuffer(subobj, &subbuf) < 0)
1258 return -2;
1259
1260 sub = subbuf.buf;
1261 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001262 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001263 else {
1264 sub = &byte;
1265 sub_len = 1;
1266 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001267
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001268 if (dir > 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001269 res = stringlib_find_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001270 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1271 sub, sub_len, start, end);
1272 else
Antoine Pitrouac65d962011-10-20 23:54:17 +02001273 res = stringlib_rfind_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001274 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1275 sub, sub_len, start, end);
Antoine Pitrouac65d962011-10-20 23:54:17 +02001276
1277 if (subobj)
1278 PyBuffer_Release(&subbuf);
1279
1280 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001281}
1282
1283
1284PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001285"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001286\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001287Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001288such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001289arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001290\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001291Return -1 on failure.");
1292
Neal Norwitz6968b052007-02-27 19:02:19 +00001293static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001294bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001295{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001296 Py_ssize_t result = bytes_find_internal(self, args, +1);
1297 if (result == -2)
1298 return NULL;
1299 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001300}
1301
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001302
1303PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001304"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001305\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001306Like B.find() but raise ValueError when the substring is not found.");
1307
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001308static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001309bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001310{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001311 Py_ssize_t result = bytes_find_internal(self, args, +1);
1312 if (result == -2)
1313 return NULL;
1314 if (result == -1) {
1315 PyErr_SetString(PyExc_ValueError,
1316 "substring not found");
1317 return NULL;
1318 }
1319 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001320}
1321
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001322
1323PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001324"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001325\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001326Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001327such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001328arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001329\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001330Return -1 on failure.");
1331
Neal Norwitz6968b052007-02-27 19:02:19 +00001332static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001333bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001334{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001335 Py_ssize_t result = bytes_find_internal(self, args, -1);
1336 if (result == -2)
1337 return NULL;
1338 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001339}
1340
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001341
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001342PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001343"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001344\n\
1345Like B.rfind() but raise ValueError when the substring is not found.");
1346
1347static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001348bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001349{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001350 Py_ssize_t result = bytes_find_internal(self, args, -1);
1351 if (result == -2)
1352 return NULL;
1353 if (result == -1) {
1354 PyErr_SetString(PyExc_ValueError,
1355 "substring not found");
1356 return NULL;
1357 }
1358 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001359}
1360
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001361
1362Py_LOCAL_INLINE(PyObject *)
1363do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001364{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001365 Py_buffer vsep;
1366 char *s = PyBytes_AS_STRING(self);
1367 Py_ssize_t len = PyBytes_GET_SIZE(self);
1368 char *sep;
1369 Py_ssize_t seplen;
1370 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001371
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001372 if (_getbuffer(sepobj, &vsep) < 0)
1373 return NULL;
1374 sep = vsep.buf;
1375 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001376
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001377 i = 0;
1378 if (striptype != RIGHTSTRIP) {
1379 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1380 i++;
1381 }
1382 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001383
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001384 j = len;
1385 if (striptype != LEFTSTRIP) {
1386 do {
1387 j--;
1388 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1389 j++;
1390 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001391
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001392 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001393
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001394 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1395 Py_INCREF(self);
1396 return (PyObject*)self;
1397 }
1398 else
1399 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001400}
1401
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001402
1403Py_LOCAL_INLINE(PyObject *)
1404do_strip(PyBytesObject *self, int striptype)
1405{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001406 char *s = PyBytes_AS_STRING(self);
1407 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001408
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001409 i = 0;
1410 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001411 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001412 i++;
1413 }
1414 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001415
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001416 j = len;
1417 if (striptype != LEFTSTRIP) {
1418 do {
1419 j--;
David Malcolm96960882010-11-05 17:23:41 +00001420 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001421 j++;
1422 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001423
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001424 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1425 Py_INCREF(self);
1426 return (PyObject*)self;
1427 }
1428 else
1429 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001430}
1431
1432
1433Py_LOCAL_INLINE(PyObject *)
1434do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1435{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001436 PyObject *sep = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001437
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001438 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1439 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001440
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001441 if (sep != NULL && sep != Py_None) {
1442 return do_xstrip(self, striptype, sep);
1443 }
1444 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001445}
1446
1447
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001448PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001449"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001450\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001451Strip leading and trailing bytes contained in the argument.\n\
Georg Brandlbeca27a2012-01-22 21:31:21 +01001452If the argument is omitted, strip leading and trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001453static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001454bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001455{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001456 if (PyTuple_GET_SIZE(args) == 0)
1457 return do_strip(self, BOTHSTRIP); /* Common case */
1458 else
1459 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001460}
1461
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001462
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001463PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001464"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001465\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001466Strip leading bytes contained in the argument.\n\
1467If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001468static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001469bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001470{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001471 if (PyTuple_GET_SIZE(args) == 0)
1472 return do_strip(self, LEFTSTRIP); /* Common case */
1473 else
1474 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001475}
1476
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001477
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001478PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001479"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001480\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001481Strip trailing bytes contained in the argument.\n\
1482If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001483static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001484bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001485{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001486 if (PyTuple_GET_SIZE(args) == 0)
1487 return do_strip(self, RIGHTSTRIP); /* Common case */
1488 else
1489 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001490}
Neal Norwitz6968b052007-02-27 19:02:19 +00001491
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001492
1493PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001494"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001495\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001496Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001497string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001498as in slice notation.");
1499
1500static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001501bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001502{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001503 PyObject *sub_obj;
1504 const char *str = PyBytes_AS_STRING(self), *sub;
1505 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001506 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001507 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001508
Antoine Pitrouac65d962011-10-20 23:54:17 +02001509 Py_buffer vsub;
1510 PyObject *count_obj;
1511
1512 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
1513 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001514 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001515
Antoine Pitrouac65d962011-10-20 23:54:17 +02001516 if (sub_obj) {
1517 if (_getbuffer(sub_obj, &vsub) < 0)
1518 return NULL;
1519
1520 sub = vsub.buf;
1521 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001522 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001523 else {
1524 sub = &byte;
1525 sub_len = 1;
1526 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001527
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001528 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001529
Antoine Pitrouac65d962011-10-20 23:54:17 +02001530 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001531 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
1532 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02001533
1534 if (sub_obj)
1535 PyBuffer_Release(&vsub);
1536
1537 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001538}
1539
1540
1541PyDoc_STRVAR(translate__doc__,
1542"B.translate(table[, deletechars]) -> bytes\n\
1543\n\
1544Return a copy of B, where all characters occurring in the\n\
1545optional argument deletechars are removed, and the remaining\n\
1546characters have been mapped through the given translation\n\
1547table, which must be a bytes object of length 256.");
1548
1549static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001550bytes_translate(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001551{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001552 register char *input, *output;
1553 const char *table;
1554 register Py_ssize_t i, c, changed = 0;
1555 PyObject *input_obj = (PyObject*)self;
1556 const char *output_start, *del_table=NULL;
1557 Py_ssize_t inlen, tablen, dellen = 0;
1558 PyObject *result;
1559 int trans_table[256];
1560 PyObject *tableobj, *delobj = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001561
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001562 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1563 &tableobj, &delobj))
1564 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001565
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001566 if (PyBytes_Check(tableobj)) {
1567 table = PyBytes_AS_STRING(tableobj);
1568 tablen = PyBytes_GET_SIZE(tableobj);
1569 }
1570 else if (tableobj == Py_None) {
1571 table = NULL;
1572 tablen = 256;
1573 }
1574 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1575 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001576
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001577 if (tablen != 256) {
1578 PyErr_SetString(PyExc_ValueError,
1579 "translation table must be 256 characters long");
1580 return NULL;
1581 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001582
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001583 if (delobj != NULL) {
1584 if (PyBytes_Check(delobj)) {
1585 del_table = PyBytes_AS_STRING(delobj);
1586 dellen = PyBytes_GET_SIZE(delobj);
1587 }
1588 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1589 return NULL;
1590 }
1591 else {
1592 del_table = NULL;
1593 dellen = 0;
1594 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001595
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001596 inlen = PyBytes_GET_SIZE(input_obj);
1597 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1598 if (result == NULL)
1599 return NULL;
1600 output_start = output = PyBytes_AsString(result);
1601 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001602
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001603 if (dellen == 0 && table != NULL) {
1604 /* If no deletions are required, use faster code */
1605 for (i = inlen; --i >= 0; ) {
1606 c = Py_CHARMASK(*input++);
1607 if (Py_CHARMASK((*output++ = table[c])) != c)
1608 changed = 1;
1609 }
1610 if (changed || !PyBytes_CheckExact(input_obj))
1611 return result;
1612 Py_DECREF(result);
1613 Py_INCREF(input_obj);
1614 return input_obj;
1615 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001616
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001617 if (table == NULL) {
1618 for (i = 0; i < 256; i++)
1619 trans_table[i] = Py_CHARMASK(i);
1620 } else {
1621 for (i = 0; i < 256; i++)
1622 trans_table[i] = Py_CHARMASK(table[i]);
1623 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001624
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001625 for (i = 0; i < dellen; i++)
1626 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001627
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001628 for (i = inlen; --i >= 0; ) {
1629 c = Py_CHARMASK(*input++);
1630 if (trans_table[c] != -1)
1631 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1632 continue;
1633 changed = 1;
1634 }
1635 if (!changed && PyBytes_CheckExact(input_obj)) {
1636 Py_DECREF(result);
1637 Py_INCREF(input_obj);
1638 return input_obj;
1639 }
1640 /* Fix the size of the resulting string */
1641 if (inlen > 0)
1642 _PyBytes_Resize(&result, output - output_start);
1643 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001644}
1645
1646
Georg Brandlabc38772009-04-12 15:51:51 +00001647static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001648bytes_maketrans(PyObject *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001649{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001650 return _Py_bytes_maketrans(args);
Georg Brandlabc38772009-04-12 15:51:51 +00001651}
1652
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001653/* find and count characters and substrings */
1654
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001655#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001656 ((char *)memchr((const void *)(target), c, target_len))
1657
1658/* String ops must return a string. */
1659/* If the object is subclass of string, create a copy */
1660Py_LOCAL(PyBytesObject *)
1661return_self(PyBytesObject *self)
1662{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001663 if (PyBytes_CheckExact(self)) {
1664 Py_INCREF(self);
1665 return self;
1666 }
1667 return (PyBytesObject *)PyBytes_FromStringAndSize(
1668 PyBytes_AS_STRING(self),
1669 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001670}
1671
1672Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00001673countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001674{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001675 Py_ssize_t count=0;
1676 const char *start=target;
1677 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001678
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001679 while ( (start=findchar(start, end-start, c)) != NULL ) {
1680 count++;
1681 if (count >= maxcount)
1682 break;
1683 start += 1;
1684 }
1685 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001686}
1687
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001688
1689/* Algorithms for different cases of string replacement */
1690
1691/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1692Py_LOCAL(PyBytesObject *)
1693replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001694 const char *to_s, Py_ssize_t to_len,
1695 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001696{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001697 char *self_s, *result_s;
1698 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001699 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001700 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001701
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001702 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001703
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001704 /* 1 at the end plus 1 after every character;
1705 count = min(maxcount, self_len + 1) */
1706 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001707 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001708 else
1709 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
1710 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001711
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001712 /* Check for overflow */
1713 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001714 assert(count > 0);
1715 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001716 PyErr_SetString(PyExc_OverflowError,
1717 "replacement bytes are too long");
1718 return NULL;
1719 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001720 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001721
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001722 if (! (result = (PyBytesObject *)
1723 PyBytes_FromStringAndSize(NULL, result_len)) )
1724 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001725
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001726 self_s = PyBytes_AS_STRING(self);
1727 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001728
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001729 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001730
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001731 /* Lay the first one down (guaranteed this will occur) */
1732 Py_MEMCPY(result_s, to_s, to_len);
1733 result_s += to_len;
1734 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001735
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001736 for (i=0; i<count; i++) {
1737 *result_s++ = *self_s++;
1738 Py_MEMCPY(result_s, to_s, to_len);
1739 result_s += to_len;
1740 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001741
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001742 /* Copy the rest of the original string */
1743 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001744
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001745 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001746}
1747
1748/* Special case for deleting a single character */
1749/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1750Py_LOCAL(PyBytesObject *)
1751replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001752 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001753{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001754 char *self_s, *result_s;
1755 char *start, *next, *end;
1756 Py_ssize_t self_len, result_len;
1757 Py_ssize_t count;
1758 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001759
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001760 self_len = PyBytes_GET_SIZE(self);
1761 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001762
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001763 count = countchar(self_s, self_len, from_c, maxcount);
1764 if (count == 0) {
1765 return return_self(self);
1766 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001767
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001768 result_len = self_len - count; /* from_len == 1 */
1769 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001770
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001771 if ( (result = (PyBytesObject *)
1772 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1773 return NULL;
1774 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001775
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001776 start = self_s;
1777 end = self_s + self_len;
1778 while (count-- > 0) {
1779 next = findchar(start, end-start, from_c);
1780 if (next == NULL)
1781 break;
1782 Py_MEMCPY(result_s, start, next-start);
1783 result_s += (next-start);
1784 start = next+1;
1785 }
1786 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001787
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001788 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001789}
1790
1791/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1792
1793Py_LOCAL(PyBytesObject *)
1794replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001795 const char *from_s, Py_ssize_t from_len,
1796 Py_ssize_t maxcount) {
1797 char *self_s, *result_s;
1798 char *start, *next, *end;
1799 Py_ssize_t self_len, result_len;
1800 Py_ssize_t count, offset;
1801 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001802
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001803 self_len = PyBytes_GET_SIZE(self);
1804 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001805
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001806 count = stringlib_count(self_s, self_len,
1807 from_s, from_len,
1808 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001809
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001810 if (count == 0) {
1811 /* no matches */
1812 return return_self(self);
1813 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001814
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001815 result_len = self_len - (count * from_len);
1816 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001817
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001818 if ( (result = (PyBytesObject *)
1819 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1820 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001821
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001822 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001823
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001824 start = self_s;
1825 end = self_s + self_len;
1826 while (count-- > 0) {
1827 offset = stringlib_find(start, end-start,
1828 from_s, from_len,
1829 0);
1830 if (offset == -1)
1831 break;
1832 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001833
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001834 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001835
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001836 result_s += (next-start);
1837 start = next+from_len;
1838 }
1839 Py_MEMCPY(result_s, start, end-start);
1840 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001841}
1842
1843/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1844Py_LOCAL(PyBytesObject *)
1845replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001846 char from_c, char to_c,
1847 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001848{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001849 char *self_s, *result_s, *start, *end, *next;
1850 Py_ssize_t self_len;
1851 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001852
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001853 /* The result string will be the same size */
1854 self_s = PyBytes_AS_STRING(self);
1855 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001856
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001857 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001858
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001859 if (next == NULL) {
1860 /* No matches; return the original string */
1861 return return_self(self);
1862 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001863
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001864 /* Need to make a new string */
1865 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1866 if (result == NULL)
1867 return NULL;
1868 result_s = PyBytes_AS_STRING(result);
1869 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001870
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001871 /* change everything in-place, starting with this one */
1872 start = result_s + (next-self_s);
1873 *start = to_c;
1874 start++;
1875 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001876
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001877 while (--maxcount > 0) {
1878 next = findchar(start, end-start, from_c);
1879 if (next == NULL)
1880 break;
1881 *next = to_c;
1882 start = next+1;
1883 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001884
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001885 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001886}
1887
1888/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1889Py_LOCAL(PyBytesObject *)
1890replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001891 const char *from_s, Py_ssize_t from_len,
1892 const char *to_s, Py_ssize_t to_len,
1893 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001894{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001895 char *result_s, *start, *end;
1896 char *self_s;
1897 Py_ssize_t self_len, offset;
1898 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001899
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001900 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001901
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001902 self_s = PyBytes_AS_STRING(self);
1903 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001904
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001905 offset = stringlib_find(self_s, self_len,
1906 from_s, from_len,
1907 0);
1908 if (offset == -1) {
1909 /* No matches; return the original string */
1910 return return_self(self);
1911 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001912
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001913 /* Need to make a new string */
1914 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1915 if (result == NULL)
1916 return NULL;
1917 result_s = PyBytes_AS_STRING(result);
1918 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001919
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001920 /* change everything in-place, starting with this one */
1921 start = result_s + offset;
1922 Py_MEMCPY(start, to_s, from_len);
1923 start += from_len;
1924 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001925
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001926 while ( --maxcount > 0) {
1927 offset = stringlib_find(start, end-start,
1928 from_s, from_len,
1929 0);
1930 if (offset==-1)
1931 break;
1932 Py_MEMCPY(start+offset, to_s, from_len);
1933 start += offset+from_len;
1934 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001935
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001936 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001937}
1938
1939/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1940Py_LOCAL(PyBytesObject *)
1941replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001942 char from_c,
1943 const char *to_s, Py_ssize_t to_len,
1944 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001945{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001946 char *self_s, *result_s;
1947 char *start, *next, *end;
1948 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001949 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001950 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001951
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001952 self_s = PyBytes_AS_STRING(self);
1953 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001954
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001955 count = countchar(self_s, self_len, from_c, maxcount);
1956 if (count == 0) {
1957 /* no matches, return unchanged */
1958 return return_self(self);
1959 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001960
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001961 /* use the difference between current and new, hence the "-1" */
1962 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001963 assert(count > 0);
1964 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001965 PyErr_SetString(PyExc_OverflowError,
1966 "replacement bytes are too long");
1967 return NULL;
1968 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001969 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001970
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001971 if ( (result = (PyBytesObject *)
1972 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1973 return NULL;
1974 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001975
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001976 start = self_s;
1977 end = self_s + self_len;
1978 while (count-- > 0) {
1979 next = findchar(start, end-start, from_c);
1980 if (next == NULL)
1981 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001982
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001983 if (next == start) {
1984 /* replace with the 'to' */
1985 Py_MEMCPY(result_s, to_s, to_len);
1986 result_s += to_len;
1987 start += 1;
1988 } else {
1989 /* copy the unchanged old then the 'to' */
1990 Py_MEMCPY(result_s, start, next-start);
1991 result_s += (next-start);
1992 Py_MEMCPY(result_s, to_s, to_len);
1993 result_s += to_len;
1994 start = next+1;
1995 }
1996 }
1997 /* Copy the remainder of the remaining string */
1998 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001999
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002000 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002001}
2002
2003/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2004Py_LOCAL(PyBytesObject *)
2005replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002006 const char *from_s, Py_ssize_t from_len,
2007 const char *to_s, Py_ssize_t to_len,
2008 Py_ssize_t maxcount) {
2009 char *self_s, *result_s;
2010 char *start, *next, *end;
2011 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002012 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002013 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002014
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002015 self_s = PyBytes_AS_STRING(self);
2016 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002017
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002018 count = stringlib_count(self_s, self_len,
2019 from_s, from_len,
2020 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002021
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002022 if (count == 0) {
2023 /* no matches, return unchanged */
2024 return return_self(self);
2025 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002026
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002027 /* Check for overflow */
2028 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002029 assert(count > 0);
2030 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002031 PyErr_SetString(PyExc_OverflowError,
2032 "replacement bytes are too long");
2033 return NULL;
2034 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002035 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002036
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002037 if ( (result = (PyBytesObject *)
2038 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2039 return NULL;
2040 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002041
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002042 start = self_s;
2043 end = self_s + self_len;
2044 while (count-- > 0) {
2045 offset = stringlib_find(start, end-start,
2046 from_s, from_len,
2047 0);
2048 if (offset == -1)
2049 break;
2050 next = start+offset;
2051 if (next == start) {
2052 /* replace with the 'to' */
2053 Py_MEMCPY(result_s, to_s, to_len);
2054 result_s += to_len;
2055 start += from_len;
2056 } else {
2057 /* copy the unchanged old then the 'to' */
2058 Py_MEMCPY(result_s, start, next-start);
2059 result_s += (next-start);
2060 Py_MEMCPY(result_s, to_s, to_len);
2061 result_s += to_len;
2062 start = next+from_len;
2063 }
2064 }
2065 /* Copy the remainder of the remaining string */
2066 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002067
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002068 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002069}
2070
2071
2072Py_LOCAL(PyBytesObject *)
2073replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002074 const char *from_s, Py_ssize_t from_len,
2075 const char *to_s, Py_ssize_t to_len,
2076 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002077{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002078 if (maxcount < 0) {
2079 maxcount = PY_SSIZE_T_MAX;
2080 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2081 /* nothing to do; return the original string */
2082 return return_self(self);
2083 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002084
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002085 if (maxcount == 0 ||
2086 (from_len == 0 && to_len == 0)) {
2087 /* nothing to do; return the original string */
2088 return return_self(self);
2089 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002090
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002091 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002092
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002093 if (from_len == 0) {
2094 /* insert the 'to' string everywhere. */
2095 /* >>> "Python".replace("", ".") */
2096 /* '.P.y.t.h.o.n.' */
2097 return replace_interleave(self, to_s, to_len, maxcount);
2098 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002099
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002100 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2101 /* point for an empty self string to generate a non-empty string */
2102 /* Special case so the remaining code always gets a non-empty string */
2103 if (PyBytes_GET_SIZE(self) == 0) {
2104 return return_self(self);
2105 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002106
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002107 if (to_len == 0) {
2108 /* delete all occurrences of 'from' string */
2109 if (from_len == 1) {
2110 return replace_delete_single_character(
2111 self, from_s[0], maxcount);
2112 } else {
2113 return replace_delete_substring(self, from_s,
2114 from_len, maxcount);
2115 }
2116 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002117
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002118 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002119
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002120 if (from_len == to_len) {
2121 if (from_len == 1) {
2122 return replace_single_character_in_place(
2123 self,
2124 from_s[0],
2125 to_s[0],
2126 maxcount);
2127 } else {
2128 return replace_substring_in_place(
2129 self, from_s, from_len, to_s, to_len,
2130 maxcount);
2131 }
2132 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002133
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002134 /* Otherwise use the more generic algorithms */
2135 if (from_len == 1) {
2136 return replace_single_character(self, from_s[0],
2137 to_s, to_len, maxcount);
2138 } else {
2139 /* len('from')>=2, len('to')>=1 */
2140 return replace_substring(self, from_s, from_len, to_s, to_len,
2141 maxcount);
2142 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002143}
2144
2145PyDoc_STRVAR(replace__doc__,
2146"B.replace(old, new[, count]) -> bytes\n\
2147\n\
2148Return a copy of B with all occurrences of subsection\n\
2149old replaced by new. If the optional argument count is\n\
Senthil Kumaran9a9dd1c2010-09-08 12:50:29 +00002150given, only first count occurances are replaced.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002151
2152static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002153bytes_replace(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002154{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002155 Py_ssize_t count = -1;
2156 PyObject *from, *to;
2157 const char *from_s, *to_s;
2158 Py_ssize_t from_len, to_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002159
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002160 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2161 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002162
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002163 if (PyBytes_Check(from)) {
2164 from_s = PyBytes_AS_STRING(from);
2165 from_len = PyBytes_GET_SIZE(from);
2166 }
2167 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2168 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002169
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002170 if (PyBytes_Check(to)) {
2171 to_s = PyBytes_AS_STRING(to);
2172 to_len = PyBytes_GET_SIZE(to);
2173 }
2174 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2175 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002176
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002177 return (PyObject *)replace((PyBytesObject *) self,
2178 from_s, from_len,
2179 to_s, to_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002180}
2181
2182/** End DALKE **/
2183
2184/* Matches the end (direction >= 0) or start (direction < 0) of self
2185 * against substr, using the start and end arguments. Returns
2186 * -1 on error, 0 if not found and 1 if found.
2187 */
2188Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002189_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002190 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002191{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002192 Py_ssize_t len = PyBytes_GET_SIZE(self);
2193 Py_ssize_t slen;
2194 const char* sub;
2195 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002196
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002197 if (PyBytes_Check(substr)) {
2198 sub = PyBytes_AS_STRING(substr);
2199 slen = PyBytes_GET_SIZE(substr);
2200 }
2201 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2202 return -1;
2203 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002204
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002205 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002206
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002207 if (direction < 0) {
2208 /* startswith */
2209 if (start+slen > len)
2210 return 0;
2211 } else {
2212 /* endswith */
2213 if (end-start < slen || start > len)
2214 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002215
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002216 if (end-slen > start)
2217 start = end - slen;
2218 }
2219 if (end-start >= slen)
2220 return ! memcmp(str+start, sub, slen);
2221 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002222}
2223
2224
2225PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002226"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002227\n\
2228Return True if B starts with the specified prefix, False otherwise.\n\
2229With optional start, test B beginning at that position.\n\
2230With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002231prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002232
2233static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002234bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002235{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002236 Py_ssize_t start = 0;
2237 Py_ssize_t end = PY_SSIZE_T_MAX;
2238 PyObject *subobj;
2239 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002240
Jesus Ceaac451502011-04-20 17:09:23 +02002241 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002242 return NULL;
2243 if (PyTuple_Check(subobj)) {
2244 Py_ssize_t i;
2245 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2246 result = _bytes_tailmatch(self,
2247 PyTuple_GET_ITEM(subobj, i),
2248 start, end, -1);
2249 if (result == -1)
2250 return NULL;
2251 else if (result) {
2252 Py_RETURN_TRUE;
2253 }
2254 }
2255 Py_RETURN_FALSE;
2256 }
2257 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002258 if (result == -1) {
2259 if (PyErr_ExceptionMatches(PyExc_TypeError))
2260 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2261 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002262 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002263 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002264 else
2265 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002266}
2267
2268
2269PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002270"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002271\n\
2272Return True if B ends with the specified suffix, False otherwise.\n\
2273With optional start, test B beginning at that position.\n\
2274With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002275suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002276
2277static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002278bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002279{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002280 Py_ssize_t start = 0;
2281 Py_ssize_t end = PY_SSIZE_T_MAX;
2282 PyObject *subobj;
2283 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002284
Jesus Ceaac451502011-04-20 17:09:23 +02002285 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002286 return NULL;
2287 if (PyTuple_Check(subobj)) {
2288 Py_ssize_t i;
2289 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2290 result = _bytes_tailmatch(self,
2291 PyTuple_GET_ITEM(subobj, i),
2292 start, end, +1);
2293 if (result == -1)
2294 return NULL;
2295 else if (result) {
2296 Py_RETURN_TRUE;
2297 }
2298 }
2299 Py_RETURN_FALSE;
2300 }
2301 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002302 if (result == -1) {
2303 if (PyErr_ExceptionMatches(PyExc_TypeError))
2304 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2305 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002306 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002307 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002308 else
2309 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002310}
2311
2312
2313PyDoc_STRVAR(decode__doc__,
Victor Stinnerc911bbf2010-11-07 19:04:46 +00002314"B.decode(encoding='utf-8', errors='strict') -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002315\n\
Victor Stinnere14e2122010-11-07 18:41:46 +00002316Decode B using the codec registered for encoding. Default encoding\n\
2317is 'utf-8'. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002318handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2319a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002320as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002321able to handle UnicodeDecodeErrors.");
2322
2323static PyObject *
Benjamin Peterson308d6372009-09-18 21:42:35 +00002324bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002325{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002326 const char *encoding = NULL;
2327 const char *errors = NULL;
2328 static char *kwlist[] = {"encoding", "errors", 0};
Guido van Rossumd624f182006-04-24 13:47:05 +00002329
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002330 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2331 return NULL;
2332 if (encoding == NULL)
2333 encoding = PyUnicode_GetDefaultEncoding();
2334 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002335}
2336
Guido van Rossum20188312006-05-05 15:15:40 +00002337
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002338PyDoc_STRVAR(splitlines__doc__,
2339"B.splitlines([keepends]) -> list of lines\n\
2340\n\
2341Return a list of the lines in B, breaking at line boundaries.\n\
2342Line breaks are not included in the resulting list unless keepends\n\
2343is given and true.");
2344
2345static PyObject*
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002346bytes_splitlines(PyObject *self, PyObject *args, PyObject *kwds)
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002347{
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002348 static char *kwlist[] = {"keepends", 0};
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002349 int keepends = 0;
2350
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002351 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:splitlines",
2352 kwlist, &keepends))
Antoine Pitroud1188562010-06-09 16:38:55 +00002353 return NULL;
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002354
2355 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002356 (PyObject*) self, PyBytes_AS_STRING(self),
2357 PyBytes_GET_SIZE(self), keepends
2358 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002359}
2360
2361
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002362PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002363"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002364\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002365Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002366Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002367Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002368
2369static int
Victor Stinner6430fd52011-09-29 04:02:13 +02002370hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002371{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002372 if (c >= 128)
2373 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002374 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002375 return c - '0';
2376 else {
David Malcolm96960882010-11-05 17:23:41 +00002377 if (Py_ISUPPER(c))
2378 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002379 if (c >= 'a' && c <= 'f')
2380 return c - 'a' + 10;
2381 }
2382 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002383}
2384
2385static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002386bytes_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002387{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002388 PyObject *newstring, *hexobj;
2389 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002390 Py_ssize_t hexlen, byteslen, i, j;
2391 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002392 void *data;
2393 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002394
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002395 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2396 return NULL;
2397 assert(PyUnicode_Check(hexobj));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002398 if (PyUnicode_READY(hexobj))
2399 return NULL;
2400 kind = PyUnicode_KIND(hexobj);
2401 data = PyUnicode_DATA(hexobj);
2402 hexlen = PyUnicode_GET_LENGTH(hexobj);
2403
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002404 byteslen = hexlen/2; /* This overestimates if there are spaces */
2405 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2406 if (!newstring)
2407 return NULL;
2408 buf = PyBytes_AS_STRING(newstring);
2409 for (i = j = 0; i < hexlen; i += 2) {
2410 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002411 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002412 i++;
2413 if (i >= hexlen)
2414 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002415 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
2416 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002417 if (top == -1 || bot == -1) {
2418 PyErr_Format(PyExc_ValueError,
2419 "non-hexadecimal number found in "
2420 "fromhex() arg at position %zd", i);
2421 goto error;
2422 }
2423 buf[j++] = (top << 4) + bot;
2424 }
2425 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2426 goto error;
2427 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002428
2429 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002430 Py_XDECREF(newstring);
2431 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002432}
2433
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002434PyDoc_STRVAR(sizeof__doc__,
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002435"B.__sizeof__() -> size of B in memory, in bytes");
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002436
2437static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002438bytes_sizeof(PyBytesObject *v)
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002439{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002440 Py_ssize_t res;
2441 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2442 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002443}
2444
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002445
2446static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002447bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002448{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002449 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002450}
2451
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002452
2453static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002454bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002455 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2456 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2457 _Py_capitalize__doc__},
2458 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2459 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2460 {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
2461 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2462 endswith__doc__},
2463 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2464 expandtabs__doc__},
2465 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2466 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2467 fromhex_doc},
2468 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2469 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2470 _Py_isalnum__doc__},
2471 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2472 _Py_isalpha__doc__},
2473 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2474 _Py_isdigit__doc__},
2475 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2476 _Py_islower__doc__},
2477 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2478 _Py_isspace__doc__},
2479 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2480 _Py_istitle__doc__},
2481 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2482 _Py_isupper__doc__},
2483 {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
2484 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2485 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2486 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2487 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
2488 _Py_maketrans__doc__},
2489 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2490 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2491 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2492 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2493 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2494 {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
2495 rpartition__doc__},
Ezio Melotticda6b6d2012-02-26 09:39:55 +02002496 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS | METH_KEYWORDS, rsplit__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002497 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
Ezio Melotticda6b6d2012-02-26 09:39:55 +02002498 {"split", (PyCFunction)bytes_split, METH_VARARGS | METH_KEYWORDS, split__doc__},
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002499 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002500 splitlines__doc__},
2501 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2502 startswith__doc__},
2503 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
2504 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2505 _Py_swapcase__doc__},
2506 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2507 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
2508 translate__doc__},
2509 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2510 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2511 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
2512 sizeof__doc__},
2513 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002514};
2515
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002516static PyObject *
2517str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2518
2519static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002520bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002521{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002522 PyObject *x = NULL;
2523 const char *encoding = NULL;
2524 const char *errors = NULL;
2525 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002526 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002527 Py_ssize_t size;
2528 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002529 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002530
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002531 if (type != &PyBytes_Type)
2532 return str_subtype_new(type, args, kwds);
2533 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2534 &encoding, &errors))
2535 return NULL;
2536 if (x == NULL) {
2537 if (encoding != NULL || errors != NULL) {
2538 PyErr_SetString(PyExc_TypeError,
2539 "encoding or errors without sequence "
2540 "argument");
2541 return NULL;
2542 }
2543 return PyBytes_FromString("");
2544 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002545
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002546 if (PyUnicode_Check(x)) {
2547 /* Encode via the codec registry */
2548 if (encoding == NULL) {
2549 PyErr_SetString(PyExc_TypeError,
2550 "string argument without an encoding");
2551 return NULL;
2552 }
2553 new = PyUnicode_AsEncodedString(x, encoding, errors);
2554 if (new == NULL)
2555 return NULL;
2556 assert(PyBytes_Check(new));
2557 return new;
2558 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002559
2560 /* We'd like to call PyObject_Bytes here, but we need to check for an
2561 integer argument before deferring to PyBytes_FromObject, something
2562 PyObject_Bytes doesn't do. */
2563 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2564 if (func != NULL) {
2565 new = PyObject_CallFunctionObjArgs(func, NULL);
2566 Py_DECREF(func);
2567 if (new == NULL)
2568 return NULL;
2569 if (!PyBytes_Check(new)) {
2570 PyErr_Format(PyExc_TypeError,
2571 "__bytes__ returned non-bytes (type %.200s)",
2572 Py_TYPE(new)->tp_name);
2573 Py_DECREF(new);
2574 return NULL;
2575 }
2576 return new;
2577 }
2578 else if (PyErr_Occurred())
2579 return NULL;
2580
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002581 /* Is it an integer? */
2582 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2583 if (size == -1 && PyErr_Occurred()) {
2584 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2585 return NULL;
2586 PyErr_Clear();
2587 }
2588 else if (size < 0) {
2589 PyErr_SetString(PyExc_ValueError, "negative count");
2590 return NULL;
2591 }
2592 else {
2593 new = PyBytes_FromStringAndSize(NULL, size);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002594 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002595 return NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002596 if (size > 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002597 memset(((PyBytesObject*)new)->ob_sval, 0, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002598 return new;
2599 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002600
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002601 /* If it's not unicode, there can't be encoding or errors */
2602 if (encoding != NULL || errors != NULL) {
2603 PyErr_SetString(PyExc_TypeError,
2604 "encoding or errors without a string argument");
2605 return NULL;
2606 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002607
2608 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002609}
2610
2611PyObject *
2612PyBytes_FromObject(PyObject *x)
2613{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002614 PyObject *new, *it;
2615 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002616
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002617 if (x == NULL) {
2618 PyErr_BadInternalCall();
2619 return NULL;
2620 }
Larry Hastingsca28e992012-05-24 22:58:30 -07002621
2622 if (PyBytes_CheckExact(x)) {
2623 Py_INCREF(x);
2624 return x;
2625 }
2626
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002627 /* Use the modern buffer interface */
2628 if (PyObject_CheckBuffer(x)) {
2629 Py_buffer view;
2630 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2631 return NULL;
2632 new = PyBytes_FromStringAndSize(NULL, view.len);
2633 if (!new)
2634 goto fail;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002635 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2636 &view, view.len, 'C') < 0)
2637 goto fail;
2638 PyBuffer_Release(&view);
2639 return new;
2640 fail:
2641 Py_XDECREF(new);
2642 PyBuffer_Release(&view);
2643 return NULL;
2644 }
2645 if (PyUnicode_Check(x)) {
2646 PyErr_SetString(PyExc_TypeError,
2647 "cannot convert unicode object to bytes");
2648 return NULL;
2649 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002650
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002651 if (PyList_CheckExact(x)) {
2652 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2653 if (new == NULL)
2654 return NULL;
2655 for (i = 0; i < Py_SIZE(x); i++) {
2656 Py_ssize_t value = PyNumber_AsSsize_t(
2657 PyList_GET_ITEM(x, i), PyExc_ValueError);
2658 if (value == -1 && PyErr_Occurred()) {
2659 Py_DECREF(new);
2660 return NULL;
2661 }
2662 if (value < 0 || value >= 256) {
2663 PyErr_SetString(PyExc_ValueError,
2664 "bytes must be in range(0, 256)");
2665 Py_DECREF(new);
2666 return NULL;
2667 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002668 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002669 }
2670 return new;
2671 }
2672 if (PyTuple_CheckExact(x)) {
2673 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2674 if (new == NULL)
2675 return NULL;
2676 for (i = 0; i < Py_SIZE(x); i++) {
2677 Py_ssize_t value = PyNumber_AsSsize_t(
2678 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
2679 if (value == -1 && PyErr_Occurred()) {
2680 Py_DECREF(new);
2681 return NULL;
2682 }
2683 if (value < 0 || value >= 256) {
2684 PyErr_SetString(PyExc_ValueError,
2685 "bytes must be in range(0, 256)");
2686 Py_DECREF(new);
2687 return NULL;
2688 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002689 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002690 }
2691 return new;
2692 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00002693
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002694 /* For iterator version, create a string object and resize as needed */
2695 size = _PyObject_LengthHint(x, 64);
2696 if (size == -1 && PyErr_Occurred())
2697 return NULL;
2698 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
2699 returning a shared empty bytes string. This required because we
2700 want to call _PyBytes_Resize() the returned object, which we can
2701 only do on bytes objects with refcount == 1. */
2702 size += 1;
2703 new = PyBytes_FromStringAndSize(NULL, size);
2704 if (new == NULL)
2705 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002706
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002707 /* Get the iterator */
2708 it = PyObject_GetIter(x);
2709 if (it == NULL)
2710 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002711
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002712 /* Run the iterator to exhaustion */
2713 for (i = 0; ; i++) {
2714 PyObject *item;
2715 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002716
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002717 /* Get the next item */
2718 item = PyIter_Next(it);
2719 if (item == NULL) {
2720 if (PyErr_Occurred())
2721 goto error;
2722 break;
2723 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002724
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002725 /* Interpret it as an int (__index__) */
2726 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2727 Py_DECREF(item);
2728 if (value == -1 && PyErr_Occurred())
2729 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002730
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002731 /* Range check */
2732 if (value < 0 || value >= 256) {
2733 PyErr_SetString(PyExc_ValueError,
2734 "bytes must be in range(0, 256)");
2735 goto error;
2736 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002737
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002738 /* Append the byte */
2739 if (i >= size) {
2740 size = 2 * size + 1;
2741 if (_PyBytes_Resize(&new, size) < 0)
2742 goto error;
2743 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002744 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002745 }
2746 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002747
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002748 /* Clean up and return success */
2749 Py_DECREF(it);
2750 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002751
2752 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002753 /* Error handling when new != NULL */
2754 Py_XDECREF(it);
2755 Py_DECREF(new);
2756 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002757}
2758
2759static PyObject *
2760str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2761{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002762 PyObject *tmp, *pnew;
2763 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002764
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002765 assert(PyType_IsSubtype(type, &PyBytes_Type));
2766 tmp = bytes_new(&PyBytes_Type, args, kwds);
2767 if (tmp == NULL)
2768 return NULL;
2769 assert(PyBytes_CheckExact(tmp));
2770 n = PyBytes_GET_SIZE(tmp);
2771 pnew = type->tp_alloc(type, n);
2772 if (pnew != NULL) {
2773 Py_MEMCPY(PyBytes_AS_STRING(pnew),
2774 PyBytes_AS_STRING(tmp), n+1);
2775 ((PyBytesObject *)pnew)->ob_shash =
2776 ((PyBytesObject *)tmp)->ob_shash;
2777 }
2778 Py_DECREF(tmp);
2779 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002780}
2781
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002782PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002783"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002784bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002785bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002786bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2787bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002788\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002789Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002790 - an iterable yielding integers in range(256)\n\
2791 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002792 - any object implementing the buffer API.\n\
2793 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002794
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002795static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002796
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002797PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002798 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2799 "bytes",
2800 PyBytesObject_SIZE,
2801 sizeof(char),
2802 bytes_dealloc, /* tp_dealloc */
2803 0, /* tp_print */
2804 0, /* tp_getattr */
2805 0, /* tp_setattr */
2806 0, /* tp_reserved */
2807 (reprfunc)bytes_repr, /* tp_repr */
2808 0, /* tp_as_number */
2809 &bytes_as_sequence, /* tp_as_sequence */
2810 &bytes_as_mapping, /* tp_as_mapping */
2811 (hashfunc)bytes_hash, /* tp_hash */
2812 0, /* tp_call */
2813 bytes_str, /* tp_str */
2814 PyObject_GenericGetAttr, /* tp_getattro */
2815 0, /* tp_setattro */
2816 &bytes_as_buffer, /* tp_as_buffer */
2817 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2818 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2819 bytes_doc, /* tp_doc */
2820 0, /* tp_traverse */
2821 0, /* tp_clear */
2822 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2823 0, /* tp_weaklistoffset */
2824 bytes_iter, /* tp_iter */
2825 0, /* tp_iternext */
2826 bytes_methods, /* tp_methods */
2827 0, /* tp_members */
2828 0, /* tp_getset */
2829 &PyBaseObject_Type, /* tp_base */
2830 0, /* tp_dict */
2831 0, /* tp_descr_get */
2832 0, /* tp_descr_set */
2833 0, /* tp_dictoffset */
2834 0, /* tp_init */
2835 0, /* tp_alloc */
2836 bytes_new, /* tp_new */
2837 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002838};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002839
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002840void
2841PyBytes_Concat(register PyObject **pv, register PyObject *w)
2842{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002843 register PyObject *v;
2844 assert(pv != NULL);
2845 if (*pv == NULL)
2846 return;
2847 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002848 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002849 return;
2850 }
2851 v = bytes_concat(*pv, w);
2852 Py_DECREF(*pv);
2853 *pv = v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002854}
2855
2856void
2857PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
2858{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002859 PyBytes_Concat(pv, w);
2860 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002861}
2862
2863
2864/* The following function breaks the notion that strings are immutable:
2865 it changes the size of a string. We get away with this only if there
2866 is only one module referencing the object. You can also think of it
2867 as creating a new string object and destroying the old one, only
2868 more efficiently. In any case, don't use this if the string may
2869 already be known to some other part of the code...
2870 Note that if there's not enough memory to resize the string, the original
2871 string object at *pv is deallocated, *pv is set to NULL, an "out of
2872 memory" exception is set, and -1 is returned. Else (on success) 0 is
2873 returned, and the value in *pv may or may not be the same as on input.
2874 As always, an extra byte is allocated for a trailing \0 byte (newsize
2875 does *not* include that), and a trailing \0 byte is stored.
2876*/
2877
2878int
2879_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2880{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002881 register PyObject *v;
2882 register PyBytesObject *sv;
2883 v = *pv;
2884 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
2885 *pv = 0;
2886 Py_DECREF(v);
2887 PyErr_BadInternalCall();
2888 return -1;
2889 }
2890 /* XXX UNREF/NEWREF interface should be more symmetrical */
2891 _Py_DEC_REFTOTAL;
2892 _Py_ForgetReference(v);
2893 *pv = (PyObject *)
2894 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
2895 if (*pv == NULL) {
2896 PyObject_Del(v);
2897 PyErr_NoMemory();
2898 return -1;
2899 }
2900 _Py_NewReference(*pv);
2901 sv = (PyBytesObject *) *pv;
2902 Py_SIZE(sv) = newsize;
2903 sv->ob_sval[newsize] = '\0';
2904 sv->ob_shash = -1; /* invalidate cached hash value */
2905 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002906}
2907
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002908void
2909PyBytes_Fini(void)
2910{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002911 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002912 for (i = 0; i < UCHAR_MAX + 1; i++)
2913 Py_CLEAR(characters[i]);
2914 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002915}
2916
Benjamin Peterson4116f362008-05-27 00:36:20 +00002917/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002918
2919typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002920 PyObject_HEAD
2921 Py_ssize_t it_index;
2922 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002923} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002924
2925static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002926striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002927{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002928 _PyObject_GC_UNTRACK(it);
2929 Py_XDECREF(it->it_seq);
2930 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002931}
2932
2933static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002934striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002935{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002936 Py_VISIT(it->it_seq);
2937 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002938}
2939
2940static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002941striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002942{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002943 PyBytesObject *seq;
2944 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002945
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002946 assert(it != NULL);
2947 seq = it->it_seq;
2948 if (seq == NULL)
2949 return NULL;
2950 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002951
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002952 if (it->it_index < PyBytes_GET_SIZE(seq)) {
2953 item = PyLong_FromLong(
2954 (unsigned char)seq->ob_sval[it->it_index]);
2955 if (item != NULL)
2956 ++it->it_index;
2957 return item;
2958 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002959
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002960 Py_DECREF(seq);
2961 it->it_seq = NULL;
2962 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002963}
2964
2965static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002966striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002967{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002968 Py_ssize_t len = 0;
2969 if (it->it_seq)
2970 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
2971 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002972}
2973
2974PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002975 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002976
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002977static PyObject *
2978striter_reduce(striterobject *it)
2979{
2980 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02002981 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002982 it->it_seq, it->it_index);
2983 } else {
2984 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
2985 if (u == NULL)
2986 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02002987 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002988 }
2989}
2990
2991PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
2992
2993static PyObject *
2994striter_setstate(striterobject *it, PyObject *state)
2995{
2996 Py_ssize_t index = PyLong_AsSsize_t(state);
2997 if (index == -1 && PyErr_Occurred())
2998 return NULL;
2999 if (index < 0)
3000 index = 0;
3001 it->it_index = index;
3002 Py_RETURN_NONE;
3003}
3004
3005PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3006
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003007static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003008 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3009 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003010 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3011 reduce_doc},
3012 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3013 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003014 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003015};
3016
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003017PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003018 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3019 "bytes_iterator", /* tp_name */
3020 sizeof(striterobject), /* tp_basicsize */
3021 0, /* tp_itemsize */
3022 /* methods */
3023 (destructor)striter_dealloc, /* tp_dealloc */
3024 0, /* tp_print */
3025 0, /* tp_getattr */
3026 0, /* tp_setattr */
3027 0, /* tp_reserved */
3028 0, /* tp_repr */
3029 0, /* tp_as_number */
3030 0, /* tp_as_sequence */
3031 0, /* tp_as_mapping */
3032 0, /* tp_hash */
3033 0, /* tp_call */
3034 0, /* tp_str */
3035 PyObject_GenericGetAttr, /* tp_getattro */
3036 0, /* tp_setattro */
3037 0, /* tp_as_buffer */
3038 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3039 0, /* tp_doc */
3040 (traverseproc)striter_traverse, /* tp_traverse */
3041 0, /* tp_clear */
3042 0, /* tp_richcompare */
3043 0, /* tp_weaklistoffset */
3044 PyObject_SelfIter, /* tp_iter */
3045 (iternextfunc)striter_next, /* tp_iternext */
3046 striter_methods, /* tp_methods */
3047 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003048};
3049
3050static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003051bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003052{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003053 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003054
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003055 if (!PyBytes_Check(seq)) {
3056 PyErr_BadInternalCall();
3057 return NULL;
3058 }
3059 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3060 if (it == NULL)
3061 return NULL;
3062 it->it_index = 0;
3063 Py_INCREF(seq);
3064 it->it_seq = (PyBytesObject *)seq;
3065 _PyObject_GC_TRACK(it);
3066 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003067}