blob: 5e17107a8af89a94ff3eddfca41d0922aa81d031 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Christian Heimes90aa7642007-12-19 02:45:37 +000013 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000014
Gregory P. Smith60d241f2007-10-16 06:31:30 +000015 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000016 {
Antoine Pitroud1188562010-06-09 16:38:55 +000017 PyErr_Format(PyExc_TypeError,
18 "Type %.100s doesn't support the buffer API",
19 Py_TYPE(obj)->tp_name);
20 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000021 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000023 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000024 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000025 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000026}
27
Christian Heimes2c9c7a52008-05-26 13:42:13 +000028#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000029Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000030#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000031
Christian Heimes2c9c7a52008-05-26 13:42:13 +000032static PyBytesObject *characters[UCHAR_MAX + 1];
33static PyBytesObject *nullstring;
34
Mark Dickinsonfd24b322008-12-06 15:33:31 +000035/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
36 for a string of length n should request PyBytesObject_SIZE + n bytes.
37
38 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
39 3 bytes per string allocation on a typical system.
40*/
41#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
42
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000044 For PyBytes_FromString(), the parameter `str' points to a null-terminated
45 string containing exactly `size' bytes.
46
47 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
48 either NULL or else points to a string containing at least `size' bytes.
49 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
50 not have to be null-terminated. (Therefore it is safe to construct a
51 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
52 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
53 bytes (setting the last byte to the null terminating character) and you can
54 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000055 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000056 alter the data yourself, since the strings may be shared.
57
58 The PyObject member `op->ob_size', which denotes the number of "extra
59 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020060 allocated for string data, not counting the null terminating character.
61 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000062 PyBytes_FromStringAndSize()) or the length of the string in the `str'
63 parameter (for PyBytes_FromString()).
64*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000065PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000066PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000067{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000068 register PyBytesObject *op;
69 if (size < 0) {
70 PyErr_SetString(PyExc_SystemError,
71 "Negative size passed to PyBytes_FromStringAndSize");
72 return NULL;
73 }
74 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000075#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000076 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000077#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078 Py_INCREF(op);
79 return (PyObject *)op;
80 }
81 if (size == 1 && str != NULL &&
82 (op = characters[*str & UCHAR_MAX]) != NULL)
83 {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000084#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000086#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 Py_INCREF(op);
88 return (PyObject *)op;
89 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000090
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
92 PyErr_SetString(PyExc_OverflowError,
93 "byte string is too large");
94 return NULL;
95 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000097 /* Inline PyObject_NewVar */
98 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
99 if (op == NULL)
100 return PyErr_NoMemory();
101 PyObject_INIT_VAR(op, &PyBytes_Type, size);
102 op->ob_shash = -1;
103 if (str != NULL)
104 Py_MEMCPY(op->ob_sval, str, size);
105 op->ob_sval[size] = '\0';
106 /* share short strings */
107 if (size == 0) {
108 nullstring = op;
109 Py_INCREF(op);
110 } else if (size == 1 && str != NULL) {
111 characters[*str & UCHAR_MAX] = op;
112 Py_INCREF(op);
113 }
114 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000115}
116
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000117PyObject *
118PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000119{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000120 register size_t size;
121 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000122
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000123 assert(str != NULL);
124 size = strlen(str);
125 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
126 PyErr_SetString(PyExc_OverflowError,
127 "byte string is too long");
128 return NULL;
129 }
130 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000131#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000133#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 Py_INCREF(op);
135 return (PyObject *)op;
136 }
137 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000138#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000139 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000140#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 Py_INCREF(op);
142 return (PyObject *)op;
143 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000144
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 /* Inline PyObject_NewVar */
146 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
147 if (op == NULL)
148 return PyErr_NoMemory();
149 PyObject_INIT_VAR(op, &PyBytes_Type, size);
150 op->ob_shash = -1;
151 Py_MEMCPY(op->ob_sval, str, size+1);
152 /* share short strings */
153 if (size == 0) {
154 nullstring = op;
155 Py_INCREF(op);
156 } else if (size == 1) {
157 characters[*str & UCHAR_MAX] = op;
158 Py_INCREF(op);
159 }
160 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000161}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000162
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000163PyObject *
164PyBytes_FromFormatV(const char *format, va_list vargs)
165{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000166 va_list count;
167 Py_ssize_t n = 0;
168 const char* f;
169 char *s;
170 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000171
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000172 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000173 /* step 1: figure out how large a buffer we need */
174 for (f = format; *f; f++) {
175 if (*f == '%') {
176 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000177 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000178 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000179
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000180 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
181 * they don't affect the amount of space we reserve.
182 */
183 if ((*f == 'l' || *f == 'z') &&
184 (f[1] == 'd' || f[1] == 'u'))
185 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000186
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000187 switch (*f) {
188 case 'c':
189 (void)va_arg(count, int);
190 /* fall through... */
191 case '%':
192 n++;
193 break;
194 case 'd': case 'u': case 'i': case 'x':
195 (void) va_arg(count, int);
196 /* 20 bytes is enough to hold a 64-bit
197 integer. Decimal takes the most space.
198 This isn't enough for octal. */
199 n += 20;
200 break;
201 case 's':
202 s = va_arg(count, char*);
203 n += strlen(s);
204 break;
205 case 'p':
206 (void) va_arg(count, int);
207 /* maximum 64-bit pointer representation:
208 * 0xffffffffffffffff
209 * so 19 characters is enough.
210 * XXX I count 18 -- what's the extra for?
211 */
212 n += 19;
213 break;
214 default:
215 /* if we stumble upon an unknown
216 formatting code, copy the rest of
217 the format string to the output
218 string. (we cannot just skip the
219 code, since there's no way to know
220 what's in the argument list) */
221 n += strlen(p);
222 goto expand;
223 }
224 } else
225 n++;
226 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000227 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 /* step 2: fill the buffer */
229 /* Since we've analyzed how much space we need for the worst case,
230 use sprintf directly instead of the slower PyOS_snprintf. */
231 string = PyBytes_FromStringAndSize(NULL, n);
232 if (!string)
233 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000234
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000235 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000236
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000237 for (f = format; *f; f++) {
238 if (*f == '%') {
239 const char* p = f++;
240 Py_ssize_t i;
241 int longflag = 0;
242 int size_tflag = 0;
243 /* parse the width.precision part (we're only
244 interested in the precision value, if any) */
245 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000246 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000247 n = (n*10) + *f++ - '0';
248 if (*f == '.') {
249 f++;
250 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000251 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000252 n = (n*10) + *f++ - '0';
253 }
David Malcolm96960882010-11-05 17:23:41 +0000254 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000255 f++;
256 /* handle the long flag, but only for %ld and %lu.
257 others can be added when necessary. */
258 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
259 longflag = 1;
260 ++f;
261 }
262 /* handle the size_t flag. */
263 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
264 size_tflag = 1;
265 ++f;
266 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000267
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000268 switch (*f) {
269 case 'c':
270 *s++ = va_arg(vargs, int);
271 break;
272 case 'd':
273 if (longflag)
274 sprintf(s, "%ld", va_arg(vargs, long));
275 else if (size_tflag)
276 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
277 va_arg(vargs, Py_ssize_t));
278 else
279 sprintf(s, "%d", va_arg(vargs, int));
280 s += strlen(s);
281 break;
282 case 'u':
283 if (longflag)
284 sprintf(s, "%lu",
285 va_arg(vargs, unsigned long));
286 else if (size_tflag)
287 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
288 va_arg(vargs, size_t));
289 else
290 sprintf(s, "%u",
291 va_arg(vargs, unsigned int));
292 s += strlen(s);
293 break;
294 case 'i':
295 sprintf(s, "%i", va_arg(vargs, int));
296 s += strlen(s);
297 break;
298 case 'x':
299 sprintf(s, "%x", va_arg(vargs, int));
300 s += strlen(s);
301 break;
302 case 's':
303 p = va_arg(vargs, char*);
304 i = strlen(p);
305 if (n > 0 && i > n)
306 i = n;
307 Py_MEMCPY(s, p, i);
308 s += i;
309 break;
310 case 'p':
311 sprintf(s, "%p", va_arg(vargs, void*));
312 /* %p is ill-defined: ensure leading 0x. */
313 if (s[1] == 'X')
314 s[1] = 'x';
315 else if (s[1] != 'x') {
316 memmove(s+2, s, strlen(s)+1);
317 s[0] = '0';
318 s[1] = 'x';
319 }
320 s += strlen(s);
321 break;
322 case '%':
323 *s++ = '%';
324 break;
325 default:
326 strcpy(s, p);
327 s += strlen(s);
328 goto end;
329 }
330 } else
331 *s++ = *f;
332 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000333
334 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000335 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
336 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000337}
338
339PyObject *
340PyBytes_FromFormat(const char *format, ...)
341{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000342 PyObject* ret;
343 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000344
345#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000347#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000348 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000349#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 ret = PyBytes_FromFormatV(format, vargs);
351 va_end(vargs);
352 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000353}
354
355static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000356bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000357{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000358 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000359}
360
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000361/* Unescape a backslash-escaped string. If unicode is non-zero,
362 the string is a u-literal. If recode_encoding is non-zero,
363 the string is UTF-8 encoded and should be re-encoded in the
364 specified encoding. */
365
366PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 Py_ssize_t len,
368 const char *errors,
369 Py_ssize_t unicode,
370 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000371{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 int c;
373 char *p, *buf;
374 const char *end;
375 PyObject *v;
376 Py_ssize_t newlen = recode_encoding ? 4*len:len;
377 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
378 if (v == NULL)
379 return NULL;
380 p = buf = PyBytes_AsString(v);
381 end = s + len;
382 while (s < end) {
383 if (*s != '\\') {
384 non_esc:
385 if (recode_encoding && (*s & 0x80)) {
386 PyObject *u, *w;
387 char *r;
388 const char* t;
389 Py_ssize_t rn;
390 t = s;
391 /* Decode non-ASCII bytes as UTF-8. */
392 while (t < end && (*t & 0x80)) t++;
393 u = PyUnicode_DecodeUTF8(s, t - s, errors);
394 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000395
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000396 /* Recode them in target encoding. */
397 w = PyUnicode_AsEncodedString(
398 u, recode_encoding, errors);
399 Py_DECREF(u);
400 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000401
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000402 /* Append bytes to output buffer. */
403 assert(PyBytes_Check(w));
404 r = PyBytes_AS_STRING(w);
405 rn = PyBytes_GET_SIZE(w);
406 Py_MEMCPY(p, r, rn);
407 p += rn;
408 Py_DECREF(w);
409 s = t;
410 } else {
411 *p++ = *s++;
412 }
413 continue;
414 }
415 s++;
416 if (s==end) {
417 PyErr_SetString(PyExc_ValueError,
418 "Trailing \\ in string");
419 goto failed;
420 }
421 switch (*s++) {
422 /* XXX This assumes ASCII! */
423 case '\n': break;
424 case '\\': *p++ = '\\'; break;
425 case '\'': *p++ = '\''; break;
426 case '\"': *p++ = '\"'; break;
427 case 'b': *p++ = '\b'; break;
428 case 'f': *p++ = '\014'; break; /* FF */
429 case 't': *p++ = '\t'; break;
430 case 'n': *p++ = '\n'; break;
431 case 'r': *p++ = '\r'; break;
432 case 'v': *p++ = '\013'; break; /* VT */
433 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
434 case '0': case '1': case '2': case '3':
435 case '4': case '5': case '6': case '7':
436 c = s[-1] - '0';
437 if (s < end && '0' <= *s && *s <= '7') {
438 c = (c<<3) + *s++ - '0';
439 if (s < end && '0' <= *s && *s <= '7')
440 c = (c<<3) + *s++ - '0';
441 }
442 *p++ = c;
443 break;
444 case 'x':
David Malcolm96960882010-11-05 17:23:41 +0000445 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000446 unsigned int x = 0;
447 c = Py_CHARMASK(*s);
448 s++;
David Malcolm96960882010-11-05 17:23:41 +0000449 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000450 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000451 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000452 x = 10 + c - 'a';
453 else
454 x = 10 + c - 'A';
455 x = x << 4;
456 c = Py_CHARMASK(*s);
457 s++;
David Malcolm96960882010-11-05 17:23:41 +0000458 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000459 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000460 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000461 x += 10 + c - 'a';
462 else
463 x += 10 + c - 'A';
464 *p++ = x;
465 break;
466 }
467 if (!errors || strcmp(errors, "strict") == 0) {
468 PyErr_SetString(PyExc_ValueError,
469 "invalid \\x escape");
470 goto failed;
471 }
472 if (strcmp(errors, "replace") == 0) {
473 *p++ = '?';
474 } else if (strcmp(errors, "ignore") == 0)
475 /* do nothing */;
476 else {
477 PyErr_Format(PyExc_ValueError,
478 "decoding error; unknown "
479 "error handling code: %.400s",
480 errors);
481 goto failed;
482 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +0200483 /* skip \x */
484 if (s < end && Py_ISXDIGIT(s[0]))
485 s++; /* and a hexdigit */
486 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000487 default:
488 *p++ = '\\';
489 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +0200490 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000491 UTF-8 bytes may follow. */
492 }
493 }
494 if (p-buf < newlen)
495 _PyBytes_Resize(&v, p - buf);
496 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000497 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000498 Py_DECREF(v);
499 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000500}
501
502/* -------------------------------------------------------------------- */
503/* object api */
504
505Py_ssize_t
506PyBytes_Size(register PyObject *op)
507{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000508 if (!PyBytes_Check(op)) {
509 PyErr_Format(PyExc_TypeError,
510 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
511 return -1;
512 }
513 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000514}
515
516char *
517PyBytes_AsString(register PyObject *op)
518{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000519 if (!PyBytes_Check(op)) {
520 PyErr_Format(PyExc_TypeError,
521 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
522 return NULL;
523 }
524 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000525}
526
527int
528PyBytes_AsStringAndSize(register PyObject *obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000529 register char **s,
530 register Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000531{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000532 if (s == NULL) {
533 PyErr_BadInternalCall();
534 return -1;
535 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000536
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000537 if (!PyBytes_Check(obj)) {
538 PyErr_Format(PyExc_TypeError,
539 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
540 return -1;
541 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000542
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000543 *s = PyBytes_AS_STRING(obj);
544 if (len != NULL)
545 *len = PyBytes_GET_SIZE(obj);
546 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
547 PyErr_SetString(PyExc_TypeError,
548 "expected bytes with no null");
549 return -1;
550 }
551 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000552}
Neal Norwitz6968b052007-02-27 19:02:19 +0000553
554/* -------------------------------------------------------------------- */
555/* Methods */
556
Eric Smith0923d1d2009-04-16 20:16:10 +0000557#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000558
559#include "stringlib/fastsearch.h"
560#include "stringlib/count.h"
561#include "stringlib/find.h"
562#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000563#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000564#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000565
Eric Smith0f78bff2009-11-30 01:01:42 +0000566#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000567
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000568PyObject *
569PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000570{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000571 register PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200572 Py_ssize_t i, length = Py_SIZE(op);
573 size_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000574 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200575 unsigned char quote, *s, *p;
576
577 /* Compute size of output string */
578 squotes = dquotes = 0;
579 newsize = 3; /* b'' */
580 s = (unsigned char*)op->ob_sval;
581 for (i = 0; i < length; i++) {
582 switch(s[i]) {
583 case '\'': squotes++; newsize++; break;
584 case '"': dquotes++; newsize++; break;
585 case '\\': case '\t': case '\n': case '\r':
586 newsize += 2; break; /* \C */
587 default:
588 if (s[i] < ' ' || s[i] >= 0x7f)
589 newsize += 4; /* \xHH */
590 else
591 newsize++;
592 }
593 }
594 quote = '\'';
595 if (smartquotes && squotes && !dquotes)
596 quote = '"';
597 if (squotes && quote == '\'')
598 newsize += squotes;
Victor Stinner6430fd52011-09-29 04:02:13 +0200599
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200600 if (newsize > (PY_SSIZE_T_MAX - sizeof(PyUnicodeObject) - 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000601 PyErr_SetString(PyExc_OverflowError,
602 "bytes object is too large to make repr");
603 return NULL;
604 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200605
606 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000607 if (v == NULL) {
608 return NULL;
609 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200610 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000611
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200612 *p++ = 'b', *p++ = quote;
613 for (i = 0; i < length; i++) {
614 unsigned char c = op->ob_sval[i];
615 if (c == quote || c == '\\')
616 *p++ = '\\', *p++ = c;
617 else if (c == '\t')
618 *p++ = '\\', *p++ = 't';
619 else if (c == '\n')
620 *p++ = '\\', *p++ = 'n';
621 else if (c == '\r')
622 *p++ = '\\', *p++ = 'r';
623 else if (c < ' ' || c >= 0x7f) {
624 *p++ = '\\';
625 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200626 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
627 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000628 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200629 else
630 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000631 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200632 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +0200633 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200634 return v;
Neal Norwitz6968b052007-02-27 19:02:19 +0000635}
636
Neal Norwitz6968b052007-02-27 19:02:19 +0000637static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000638bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000639{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000640 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000641}
642
Neal Norwitz6968b052007-02-27 19:02:19 +0000643static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000644bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000645{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000646 if (Py_BytesWarningFlag) {
647 if (PyErr_WarnEx(PyExc_BytesWarning,
648 "str() on a bytes instance", 1))
649 return NULL;
650 }
651 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000652}
653
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000654static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000655bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000656{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000657 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000658}
Neal Norwitz6968b052007-02-27 19:02:19 +0000659
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000660/* This is also used by PyBytes_Concat() */
661static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000662bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000663{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000664 Py_ssize_t size;
665 Py_buffer va, vb;
666 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000667
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000668 va.len = -1;
669 vb.len = -1;
670 if (_getbuffer(a, &va) < 0 ||
671 _getbuffer(b, &vb) < 0) {
672 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
673 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
674 goto done;
675 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000676
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000677 /* Optimize end cases */
678 if (va.len == 0 && PyBytes_CheckExact(b)) {
679 result = b;
680 Py_INCREF(result);
681 goto done;
682 }
683 if (vb.len == 0 && PyBytes_CheckExact(a)) {
684 result = a;
685 Py_INCREF(result);
686 goto done;
687 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000688
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000689 size = va.len + vb.len;
690 if (size < 0) {
691 PyErr_NoMemory();
692 goto done;
693 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000694
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000695 result = PyBytes_FromStringAndSize(NULL, size);
696 if (result != NULL) {
697 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
698 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
699 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000700
701 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000702 if (va.len != -1)
703 PyBuffer_Release(&va);
704 if (vb.len != -1)
705 PyBuffer_Release(&vb);
706 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000707}
Neal Norwitz6968b052007-02-27 19:02:19 +0000708
709static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000710bytes_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000711{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000712 register Py_ssize_t i;
713 register Py_ssize_t j;
714 register Py_ssize_t size;
715 register PyBytesObject *op;
716 size_t nbytes;
717 if (n < 0)
718 n = 0;
719 /* watch out for overflows: the size can overflow int,
720 * and the # of bytes needed can overflow size_t
721 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000722 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000723 PyErr_SetString(PyExc_OverflowError,
724 "repeated bytes are too long");
725 return NULL;
726 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000727 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000728 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
729 Py_INCREF(a);
730 return (PyObject *)a;
731 }
732 nbytes = (size_t)size;
733 if (nbytes + PyBytesObject_SIZE <= nbytes) {
734 PyErr_SetString(PyExc_OverflowError,
735 "repeated bytes are too long");
736 return NULL;
737 }
738 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
739 if (op == NULL)
740 return PyErr_NoMemory();
741 PyObject_INIT_VAR(op, &PyBytes_Type, size);
742 op->ob_shash = -1;
743 op->ob_sval[size] = '\0';
744 if (Py_SIZE(a) == 1 && n > 0) {
745 memset(op->ob_sval, a->ob_sval[0] , n);
746 return (PyObject *) op;
747 }
748 i = 0;
749 if (i < size) {
750 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
751 i = Py_SIZE(a);
752 }
753 while (i < size) {
754 j = (i <= size-i) ? i : size-i;
755 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
756 i += j;
757 }
758 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000759}
760
Guido van Rossum98297ee2007-11-06 21:34:58 +0000761static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000762bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000763{
764 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
765 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000766 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +0000767 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +0000768 PyErr_Clear();
769 if (_getbuffer(arg, &varg) < 0)
770 return -1;
771 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
772 varg.buf, varg.len, 0);
773 PyBuffer_Release(&varg);
774 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000775 }
776 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000777 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
778 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000779 }
780
Antoine Pitrou0010d372010-08-15 17:12:55 +0000781 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000782}
783
Neal Norwitz6968b052007-02-27 19:02:19 +0000784static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000785bytes_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000786{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000787 if (i < 0 || i >= Py_SIZE(a)) {
788 PyErr_SetString(PyExc_IndexError, "index out of range");
789 return NULL;
790 }
791 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000792}
793
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000794static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000795bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000796{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000797 int c;
798 Py_ssize_t len_a, len_b;
799 Py_ssize_t min_len;
800 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000801
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000802 /* Make sure both arguments are strings. */
803 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
804 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
805 (PyObject_IsInstance((PyObject*)a,
806 (PyObject*)&PyUnicode_Type) ||
807 PyObject_IsInstance((PyObject*)b,
808 (PyObject*)&PyUnicode_Type))) {
809 if (PyErr_WarnEx(PyExc_BytesWarning,
810 "Comparison between bytes and string", 1))
811 return NULL;
812 }
813 result = Py_NotImplemented;
814 goto out;
815 }
816 if (a == b) {
817 switch (op) {
818 case Py_EQ:case Py_LE:case Py_GE:
819 result = Py_True;
820 goto out;
821 case Py_NE:case Py_LT:case Py_GT:
822 result = Py_False;
823 goto out;
824 }
825 }
826 if (op == Py_EQ) {
827 /* Supporting Py_NE here as well does not save
828 much time, since Py_NE is rarely used. */
829 if (Py_SIZE(a) == Py_SIZE(b)
830 && (a->ob_sval[0] == b->ob_sval[0]
831 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
832 result = Py_True;
833 } else {
834 result = Py_False;
835 }
836 goto out;
837 }
838 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
839 min_len = (len_a < len_b) ? len_a : len_b;
840 if (min_len > 0) {
841 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
842 if (c==0)
843 c = memcmp(a->ob_sval, b->ob_sval, min_len);
844 } else
845 c = 0;
846 if (c == 0)
847 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
848 switch (op) {
849 case Py_LT: c = c < 0; break;
850 case Py_LE: c = c <= 0; break;
851 case Py_EQ: assert(0); break; /* unreachable */
852 case Py_NE: c = c != 0; break;
853 case Py_GT: c = c > 0; break;
854 case Py_GE: c = c >= 0; break;
855 default:
856 result = Py_NotImplemented;
857 goto out;
858 }
859 result = c ? Py_True : Py_False;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000860 out:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000861 Py_INCREF(result);
862 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000863}
864
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000865static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000866bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000867{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +0100868 if (a->ob_shash == -1) {
869 /* Can't fail */
870 a->ob_shash = _Py_HashBytes((unsigned char *) a->ob_sval, Py_SIZE(a));
871 }
872 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +0000873}
874
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000875static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000876bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000877{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000878 if (PyIndex_Check(item)) {
879 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
880 if (i == -1 && PyErr_Occurred())
881 return NULL;
882 if (i < 0)
883 i += PyBytes_GET_SIZE(self);
884 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
885 PyErr_SetString(PyExc_IndexError,
886 "index out of range");
887 return NULL;
888 }
889 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
890 }
891 else if (PySlice_Check(item)) {
892 Py_ssize_t start, stop, step, slicelength, cur, i;
893 char* source_buf;
894 char* result_buf;
895 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000896
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000897 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000898 PyBytes_GET_SIZE(self),
899 &start, &stop, &step, &slicelength) < 0) {
900 return NULL;
901 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000902
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000903 if (slicelength <= 0) {
904 return PyBytes_FromStringAndSize("", 0);
905 }
906 else if (start == 0 && step == 1 &&
907 slicelength == PyBytes_GET_SIZE(self) &&
908 PyBytes_CheckExact(self)) {
909 Py_INCREF(self);
910 return (PyObject *)self;
911 }
912 else if (step == 1) {
913 return PyBytes_FromStringAndSize(
914 PyBytes_AS_STRING(self) + start,
915 slicelength);
916 }
917 else {
918 source_buf = PyBytes_AS_STRING(self);
919 result = PyBytes_FromStringAndSize(NULL, slicelength);
920 if (result == NULL)
921 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000922
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000923 result_buf = PyBytes_AS_STRING(result);
924 for (cur = start, i = 0; i < slicelength;
925 cur += step, i++) {
926 result_buf[i] = source_buf[cur];
927 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000928
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000929 return result;
930 }
931 }
932 else {
933 PyErr_Format(PyExc_TypeError,
934 "byte indices must be integers, not %.200s",
935 Py_TYPE(item)->tp_name);
936 return NULL;
937 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000938}
939
940static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000941bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000942{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000943 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
944 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000945}
946
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000947static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000948 (lenfunc)bytes_length, /*sq_length*/
949 (binaryfunc)bytes_concat, /*sq_concat*/
950 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
951 (ssizeargfunc)bytes_item, /*sq_item*/
952 0, /*sq_slice*/
953 0, /*sq_ass_item*/
954 0, /*sq_ass_slice*/
955 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000956};
957
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000958static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000959 (lenfunc)bytes_length,
960 (binaryfunc)bytes_subscript,
961 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000962};
963
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000964static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000965 (getbufferproc)bytes_buffer_getbuffer,
966 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000967};
968
969
970#define LEFTSTRIP 0
971#define RIGHTSTRIP 1
972#define BOTHSTRIP 2
973
974/* Arrays indexed by above */
975static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
976
977#define STRIPNAME(i) (stripformat[i]+3)
978
Neal Norwitz6968b052007-02-27 19:02:19 +0000979PyDoc_STRVAR(split__doc__,
Ezio Melotticda6b6d2012-02-26 09:39:55 +0200980"B.split(sep=None, maxsplit=-1) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +0000981\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +0000982Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000983If sep is not specified or is None, B is split on ASCII whitespace\n\
984characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +0000985If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +0000986
987static PyObject *
Ezio Melotticda6b6d2012-02-26 09:39:55 +0200988bytes_split(PyBytesObject *self, PyObject *args, PyObject *kwds)
Neal Norwitz6968b052007-02-27 19:02:19 +0000989{
Ezio Melotticda6b6d2012-02-26 09:39:55 +0200990 static char *kwlist[] = {"sep", "maxsplit", 0};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000991 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
992 Py_ssize_t maxsplit = -1;
993 const char *s = PyBytes_AS_STRING(self), *sub;
994 Py_buffer vsub;
995 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +0000996
Ezio Melotticda6b6d2012-02-26 09:39:55 +0200997 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:split",
998 kwlist, &subobj, &maxsplit))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000999 return NULL;
1000 if (maxsplit < 0)
1001 maxsplit = PY_SSIZE_T_MAX;
1002 if (subobj == Py_None)
1003 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1004 if (_getbuffer(subobj, &vsub) < 0)
1005 return NULL;
1006 sub = vsub.buf;
1007 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001008
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001009 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1010 PyBuffer_Release(&vsub);
1011 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001012}
1013
Neal Norwitz6968b052007-02-27 19:02:19 +00001014PyDoc_STRVAR(partition__doc__,
1015"B.partition(sep) -> (head, sep, tail)\n\
1016\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001017Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001018the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001019found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001020
1021static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001022bytes_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001023{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001024 const char *sep;
1025 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001026
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001027 if (PyBytes_Check(sep_obj)) {
1028 sep = PyBytes_AS_STRING(sep_obj);
1029 sep_len = PyBytes_GET_SIZE(sep_obj);
1030 }
1031 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1032 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001033
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001034 return stringlib_partition(
1035 (PyObject*) self,
1036 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1037 sep_obj, sep, sep_len
1038 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001039}
1040
1041PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti5b2b2422010-01-25 11:58:28 +00001042"B.rpartition(sep) -> (head, sep, tail)\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001043\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001044Search for the separator sep in B, starting at the end of B,\n\
1045and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001046part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001047bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001048
1049static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001050bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001051{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001052 const char *sep;
1053 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001054
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001055 if (PyBytes_Check(sep_obj)) {
1056 sep = PyBytes_AS_STRING(sep_obj);
1057 sep_len = PyBytes_GET_SIZE(sep_obj);
1058 }
1059 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1060 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001061
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001062 return stringlib_rpartition(
1063 (PyObject*) self,
1064 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1065 sep_obj, sep, sep_len
1066 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001067}
1068
Neal Norwitz6968b052007-02-27 19:02:19 +00001069PyDoc_STRVAR(rsplit__doc__,
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001070"B.rsplit(sep=None, maxsplit=-1) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001071\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001072Return a list of the sections in B, using sep as the delimiter,\n\
1073starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001074If sep is not given, B is split on ASCII whitespace characters\n\
1075(space, tab, return, newline, formfeed, vertical tab).\n\
1076If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001077
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001078
Neal Norwitz6968b052007-02-27 19:02:19 +00001079static PyObject *
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001080bytes_rsplit(PyBytesObject *self, PyObject *args, PyObject *kwds)
Neal Norwitz6968b052007-02-27 19:02:19 +00001081{
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001082 static char *kwlist[] = {"sep", "maxsplit", 0};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001083 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1084 Py_ssize_t maxsplit = -1;
1085 const char *s = PyBytes_AS_STRING(self), *sub;
1086 Py_buffer vsub;
1087 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001088
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001089 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:rsplit",
1090 kwlist, &subobj, &maxsplit))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001091 return NULL;
1092 if (maxsplit < 0)
1093 maxsplit = PY_SSIZE_T_MAX;
1094 if (subobj == Py_None)
1095 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1096 if (_getbuffer(subobj, &vsub) < 0)
1097 return NULL;
1098 sub = vsub.buf;
1099 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001100
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001101 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1102 PyBuffer_Release(&vsub);
1103 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001104}
1105
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001106
1107PyDoc_STRVAR(join__doc__,
1108"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001109\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001110Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001111Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1112
Neal Norwitz6968b052007-02-27 19:02:19 +00001113static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001114bytes_join(PyObject *self, PyObject *orig)
Neal Norwitz6968b052007-02-27 19:02:19 +00001115{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001116 char *sep = PyBytes_AS_STRING(self);
1117 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1118 PyObject *res = NULL;
1119 char *p;
1120 Py_ssize_t seqlen = 0;
1121 size_t sz = 0;
1122 Py_ssize_t i;
1123 PyObject *seq, *item;
Neal Norwitz6968b052007-02-27 19:02:19 +00001124
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001125 seq = PySequence_Fast(orig, "");
1126 if (seq == NULL) {
1127 return NULL;
1128 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001129
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001130 seqlen = PySequence_Size(seq);
1131 if (seqlen == 0) {
1132 Py_DECREF(seq);
1133 return PyBytes_FromString("");
1134 }
1135 if (seqlen == 1) {
1136 item = PySequence_Fast_GET_ITEM(seq, 0);
1137 if (PyBytes_CheckExact(item)) {
1138 Py_INCREF(item);
1139 Py_DECREF(seq);
1140 return item;
1141 }
1142 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001143
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001144 /* There are at least two things to join, or else we have a subclass
1145 * of the builtin types in the sequence.
1146 * Do a pre-pass to figure out the total amount of space we'll
1147 * need (sz), and see whether all argument are bytes.
1148 */
1149 /* XXX Shouldn't we use _getbuffer() on these items instead? */
1150 for (i = 0; i < seqlen; i++) {
1151 const size_t old_sz = sz;
1152 item = PySequence_Fast_GET_ITEM(seq, i);
1153 if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
1154 PyErr_Format(PyExc_TypeError,
1155 "sequence item %zd: expected bytes,"
1156 " %.80s found",
1157 i, Py_TYPE(item)->tp_name);
1158 Py_DECREF(seq);
1159 return NULL;
1160 }
1161 sz += Py_SIZE(item);
1162 if (i != 0)
1163 sz += seplen;
1164 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1165 PyErr_SetString(PyExc_OverflowError,
1166 "join() result is too long for bytes");
1167 Py_DECREF(seq);
1168 return NULL;
1169 }
1170 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001171
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001172 /* Allocate result space. */
1173 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1174 if (res == NULL) {
1175 Py_DECREF(seq);
1176 return NULL;
1177 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001178
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001179 /* Catenate everything. */
1180 /* I'm not worried about a PyByteArray item growing because there's
1181 nowhere in this function where we release the GIL. */
1182 p = PyBytes_AS_STRING(res);
1183 for (i = 0; i < seqlen; ++i) {
1184 size_t n;
1185 char *q;
1186 if (i) {
1187 Py_MEMCPY(p, sep, seplen);
1188 p += seplen;
1189 }
1190 item = PySequence_Fast_GET_ITEM(seq, i);
1191 n = Py_SIZE(item);
1192 if (PyBytes_Check(item))
1193 q = PyBytes_AS_STRING(item);
1194 else
1195 q = PyByteArray_AS_STRING(item);
1196 Py_MEMCPY(p, q, n);
1197 p += n;
1198 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001199
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001200 Py_DECREF(seq);
1201 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001202}
1203
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001204PyObject *
1205_PyBytes_Join(PyObject *sep, PyObject *x)
1206{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001207 assert(sep != NULL && PyBytes_Check(sep));
1208 assert(x != NULL);
1209 return bytes_join(sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001210}
1211
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001212/* helper macro to fixup start/end slice values */
1213#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001214 if (end > len) \
1215 end = len; \
1216 else if (end < 0) { \
1217 end += len; \
1218 if (end < 0) \
1219 end = 0; \
1220 } \
1221 if (start < 0) { \
1222 start += len; \
1223 if (start < 0) \
1224 start = 0; \
1225 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001226
1227Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001228bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001229{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001230 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001231 char byte;
1232 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001233 const char *sub;
1234 Py_ssize_t sub_len;
1235 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001236 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001237
Antoine Pitrouac65d962011-10-20 23:54:17 +02001238 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1239 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001240 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001241
Antoine Pitrouac65d962011-10-20 23:54:17 +02001242 if (subobj) {
1243 if (_getbuffer(subobj, &subbuf) < 0)
1244 return -2;
1245
1246 sub = subbuf.buf;
1247 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001248 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001249 else {
1250 sub = &byte;
1251 sub_len = 1;
1252 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001253
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001254 if (dir > 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001255 res = stringlib_find_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001256 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1257 sub, sub_len, start, end);
1258 else
Antoine Pitrouac65d962011-10-20 23:54:17 +02001259 res = stringlib_rfind_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001260 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1261 sub, sub_len, start, end);
Antoine Pitrouac65d962011-10-20 23:54:17 +02001262
1263 if (subobj)
1264 PyBuffer_Release(&subbuf);
1265
1266 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001267}
1268
1269
1270PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001271"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001272\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001273Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001274such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001275arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001276\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001277Return -1 on failure.");
1278
Neal Norwitz6968b052007-02-27 19:02:19 +00001279static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001280bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001281{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001282 Py_ssize_t result = bytes_find_internal(self, args, +1);
1283 if (result == -2)
1284 return NULL;
1285 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001286}
1287
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001288
1289PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001290"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001291\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001292Like B.find() but raise ValueError when the substring is not found.");
1293
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001294static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001295bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001296{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001297 Py_ssize_t result = bytes_find_internal(self, args, +1);
1298 if (result == -2)
1299 return NULL;
1300 if (result == -1) {
1301 PyErr_SetString(PyExc_ValueError,
1302 "substring not found");
1303 return NULL;
1304 }
1305 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001306}
1307
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001308
1309PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001310"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001311\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001312Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001313such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001314arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001315\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001316Return -1 on failure.");
1317
Neal Norwitz6968b052007-02-27 19:02:19 +00001318static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001319bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001320{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001321 Py_ssize_t result = bytes_find_internal(self, args, -1);
1322 if (result == -2)
1323 return NULL;
1324 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001325}
1326
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001327
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001328PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001329"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001330\n\
1331Like B.rfind() but raise ValueError when the substring is not found.");
1332
1333static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001334bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001335{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001336 Py_ssize_t result = bytes_find_internal(self, args, -1);
1337 if (result == -2)
1338 return NULL;
1339 if (result == -1) {
1340 PyErr_SetString(PyExc_ValueError,
1341 "substring not found");
1342 return NULL;
1343 }
1344 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001345}
1346
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001347
1348Py_LOCAL_INLINE(PyObject *)
1349do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001350{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001351 Py_buffer vsep;
1352 char *s = PyBytes_AS_STRING(self);
1353 Py_ssize_t len = PyBytes_GET_SIZE(self);
1354 char *sep;
1355 Py_ssize_t seplen;
1356 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001357
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001358 if (_getbuffer(sepobj, &vsep) < 0)
1359 return NULL;
1360 sep = vsep.buf;
1361 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001362
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001363 i = 0;
1364 if (striptype != RIGHTSTRIP) {
1365 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1366 i++;
1367 }
1368 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001369
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001370 j = len;
1371 if (striptype != LEFTSTRIP) {
1372 do {
1373 j--;
1374 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1375 j++;
1376 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001377
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001378 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001379
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001380 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1381 Py_INCREF(self);
1382 return (PyObject*)self;
1383 }
1384 else
1385 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001386}
1387
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001388
1389Py_LOCAL_INLINE(PyObject *)
1390do_strip(PyBytesObject *self, int striptype)
1391{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001392 char *s = PyBytes_AS_STRING(self);
1393 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001394
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001395 i = 0;
1396 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001397 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001398 i++;
1399 }
1400 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001401
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001402 j = len;
1403 if (striptype != LEFTSTRIP) {
1404 do {
1405 j--;
David Malcolm96960882010-11-05 17:23:41 +00001406 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001407 j++;
1408 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001409
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001410 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1411 Py_INCREF(self);
1412 return (PyObject*)self;
1413 }
1414 else
1415 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001416}
1417
1418
1419Py_LOCAL_INLINE(PyObject *)
1420do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1421{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001422 PyObject *sep = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001423
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001424 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1425 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001426
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001427 if (sep != NULL && sep != Py_None) {
1428 return do_xstrip(self, striptype, sep);
1429 }
1430 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001431}
1432
1433
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001434PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001435"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001436\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001437Strip leading and trailing bytes contained in the argument.\n\
Georg Brandlbeca27a2012-01-22 21:31:21 +01001438If the argument is omitted, strip leading and trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001439static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001440bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001441{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001442 if (PyTuple_GET_SIZE(args) == 0)
1443 return do_strip(self, BOTHSTRIP); /* Common case */
1444 else
1445 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001446}
1447
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001448
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001449PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001450"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001451\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001452Strip leading bytes contained in the argument.\n\
1453If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001454static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001455bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001456{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001457 if (PyTuple_GET_SIZE(args) == 0)
1458 return do_strip(self, LEFTSTRIP); /* Common case */
1459 else
1460 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001461}
1462
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001463
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001464PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001465"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001466\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001467Strip trailing bytes contained in the argument.\n\
1468If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001469static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001470bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001471{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001472 if (PyTuple_GET_SIZE(args) == 0)
1473 return do_strip(self, RIGHTSTRIP); /* Common case */
1474 else
1475 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001476}
Neal Norwitz6968b052007-02-27 19:02:19 +00001477
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001478
1479PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001480"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001481\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001482Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001483string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001484as in slice notation.");
1485
1486static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001487bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001488{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001489 PyObject *sub_obj;
1490 const char *str = PyBytes_AS_STRING(self), *sub;
1491 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001492 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001493 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001494
Antoine Pitrouac65d962011-10-20 23:54:17 +02001495 Py_buffer vsub;
1496 PyObject *count_obj;
1497
1498 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
1499 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001500 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001501
Antoine Pitrouac65d962011-10-20 23:54:17 +02001502 if (sub_obj) {
1503 if (_getbuffer(sub_obj, &vsub) < 0)
1504 return NULL;
1505
1506 sub = vsub.buf;
1507 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001508 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001509 else {
1510 sub = &byte;
1511 sub_len = 1;
1512 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001513
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001514 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001515
Antoine Pitrouac65d962011-10-20 23:54:17 +02001516 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001517 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
1518 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02001519
1520 if (sub_obj)
1521 PyBuffer_Release(&vsub);
1522
1523 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001524}
1525
1526
1527PyDoc_STRVAR(translate__doc__,
1528"B.translate(table[, deletechars]) -> bytes\n\
1529\n\
1530Return a copy of B, where all characters occurring in the\n\
1531optional argument deletechars are removed, and the remaining\n\
1532characters have been mapped through the given translation\n\
1533table, which must be a bytes object of length 256.");
1534
1535static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001536bytes_translate(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001537{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001538 register char *input, *output;
1539 const char *table;
1540 register Py_ssize_t i, c, changed = 0;
1541 PyObject *input_obj = (PyObject*)self;
1542 const char *output_start, *del_table=NULL;
1543 Py_ssize_t inlen, tablen, dellen = 0;
1544 PyObject *result;
1545 int trans_table[256];
1546 PyObject *tableobj, *delobj = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001547
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001548 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1549 &tableobj, &delobj))
1550 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001551
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001552 if (PyBytes_Check(tableobj)) {
1553 table = PyBytes_AS_STRING(tableobj);
1554 tablen = PyBytes_GET_SIZE(tableobj);
1555 }
1556 else if (tableobj == Py_None) {
1557 table = NULL;
1558 tablen = 256;
1559 }
1560 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1561 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001562
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001563 if (tablen != 256) {
1564 PyErr_SetString(PyExc_ValueError,
1565 "translation table must be 256 characters long");
1566 return NULL;
1567 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001568
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001569 if (delobj != NULL) {
1570 if (PyBytes_Check(delobj)) {
1571 del_table = PyBytes_AS_STRING(delobj);
1572 dellen = PyBytes_GET_SIZE(delobj);
1573 }
1574 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1575 return NULL;
1576 }
1577 else {
1578 del_table = NULL;
1579 dellen = 0;
1580 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001581
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001582 inlen = PyBytes_GET_SIZE(input_obj);
1583 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1584 if (result == NULL)
1585 return NULL;
1586 output_start = output = PyBytes_AsString(result);
1587 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001588
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001589 if (dellen == 0 && table != NULL) {
1590 /* If no deletions are required, use faster code */
1591 for (i = inlen; --i >= 0; ) {
1592 c = Py_CHARMASK(*input++);
1593 if (Py_CHARMASK((*output++ = table[c])) != c)
1594 changed = 1;
1595 }
1596 if (changed || !PyBytes_CheckExact(input_obj))
1597 return result;
1598 Py_DECREF(result);
1599 Py_INCREF(input_obj);
1600 return input_obj;
1601 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001602
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001603 if (table == NULL) {
1604 for (i = 0; i < 256; i++)
1605 trans_table[i] = Py_CHARMASK(i);
1606 } else {
1607 for (i = 0; i < 256; i++)
1608 trans_table[i] = Py_CHARMASK(table[i]);
1609 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001610
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001611 for (i = 0; i < dellen; i++)
1612 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001613
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001614 for (i = inlen; --i >= 0; ) {
1615 c = Py_CHARMASK(*input++);
1616 if (trans_table[c] != -1)
1617 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1618 continue;
1619 changed = 1;
1620 }
1621 if (!changed && PyBytes_CheckExact(input_obj)) {
1622 Py_DECREF(result);
1623 Py_INCREF(input_obj);
1624 return input_obj;
1625 }
1626 /* Fix the size of the resulting string */
1627 if (inlen > 0)
1628 _PyBytes_Resize(&result, output - output_start);
1629 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001630}
1631
1632
Georg Brandlabc38772009-04-12 15:51:51 +00001633static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001634bytes_maketrans(PyObject *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001635{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001636 return _Py_bytes_maketrans(args);
Georg Brandlabc38772009-04-12 15:51:51 +00001637}
1638
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001639/* find and count characters and substrings */
1640
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001641#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001642 ((char *)memchr((const void *)(target), c, target_len))
1643
1644/* String ops must return a string. */
1645/* If the object is subclass of string, create a copy */
1646Py_LOCAL(PyBytesObject *)
1647return_self(PyBytesObject *self)
1648{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001649 if (PyBytes_CheckExact(self)) {
1650 Py_INCREF(self);
1651 return self;
1652 }
1653 return (PyBytesObject *)PyBytes_FromStringAndSize(
1654 PyBytes_AS_STRING(self),
1655 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001656}
1657
1658Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00001659countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001660{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001661 Py_ssize_t count=0;
1662 const char *start=target;
1663 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001664
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001665 while ( (start=findchar(start, end-start, c)) != NULL ) {
1666 count++;
1667 if (count >= maxcount)
1668 break;
1669 start += 1;
1670 }
1671 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001672}
1673
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001674
1675/* Algorithms for different cases of string replacement */
1676
1677/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1678Py_LOCAL(PyBytesObject *)
1679replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001680 const char *to_s, Py_ssize_t to_len,
1681 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001682{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001683 char *self_s, *result_s;
1684 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001685 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001686 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001687
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001688 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001689
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001690 /* 1 at the end plus 1 after every character;
1691 count = min(maxcount, self_len + 1) */
1692 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001693 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001694 else
1695 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
1696 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001697
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001698 /* Check for overflow */
1699 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001700 assert(count > 0);
1701 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001702 PyErr_SetString(PyExc_OverflowError,
1703 "replacement bytes are too long");
1704 return NULL;
1705 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001706 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001707
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001708 if (! (result = (PyBytesObject *)
1709 PyBytes_FromStringAndSize(NULL, result_len)) )
1710 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001711
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001712 self_s = PyBytes_AS_STRING(self);
1713 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001714
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001715 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001716
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001717 /* Lay the first one down (guaranteed this will occur) */
1718 Py_MEMCPY(result_s, to_s, to_len);
1719 result_s += to_len;
1720 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001721
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001722 for (i=0; i<count; i++) {
1723 *result_s++ = *self_s++;
1724 Py_MEMCPY(result_s, to_s, to_len);
1725 result_s += to_len;
1726 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001727
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001728 /* Copy the rest of the original string */
1729 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001730
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001731 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001732}
1733
1734/* Special case for deleting a single character */
1735/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1736Py_LOCAL(PyBytesObject *)
1737replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001738 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001739{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001740 char *self_s, *result_s;
1741 char *start, *next, *end;
1742 Py_ssize_t self_len, result_len;
1743 Py_ssize_t count;
1744 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001745
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001746 self_len = PyBytes_GET_SIZE(self);
1747 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001748
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001749 count = countchar(self_s, self_len, from_c, maxcount);
1750 if (count == 0) {
1751 return return_self(self);
1752 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001753
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001754 result_len = self_len - count; /* from_len == 1 */
1755 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001756
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001757 if ( (result = (PyBytesObject *)
1758 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1759 return NULL;
1760 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001761
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001762 start = self_s;
1763 end = self_s + self_len;
1764 while (count-- > 0) {
1765 next = findchar(start, end-start, from_c);
1766 if (next == NULL)
1767 break;
1768 Py_MEMCPY(result_s, start, next-start);
1769 result_s += (next-start);
1770 start = next+1;
1771 }
1772 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001773
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001774 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001775}
1776
1777/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1778
1779Py_LOCAL(PyBytesObject *)
1780replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001781 const char *from_s, Py_ssize_t from_len,
1782 Py_ssize_t maxcount) {
1783 char *self_s, *result_s;
1784 char *start, *next, *end;
1785 Py_ssize_t self_len, result_len;
1786 Py_ssize_t count, offset;
1787 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001788
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001789 self_len = PyBytes_GET_SIZE(self);
1790 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001791
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001792 count = stringlib_count(self_s, self_len,
1793 from_s, from_len,
1794 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001795
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001796 if (count == 0) {
1797 /* no matches */
1798 return return_self(self);
1799 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001800
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001801 result_len = self_len - (count * from_len);
1802 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001803
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001804 if ( (result = (PyBytesObject *)
1805 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1806 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001807
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001808 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001809
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001810 start = self_s;
1811 end = self_s + self_len;
1812 while (count-- > 0) {
1813 offset = stringlib_find(start, end-start,
1814 from_s, from_len,
1815 0);
1816 if (offset == -1)
1817 break;
1818 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001819
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001820 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001821
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001822 result_s += (next-start);
1823 start = next+from_len;
1824 }
1825 Py_MEMCPY(result_s, start, end-start);
1826 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001827}
1828
1829/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1830Py_LOCAL(PyBytesObject *)
1831replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001832 char from_c, char to_c,
1833 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001834{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001835 char *self_s, *result_s, *start, *end, *next;
1836 Py_ssize_t self_len;
1837 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001838
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001839 /* The result string will be the same size */
1840 self_s = PyBytes_AS_STRING(self);
1841 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001842
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001843 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001844
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001845 if (next == NULL) {
1846 /* No matches; return the original string */
1847 return return_self(self);
1848 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001849
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001850 /* Need to make a new string */
1851 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1852 if (result == NULL)
1853 return NULL;
1854 result_s = PyBytes_AS_STRING(result);
1855 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001856
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001857 /* change everything in-place, starting with this one */
1858 start = result_s + (next-self_s);
1859 *start = to_c;
1860 start++;
1861 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001862
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001863 while (--maxcount > 0) {
1864 next = findchar(start, end-start, from_c);
1865 if (next == NULL)
1866 break;
1867 *next = to_c;
1868 start = next+1;
1869 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001870
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001871 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001872}
1873
1874/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1875Py_LOCAL(PyBytesObject *)
1876replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001877 const char *from_s, Py_ssize_t from_len,
1878 const char *to_s, Py_ssize_t to_len,
1879 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001880{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001881 char *result_s, *start, *end;
1882 char *self_s;
1883 Py_ssize_t self_len, offset;
1884 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001885
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001886 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001887
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001888 self_s = PyBytes_AS_STRING(self);
1889 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001890
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001891 offset = stringlib_find(self_s, self_len,
1892 from_s, from_len,
1893 0);
1894 if (offset == -1) {
1895 /* No matches; return the original string */
1896 return return_self(self);
1897 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001898
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001899 /* Need to make a new string */
1900 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1901 if (result == NULL)
1902 return NULL;
1903 result_s = PyBytes_AS_STRING(result);
1904 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001905
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001906 /* change everything in-place, starting with this one */
1907 start = result_s + offset;
1908 Py_MEMCPY(start, to_s, from_len);
1909 start += from_len;
1910 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001911
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001912 while ( --maxcount > 0) {
1913 offset = stringlib_find(start, end-start,
1914 from_s, from_len,
1915 0);
1916 if (offset==-1)
1917 break;
1918 Py_MEMCPY(start+offset, to_s, from_len);
1919 start += offset+from_len;
1920 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001921
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001922 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001923}
1924
1925/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1926Py_LOCAL(PyBytesObject *)
1927replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001928 char from_c,
1929 const char *to_s, Py_ssize_t to_len,
1930 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001931{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001932 char *self_s, *result_s;
1933 char *start, *next, *end;
1934 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001935 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001936 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001937
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001938 self_s = PyBytes_AS_STRING(self);
1939 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001940
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001941 count = countchar(self_s, self_len, from_c, maxcount);
1942 if (count == 0) {
1943 /* no matches, return unchanged */
1944 return return_self(self);
1945 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001946
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001947 /* use the difference between current and new, hence the "-1" */
1948 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001949 assert(count > 0);
1950 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001951 PyErr_SetString(PyExc_OverflowError,
1952 "replacement bytes are too long");
1953 return NULL;
1954 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001955 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001956
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001957 if ( (result = (PyBytesObject *)
1958 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1959 return NULL;
1960 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001961
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001962 start = self_s;
1963 end = self_s + self_len;
1964 while (count-- > 0) {
1965 next = findchar(start, end-start, from_c);
1966 if (next == NULL)
1967 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001968
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001969 if (next == start) {
1970 /* replace with the 'to' */
1971 Py_MEMCPY(result_s, to_s, to_len);
1972 result_s += to_len;
1973 start += 1;
1974 } else {
1975 /* copy the unchanged old then the 'to' */
1976 Py_MEMCPY(result_s, start, next-start);
1977 result_s += (next-start);
1978 Py_MEMCPY(result_s, to_s, to_len);
1979 result_s += to_len;
1980 start = next+1;
1981 }
1982 }
1983 /* Copy the remainder of the remaining string */
1984 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001985
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001986 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001987}
1988
1989/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1990Py_LOCAL(PyBytesObject *)
1991replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001992 const char *from_s, Py_ssize_t from_len,
1993 const char *to_s, Py_ssize_t to_len,
1994 Py_ssize_t maxcount) {
1995 char *self_s, *result_s;
1996 char *start, *next, *end;
1997 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001998 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001999 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002000
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002001 self_s = PyBytes_AS_STRING(self);
2002 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002003
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002004 count = stringlib_count(self_s, self_len,
2005 from_s, from_len,
2006 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002007
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002008 if (count == 0) {
2009 /* no matches, return unchanged */
2010 return return_self(self);
2011 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002012
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002013 /* Check for overflow */
2014 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002015 assert(count > 0);
2016 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002017 PyErr_SetString(PyExc_OverflowError,
2018 "replacement bytes are too long");
2019 return NULL;
2020 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002021 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002022
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002023 if ( (result = (PyBytesObject *)
2024 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2025 return NULL;
2026 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002027
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002028 start = self_s;
2029 end = self_s + self_len;
2030 while (count-- > 0) {
2031 offset = stringlib_find(start, end-start,
2032 from_s, from_len,
2033 0);
2034 if (offset == -1)
2035 break;
2036 next = start+offset;
2037 if (next == start) {
2038 /* replace with the 'to' */
2039 Py_MEMCPY(result_s, to_s, to_len);
2040 result_s += to_len;
2041 start += from_len;
2042 } else {
2043 /* copy the unchanged old then the 'to' */
2044 Py_MEMCPY(result_s, start, next-start);
2045 result_s += (next-start);
2046 Py_MEMCPY(result_s, to_s, to_len);
2047 result_s += to_len;
2048 start = next+from_len;
2049 }
2050 }
2051 /* Copy the remainder of the remaining string */
2052 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002053
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002054 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002055}
2056
2057
2058Py_LOCAL(PyBytesObject *)
2059replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002060 const char *from_s, Py_ssize_t from_len,
2061 const char *to_s, Py_ssize_t to_len,
2062 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002063{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002064 if (maxcount < 0) {
2065 maxcount = PY_SSIZE_T_MAX;
2066 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2067 /* nothing to do; return the original string */
2068 return return_self(self);
2069 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002070
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002071 if (maxcount == 0 ||
2072 (from_len == 0 && to_len == 0)) {
2073 /* nothing to do; return the original string */
2074 return return_self(self);
2075 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002076
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002077 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002078
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002079 if (from_len == 0) {
2080 /* insert the 'to' string everywhere. */
2081 /* >>> "Python".replace("", ".") */
2082 /* '.P.y.t.h.o.n.' */
2083 return replace_interleave(self, to_s, to_len, maxcount);
2084 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002085
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002086 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2087 /* point for an empty self string to generate a non-empty string */
2088 /* Special case so the remaining code always gets a non-empty string */
2089 if (PyBytes_GET_SIZE(self) == 0) {
2090 return return_self(self);
2091 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002092
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002093 if (to_len == 0) {
2094 /* delete all occurrences of 'from' string */
2095 if (from_len == 1) {
2096 return replace_delete_single_character(
2097 self, from_s[0], maxcount);
2098 } else {
2099 return replace_delete_substring(self, from_s,
2100 from_len, maxcount);
2101 }
2102 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002103
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002104 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002105
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002106 if (from_len == to_len) {
2107 if (from_len == 1) {
2108 return replace_single_character_in_place(
2109 self,
2110 from_s[0],
2111 to_s[0],
2112 maxcount);
2113 } else {
2114 return replace_substring_in_place(
2115 self, from_s, from_len, to_s, to_len,
2116 maxcount);
2117 }
2118 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002119
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002120 /* Otherwise use the more generic algorithms */
2121 if (from_len == 1) {
2122 return replace_single_character(self, from_s[0],
2123 to_s, to_len, maxcount);
2124 } else {
2125 /* len('from')>=2, len('to')>=1 */
2126 return replace_substring(self, from_s, from_len, to_s, to_len,
2127 maxcount);
2128 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002129}
2130
2131PyDoc_STRVAR(replace__doc__,
2132"B.replace(old, new[, count]) -> bytes\n\
2133\n\
2134Return a copy of B with all occurrences of subsection\n\
2135old replaced by new. If the optional argument count is\n\
Senthil Kumaran9a9dd1c2010-09-08 12:50:29 +00002136given, only first count occurances are replaced.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002137
2138static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002139bytes_replace(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002140{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002141 Py_ssize_t count = -1;
2142 PyObject *from, *to;
2143 const char *from_s, *to_s;
2144 Py_ssize_t from_len, to_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002145
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002146 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2147 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002149 if (PyBytes_Check(from)) {
2150 from_s = PyBytes_AS_STRING(from);
2151 from_len = PyBytes_GET_SIZE(from);
2152 }
2153 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2154 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002155
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002156 if (PyBytes_Check(to)) {
2157 to_s = PyBytes_AS_STRING(to);
2158 to_len = PyBytes_GET_SIZE(to);
2159 }
2160 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2161 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002162
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002163 return (PyObject *)replace((PyBytesObject *) self,
2164 from_s, from_len,
2165 to_s, to_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002166}
2167
2168/** End DALKE **/
2169
2170/* Matches the end (direction >= 0) or start (direction < 0) of self
2171 * against substr, using the start and end arguments. Returns
2172 * -1 on error, 0 if not found and 1 if found.
2173 */
2174Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002175_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002176 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002177{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002178 Py_ssize_t len = PyBytes_GET_SIZE(self);
2179 Py_ssize_t slen;
2180 const char* sub;
2181 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002182
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002183 if (PyBytes_Check(substr)) {
2184 sub = PyBytes_AS_STRING(substr);
2185 slen = PyBytes_GET_SIZE(substr);
2186 }
2187 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2188 return -1;
2189 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002190
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002191 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002192
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002193 if (direction < 0) {
2194 /* startswith */
2195 if (start+slen > len)
2196 return 0;
2197 } else {
2198 /* endswith */
2199 if (end-start < slen || start > len)
2200 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002201
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002202 if (end-slen > start)
2203 start = end - slen;
2204 }
2205 if (end-start >= slen)
2206 return ! memcmp(str+start, sub, slen);
2207 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002208}
2209
2210
2211PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002212"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002213\n\
2214Return True if B starts with the specified prefix, False otherwise.\n\
2215With optional start, test B beginning at that position.\n\
2216With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002217prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002218
2219static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002220bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002221{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002222 Py_ssize_t start = 0;
2223 Py_ssize_t end = PY_SSIZE_T_MAX;
2224 PyObject *subobj;
2225 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002226
Jesus Ceaac451502011-04-20 17:09:23 +02002227 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002228 return NULL;
2229 if (PyTuple_Check(subobj)) {
2230 Py_ssize_t i;
2231 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2232 result = _bytes_tailmatch(self,
2233 PyTuple_GET_ITEM(subobj, i),
2234 start, end, -1);
2235 if (result == -1)
2236 return NULL;
2237 else if (result) {
2238 Py_RETURN_TRUE;
2239 }
2240 }
2241 Py_RETURN_FALSE;
2242 }
2243 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002244 if (result == -1) {
2245 if (PyErr_ExceptionMatches(PyExc_TypeError))
2246 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2247 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002248 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002249 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002250 else
2251 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002252}
2253
2254
2255PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002256"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002257\n\
2258Return True if B ends with the specified suffix, False otherwise.\n\
2259With optional start, test B beginning at that position.\n\
2260With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002261suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002262
2263static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002264bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002265{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002266 Py_ssize_t start = 0;
2267 Py_ssize_t end = PY_SSIZE_T_MAX;
2268 PyObject *subobj;
2269 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002270
Jesus Ceaac451502011-04-20 17:09:23 +02002271 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002272 return NULL;
2273 if (PyTuple_Check(subobj)) {
2274 Py_ssize_t i;
2275 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2276 result = _bytes_tailmatch(self,
2277 PyTuple_GET_ITEM(subobj, i),
2278 start, end, +1);
2279 if (result == -1)
2280 return NULL;
2281 else if (result) {
2282 Py_RETURN_TRUE;
2283 }
2284 }
2285 Py_RETURN_FALSE;
2286 }
2287 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002288 if (result == -1) {
2289 if (PyErr_ExceptionMatches(PyExc_TypeError))
2290 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2291 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002292 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002293 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002294 else
2295 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002296}
2297
2298
2299PyDoc_STRVAR(decode__doc__,
Victor Stinnerc911bbf2010-11-07 19:04:46 +00002300"B.decode(encoding='utf-8', errors='strict') -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002301\n\
Victor Stinnere14e2122010-11-07 18:41:46 +00002302Decode B using the codec registered for encoding. Default encoding\n\
2303is 'utf-8'. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002304handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2305a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002306as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002307able to handle UnicodeDecodeErrors.");
2308
2309static PyObject *
Benjamin Peterson308d6372009-09-18 21:42:35 +00002310bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002311{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002312 const char *encoding = NULL;
2313 const char *errors = NULL;
2314 static char *kwlist[] = {"encoding", "errors", 0};
Guido van Rossumd624f182006-04-24 13:47:05 +00002315
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002316 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2317 return NULL;
2318 if (encoding == NULL)
2319 encoding = PyUnicode_GetDefaultEncoding();
2320 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002321}
2322
Guido van Rossum20188312006-05-05 15:15:40 +00002323
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002324PyDoc_STRVAR(splitlines__doc__,
2325"B.splitlines([keepends]) -> list of lines\n\
2326\n\
2327Return a list of the lines in B, breaking at line boundaries.\n\
2328Line breaks are not included in the resulting list unless keepends\n\
2329is given and true.");
2330
2331static PyObject*
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002332bytes_splitlines(PyObject *self, PyObject *args, PyObject *kwds)
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002333{
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002334 static char *kwlist[] = {"keepends", 0};
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002335 int keepends = 0;
2336
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002337 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:splitlines",
2338 kwlist, &keepends))
Antoine Pitroud1188562010-06-09 16:38:55 +00002339 return NULL;
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002340
2341 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002342 (PyObject*) self, PyBytes_AS_STRING(self),
2343 PyBytes_GET_SIZE(self), keepends
2344 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002345}
2346
2347
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002348PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002349"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002350\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002351Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002352Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002353Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002354
2355static int
Victor Stinner6430fd52011-09-29 04:02:13 +02002356hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002357{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002358 if (c >= 128)
2359 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002360 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002361 return c - '0';
2362 else {
David Malcolm96960882010-11-05 17:23:41 +00002363 if (Py_ISUPPER(c))
2364 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002365 if (c >= 'a' && c <= 'f')
2366 return c - 'a' + 10;
2367 }
2368 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002369}
2370
2371static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002372bytes_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002373{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002374 PyObject *newstring, *hexobj;
2375 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002376 Py_ssize_t hexlen, byteslen, i, j;
2377 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002378 void *data;
2379 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002380
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002381 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2382 return NULL;
2383 assert(PyUnicode_Check(hexobj));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002384 if (PyUnicode_READY(hexobj))
2385 return NULL;
2386 kind = PyUnicode_KIND(hexobj);
2387 data = PyUnicode_DATA(hexobj);
2388 hexlen = PyUnicode_GET_LENGTH(hexobj);
2389
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002390 byteslen = hexlen/2; /* This overestimates if there are spaces */
2391 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2392 if (!newstring)
2393 return NULL;
2394 buf = PyBytes_AS_STRING(newstring);
2395 for (i = j = 0; i < hexlen; i += 2) {
2396 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002397 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002398 i++;
2399 if (i >= hexlen)
2400 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002401 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
2402 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002403 if (top == -1 || bot == -1) {
2404 PyErr_Format(PyExc_ValueError,
2405 "non-hexadecimal number found in "
2406 "fromhex() arg at position %zd", i);
2407 goto error;
2408 }
2409 buf[j++] = (top << 4) + bot;
2410 }
2411 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2412 goto error;
2413 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002414
2415 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002416 Py_XDECREF(newstring);
2417 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002418}
2419
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002420PyDoc_STRVAR(sizeof__doc__,
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002421"B.__sizeof__() -> size of B in memory, in bytes");
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002422
2423static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002424bytes_sizeof(PyBytesObject *v)
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002425{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002426 Py_ssize_t res;
2427 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2428 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002429}
2430
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002431
2432static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002433bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002434{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002435 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002436}
2437
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002438
2439static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002440bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002441 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2442 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2443 _Py_capitalize__doc__},
2444 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2445 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2446 {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
2447 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2448 endswith__doc__},
2449 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2450 expandtabs__doc__},
2451 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2452 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2453 fromhex_doc},
2454 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2455 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2456 _Py_isalnum__doc__},
2457 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2458 _Py_isalpha__doc__},
2459 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2460 _Py_isdigit__doc__},
2461 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2462 _Py_islower__doc__},
2463 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2464 _Py_isspace__doc__},
2465 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2466 _Py_istitle__doc__},
2467 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2468 _Py_isupper__doc__},
2469 {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
2470 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2471 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2472 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2473 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
2474 _Py_maketrans__doc__},
2475 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2476 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2477 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2478 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2479 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2480 {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
2481 rpartition__doc__},
Ezio Melotticda6b6d2012-02-26 09:39:55 +02002482 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS | METH_KEYWORDS, rsplit__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002483 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
Ezio Melotticda6b6d2012-02-26 09:39:55 +02002484 {"split", (PyCFunction)bytes_split, METH_VARARGS | METH_KEYWORDS, split__doc__},
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002485 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002486 splitlines__doc__},
2487 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2488 startswith__doc__},
2489 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
2490 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2491 _Py_swapcase__doc__},
2492 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2493 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
2494 translate__doc__},
2495 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2496 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2497 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
2498 sizeof__doc__},
2499 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002500};
2501
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002502static PyObject *
2503str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2504
2505static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002506bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002507{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002508 PyObject *x = NULL;
2509 const char *encoding = NULL;
2510 const char *errors = NULL;
2511 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002512 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002513 Py_ssize_t size;
2514 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002515 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002516
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002517 if (type != &PyBytes_Type)
2518 return str_subtype_new(type, args, kwds);
2519 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2520 &encoding, &errors))
2521 return NULL;
2522 if (x == NULL) {
2523 if (encoding != NULL || errors != NULL) {
2524 PyErr_SetString(PyExc_TypeError,
2525 "encoding or errors without sequence "
2526 "argument");
2527 return NULL;
2528 }
2529 return PyBytes_FromString("");
2530 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002531
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002532 if (PyUnicode_Check(x)) {
2533 /* Encode via the codec registry */
2534 if (encoding == NULL) {
2535 PyErr_SetString(PyExc_TypeError,
2536 "string argument without an encoding");
2537 return NULL;
2538 }
2539 new = PyUnicode_AsEncodedString(x, encoding, errors);
2540 if (new == NULL)
2541 return NULL;
2542 assert(PyBytes_Check(new));
2543 return new;
2544 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002545
2546 /* We'd like to call PyObject_Bytes here, but we need to check for an
2547 integer argument before deferring to PyBytes_FromObject, something
2548 PyObject_Bytes doesn't do. */
2549 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2550 if (func != NULL) {
2551 new = PyObject_CallFunctionObjArgs(func, NULL);
2552 Py_DECREF(func);
2553 if (new == NULL)
2554 return NULL;
2555 if (!PyBytes_Check(new)) {
2556 PyErr_Format(PyExc_TypeError,
2557 "__bytes__ returned non-bytes (type %.200s)",
2558 Py_TYPE(new)->tp_name);
2559 Py_DECREF(new);
2560 return NULL;
2561 }
2562 return new;
2563 }
2564 else if (PyErr_Occurred())
2565 return NULL;
2566
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002567 /* Is it an integer? */
2568 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2569 if (size == -1 && PyErr_Occurred()) {
2570 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2571 return NULL;
2572 PyErr_Clear();
2573 }
2574 else if (size < 0) {
2575 PyErr_SetString(PyExc_ValueError, "negative count");
2576 return NULL;
2577 }
2578 else {
2579 new = PyBytes_FromStringAndSize(NULL, size);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002580 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002581 return NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002582 if (size > 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002583 memset(((PyBytesObject*)new)->ob_sval, 0, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002584 return new;
2585 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002586
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002587 /* If it's not unicode, there can't be encoding or errors */
2588 if (encoding != NULL || errors != NULL) {
2589 PyErr_SetString(PyExc_TypeError,
2590 "encoding or errors without a string argument");
2591 return NULL;
2592 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002593
2594 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002595}
2596
2597PyObject *
2598PyBytes_FromObject(PyObject *x)
2599{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002600 PyObject *new, *it;
2601 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002602
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002603 if (x == NULL) {
2604 PyErr_BadInternalCall();
2605 return NULL;
2606 }
Larry Hastingsca28e992012-05-24 22:58:30 -07002607
2608 if (PyBytes_CheckExact(x)) {
2609 Py_INCREF(x);
2610 return x;
2611 }
2612
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002613 /* Use the modern buffer interface */
2614 if (PyObject_CheckBuffer(x)) {
2615 Py_buffer view;
2616 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2617 return NULL;
2618 new = PyBytes_FromStringAndSize(NULL, view.len);
2619 if (!new)
2620 goto fail;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002621 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2622 &view, view.len, 'C') < 0)
2623 goto fail;
2624 PyBuffer_Release(&view);
2625 return new;
2626 fail:
2627 Py_XDECREF(new);
2628 PyBuffer_Release(&view);
2629 return NULL;
2630 }
2631 if (PyUnicode_Check(x)) {
2632 PyErr_SetString(PyExc_TypeError,
2633 "cannot convert unicode object to bytes");
2634 return NULL;
2635 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002636
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002637 if (PyList_CheckExact(x)) {
2638 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2639 if (new == NULL)
2640 return NULL;
2641 for (i = 0; i < Py_SIZE(x); i++) {
2642 Py_ssize_t value = PyNumber_AsSsize_t(
2643 PyList_GET_ITEM(x, i), PyExc_ValueError);
2644 if (value == -1 && PyErr_Occurred()) {
2645 Py_DECREF(new);
2646 return NULL;
2647 }
2648 if (value < 0 || value >= 256) {
2649 PyErr_SetString(PyExc_ValueError,
2650 "bytes must be in range(0, 256)");
2651 Py_DECREF(new);
2652 return NULL;
2653 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002654 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002655 }
2656 return new;
2657 }
2658 if (PyTuple_CheckExact(x)) {
2659 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2660 if (new == NULL)
2661 return NULL;
2662 for (i = 0; i < Py_SIZE(x); i++) {
2663 Py_ssize_t value = PyNumber_AsSsize_t(
2664 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
2665 if (value == -1 && PyErr_Occurred()) {
2666 Py_DECREF(new);
2667 return NULL;
2668 }
2669 if (value < 0 || value >= 256) {
2670 PyErr_SetString(PyExc_ValueError,
2671 "bytes must be in range(0, 256)");
2672 Py_DECREF(new);
2673 return NULL;
2674 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002675 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002676 }
2677 return new;
2678 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00002679
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002680 /* For iterator version, create a string object and resize as needed */
2681 size = _PyObject_LengthHint(x, 64);
2682 if (size == -1 && PyErr_Occurred())
2683 return NULL;
2684 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
2685 returning a shared empty bytes string. This required because we
2686 want to call _PyBytes_Resize() the returned object, which we can
2687 only do on bytes objects with refcount == 1. */
2688 size += 1;
2689 new = PyBytes_FromStringAndSize(NULL, size);
2690 if (new == NULL)
2691 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002692
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002693 /* Get the iterator */
2694 it = PyObject_GetIter(x);
2695 if (it == NULL)
2696 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002697
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002698 /* Run the iterator to exhaustion */
2699 for (i = 0; ; i++) {
2700 PyObject *item;
2701 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002702
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002703 /* Get the next item */
2704 item = PyIter_Next(it);
2705 if (item == NULL) {
2706 if (PyErr_Occurred())
2707 goto error;
2708 break;
2709 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002710
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002711 /* Interpret it as an int (__index__) */
2712 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2713 Py_DECREF(item);
2714 if (value == -1 && PyErr_Occurred())
2715 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002716
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002717 /* Range check */
2718 if (value < 0 || value >= 256) {
2719 PyErr_SetString(PyExc_ValueError,
2720 "bytes must be in range(0, 256)");
2721 goto error;
2722 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002723
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002724 /* Append the byte */
2725 if (i >= size) {
2726 size = 2 * size + 1;
2727 if (_PyBytes_Resize(&new, size) < 0)
2728 goto error;
2729 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002730 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002731 }
2732 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002733
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002734 /* Clean up and return success */
2735 Py_DECREF(it);
2736 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002737
2738 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002739 /* Error handling when new != NULL */
2740 Py_XDECREF(it);
2741 Py_DECREF(new);
2742 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002743}
2744
2745static PyObject *
2746str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2747{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002748 PyObject *tmp, *pnew;
2749 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002750
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002751 assert(PyType_IsSubtype(type, &PyBytes_Type));
2752 tmp = bytes_new(&PyBytes_Type, args, kwds);
2753 if (tmp == NULL)
2754 return NULL;
2755 assert(PyBytes_CheckExact(tmp));
2756 n = PyBytes_GET_SIZE(tmp);
2757 pnew = type->tp_alloc(type, n);
2758 if (pnew != NULL) {
2759 Py_MEMCPY(PyBytes_AS_STRING(pnew),
2760 PyBytes_AS_STRING(tmp), n+1);
2761 ((PyBytesObject *)pnew)->ob_shash =
2762 ((PyBytesObject *)tmp)->ob_shash;
2763 }
2764 Py_DECREF(tmp);
2765 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002766}
2767
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002768PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002769"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002770bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002771bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002772bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2773bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002774\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002775Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002776 - an iterable yielding integers in range(256)\n\
2777 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002778 - any object implementing the buffer API.\n\
2779 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002780
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002781static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002782
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002783PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002784 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2785 "bytes",
2786 PyBytesObject_SIZE,
2787 sizeof(char),
2788 bytes_dealloc, /* tp_dealloc */
2789 0, /* tp_print */
2790 0, /* tp_getattr */
2791 0, /* tp_setattr */
2792 0, /* tp_reserved */
2793 (reprfunc)bytes_repr, /* tp_repr */
2794 0, /* tp_as_number */
2795 &bytes_as_sequence, /* tp_as_sequence */
2796 &bytes_as_mapping, /* tp_as_mapping */
2797 (hashfunc)bytes_hash, /* tp_hash */
2798 0, /* tp_call */
2799 bytes_str, /* tp_str */
2800 PyObject_GenericGetAttr, /* tp_getattro */
2801 0, /* tp_setattro */
2802 &bytes_as_buffer, /* tp_as_buffer */
2803 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2804 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2805 bytes_doc, /* tp_doc */
2806 0, /* tp_traverse */
2807 0, /* tp_clear */
2808 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2809 0, /* tp_weaklistoffset */
2810 bytes_iter, /* tp_iter */
2811 0, /* tp_iternext */
2812 bytes_methods, /* tp_methods */
2813 0, /* tp_members */
2814 0, /* tp_getset */
2815 &PyBaseObject_Type, /* tp_base */
2816 0, /* tp_dict */
2817 0, /* tp_descr_get */
2818 0, /* tp_descr_set */
2819 0, /* tp_dictoffset */
2820 0, /* tp_init */
2821 0, /* tp_alloc */
2822 bytes_new, /* tp_new */
2823 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002824};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002825
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002826void
2827PyBytes_Concat(register PyObject **pv, register PyObject *w)
2828{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002829 register PyObject *v;
2830 assert(pv != NULL);
2831 if (*pv == NULL)
2832 return;
2833 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002834 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002835 return;
2836 }
2837 v = bytes_concat(*pv, w);
2838 Py_DECREF(*pv);
2839 *pv = v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002840}
2841
2842void
2843PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
2844{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002845 PyBytes_Concat(pv, w);
2846 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002847}
2848
2849
2850/* The following function breaks the notion that strings are immutable:
2851 it changes the size of a string. We get away with this only if there
2852 is only one module referencing the object. You can also think of it
2853 as creating a new string object and destroying the old one, only
2854 more efficiently. In any case, don't use this if the string may
2855 already be known to some other part of the code...
2856 Note that if there's not enough memory to resize the string, the original
2857 string object at *pv is deallocated, *pv is set to NULL, an "out of
2858 memory" exception is set, and -1 is returned. Else (on success) 0 is
2859 returned, and the value in *pv may or may not be the same as on input.
2860 As always, an extra byte is allocated for a trailing \0 byte (newsize
2861 does *not* include that), and a trailing \0 byte is stored.
2862*/
2863
2864int
2865_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2866{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002867 register PyObject *v;
2868 register PyBytesObject *sv;
2869 v = *pv;
2870 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
2871 *pv = 0;
2872 Py_DECREF(v);
2873 PyErr_BadInternalCall();
2874 return -1;
2875 }
2876 /* XXX UNREF/NEWREF interface should be more symmetrical */
2877 _Py_DEC_REFTOTAL;
2878 _Py_ForgetReference(v);
2879 *pv = (PyObject *)
2880 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
2881 if (*pv == NULL) {
2882 PyObject_Del(v);
2883 PyErr_NoMemory();
2884 return -1;
2885 }
2886 _Py_NewReference(*pv);
2887 sv = (PyBytesObject *) *pv;
2888 Py_SIZE(sv) = newsize;
2889 sv->ob_sval[newsize] = '\0';
2890 sv->ob_shash = -1; /* invalidate cached hash value */
2891 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002892}
2893
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002894void
2895PyBytes_Fini(void)
2896{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002897 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002898 for (i = 0; i < UCHAR_MAX + 1; i++)
2899 Py_CLEAR(characters[i]);
2900 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002901}
2902
Benjamin Peterson4116f362008-05-27 00:36:20 +00002903/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002904
2905typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002906 PyObject_HEAD
2907 Py_ssize_t it_index;
2908 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002909} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002910
2911static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002912striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002913{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002914 _PyObject_GC_UNTRACK(it);
2915 Py_XDECREF(it->it_seq);
2916 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002917}
2918
2919static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002920striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002921{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002922 Py_VISIT(it->it_seq);
2923 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002924}
2925
2926static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002927striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002928{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002929 PyBytesObject *seq;
2930 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002931
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002932 assert(it != NULL);
2933 seq = it->it_seq;
2934 if (seq == NULL)
2935 return NULL;
2936 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002937
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002938 if (it->it_index < PyBytes_GET_SIZE(seq)) {
2939 item = PyLong_FromLong(
2940 (unsigned char)seq->ob_sval[it->it_index]);
2941 if (item != NULL)
2942 ++it->it_index;
2943 return item;
2944 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002945
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002946 Py_DECREF(seq);
2947 it->it_seq = NULL;
2948 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002949}
2950
2951static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002952striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002953{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002954 Py_ssize_t len = 0;
2955 if (it->it_seq)
2956 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
2957 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002958}
2959
2960PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002961 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002962
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002963static PyObject *
2964striter_reduce(striterobject *it)
2965{
2966 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02002967 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002968 it->it_seq, it->it_index);
2969 } else {
2970 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
2971 if (u == NULL)
2972 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02002973 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002974 }
2975}
2976
2977PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
2978
2979static PyObject *
2980striter_setstate(striterobject *it, PyObject *state)
2981{
2982 Py_ssize_t index = PyLong_AsSsize_t(state);
2983 if (index == -1 && PyErr_Occurred())
2984 return NULL;
2985 if (index < 0)
2986 index = 0;
2987 it->it_index = index;
2988 Py_RETURN_NONE;
2989}
2990
2991PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
2992
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002993static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002994 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
2995 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002996 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
2997 reduce_doc},
2998 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
2999 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003000 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003001};
3002
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003003PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003004 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3005 "bytes_iterator", /* tp_name */
3006 sizeof(striterobject), /* tp_basicsize */
3007 0, /* tp_itemsize */
3008 /* methods */
3009 (destructor)striter_dealloc, /* tp_dealloc */
3010 0, /* tp_print */
3011 0, /* tp_getattr */
3012 0, /* tp_setattr */
3013 0, /* tp_reserved */
3014 0, /* tp_repr */
3015 0, /* tp_as_number */
3016 0, /* tp_as_sequence */
3017 0, /* tp_as_mapping */
3018 0, /* tp_hash */
3019 0, /* tp_call */
3020 0, /* tp_str */
3021 PyObject_GenericGetAttr, /* tp_getattro */
3022 0, /* tp_setattro */
3023 0, /* tp_as_buffer */
3024 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3025 0, /* tp_doc */
3026 (traverseproc)striter_traverse, /* tp_traverse */
3027 0, /* tp_clear */
3028 0, /* tp_richcompare */
3029 0, /* tp_weaklistoffset */
3030 PyObject_SelfIter, /* tp_iter */
3031 (iternextfunc)striter_next, /* tp_iternext */
3032 striter_methods, /* tp_methods */
3033 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003034};
3035
3036static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003037bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003038{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003039 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003040
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003041 if (!PyBytes_Check(seq)) {
3042 PyErr_BadInternalCall();
3043 return NULL;
3044 }
3045 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3046 if (it == NULL)
3047 return NULL;
3048 it->it_index = 0;
3049 Py_INCREF(seq);
3050 it->it_seq = (PyBytesObject *)seq;
3051 _PyObject_GC_TRACK(it);
3052 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003053}