blob: 43d83815cc5c415e2eac024dfec9a562e479d318 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Christian Heimes90aa7642007-12-19 02:45:37 +000013 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000014
Gregory P. Smith60d241f2007-10-16 06:31:30 +000015 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000016 {
Antoine Pitroud1188562010-06-09 16:38:55 +000017 PyErr_Format(PyExc_TypeError,
18 "Type %.100s doesn't support the buffer API",
19 Py_TYPE(obj)->tp_name);
20 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000021 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000023 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000024 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000025 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000026}
27
Christian Heimes2c9c7a52008-05-26 13:42:13 +000028#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000029Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000030#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000031
Christian Heimes2c9c7a52008-05-26 13:42:13 +000032static PyBytesObject *characters[UCHAR_MAX + 1];
33static PyBytesObject *nullstring;
34
Mark Dickinsonfd24b322008-12-06 15:33:31 +000035/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
36 for a string of length n should request PyBytesObject_SIZE + n bytes.
37
38 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
39 3 bytes per string allocation on a typical system.
40*/
41#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
42
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000044 For PyBytes_FromString(), the parameter `str' points to a null-terminated
45 string containing exactly `size' bytes.
46
47 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
48 either NULL or else points to a string containing at least `size' bytes.
49 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
50 not have to be null-terminated. (Therefore it is safe to construct a
51 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
52 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
53 bytes (setting the last byte to the null terminating character) and you can
54 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000055 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000056 alter the data yourself, since the strings may be shared.
57
58 The PyObject member `op->ob_size', which denotes the number of "extra
59 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020060 allocated for string data, not counting the null terminating character.
61 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000062 PyBytes_FromStringAndSize()) or the length of the string in the `str'
63 parameter (for PyBytes_FromString()).
64*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000065PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000066PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000067{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000068 register PyBytesObject *op;
69 if (size < 0) {
70 PyErr_SetString(PyExc_SystemError,
71 "Negative size passed to PyBytes_FromStringAndSize");
72 return NULL;
73 }
74 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000075#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000076 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000077#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078 Py_INCREF(op);
79 return (PyObject *)op;
80 }
81 if (size == 1 && str != NULL &&
82 (op = characters[*str & UCHAR_MAX]) != NULL)
83 {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000084#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000086#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 Py_INCREF(op);
88 return (PyObject *)op;
89 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000090
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
92 PyErr_SetString(PyExc_OverflowError,
93 "byte string is too large");
94 return NULL;
95 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000097 /* Inline PyObject_NewVar */
98 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
99 if (op == NULL)
100 return PyErr_NoMemory();
101 PyObject_INIT_VAR(op, &PyBytes_Type, size);
102 op->ob_shash = -1;
103 if (str != NULL)
104 Py_MEMCPY(op->ob_sval, str, size);
105 op->ob_sval[size] = '\0';
106 /* share short strings */
107 if (size == 0) {
108 nullstring = op;
109 Py_INCREF(op);
110 } else if (size == 1 && str != NULL) {
111 characters[*str & UCHAR_MAX] = op;
112 Py_INCREF(op);
113 }
114 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000115}
116
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000117PyObject *
118PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000119{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000120 register size_t size;
121 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000122
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000123 assert(str != NULL);
124 size = strlen(str);
125 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
126 PyErr_SetString(PyExc_OverflowError,
127 "byte string is too long");
128 return NULL;
129 }
130 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000131#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000133#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 Py_INCREF(op);
135 return (PyObject *)op;
136 }
137 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000138#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000139 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000140#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 Py_INCREF(op);
142 return (PyObject *)op;
143 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000144
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 /* Inline PyObject_NewVar */
146 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
147 if (op == NULL)
148 return PyErr_NoMemory();
149 PyObject_INIT_VAR(op, &PyBytes_Type, size);
150 op->ob_shash = -1;
151 Py_MEMCPY(op->ob_sval, str, size+1);
152 /* share short strings */
153 if (size == 0) {
154 nullstring = op;
155 Py_INCREF(op);
156 } else if (size == 1) {
157 characters[*str & UCHAR_MAX] = op;
158 Py_INCREF(op);
159 }
160 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000161}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000162
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000163PyObject *
164PyBytes_FromFormatV(const char *format, va_list vargs)
165{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000166 va_list count;
167 Py_ssize_t n = 0;
168 const char* f;
169 char *s;
170 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000171
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000172 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000173 /* step 1: figure out how large a buffer we need */
174 for (f = format; *f; f++) {
175 if (*f == '%') {
176 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000177 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000178 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000179
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000180 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
181 * they don't affect the amount of space we reserve.
182 */
183 if ((*f == 'l' || *f == 'z') &&
184 (f[1] == 'd' || f[1] == 'u'))
185 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000186
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000187 switch (*f) {
188 case 'c':
189 (void)va_arg(count, int);
190 /* fall through... */
191 case '%':
192 n++;
193 break;
194 case 'd': case 'u': case 'i': case 'x':
195 (void) va_arg(count, int);
196 /* 20 bytes is enough to hold a 64-bit
197 integer. Decimal takes the most space.
198 This isn't enough for octal. */
199 n += 20;
200 break;
201 case 's':
202 s = va_arg(count, char*);
203 n += strlen(s);
204 break;
205 case 'p':
206 (void) va_arg(count, int);
207 /* maximum 64-bit pointer representation:
208 * 0xffffffffffffffff
209 * so 19 characters is enough.
210 * XXX I count 18 -- what's the extra for?
211 */
212 n += 19;
213 break;
214 default:
215 /* if we stumble upon an unknown
216 formatting code, copy the rest of
217 the format string to the output
218 string. (we cannot just skip the
219 code, since there's no way to know
220 what's in the argument list) */
221 n += strlen(p);
222 goto expand;
223 }
224 } else
225 n++;
226 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000227 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 /* step 2: fill the buffer */
229 /* Since we've analyzed how much space we need for the worst case,
230 use sprintf directly instead of the slower PyOS_snprintf. */
231 string = PyBytes_FromStringAndSize(NULL, n);
232 if (!string)
233 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000234
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000235 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000236
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000237 for (f = format; *f; f++) {
238 if (*f == '%') {
239 const char* p = f++;
240 Py_ssize_t i;
241 int longflag = 0;
242 int size_tflag = 0;
243 /* parse the width.precision part (we're only
244 interested in the precision value, if any) */
245 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000246 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000247 n = (n*10) + *f++ - '0';
248 if (*f == '.') {
249 f++;
250 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000251 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000252 n = (n*10) + *f++ - '0';
253 }
David Malcolm96960882010-11-05 17:23:41 +0000254 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000255 f++;
256 /* handle the long flag, but only for %ld and %lu.
257 others can be added when necessary. */
258 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
259 longflag = 1;
260 ++f;
261 }
262 /* handle the size_t flag. */
263 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
264 size_tflag = 1;
265 ++f;
266 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000267
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000268 switch (*f) {
269 case 'c':
270 *s++ = va_arg(vargs, int);
271 break;
272 case 'd':
273 if (longflag)
274 sprintf(s, "%ld", va_arg(vargs, long));
275 else if (size_tflag)
276 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
277 va_arg(vargs, Py_ssize_t));
278 else
279 sprintf(s, "%d", va_arg(vargs, int));
280 s += strlen(s);
281 break;
282 case 'u':
283 if (longflag)
284 sprintf(s, "%lu",
285 va_arg(vargs, unsigned long));
286 else if (size_tflag)
287 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
288 va_arg(vargs, size_t));
289 else
290 sprintf(s, "%u",
291 va_arg(vargs, unsigned int));
292 s += strlen(s);
293 break;
294 case 'i':
295 sprintf(s, "%i", va_arg(vargs, int));
296 s += strlen(s);
297 break;
298 case 'x':
299 sprintf(s, "%x", va_arg(vargs, int));
300 s += strlen(s);
301 break;
302 case 's':
303 p = va_arg(vargs, char*);
304 i = strlen(p);
305 if (n > 0 && i > n)
306 i = n;
307 Py_MEMCPY(s, p, i);
308 s += i;
309 break;
310 case 'p':
311 sprintf(s, "%p", va_arg(vargs, void*));
312 /* %p is ill-defined: ensure leading 0x. */
313 if (s[1] == 'X')
314 s[1] = 'x';
315 else if (s[1] != 'x') {
316 memmove(s+2, s, strlen(s)+1);
317 s[0] = '0';
318 s[1] = 'x';
319 }
320 s += strlen(s);
321 break;
322 case '%':
323 *s++ = '%';
324 break;
325 default:
326 strcpy(s, p);
327 s += strlen(s);
328 goto end;
329 }
330 } else
331 *s++ = *f;
332 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000333
334 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000335 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
336 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000337}
338
339PyObject *
340PyBytes_FromFormat(const char *format, ...)
341{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000342 PyObject* ret;
343 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000344
345#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000347#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000348 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000349#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 ret = PyBytes_FromFormatV(format, vargs);
351 va_end(vargs);
352 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000353}
354
355static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000356bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000357{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000358 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000359}
360
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000361/* Unescape a backslash-escaped string. If unicode is non-zero,
362 the string is a u-literal. If recode_encoding is non-zero,
363 the string is UTF-8 encoded and should be re-encoded in the
364 specified encoding. */
365
366PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 Py_ssize_t len,
368 const char *errors,
369 Py_ssize_t unicode,
370 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000371{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 int c;
373 char *p, *buf;
374 const char *end;
375 PyObject *v;
376 Py_ssize_t newlen = recode_encoding ? 4*len:len;
377 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
378 if (v == NULL)
379 return NULL;
380 p = buf = PyBytes_AsString(v);
381 end = s + len;
382 while (s < end) {
383 if (*s != '\\') {
384 non_esc:
385 if (recode_encoding && (*s & 0x80)) {
386 PyObject *u, *w;
387 char *r;
388 const char* t;
389 Py_ssize_t rn;
390 t = s;
391 /* Decode non-ASCII bytes as UTF-8. */
392 while (t < end && (*t & 0x80)) t++;
393 u = PyUnicode_DecodeUTF8(s, t - s, errors);
394 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000395
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000396 /* Recode them in target encoding. */
397 w = PyUnicode_AsEncodedString(
398 u, recode_encoding, errors);
399 Py_DECREF(u);
400 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000401
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000402 /* Append bytes to output buffer. */
403 assert(PyBytes_Check(w));
404 r = PyBytes_AS_STRING(w);
405 rn = PyBytes_GET_SIZE(w);
406 Py_MEMCPY(p, r, rn);
407 p += rn;
408 Py_DECREF(w);
409 s = t;
410 } else {
411 *p++ = *s++;
412 }
413 continue;
414 }
415 s++;
416 if (s==end) {
417 PyErr_SetString(PyExc_ValueError,
418 "Trailing \\ in string");
419 goto failed;
420 }
421 switch (*s++) {
422 /* XXX This assumes ASCII! */
423 case '\n': break;
424 case '\\': *p++ = '\\'; break;
425 case '\'': *p++ = '\''; break;
426 case '\"': *p++ = '\"'; break;
427 case 'b': *p++ = '\b'; break;
428 case 'f': *p++ = '\014'; break; /* FF */
429 case 't': *p++ = '\t'; break;
430 case 'n': *p++ = '\n'; break;
431 case 'r': *p++ = '\r'; break;
432 case 'v': *p++ = '\013'; break; /* VT */
433 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
434 case '0': case '1': case '2': case '3':
435 case '4': case '5': case '6': case '7':
436 c = s[-1] - '0';
437 if (s < end && '0' <= *s && *s <= '7') {
438 c = (c<<3) + *s++ - '0';
439 if (s < end && '0' <= *s && *s <= '7')
440 c = (c<<3) + *s++ - '0';
441 }
442 *p++ = c;
443 break;
444 case 'x':
David Malcolm96960882010-11-05 17:23:41 +0000445 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000446 unsigned int x = 0;
447 c = Py_CHARMASK(*s);
448 s++;
David Malcolm96960882010-11-05 17:23:41 +0000449 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000450 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000451 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000452 x = 10 + c - 'a';
453 else
454 x = 10 + c - 'A';
455 x = x << 4;
456 c = Py_CHARMASK(*s);
457 s++;
David Malcolm96960882010-11-05 17:23:41 +0000458 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000459 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000460 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000461 x += 10 + c - 'a';
462 else
463 x += 10 + c - 'A';
464 *p++ = x;
465 break;
466 }
467 if (!errors || strcmp(errors, "strict") == 0) {
468 PyErr_SetString(PyExc_ValueError,
469 "invalid \\x escape");
470 goto failed;
471 }
472 if (strcmp(errors, "replace") == 0) {
473 *p++ = '?';
474 } else if (strcmp(errors, "ignore") == 0)
475 /* do nothing */;
476 else {
477 PyErr_Format(PyExc_ValueError,
478 "decoding error; unknown "
479 "error handling code: %.400s",
480 errors);
481 goto failed;
482 }
483 default:
484 *p++ = '\\';
485 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +0200486 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000487 UTF-8 bytes may follow. */
488 }
489 }
490 if (p-buf < newlen)
491 _PyBytes_Resize(&v, p - buf);
492 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000493 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000494 Py_DECREF(v);
495 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000496}
497
498/* -------------------------------------------------------------------- */
499/* object api */
500
501Py_ssize_t
502PyBytes_Size(register PyObject *op)
503{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000504 if (!PyBytes_Check(op)) {
505 PyErr_Format(PyExc_TypeError,
506 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
507 return -1;
508 }
509 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000510}
511
512char *
513PyBytes_AsString(register PyObject *op)
514{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000515 if (!PyBytes_Check(op)) {
516 PyErr_Format(PyExc_TypeError,
517 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
518 return NULL;
519 }
520 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000521}
522
523int
524PyBytes_AsStringAndSize(register PyObject *obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000525 register char **s,
526 register Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000527{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000528 if (s == NULL) {
529 PyErr_BadInternalCall();
530 return -1;
531 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000532
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000533 if (!PyBytes_Check(obj)) {
534 PyErr_Format(PyExc_TypeError,
535 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
536 return -1;
537 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000538
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000539 *s = PyBytes_AS_STRING(obj);
540 if (len != NULL)
541 *len = PyBytes_GET_SIZE(obj);
542 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
543 PyErr_SetString(PyExc_TypeError,
544 "expected bytes with no null");
545 return -1;
546 }
547 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000548}
Neal Norwitz6968b052007-02-27 19:02:19 +0000549
550/* -------------------------------------------------------------------- */
551/* Methods */
552
Eric Smith0923d1d2009-04-16 20:16:10 +0000553#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000554
555#include "stringlib/fastsearch.h"
556#include "stringlib/count.h"
557#include "stringlib/find.h"
558#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000559#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000560#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000561
Eric Smith0f78bff2009-11-30 01:01:42 +0000562#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000563
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000564PyObject *
565PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000566{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000567 static const char *hexdigits = "0123456789abcdef";
568 register PyBytesObject* op = (PyBytesObject*) obj;
569 Py_ssize_t length = Py_SIZE(op);
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000570 size_t newsize;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000571 PyObject *v;
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000572 if (length > (PY_SSIZE_T_MAX - 3) / 4) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000573 PyErr_SetString(PyExc_OverflowError,
574 "bytes object is too large to make repr");
575 return NULL;
576 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000577 newsize = 3 + 4 * length;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000578 v = PyUnicode_FromUnicode(NULL, newsize);
579 if (v == NULL) {
580 return NULL;
581 }
582 else {
583 register Py_ssize_t i;
584 register Py_UNICODE c;
585 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
586 int quote;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000587
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000588 /* Figure out which quote to use; single is preferred */
589 quote = '\'';
590 if (smartquotes) {
591 char *test, *start;
592 start = PyBytes_AS_STRING(op);
593 for (test = start; test < start+length; ++test) {
594 if (*test == '"') {
595 quote = '\''; /* back to single */
596 goto decided;
597 }
598 else if (*test == '\'')
599 quote = '"';
600 }
601 decided:
602 ;
603 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000604
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000605 *p++ = 'b', *p++ = quote;
606 for (i = 0; i < length; i++) {
607 /* There's at least enough room for a hex escape
608 and a closing quote. */
609 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
610 c = op->ob_sval[i];
611 if (c == quote || c == '\\')
612 *p++ = '\\', *p++ = c;
613 else if (c == '\t')
614 *p++ = '\\', *p++ = 't';
615 else if (c == '\n')
616 *p++ = '\\', *p++ = 'n';
617 else if (c == '\r')
618 *p++ = '\\', *p++ = 'r';
619 else if (c < ' ' || c >= 0x7f) {
620 *p++ = '\\';
621 *p++ = 'x';
622 *p++ = hexdigits[(c & 0xf0) >> 4];
623 *p++ = hexdigits[c & 0xf];
624 }
625 else
626 *p++ = c;
627 }
628 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
629 *p++ = quote;
630 *p = '\0';
631 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
632 Py_DECREF(v);
633 return NULL;
634 }
635 return v;
636 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000637}
638
Neal Norwitz6968b052007-02-27 19:02:19 +0000639static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000640bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000641{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000642 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000643}
644
Neal Norwitz6968b052007-02-27 19:02:19 +0000645static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000646bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000647{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000648 if (Py_BytesWarningFlag) {
649 if (PyErr_WarnEx(PyExc_BytesWarning,
650 "str() on a bytes instance", 1))
651 return NULL;
652 }
653 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000654}
655
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000656static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000657bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000658{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000659 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000660}
Neal Norwitz6968b052007-02-27 19:02:19 +0000661
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000662/* This is also used by PyBytes_Concat() */
663static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000664bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000665{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000666 Py_ssize_t size;
667 Py_buffer va, vb;
668 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000669
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000670 va.len = -1;
671 vb.len = -1;
672 if (_getbuffer(a, &va) < 0 ||
673 _getbuffer(b, &vb) < 0) {
674 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
675 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
676 goto done;
677 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000678
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000679 /* Optimize end cases */
680 if (va.len == 0 && PyBytes_CheckExact(b)) {
681 result = b;
682 Py_INCREF(result);
683 goto done;
684 }
685 if (vb.len == 0 && PyBytes_CheckExact(a)) {
686 result = a;
687 Py_INCREF(result);
688 goto done;
689 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000690
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000691 size = va.len + vb.len;
692 if (size < 0) {
693 PyErr_NoMemory();
694 goto done;
695 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000696
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000697 result = PyBytes_FromStringAndSize(NULL, size);
698 if (result != NULL) {
699 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
700 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
701 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000702
703 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000704 if (va.len != -1)
705 PyBuffer_Release(&va);
706 if (vb.len != -1)
707 PyBuffer_Release(&vb);
708 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000709}
Neal Norwitz6968b052007-02-27 19:02:19 +0000710
711static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000712bytes_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000713{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000714 register Py_ssize_t i;
715 register Py_ssize_t j;
716 register Py_ssize_t size;
717 register PyBytesObject *op;
718 size_t nbytes;
719 if (n < 0)
720 n = 0;
721 /* watch out for overflows: the size can overflow int,
722 * and the # of bytes needed can overflow size_t
723 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000724 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000725 PyErr_SetString(PyExc_OverflowError,
726 "repeated bytes are too long");
727 return NULL;
728 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000729 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000730 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
731 Py_INCREF(a);
732 return (PyObject *)a;
733 }
734 nbytes = (size_t)size;
735 if (nbytes + PyBytesObject_SIZE <= nbytes) {
736 PyErr_SetString(PyExc_OverflowError,
737 "repeated bytes are too long");
738 return NULL;
739 }
740 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
741 if (op == NULL)
742 return PyErr_NoMemory();
743 PyObject_INIT_VAR(op, &PyBytes_Type, size);
744 op->ob_shash = -1;
745 op->ob_sval[size] = '\0';
746 if (Py_SIZE(a) == 1 && n > 0) {
747 memset(op->ob_sval, a->ob_sval[0] , n);
748 return (PyObject *) op;
749 }
750 i = 0;
751 if (i < size) {
752 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
753 i = Py_SIZE(a);
754 }
755 while (i < size) {
756 j = (i <= size-i) ? i : size-i;
757 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
758 i += j;
759 }
760 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000761}
762
Guido van Rossum98297ee2007-11-06 21:34:58 +0000763static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000764bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000765{
766 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
767 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000768 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +0000769 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +0000770 PyErr_Clear();
771 if (_getbuffer(arg, &varg) < 0)
772 return -1;
773 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
774 varg.buf, varg.len, 0);
775 PyBuffer_Release(&varg);
776 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000777 }
778 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000779 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
780 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000781 }
782
Antoine Pitrou0010d372010-08-15 17:12:55 +0000783 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000784}
785
Neal Norwitz6968b052007-02-27 19:02:19 +0000786static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000787bytes_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000788{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000789 if (i < 0 || i >= Py_SIZE(a)) {
790 PyErr_SetString(PyExc_IndexError, "index out of range");
791 return NULL;
792 }
793 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000794}
795
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000796static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000797bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000798{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000799 int c;
800 Py_ssize_t len_a, len_b;
801 Py_ssize_t min_len;
802 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000803
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000804 /* Make sure both arguments are strings. */
805 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
806 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
807 (PyObject_IsInstance((PyObject*)a,
808 (PyObject*)&PyUnicode_Type) ||
809 PyObject_IsInstance((PyObject*)b,
810 (PyObject*)&PyUnicode_Type))) {
811 if (PyErr_WarnEx(PyExc_BytesWarning,
812 "Comparison between bytes and string", 1))
813 return NULL;
814 }
815 result = Py_NotImplemented;
816 goto out;
817 }
818 if (a == b) {
819 switch (op) {
820 case Py_EQ:case Py_LE:case Py_GE:
821 result = Py_True;
822 goto out;
823 case Py_NE:case Py_LT:case Py_GT:
824 result = Py_False;
825 goto out;
826 }
827 }
828 if (op == Py_EQ) {
829 /* Supporting Py_NE here as well does not save
830 much time, since Py_NE is rarely used. */
831 if (Py_SIZE(a) == Py_SIZE(b)
832 && (a->ob_sval[0] == b->ob_sval[0]
833 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
834 result = Py_True;
835 } else {
836 result = Py_False;
837 }
838 goto out;
839 }
840 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
841 min_len = (len_a < len_b) ? len_a : len_b;
842 if (min_len > 0) {
843 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
844 if (c==0)
845 c = memcmp(a->ob_sval, b->ob_sval, min_len);
846 } else
847 c = 0;
848 if (c == 0)
849 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
850 switch (op) {
851 case Py_LT: c = c < 0; break;
852 case Py_LE: c = c <= 0; break;
853 case Py_EQ: assert(0); break; /* unreachable */
854 case Py_NE: c = c != 0; break;
855 case Py_GT: c = c > 0; break;
856 case Py_GE: c = c >= 0; break;
857 default:
858 result = Py_NotImplemented;
859 goto out;
860 }
861 result = c ? Py_True : Py_False;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000862 out:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000863 Py_INCREF(result);
864 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000865}
866
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000867static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000868bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000869{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000870 register Py_ssize_t len;
871 register unsigned char *p;
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000872 register Py_hash_t x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000873
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000874 if (a->ob_shash != -1)
875 return a->ob_shash;
876 len = Py_SIZE(a);
877 p = (unsigned char *) a->ob_sval;
878 x = *p << 7;
879 while (--len >= 0)
880 x = (1000003*x) ^ *p++;
881 x ^= Py_SIZE(a);
882 if (x == -1)
883 x = -2;
884 a->ob_shash = x;
885 return x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000886}
887
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000888static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000889bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000890{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000891 if (PyIndex_Check(item)) {
892 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
893 if (i == -1 && PyErr_Occurred())
894 return NULL;
895 if (i < 0)
896 i += PyBytes_GET_SIZE(self);
897 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
898 PyErr_SetString(PyExc_IndexError,
899 "index out of range");
900 return NULL;
901 }
902 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
903 }
904 else if (PySlice_Check(item)) {
905 Py_ssize_t start, stop, step, slicelength, cur, i;
906 char* source_buf;
907 char* result_buf;
908 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000909
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000910 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000911 PyBytes_GET_SIZE(self),
912 &start, &stop, &step, &slicelength) < 0) {
913 return NULL;
914 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000915
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000916 if (slicelength <= 0) {
917 return PyBytes_FromStringAndSize("", 0);
918 }
919 else if (start == 0 && step == 1 &&
920 slicelength == PyBytes_GET_SIZE(self) &&
921 PyBytes_CheckExact(self)) {
922 Py_INCREF(self);
923 return (PyObject *)self;
924 }
925 else if (step == 1) {
926 return PyBytes_FromStringAndSize(
927 PyBytes_AS_STRING(self) + start,
928 slicelength);
929 }
930 else {
931 source_buf = PyBytes_AS_STRING(self);
932 result = PyBytes_FromStringAndSize(NULL, slicelength);
933 if (result == NULL)
934 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000935
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000936 result_buf = PyBytes_AS_STRING(result);
937 for (cur = start, i = 0; i < slicelength;
938 cur += step, i++) {
939 result_buf[i] = source_buf[cur];
940 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000941
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000942 return result;
943 }
944 }
945 else {
946 PyErr_Format(PyExc_TypeError,
947 "byte indices must be integers, not %.200s",
948 Py_TYPE(item)->tp_name);
949 return NULL;
950 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000951}
952
953static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000954bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000955{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000956 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
957 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000958}
959
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000960static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000961 (lenfunc)bytes_length, /*sq_length*/
962 (binaryfunc)bytes_concat, /*sq_concat*/
963 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
964 (ssizeargfunc)bytes_item, /*sq_item*/
965 0, /*sq_slice*/
966 0, /*sq_ass_item*/
967 0, /*sq_ass_slice*/
968 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000969};
970
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000971static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000972 (lenfunc)bytes_length,
973 (binaryfunc)bytes_subscript,
974 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000975};
976
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000977static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000978 (getbufferproc)bytes_buffer_getbuffer,
979 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000980};
981
982
983#define LEFTSTRIP 0
984#define RIGHTSTRIP 1
985#define BOTHSTRIP 2
986
987/* Arrays indexed by above */
988static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
989
990#define STRIPNAME(i) (stripformat[i]+3)
991
Neal Norwitz6968b052007-02-27 19:02:19 +0000992PyDoc_STRVAR(split__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000993"B.split([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +0000994\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +0000995Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000996If sep is not specified or is None, B is split on ASCII whitespace\n\
997characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +0000998If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +0000999
1000static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001001bytes_split(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001002{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001003 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1004 Py_ssize_t maxsplit = -1;
1005 const char *s = PyBytes_AS_STRING(self), *sub;
1006 Py_buffer vsub;
1007 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001008
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001009 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1010 return NULL;
1011 if (maxsplit < 0)
1012 maxsplit = PY_SSIZE_T_MAX;
1013 if (subobj == Py_None)
1014 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1015 if (_getbuffer(subobj, &vsub) < 0)
1016 return NULL;
1017 sub = vsub.buf;
1018 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001019
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001020 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1021 PyBuffer_Release(&vsub);
1022 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001023}
1024
Neal Norwitz6968b052007-02-27 19:02:19 +00001025PyDoc_STRVAR(partition__doc__,
1026"B.partition(sep) -> (head, sep, tail)\n\
1027\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001028Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001029the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001030found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001031
1032static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001033bytes_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001034{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001035 const char *sep;
1036 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001037
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001038 if (PyBytes_Check(sep_obj)) {
1039 sep = PyBytes_AS_STRING(sep_obj);
1040 sep_len = PyBytes_GET_SIZE(sep_obj);
1041 }
1042 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1043 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001044
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001045 return stringlib_partition(
1046 (PyObject*) self,
1047 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1048 sep_obj, sep, sep_len
1049 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001050}
1051
1052PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti5b2b2422010-01-25 11:58:28 +00001053"B.rpartition(sep) -> (head, sep, tail)\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001054\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001055Search for the separator sep in B, starting at the end of B,\n\
1056and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001057part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001058bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001059
1060static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001061bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001062{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001063 const char *sep;
1064 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001065
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001066 if (PyBytes_Check(sep_obj)) {
1067 sep = PyBytes_AS_STRING(sep_obj);
1068 sep_len = PyBytes_GET_SIZE(sep_obj);
1069 }
1070 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1071 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001072
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001073 return stringlib_rpartition(
1074 (PyObject*) self,
1075 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1076 sep_obj, sep, sep_len
1077 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001078}
1079
Neal Norwitz6968b052007-02-27 19:02:19 +00001080PyDoc_STRVAR(rsplit__doc__,
Benjamin Peterson4116f362008-05-27 00:36:20 +00001081"B.rsplit([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001082\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001083Return a list of the sections in B, using sep as the delimiter,\n\
1084starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001085If sep is not given, B is split on ASCII whitespace characters\n\
1086(space, tab, return, newline, formfeed, vertical tab).\n\
1087If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001088
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001089
Neal Norwitz6968b052007-02-27 19:02:19 +00001090static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001091bytes_rsplit(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001092{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001093 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1094 Py_ssize_t maxsplit = -1;
1095 const char *s = PyBytes_AS_STRING(self), *sub;
1096 Py_buffer vsub;
1097 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001098
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001099 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1100 return NULL;
1101 if (maxsplit < 0)
1102 maxsplit = PY_SSIZE_T_MAX;
1103 if (subobj == Py_None)
1104 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1105 if (_getbuffer(subobj, &vsub) < 0)
1106 return NULL;
1107 sub = vsub.buf;
1108 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001109
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001110 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1111 PyBuffer_Release(&vsub);
1112 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001113}
1114
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001115
1116PyDoc_STRVAR(join__doc__,
1117"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001118\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001119Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001120Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1121
Neal Norwitz6968b052007-02-27 19:02:19 +00001122static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001123bytes_join(PyObject *self, PyObject *orig)
Neal Norwitz6968b052007-02-27 19:02:19 +00001124{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001125 char *sep = PyBytes_AS_STRING(self);
1126 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1127 PyObject *res = NULL;
1128 char *p;
1129 Py_ssize_t seqlen = 0;
1130 size_t sz = 0;
1131 Py_ssize_t i;
1132 PyObject *seq, *item;
Neal Norwitz6968b052007-02-27 19:02:19 +00001133
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001134 seq = PySequence_Fast(orig, "");
1135 if (seq == NULL) {
1136 return NULL;
1137 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001138
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001139 seqlen = PySequence_Size(seq);
1140 if (seqlen == 0) {
1141 Py_DECREF(seq);
1142 return PyBytes_FromString("");
1143 }
1144 if (seqlen == 1) {
1145 item = PySequence_Fast_GET_ITEM(seq, 0);
1146 if (PyBytes_CheckExact(item)) {
1147 Py_INCREF(item);
1148 Py_DECREF(seq);
1149 return item;
1150 }
1151 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001152
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001153 /* There are at least two things to join, or else we have a subclass
1154 * of the builtin types in the sequence.
1155 * Do a pre-pass to figure out the total amount of space we'll
1156 * need (sz), and see whether all argument are bytes.
1157 */
1158 /* XXX Shouldn't we use _getbuffer() on these items instead? */
1159 for (i = 0; i < seqlen; i++) {
1160 const size_t old_sz = sz;
1161 item = PySequence_Fast_GET_ITEM(seq, i);
1162 if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
1163 PyErr_Format(PyExc_TypeError,
1164 "sequence item %zd: expected bytes,"
1165 " %.80s found",
1166 i, Py_TYPE(item)->tp_name);
1167 Py_DECREF(seq);
1168 return NULL;
1169 }
1170 sz += Py_SIZE(item);
1171 if (i != 0)
1172 sz += seplen;
1173 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1174 PyErr_SetString(PyExc_OverflowError,
1175 "join() result is too long for bytes");
1176 Py_DECREF(seq);
1177 return NULL;
1178 }
1179 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001180
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001181 /* Allocate result space. */
1182 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1183 if (res == NULL) {
1184 Py_DECREF(seq);
1185 return NULL;
1186 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001187
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001188 /* Catenate everything. */
1189 /* I'm not worried about a PyByteArray item growing because there's
1190 nowhere in this function where we release the GIL. */
1191 p = PyBytes_AS_STRING(res);
1192 for (i = 0; i < seqlen; ++i) {
1193 size_t n;
1194 char *q;
1195 if (i) {
1196 Py_MEMCPY(p, sep, seplen);
1197 p += seplen;
1198 }
1199 item = PySequence_Fast_GET_ITEM(seq, i);
1200 n = Py_SIZE(item);
1201 if (PyBytes_Check(item))
1202 q = PyBytes_AS_STRING(item);
1203 else
1204 q = PyByteArray_AS_STRING(item);
1205 Py_MEMCPY(p, q, n);
1206 p += n;
1207 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001208
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001209 Py_DECREF(seq);
1210 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001211}
1212
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001213PyObject *
1214_PyBytes_Join(PyObject *sep, PyObject *x)
1215{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001216 assert(sep != NULL && PyBytes_Check(sep));
1217 assert(x != NULL);
1218 return bytes_join(sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001219}
1220
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001221/* helper macro to fixup start/end slice values */
1222#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001223 if (end > len) \
1224 end = len; \
1225 else if (end < 0) { \
1226 end += len; \
1227 if (end < 0) \
1228 end = 0; \
1229 } \
1230 if (start < 0) { \
1231 start += len; \
1232 if (start < 0) \
1233 start = 0; \
1234 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001235
1236Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001237bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001238{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001239 PyObject *subobj;
1240 const char *sub;
1241 Py_ssize_t sub_len;
1242 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001243
Jesus Ceaac451502011-04-20 17:09:23 +02001244 if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1245 args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001246 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001247
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001248 if (PyBytes_Check(subobj)) {
1249 sub = PyBytes_AS_STRING(subobj);
1250 sub_len = PyBytes_GET_SIZE(subobj);
1251 }
1252 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1253 /* XXX - the "expected a character buffer object" is pretty
1254 confusing for a non-expert. remap to something else ? */
1255 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001256
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001257 if (dir > 0)
1258 return stringlib_find_slice(
1259 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1260 sub, sub_len, start, end);
1261 else
1262 return stringlib_rfind_slice(
1263 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1264 sub, sub_len, start, end);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001265}
1266
1267
1268PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001269"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001270\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001271Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001272such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001273arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001274\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001275Return -1 on failure.");
1276
Neal Norwitz6968b052007-02-27 19:02:19 +00001277static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001278bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001279{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001280 Py_ssize_t result = bytes_find_internal(self, args, +1);
1281 if (result == -2)
1282 return NULL;
1283 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001284}
1285
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001286
1287PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001288"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001289\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001290Like B.find() but raise ValueError when the substring is not found.");
1291
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001292static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001293bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001294{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001295 Py_ssize_t result = bytes_find_internal(self, args, +1);
1296 if (result == -2)
1297 return NULL;
1298 if (result == -1) {
1299 PyErr_SetString(PyExc_ValueError,
1300 "substring not found");
1301 return NULL;
1302 }
1303 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001304}
1305
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001306
1307PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001308"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001309\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001310Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001311such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001312arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001313\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001314Return -1 on failure.");
1315
Neal Norwitz6968b052007-02-27 19:02:19 +00001316static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001317bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001318{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001319 Py_ssize_t result = bytes_find_internal(self, args, -1);
1320 if (result == -2)
1321 return NULL;
1322 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001323}
1324
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001325
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001326PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001327"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001328\n\
1329Like B.rfind() but raise ValueError when the substring is not found.");
1330
1331static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001332bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001333{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001334 Py_ssize_t result = bytes_find_internal(self, args, -1);
1335 if (result == -2)
1336 return NULL;
1337 if (result == -1) {
1338 PyErr_SetString(PyExc_ValueError,
1339 "substring not found");
1340 return NULL;
1341 }
1342 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001343}
1344
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001345
1346Py_LOCAL_INLINE(PyObject *)
1347do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001348{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001349 Py_buffer vsep;
1350 char *s = PyBytes_AS_STRING(self);
1351 Py_ssize_t len = PyBytes_GET_SIZE(self);
1352 char *sep;
1353 Py_ssize_t seplen;
1354 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001355
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001356 if (_getbuffer(sepobj, &vsep) < 0)
1357 return NULL;
1358 sep = vsep.buf;
1359 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001360
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001361 i = 0;
1362 if (striptype != RIGHTSTRIP) {
1363 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1364 i++;
1365 }
1366 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001367
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001368 j = len;
1369 if (striptype != LEFTSTRIP) {
1370 do {
1371 j--;
1372 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1373 j++;
1374 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001375
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001376 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001377
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001378 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1379 Py_INCREF(self);
1380 return (PyObject*)self;
1381 }
1382 else
1383 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001384}
1385
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001386
1387Py_LOCAL_INLINE(PyObject *)
1388do_strip(PyBytesObject *self, int striptype)
1389{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001390 char *s = PyBytes_AS_STRING(self);
1391 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001392
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001393 i = 0;
1394 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001395 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001396 i++;
1397 }
1398 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001399
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001400 j = len;
1401 if (striptype != LEFTSTRIP) {
1402 do {
1403 j--;
David Malcolm96960882010-11-05 17:23:41 +00001404 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001405 j++;
1406 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001407
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001408 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1409 Py_INCREF(self);
1410 return (PyObject*)self;
1411 }
1412 else
1413 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001414}
1415
1416
1417Py_LOCAL_INLINE(PyObject *)
1418do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1419{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001420 PyObject *sep = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001421
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001422 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1423 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001424
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001425 if (sep != NULL && sep != Py_None) {
1426 return do_xstrip(self, striptype, sep);
1427 }
1428 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001429}
1430
1431
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001432PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001433"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001434\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001435Strip leading and trailing bytes contained in the argument.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001436If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001437static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001438bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001439{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001440 if (PyTuple_GET_SIZE(args) == 0)
1441 return do_strip(self, BOTHSTRIP); /* Common case */
1442 else
1443 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001444}
1445
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001446
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001447PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001448"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001449\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001450Strip leading bytes contained in the argument.\n\
1451If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001452static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001453bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001454{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001455 if (PyTuple_GET_SIZE(args) == 0)
1456 return do_strip(self, LEFTSTRIP); /* Common case */
1457 else
1458 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001459}
1460
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001461
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001462PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001463"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001464\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001465Strip trailing bytes contained in the argument.\n\
1466If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001467static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001468bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001469{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001470 if (PyTuple_GET_SIZE(args) == 0)
1471 return do_strip(self, RIGHTSTRIP); /* Common case */
1472 else
1473 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001474}
Neal Norwitz6968b052007-02-27 19:02:19 +00001475
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001476
1477PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001478"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001479\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001480Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001481string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001482as in slice notation.");
1483
1484static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001485bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001486{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001487 PyObject *sub_obj;
1488 const char *str = PyBytes_AS_STRING(self), *sub;
1489 Py_ssize_t sub_len;
1490 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001491
Jesus Ceaac451502011-04-20 17:09:23 +02001492 if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001493 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001494
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001495 if (PyBytes_Check(sub_obj)) {
1496 sub = PyBytes_AS_STRING(sub_obj);
1497 sub_len = PyBytes_GET_SIZE(sub_obj);
1498 }
1499 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1500 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001501
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001502 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001503
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001504 return PyLong_FromSsize_t(
1505 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
1506 );
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001507}
1508
1509
1510PyDoc_STRVAR(translate__doc__,
1511"B.translate(table[, deletechars]) -> bytes\n\
1512\n\
1513Return a copy of B, where all characters occurring in the\n\
1514optional argument deletechars are removed, and the remaining\n\
1515characters have been mapped through the given translation\n\
1516table, which must be a bytes object of length 256.");
1517
1518static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001519bytes_translate(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001520{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001521 register char *input, *output;
1522 const char *table;
1523 register Py_ssize_t i, c, changed = 0;
1524 PyObject *input_obj = (PyObject*)self;
1525 const char *output_start, *del_table=NULL;
1526 Py_ssize_t inlen, tablen, dellen = 0;
1527 PyObject *result;
1528 int trans_table[256];
1529 PyObject *tableobj, *delobj = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001530
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001531 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1532 &tableobj, &delobj))
1533 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001534
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001535 if (PyBytes_Check(tableobj)) {
1536 table = PyBytes_AS_STRING(tableobj);
1537 tablen = PyBytes_GET_SIZE(tableobj);
1538 }
1539 else if (tableobj == Py_None) {
1540 table = NULL;
1541 tablen = 256;
1542 }
1543 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1544 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001545
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001546 if (tablen != 256) {
1547 PyErr_SetString(PyExc_ValueError,
1548 "translation table must be 256 characters long");
1549 return NULL;
1550 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001551
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001552 if (delobj != NULL) {
1553 if (PyBytes_Check(delobj)) {
1554 del_table = PyBytes_AS_STRING(delobj);
1555 dellen = PyBytes_GET_SIZE(delobj);
1556 }
1557 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1558 return NULL;
1559 }
1560 else {
1561 del_table = NULL;
1562 dellen = 0;
1563 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001564
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001565 inlen = PyBytes_GET_SIZE(input_obj);
1566 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1567 if (result == NULL)
1568 return NULL;
1569 output_start = output = PyBytes_AsString(result);
1570 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001571
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001572 if (dellen == 0 && table != NULL) {
1573 /* If no deletions are required, use faster code */
1574 for (i = inlen; --i >= 0; ) {
1575 c = Py_CHARMASK(*input++);
1576 if (Py_CHARMASK((*output++ = table[c])) != c)
1577 changed = 1;
1578 }
1579 if (changed || !PyBytes_CheckExact(input_obj))
1580 return result;
1581 Py_DECREF(result);
1582 Py_INCREF(input_obj);
1583 return input_obj;
1584 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001585
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001586 if (table == NULL) {
1587 for (i = 0; i < 256; i++)
1588 trans_table[i] = Py_CHARMASK(i);
1589 } else {
1590 for (i = 0; i < 256; i++)
1591 trans_table[i] = Py_CHARMASK(table[i]);
1592 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001593
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001594 for (i = 0; i < dellen; i++)
1595 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001596
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001597 for (i = inlen; --i >= 0; ) {
1598 c = Py_CHARMASK(*input++);
1599 if (trans_table[c] != -1)
1600 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1601 continue;
1602 changed = 1;
1603 }
1604 if (!changed && PyBytes_CheckExact(input_obj)) {
1605 Py_DECREF(result);
1606 Py_INCREF(input_obj);
1607 return input_obj;
1608 }
1609 /* Fix the size of the resulting string */
1610 if (inlen > 0)
1611 _PyBytes_Resize(&result, output - output_start);
1612 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001613}
1614
1615
Georg Brandlabc38772009-04-12 15:51:51 +00001616static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001617bytes_maketrans(PyObject *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001618{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001619 return _Py_bytes_maketrans(args);
Georg Brandlabc38772009-04-12 15:51:51 +00001620}
1621
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001622/* find and count characters and substrings */
1623
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001624#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001625 ((char *)memchr((const void *)(target), c, target_len))
1626
1627/* String ops must return a string. */
1628/* If the object is subclass of string, create a copy */
1629Py_LOCAL(PyBytesObject *)
1630return_self(PyBytesObject *self)
1631{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001632 if (PyBytes_CheckExact(self)) {
1633 Py_INCREF(self);
1634 return self;
1635 }
1636 return (PyBytesObject *)PyBytes_FromStringAndSize(
1637 PyBytes_AS_STRING(self),
1638 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001639}
1640
1641Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00001642countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001643{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001644 Py_ssize_t count=0;
1645 const char *start=target;
1646 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001647
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001648 while ( (start=findchar(start, end-start, c)) != NULL ) {
1649 count++;
1650 if (count >= maxcount)
1651 break;
1652 start += 1;
1653 }
1654 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001655}
1656
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001657
1658/* Algorithms for different cases of string replacement */
1659
1660/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1661Py_LOCAL(PyBytesObject *)
1662replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001663 const char *to_s, Py_ssize_t to_len,
1664 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001665{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001666 char *self_s, *result_s;
1667 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001668 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001669 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001670
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001671 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001672
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001673 /* 1 at the end plus 1 after every character;
1674 count = min(maxcount, self_len + 1) */
1675 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001676 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001677 else
1678 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
1679 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001680
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001681 /* Check for overflow */
1682 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001683 assert(count > 0);
1684 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001685 PyErr_SetString(PyExc_OverflowError,
1686 "replacement bytes are too long");
1687 return NULL;
1688 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001689 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001690
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001691 if (! (result = (PyBytesObject *)
1692 PyBytes_FromStringAndSize(NULL, result_len)) )
1693 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001694
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001695 self_s = PyBytes_AS_STRING(self);
1696 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001697
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001698 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001699
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001700 /* Lay the first one down (guaranteed this will occur) */
1701 Py_MEMCPY(result_s, to_s, to_len);
1702 result_s += to_len;
1703 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001704
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001705 for (i=0; i<count; i++) {
1706 *result_s++ = *self_s++;
1707 Py_MEMCPY(result_s, to_s, to_len);
1708 result_s += to_len;
1709 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001710
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001711 /* Copy the rest of the original string */
1712 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001713
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001714 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001715}
1716
1717/* Special case for deleting a single character */
1718/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1719Py_LOCAL(PyBytesObject *)
1720replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001721 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001722{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001723 char *self_s, *result_s;
1724 char *start, *next, *end;
1725 Py_ssize_t self_len, result_len;
1726 Py_ssize_t count;
1727 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001728
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001729 self_len = PyBytes_GET_SIZE(self);
1730 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001731
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001732 count = countchar(self_s, self_len, from_c, maxcount);
1733 if (count == 0) {
1734 return return_self(self);
1735 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001736
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001737 result_len = self_len - count; /* from_len == 1 */
1738 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001739
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001740 if ( (result = (PyBytesObject *)
1741 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1742 return NULL;
1743 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001744
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001745 start = self_s;
1746 end = self_s + self_len;
1747 while (count-- > 0) {
1748 next = findchar(start, end-start, from_c);
1749 if (next == NULL)
1750 break;
1751 Py_MEMCPY(result_s, start, next-start);
1752 result_s += (next-start);
1753 start = next+1;
1754 }
1755 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001756
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001757 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001758}
1759
1760/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1761
1762Py_LOCAL(PyBytesObject *)
1763replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001764 const char *from_s, Py_ssize_t from_len,
1765 Py_ssize_t maxcount) {
1766 char *self_s, *result_s;
1767 char *start, *next, *end;
1768 Py_ssize_t self_len, result_len;
1769 Py_ssize_t count, offset;
1770 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001771
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001772 self_len = PyBytes_GET_SIZE(self);
1773 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001774
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001775 count = stringlib_count(self_s, self_len,
1776 from_s, from_len,
1777 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001778
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001779 if (count == 0) {
1780 /* no matches */
1781 return return_self(self);
1782 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001783
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001784 result_len = self_len - (count * from_len);
1785 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001786
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001787 if ( (result = (PyBytesObject *)
1788 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1789 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001790
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001791 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001792
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001793 start = self_s;
1794 end = self_s + self_len;
1795 while (count-- > 0) {
1796 offset = stringlib_find(start, end-start,
1797 from_s, from_len,
1798 0);
1799 if (offset == -1)
1800 break;
1801 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001802
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001803 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001804
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001805 result_s += (next-start);
1806 start = next+from_len;
1807 }
1808 Py_MEMCPY(result_s, start, end-start);
1809 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001810}
1811
1812/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1813Py_LOCAL(PyBytesObject *)
1814replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001815 char from_c, char to_c,
1816 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001817{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001818 char *self_s, *result_s, *start, *end, *next;
1819 Py_ssize_t self_len;
1820 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001821
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001822 /* The result string will be the same size */
1823 self_s = PyBytes_AS_STRING(self);
1824 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001825
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001826 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001827
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001828 if (next == NULL) {
1829 /* No matches; return the original string */
1830 return return_self(self);
1831 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001832
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001833 /* Need to make a new string */
1834 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1835 if (result == NULL)
1836 return NULL;
1837 result_s = PyBytes_AS_STRING(result);
1838 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001839
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001840 /* change everything in-place, starting with this one */
1841 start = result_s + (next-self_s);
1842 *start = to_c;
1843 start++;
1844 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001845
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001846 while (--maxcount > 0) {
1847 next = findchar(start, end-start, from_c);
1848 if (next == NULL)
1849 break;
1850 *next = to_c;
1851 start = next+1;
1852 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001853
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001854 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001855}
1856
1857/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1858Py_LOCAL(PyBytesObject *)
1859replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001860 const char *from_s, Py_ssize_t from_len,
1861 const char *to_s, Py_ssize_t to_len,
1862 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001863{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001864 char *result_s, *start, *end;
1865 char *self_s;
1866 Py_ssize_t self_len, offset;
1867 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001868
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001869 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001870
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001871 self_s = PyBytes_AS_STRING(self);
1872 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001873
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001874 offset = stringlib_find(self_s, self_len,
1875 from_s, from_len,
1876 0);
1877 if (offset == -1) {
1878 /* No matches; return the original string */
1879 return return_self(self);
1880 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001881
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001882 /* Need to make a new string */
1883 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1884 if (result == NULL)
1885 return NULL;
1886 result_s = PyBytes_AS_STRING(result);
1887 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001888
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001889 /* change everything in-place, starting with this one */
1890 start = result_s + offset;
1891 Py_MEMCPY(start, to_s, from_len);
1892 start += from_len;
1893 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001894
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001895 while ( --maxcount > 0) {
1896 offset = stringlib_find(start, end-start,
1897 from_s, from_len,
1898 0);
1899 if (offset==-1)
1900 break;
1901 Py_MEMCPY(start+offset, to_s, from_len);
1902 start += offset+from_len;
1903 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001904
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001905 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001906}
1907
1908/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1909Py_LOCAL(PyBytesObject *)
1910replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001911 char from_c,
1912 const char *to_s, Py_ssize_t to_len,
1913 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001914{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001915 char *self_s, *result_s;
1916 char *start, *next, *end;
1917 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001918 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001919 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001920
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001921 self_s = PyBytes_AS_STRING(self);
1922 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001923
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001924 count = countchar(self_s, self_len, from_c, maxcount);
1925 if (count == 0) {
1926 /* no matches, return unchanged */
1927 return return_self(self);
1928 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001929
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001930 /* use the difference between current and new, hence the "-1" */
1931 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001932 assert(count > 0);
1933 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001934 PyErr_SetString(PyExc_OverflowError,
1935 "replacement bytes are too long");
1936 return NULL;
1937 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001938 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001939
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001940 if ( (result = (PyBytesObject *)
1941 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1942 return NULL;
1943 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001944
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001945 start = self_s;
1946 end = self_s + self_len;
1947 while (count-- > 0) {
1948 next = findchar(start, end-start, from_c);
1949 if (next == NULL)
1950 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001951
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001952 if (next == start) {
1953 /* replace with the 'to' */
1954 Py_MEMCPY(result_s, to_s, to_len);
1955 result_s += to_len;
1956 start += 1;
1957 } else {
1958 /* copy the unchanged old then the 'to' */
1959 Py_MEMCPY(result_s, start, next-start);
1960 result_s += (next-start);
1961 Py_MEMCPY(result_s, to_s, to_len);
1962 result_s += to_len;
1963 start = next+1;
1964 }
1965 }
1966 /* Copy the remainder of the remaining string */
1967 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001968
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001969 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001970}
1971
1972/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1973Py_LOCAL(PyBytesObject *)
1974replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001975 const char *from_s, Py_ssize_t from_len,
1976 const char *to_s, Py_ssize_t to_len,
1977 Py_ssize_t maxcount) {
1978 char *self_s, *result_s;
1979 char *start, *next, *end;
1980 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001981 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001982 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001983
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001984 self_s = PyBytes_AS_STRING(self);
1985 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001986
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001987 count = stringlib_count(self_s, self_len,
1988 from_s, from_len,
1989 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001990
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001991 if (count == 0) {
1992 /* no matches, return unchanged */
1993 return return_self(self);
1994 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001995
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001996 /* Check for overflow */
1997 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001998 assert(count > 0);
1999 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002000 PyErr_SetString(PyExc_OverflowError,
2001 "replacement bytes are too long");
2002 return NULL;
2003 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002004 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002005
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002006 if ( (result = (PyBytesObject *)
2007 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2008 return NULL;
2009 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002010
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002011 start = self_s;
2012 end = self_s + self_len;
2013 while (count-- > 0) {
2014 offset = stringlib_find(start, end-start,
2015 from_s, from_len,
2016 0);
2017 if (offset == -1)
2018 break;
2019 next = start+offset;
2020 if (next == start) {
2021 /* replace with the 'to' */
2022 Py_MEMCPY(result_s, to_s, to_len);
2023 result_s += to_len;
2024 start += from_len;
2025 } else {
2026 /* copy the unchanged old then the 'to' */
2027 Py_MEMCPY(result_s, start, next-start);
2028 result_s += (next-start);
2029 Py_MEMCPY(result_s, to_s, to_len);
2030 result_s += to_len;
2031 start = next+from_len;
2032 }
2033 }
2034 /* Copy the remainder of the remaining string */
2035 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002036
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002037 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002038}
2039
2040
2041Py_LOCAL(PyBytesObject *)
2042replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002043 const char *from_s, Py_ssize_t from_len,
2044 const char *to_s, Py_ssize_t to_len,
2045 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002046{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002047 if (maxcount < 0) {
2048 maxcount = PY_SSIZE_T_MAX;
2049 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2050 /* nothing to do; return the original string */
2051 return return_self(self);
2052 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002053
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002054 if (maxcount == 0 ||
2055 (from_len == 0 && to_len == 0)) {
2056 /* nothing to do; return the original string */
2057 return return_self(self);
2058 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002059
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002060 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002061
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002062 if (from_len == 0) {
2063 /* insert the 'to' string everywhere. */
2064 /* >>> "Python".replace("", ".") */
2065 /* '.P.y.t.h.o.n.' */
2066 return replace_interleave(self, to_s, to_len, maxcount);
2067 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002068
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002069 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2070 /* point for an empty self string to generate a non-empty string */
2071 /* Special case so the remaining code always gets a non-empty string */
2072 if (PyBytes_GET_SIZE(self) == 0) {
2073 return return_self(self);
2074 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002075
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002076 if (to_len == 0) {
2077 /* delete all occurrences of 'from' string */
2078 if (from_len == 1) {
2079 return replace_delete_single_character(
2080 self, from_s[0], maxcount);
2081 } else {
2082 return replace_delete_substring(self, from_s,
2083 from_len, maxcount);
2084 }
2085 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002086
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002087 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002088
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002089 if (from_len == to_len) {
2090 if (from_len == 1) {
2091 return replace_single_character_in_place(
2092 self,
2093 from_s[0],
2094 to_s[0],
2095 maxcount);
2096 } else {
2097 return replace_substring_in_place(
2098 self, from_s, from_len, to_s, to_len,
2099 maxcount);
2100 }
2101 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002102
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002103 /* Otherwise use the more generic algorithms */
2104 if (from_len == 1) {
2105 return replace_single_character(self, from_s[0],
2106 to_s, to_len, maxcount);
2107 } else {
2108 /* len('from')>=2, len('to')>=1 */
2109 return replace_substring(self, from_s, from_len, to_s, to_len,
2110 maxcount);
2111 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002112}
2113
2114PyDoc_STRVAR(replace__doc__,
2115"B.replace(old, new[, count]) -> bytes\n\
2116\n\
2117Return a copy of B with all occurrences of subsection\n\
2118old replaced by new. If the optional argument count is\n\
Senthil Kumaran9a9dd1c2010-09-08 12:50:29 +00002119given, only first count occurances are replaced.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002120
2121static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002122bytes_replace(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002123{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002124 Py_ssize_t count = -1;
2125 PyObject *from, *to;
2126 const char *from_s, *to_s;
2127 Py_ssize_t from_len, to_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002128
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002129 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2130 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002131
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002132 if (PyBytes_Check(from)) {
2133 from_s = PyBytes_AS_STRING(from);
2134 from_len = PyBytes_GET_SIZE(from);
2135 }
2136 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2137 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002138
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002139 if (PyBytes_Check(to)) {
2140 to_s = PyBytes_AS_STRING(to);
2141 to_len = PyBytes_GET_SIZE(to);
2142 }
2143 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2144 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002145
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002146 return (PyObject *)replace((PyBytesObject *) self,
2147 from_s, from_len,
2148 to_s, to_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002149}
2150
2151/** End DALKE **/
2152
2153/* Matches the end (direction >= 0) or start (direction < 0) of self
2154 * against substr, using the start and end arguments. Returns
2155 * -1 on error, 0 if not found and 1 if found.
2156 */
2157Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002158_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002159 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002160{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002161 Py_ssize_t len = PyBytes_GET_SIZE(self);
2162 Py_ssize_t slen;
2163 const char* sub;
2164 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002165
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002166 if (PyBytes_Check(substr)) {
2167 sub = PyBytes_AS_STRING(substr);
2168 slen = PyBytes_GET_SIZE(substr);
2169 }
2170 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2171 return -1;
2172 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002173
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002174 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002175
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002176 if (direction < 0) {
2177 /* startswith */
2178 if (start+slen > len)
2179 return 0;
2180 } else {
2181 /* endswith */
2182 if (end-start < slen || start > len)
2183 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002184
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002185 if (end-slen > start)
2186 start = end - slen;
2187 }
2188 if (end-start >= slen)
2189 return ! memcmp(str+start, sub, slen);
2190 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002191}
2192
2193
2194PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002195"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002196\n\
2197Return True if B starts with the specified prefix, False otherwise.\n\
2198With optional start, test B beginning at that position.\n\
2199With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002200prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002201
2202static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002203bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002204{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002205 Py_ssize_t start = 0;
2206 Py_ssize_t end = PY_SSIZE_T_MAX;
2207 PyObject *subobj;
2208 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002209
Jesus Ceaac451502011-04-20 17:09:23 +02002210 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002211 return NULL;
2212 if (PyTuple_Check(subobj)) {
2213 Py_ssize_t i;
2214 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2215 result = _bytes_tailmatch(self,
2216 PyTuple_GET_ITEM(subobj, i),
2217 start, end, -1);
2218 if (result == -1)
2219 return NULL;
2220 else if (result) {
2221 Py_RETURN_TRUE;
2222 }
2223 }
2224 Py_RETURN_FALSE;
2225 }
2226 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002227 if (result == -1) {
2228 if (PyErr_ExceptionMatches(PyExc_TypeError))
2229 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2230 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002231 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002232 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002233 else
2234 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002235}
2236
2237
2238PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002239"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002240\n\
2241Return True if B ends with the specified suffix, False otherwise.\n\
2242With optional start, test B beginning at that position.\n\
2243With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002244suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002245
2246static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002247bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002248{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002249 Py_ssize_t start = 0;
2250 Py_ssize_t end = PY_SSIZE_T_MAX;
2251 PyObject *subobj;
2252 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002253
Jesus Ceaac451502011-04-20 17:09:23 +02002254 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002255 return NULL;
2256 if (PyTuple_Check(subobj)) {
2257 Py_ssize_t i;
2258 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2259 result = _bytes_tailmatch(self,
2260 PyTuple_GET_ITEM(subobj, i),
2261 start, end, +1);
2262 if (result == -1)
2263 return NULL;
2264 else if (result) {
2265 Py_RETURN_TRUE;
2266 }
2267 }
2268 Py_RETURN_FALSE;
2269 }
2270 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002271 if (result == -1) {
2272 if (PyErr_ExceptionMatches(PyExc_TypeError))
2273 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2274 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002275 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002276 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002277 else
2278 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002279}
2280
2281
2282PyDoc_STRVAR(decode__doc__,
Victor Stinnerc911bbf2010-11-07 19:04:46 +00002283"B.decode(encoding='utf-8', errors='strict') -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002284\n\
Victor Stinnere14e2122010-11-07 18:41:46 +00002285Decode B using the codec registered for encoding. Default encoding\n\
2286is 'utf-8'. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002287handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2288a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002289as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002290able to handle UnicodeDecodeErrors.");
2291
2292static PyObject *
Benjamin Peterson308d6372009-09-18 21:42:35 +00002293bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002294{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002295 const char *encoding = NULL;
2296 const char *errors = NULL;
2297 static char *kwlist[] = {"encoding", "errors", 0};
Guido van Rossumd624f182006-04-24 13:47:05 +00002298
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002299 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2300 return NULL;
2301 if (encoding == NULL)
2302 encoding = PyUnicode_GetDefaultEncoding();
2303 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002304}
2305
Guido van Rossum20188312006-05-05 15:15:40 +00002306
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002307PyDoc_STRVAR(splitlines__doc__,
2308"B.splitlines([keepends]) -> list of lines\n\
2309\n\
2310Return a list of the lines in B, breaking at line boundaries.\n\
2311Line breaks are not included in the resulting list unless keepends\n\
2312is given and true.");
2313
2314static PyObject*
2315bytes_splitlines(PyObject *self, PyObject *args)
2316{
2317 int keepends = 0;
2318
2319 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Antoine Pitroud1188562010-06-09 16:38:55 +00002320 return NULL;
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002321
2322 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002323 (PyObject*) self, PyBytes_AS_STRING(self),
2324 PyBytes_GET_SIZE(self), keepends
2325 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002326}
2327
2328
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002329PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002330"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002331\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002332Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002333Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002334Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002335
2336static int
Guido van Rossumae404e22007-10-26 21:46:44 +00002337hex_digit_to_int(Py_UNICODE c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002338{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002339 if (c >= 128)
2340 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002341 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002342 return c - '0';
2343 else {
David Malcolm96960882010-11-05 17:23:41 +00002344 if (Py_ISUPPER(c))
2345 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002346 if (c >= 'a' && c <= 'f')
2347 return c - 'a' + 10;
2348 }
2349 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002350}
2351
2352static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002353bytes_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002354{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002355 PyObject *newstring, *hexobj;
2356 char *buf;
2357 Py_UNICODE *hex;
2358 Py_ssize_t hexlen, byteslen, i, j;
2359 int top, bot;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002360
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002361 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2362 return NULL;
2363 assert(PyUnicode_Check(hexobj));
2364 hexlen = PyUnicode_GET_SIZE(hexobj);
2365 hex = PyUnicode_AS_UNICODE(hexobj);
2366 byteslen = hexlen/2; /* This overestimates if there are spaces */
2367 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2368 if (!newstring)
2369 return NULL;
2370 buf = PyBytes_AS_STRING(newstring);
2371 for (i = j = 0; i < hexlen; i += 2) {
2372 /* skip over spaces in the input */
2373 while (hex[i] == ' ')
2374 i++;
2375 if (i >= hexlen)
2376 break;
2377 top = hex_digit_to_int(hex[i]);
2378 bot = hex_digit_to_int(hex[i+1]);
2379 if (top == -1 || bot == -1) {
2380 PyErr_Format(PyExc_ValueError,
2381 "non-hexadecimal number found in "
2382 "fromhex() arg at position %zd", i);
2383 goto error;
2384 }
2385 buf[j++] = (top << 4) + bot;
2386 }
2387 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2388 goto error;
2389 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002390
2391 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002392 Py_XDECREF(newstring);
2393 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002394}
2395
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002396PyDoc_STRVAR(sizeof__doc__,
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002397"B.__sizeof__() -> size of B in memory, in bytes");
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002398
2399static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002400bytes_sizeof(PyBytesObject *v)
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002401{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002402 Py_ssize_t res;
2403 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2404 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002405}
2406
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002407
2408static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002409bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002410{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002411 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002412}
2413
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002414
2415static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002416bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002417 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2418 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2419 _Py_capitalize__doc__},
2420 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2421 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2422 {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
2423 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2424 endswith__doc__},
2425 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2426 expandtabs__doc__},
2427 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2428 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2429 fromhex_doc},
2430 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2431 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2432 _Py_isalnum__doc__},
2433 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2434 _Py_isalpha__doc__},
2435 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2436 _Py_isdigit__doc__},
2437 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2438 _Py_islower__doc__},
2439 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2440 _Py_isspace__doc__},
2441 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2442 _Py_istitle__doc__},
2443 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2444 _Py_isupper__doc__},
2445 {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
2446 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2447 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2448 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2449 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
2450 _Py_maketrans__doc__},
2451 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2452 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2453 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2454 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2455 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2456 {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
2457 rpartition__doc__},
2458 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2459 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
2460 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
2461 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS,
2462 splitlines__doc__},
2463 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2464 startswith__doc__},
2465 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
2466 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2467 _Py_swapcase__doc__},
2468 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2469 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
2470 translate__doc__},
2471 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2472 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2473 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
2474 sizeof__doc__},
2475 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002476};
2477
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002478static PyObject *
2479str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2480
2481static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002482bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002483{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002484 PyObject *x = NULL;
2485 const char *encoding = NULL;
2486 const char *errors = NULL;
2487 PyObject *new = NULL;
2488 Py_ssize_t size;
2489 static char *kwlist[] = {"source", "encoding", "errors", 0};
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002490
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002491 if (type != &PyBytes_Type)
2492 return str_subtype_new(type, args, kwds);
2493 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2494 &encoding, &errors))
2495 return NULL;
2496 if (x == NULL) {
2497 if (encoding != NULL || errors != NULL) {
2498 PyErr_SetString(PyExc_TypeError,
2499 "encoding or errors without sequence "
2500 "argument");
2501 return NULL;
2502 }
2503 return PyBytes_FromString("");
2504 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002505
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002506 if (PyUnicode_Check(x)) {
2507 /* Encode via the codec registry */
2508 if (encoding == NULL) {
2509 PyErr_SetString(PyExc_TypeError,
2510 "string argument without an encoding");
2511 return NULL;
2512 }
2513 new = PyUnicode_AsEncodedString(x, encoding, errors);
2514 if (new == NULL)
2515 return NULL;
2516 assert(PyBytes_Check(new));
2517 return new;
2518 }
2519 /* Is it an integer? */
2520 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2521 if (size == -1 && PyErr_Occurred()) {
2522 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2523 return NULL;
2524 PyErr_Clear();
2525 }
2526 else if (size < 0) {
2527 PyErr_SetString(PyExc_ValueError, "negative count");
2528 return NULL;
2529 }
2530 else {
2531 new = PyBytes_FromStringAndSize(NULL, size);
2532 if (new == NULL) {
2533 return NULL;
2534 }
2535 if (size > 0) {
2536 memset(((PyBytesObject*)new)->ob_sval, 0, size);
2537 }
2538 return new;
2539 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002540
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002541 /* If it's not unicode, there can't be encoding or errors */
2542 if (encoding != NULL || errors != NULL) {
2543 PyErr_SetString(PyExc_TypeError,
2544 "encoding or errors without a string argument");
2545 return NULL;
2546 }
2547 return PyObject_Bytes(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002548}
2549
2550PyObject *
2551PyBytes_FromObject(PyObject *x)
2552{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002553 PyObject *new, *it;
2554 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002555
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002556 if (x == NULL) {
2557 PyErr_BadInternalCall();
2558 return NULL;
2559 }
2560 /* Use the modern buffer interface */
2561 if (PyObject_CheckBuffer(x)) {
2562 Py_buffer view;
2563 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2564 return NULL;
2565 new = PyBytes_FromStringAndSize(NULL, view.len);
2566 if (!new)
2567 goto fail;
2568 /* XXX(brett.cannon): Better way to get to internal buffer? */
2569 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2570 &view, view.len, 'C') < 0)
2571 goto fail;
2572 PyBuffer_Release(&view);
2573 return new;
2574 fail:
2575 Py_XDECREF(new);
2576 PyBuffer_Release(&view);
2577 return NULL;
2578 }
2579 if (PyUnicode_Check(x)) {
2580 PyErr_SetString(PyExc_TypeError,
2581 "cannot convert unicode object to bytes");
2582 return NULL;
2583 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002584
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002585 if (PyList_CheckExact(x)) {
2586 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2587 if (new == NULL)
2588 return NULL;
2589 for (i = 0; i < Py_SIZE(x); i++) {
2590 Py_ssize_t value = PyNumber_AsSsize_t(
2591 PyList_GET_ITEM(x, i), PyExc_ValueError);
2592 if (value == -1 && PyErr_Occurred()) {
2593 Py_DECREF(new);
2594 return NULL;
2595 }
2596 if (value < 0 || value >= 256) {
2597 PyErr_SetString(PyExc_ValueError,
2598 "bytes must be in range(0, 256)");
2599 Py_DECREF(new);
2600 return NULL;
2601 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002602 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002603 }
2604 return new;
2605 }
2606 if (PyTuple_CheckExact(x)) {
2607 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2608 if (new == NULL)
2609 return NULL;
2610 for (i = 0; i < Py_SIZE(x); i++) {
2611 Py_ssize_t value = PyNumber_AsSsize_t(
2612 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
2613 if (value == -1 && PyErr_Occurred()) {
2614 Py_DECREF(new);
2615 return NULL;
2616 }
2617 if (value < 0 || value >= 256) {
2618 PyErr_SetString(PyExc_ValueError,
2619 "bytes must be in range(0, 256)");
2620 Py_DECREF(new);
2621 return NULL;
2622 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002623 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002624 }
2625 return new;
2626 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00002627
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002628 /* For iterator version, create a string object and resize as needed */
2629 size = _PyObject_LengthHint(x, 64);
2630 if (size == -1 && PyErr_Occurred())
2631 return NULL;
2632 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
2633 returning a shared empty bytes string. This required because we
2634 want to call _PyBytes_Resize() the returned object, which we can
2635 only do on bytes objects with refcount == 1. */
2636 size += 1;
2637 new = PyBytes_FromStringAndSize(NULL, size);
2638 if (new == NULL)
2639 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002640
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002641 /* Get the iterator */
2642 it = PyObject_GetIter(x);
2643 if (it == NULL)
2644 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002645
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002646 /* Run the iterator to exhaustion */
2647 for (i = 0; ; i++) {
2648 PyObject *item;
2649 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002650
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002651 /* Get the next item */
2652 item = PyIter_Next(it);
2653 if (item == NULL) {
2654 if (PyErr_Occurred())
2655 goto error;
2656 break;
2657 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002658
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002659 /* Interpret it as an int (__index__) */
2660 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2661 Py_DECREF(item);
2662 if (value == -1 && PyErr_Occurred())
2663 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002664
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002665 /* Range check */
2666 if (value < 0 || value >= 256) {
2667 PyErr_SetString(PyExc_ValueError,
2668 "bytes must be in range(0, 256)");
2669 goto error;
2670 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002671
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002672 /* Append the byte */
2673 if (i >= size) {
2674 size = 2 * size + 1;
2675 if (_PyBytes_Resize(&new, size) < 0)
2676 goto error;
2677 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002678 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002679 }
2680 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002681
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002682 /* Clean up and return success */
2683 Py_DECREF(it);
2684 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002685
2686 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002687 /* Error handling when new != NULL */
2688 Py_XDECREF(it);
2689 Py_DECREF(new);
2690 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002691}
2692
2693static PyObject *
2694str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2695{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002696 PyObject *tmp, *pnew;
2697 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002698
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002699 assert(PyType_IsSubtype(type, &PyBytes_Type));
2700 tmp = bytes_new(&PyBytes_Type, args, kwds);
2701 if (tmp == NULL)
2702 return NULL;
2703 assert(PyBytes_CheckExact(tmp));
2704 n = PyBytes_GET_SIZE(tmp);
2705 pnew = type->tp_alloc(type, n);
2706 if (pnew != NULL) {
2707 Py_MEMCPY(PyBytes_AS_STRING(pnew),
2708 PyBytes_AS_STRING(tmp), n+1);
2709 ((PyBytesObject *)pnew)->ob_shash =
2710 ((PyBytesObject *)tmp)->ob_shash;
2711 }
2712 Py_DECREF(tmp);
2713 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002714}
2715
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002716PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002717"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002718bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002719bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
2720bytes(memory_view) -> bytes\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002721\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002722Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002723 - an iterable yielding integers in range(256)\n\
2724 - a text string encoded using the specified encoding\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002725 - a bytes or a buffer object\n\
2726 - any object implementing the buffer API.");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002727
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002728static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002729
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002730PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002731 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2732 "bytes",
2733 PyBytesObject_SIZE,
2734 sizeof(char),
2735 bytes_dealloc, /* tp_dealloc */
2736 0, /* tp_print */
2737 0, /* tp_getattr */
2738 0, /* tp_setattr */
2739 0, /* tp_reserved */
2740 (reprfunc)bytes_repr, /* tp_repr */
2741 0, /* tp_as_number */
2742 &bytes_as_sequence, /* tp_as_sequence */
2743 &bytes_as_mapping, /* tp_as_mapping */
2744 (hashfunc)bytes_hash, /* tp_hash */
2745 0, /* tp_call */
2746 bytes_str, /* tp_str */
2747 PyObject_GenericGetAttr, /* tp_getattro */
2748 0, /* tp_setattro */
2749 &bytes_as_buffer, /* tp_as_buffer */
2750 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2751 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2752 bytes_doc, /* tp_doc */
2753 0, /* tp_traverse */
2754 0, /* tp_clear */
2755 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2756 0, /* tp_weaklistoffset */
2757 bytes_iter, /* tp_iter */
2758 0, /* tp_iternext */
2759 bytes_methods, /* tp_methods */
2760 0, /* tp_members */
2761 0, /* tp_getset */
2762 &PyBaseObject_Type, /* tp_base */
2763 0, /* tp_dict */
2764 0, /* tp_descr_get */
2765 0, /* tp_descr_set */
2766 0, /* tp_dictoffset */
2767 0, /* tp_init */
2768 0, /* tp_alloc */
2769 bytes_new, /* tp_new */
2770 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002771};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002772
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002773void
2774PyBytes_Concat(register PyObject **pv, register PyObject *w)
2775{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002776 register PyObject *v;
2777 assert(pv != NULL);
2778 if (*pv == NULL)
2779 return;
2780 if (w == NULL) {
2781 Py_DECREF(*pv);
2782 *pv = NULL;
2783 return;
2784 }
2785 v = bytes_concat(*pv, w);
2786 Py_DECREF(*pv);
2787 *pv = v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002788}
2789
2790void
2791PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
2792{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002793 PyBytes_Concat(pv, w);
2794 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002795}
2796
2797
2798/* The following function breaks the notion that strings are immutable:
2799 it changes the size of a string. We get away with this only if there
2800 is only one module referencing the object. You can also think of it
2801 as creating a new string object and destroying the old one, only
2802 more efficiently. In any case, don't use this if the string may
2803 already be known to some other part of the code...
2804 Note that if there's not enough memory to resize the string, the original
2805 string object at *pv is deallocated, *pv is set to NULL, an "out of
2806 memory" exception is set, and -1 is returned. Else (on success) 0 is
2807 returned, and the value in *pv may or may not be the same as on input.
2808 As always, an extra byte is allocated for a trailing \0 byte (newsize
2809 does *not* include that), and a trailing \0 byte is stored.
2810*/
2811
2812int
2813_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2814{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002815 register PyObject *v;
2816 register PyBytesObject *sv;
2817 v = *pv;
2818 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
2819 *pv = 0;
2820 Py_DECREF(v);
2821 PyErr_BadInternalCall();
2822 return -1;
2823 }
2824 /* XXX UNREF/NEWREF interface should be more symmetrical */
2825 _Py_DEC_REFTOTAL;
2826 _Py_ForgetReference(v);
2827 *pv = (PyObject *)
2828 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
2829 if (*pv == NULL) {
2830 PyObject_Del(v);
2831 PyErr_NoMemory();
2832 return -1;
2833 }
2834 _Py_NewReference(*pv);
2835 sv = (PyBytesObject *) *pv;
2836 Py_SIZE(sv) = newsize;
2837 sv->ob_sval[newsize] = '\0';
2838 sv->ob_shash = -1; /* invalidate cached hash value */
2839 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002840}
2841
2842/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and
2843 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2844 * Python's regular ints.
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002845 * Return value: a new PyBytes*, or NULL if error.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002846 * . *pbuf is set to point into it,
2847 * *plen set to the # of chars following that.
2848 * Caller must decref it when done using pbuf.
2849 * The string starting at *pbuf is of the form
2850 * "-"? ("0x" | "0X")? digit+
2851 * "0x"/"0X" are present only for x and X conversions, with F_ALT
2852 * set in flags. The case of hex digits will be correct,
2853 * There will be at least prec digits, zero-filled on the left if
2854 * necessary to get that many.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002855 * val object to be converted
2856 * flags bitmask of format flags; only F_ALT is looked at
2857 * prec minimum number of digits; 0-fill on left if needed
2858 * type a character in [duoxX]; u acts the same as d
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002859 *
2860 * CAUTION: o, x and X conversions on regular ints can never
2861 * produce a '-' sign, but can for Python's unbounded ints.
2862 */
2863PyObject*
2864_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002865 char **pbuf, int *plen)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002866{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002867 PyObject *result = NULL;
2868 char *buf;
2869 Py_ssize_t i;
2870 int sign; /* 1 if '-', else 0 */
2871 int len; /* number of characters */
2872 Py_ssize_t llen;
2873 int numdigits; /* len == numnondigits + numdigits */
2874 int numnondigits = 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002875
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002876 /* Avoid exceeding SSIZE_T_MAX */
2877 if (prec > INT_MAX-3) {
2878 PyErr_SetString(PyExc_OverflowError,
2879 "precision too large");
2880 return NULL;
2881 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002882
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002883 switch (type) {
2884 case 'd':
2885 case 'u':
2886 /* Special-case boolean: we want 0/1 */
2887 if (PyBool_Check(val))
2888 result = PyNumber_ToBase(val, 10);
2889 else
2890 result = Py_TYPE(val)->tp_str(val);
2891 break;
2892 case 'o':
2893 numnondigits = 2;
2894 result = PyNumber_ToBase(val, 8);
2895 break;
2896 case 'x':
2897 case 'X':
2898 numnondigits = 2;
2899 result = PyNumber_ToBase(val, 16);
2900 break;
2901 default:
2902 assert(!"'type' not in [duoxX]");
2903 }
2904 if (!result)
2905 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002906
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002907 buf = _PyUnicode_AsString(result);
2908 if (!buf) {
2909 Py_DECREF(result);
2910 return NULL;
2911 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002912
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002913 /* To modify the string in-place, there can only be one reference. */
2914 if (Py_REFCNT(result) != 1) {
2915 PyErr_BadInternalCall();
2916 return NULL;
2917 }
2918 llen = PyUnicode_GetSize(result);
2919 if (llen > INT_MAX) {
2920 PyErr_SetString(PyExc_ValueError,
2921 "string too large in _PyBytes_FormatLong");
2922 return NULL;
2923 }
2924 len = (int)llen;
2925 if (buf[len-1] == 'L') {
2926 --len;
2927 buf[len] = '\0';
2928 }
2929 sign = buf[0] == '-';
2930 numnondigits += sign;
2931 numdigits = len - numnondigits;
2932 assert(numdigits > 0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002933
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002934 /* Get rid of base marker unless F_ALT */
2935 if (((flags & F_ALT) == 0 &&
2936 (type == 'o' || type == 'x' || type == 'X'))) {
2937 assert(buf[sign] == '0');
2938 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
2939 buf[sign+1] == 'o');
2940 numnondigits -= 2;
2941 buf += 2;
2942 len -= 2;
2943 if (sign)
2944 buf[0] = '-';
2945 assert(len == numnondigits + numdigits);
2946 assert(numdigits > 0);
2947 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002948
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002949 /* Fill with leading zeroes to meet minimum width. */
2950 if (prec > numdigits) {
2951 PyObject *r1 = PyBytes_FromStringAndSize(NULL,
2952 numnondigits + prec);
2953 char *b1;
2954 if (!r1) {
2955 Py_DECREF(result);
2956 return NULL;
2957 }
2958 b1 = PyBytes_AS_STRING(r1);
2959 for (i = 0; i < numnondigits; ++i)
2960 *b1++ = *buf++;
2961 for (i = 0; i < prec - numdigits; i++)
2962 *b1++ = '0';
2963 for (i = 0; i < numdigits; i++)
2964 *b1++ = *buf++;
2965 *b1 = '\0';
2966 Py_DECREF(result);
2967 result = r1;
2968 buf = PyBytes_AS_STRING(result);
2969 len = numnondigits + prec;
2970 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002971
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002972 /* Fix up case for hex conversions. */
2973 if (type == 'X') {
2974 /* Need to convert all lower case letters to upper case.
2975 and need to convert 0x to 0X (and -0x to -0X). */
2976 for (i = 0; i < len; i++)
2977 if (buf[i] >= 'a' && buf[i] <= 'x')
2978 buf[i] -= 'a'-'A';
2979 }
2980 *pbuf = buf;
2981 *plen = len;
2982 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002983}
2984
2985void
2986PyBytes_Fini(void)
2987{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002988 int i;
2989 for (i = 0; i < UCHAR_MAX + 1; i++) {
2990 Py_XDECREF(characters[i]);
2991 characters[i] = NULL;
2992 }
2993 Py_XDECREF(nullstring);
2994 nullstring = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002995}
2996
Benjamin Peterson4116f362008-05-27 00:36:20 +00002997/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002998
2999typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003000 PyObject_HEAD
3001 Py_ssize_t it_index;
3002 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003003} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003004
3005static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003006striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003007{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003008 _PyObject_GC_UNTRACK(it);
3009 Py_XDECREF(it->it_seq);
3010 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003011}
3012
3013static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003014striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003015{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003016 Py_VISIT(it->it_seq);
3017 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003018}
3019
3020static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003021striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003022{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003023 PyBytesObject *seq;
3024 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003025
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003026 assert(it != NULL);
3027 seq = it->it_seq;
3028 if (seq == NULL)
3029 return NULL;
3030 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003031
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003032 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3033 item = PyLong_FromLong(
3034 (unsigned char)seq->ob_sval[it->it_index]);
3035 if (item != NULL)
3036 ++it->it_index;
3037 return item;
3038 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003039
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003040 Py_DECREF(seq);
3041 it->it_seq = NULL;
3042 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003043}
3044
3045static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003046striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003047{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003048 Py_ssize_t len = 0;
3049 if (it->it_seq)
3050 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3051 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003052}
3053
3054PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003055 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003056
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003057static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003058 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3059 length_hint_doc},
3060 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003061};
3062
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003063PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003064 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3065 "bytes_iterator", /* tp_name */
3066 sizeof(striterobject), /* tp_basicsize */
3067 0, /* tp_itemsize */
3068 /* methods */
3069 (destructor)striter_dealloc, /* tp_dealloc */
3070 0, /* tp_print */
3071 0, /* tp_getattr */
3072 0, /* tp_setattr */
3073 0, /* tp_reserved */
3074 0, /* tp_repr */
3075 0, /* tp_as_number */
3076 0, /* tp_as_sequence */
3077 0, /* tp_as_mapping */
3078 0, /* tp_hash */
3079 0, /* tp_call */
3080 0, /* tp_str */
3081 PyObject_GenericGetAttr, /* tp_getattro */
3082 0, /* tp_setattro */
3083 0, /* tp_as_buffer */
3084 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3085 0, /* tp_doc */
3086 (traverseproc)striter_traverse, /* tp_traverse */
3087 0, /* tp_clear */
3088 0, /* tp_richcompare */
3089 0, /* tp_weaklistoffset */
3090 PyObject_SelfIter, /* tp_iter */
3091 (iternextfunc)striter_next, /* tp_iternext */
3092 striter_methods, /* tp_methods */
3093 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003094};
3095
3096static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003097bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003098{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003099 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003100
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003101 if (!PyBytes_Check(seq)) {
3102 PyErr_BadInternalCall();
3103 return NULL;
3104 }
3105 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3106 if (it == NULL)
3107 return NULL;
3108 it->it_index = 0;
3109 Py_INCREF(seq);
3110 it->it_seq = (PyBytesObject *)seq;
3111 _PyObject_GC_TRACK(it);
3112 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003113}