blob: d7f99815c8a3497675b01ae9546cfde2b2715bfc [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Christian Heimes90aa7642007-12-19 02:45:37 +000013 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000014
Gregory P. Smith60d241f2007-10-16 06:31:30 +000015 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000016 {
Antoine Pitroud1188562010-06-09 16:38:55 +000017 PyErr_Format(PyExc_TypeError,
18 "Type %.100s doesn't support the buffer API",
19 Py_TYPE(obj)->tp_name);
20 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000021 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000023 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000024 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000025 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000026}
27
Christian Heimes2c9c7a52008-05-26 13:42:13 +000028#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000029Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000030#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000031
Christian Heimes2c9c7a52008-05-26 13:42:13 +000032static PyBytesObject *characters[UCHAR_MAX + 1];
33static PyBytesObject *nullstring;
34
Mark Dickinsonfd24b322008-12-06 15:33:31 +000035/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
36 for a string of length n should request PyBytesObject_SIZE + n bytes.
37
38 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
39 3 bytes per string allocation on a typical system.
40*/
41#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
42
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000044 For PyBytes_FromString(), the parameter `str' points to a null-terminated
45 string containing exactly `size' bytes.
46
47 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
48 either NULL or else points to a string containing at least `size' bytes.
49 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
50 not have to be null-terminated. (Therefore it is safe to construct a
51 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
52 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
53 bytes (setting the last byte to the null terminating character) and you can
54 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000055 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000056 alter the data yourself, since the strings may be shared.
57
58 The PyObject member `op->ob_size', which denotes the number of "extra
59 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020060 allocated for string data, not counting the null terminating character.
61 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000062 PyBytes_FromStringAndSize()) or the length of the string in the `str'
63 parameter (for PyBytes_FromString()).
64*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000065PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000066PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000067{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000068 register PyBytesObject *op;
69 if (size < 0) {
70 PyErr_SetString(PyExc_SystemError,
71 "Negative size passed to PyBytes_FromStringAndSize");
72 return NULL;
73 }
74 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000075#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000076 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000077#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078 Py_INCREF(op);
79 return (PyObject *)op;
80 }
81 if (size == 1 && str != NULL &&
82 (op = characters[*str & UCHAR_MAX]) != NULL)
83 {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000084#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000086#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 Py_INCREF(op);
88 return (PyObject *)op;
89 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000090
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
92 PyErr_SetString(PyExc_OverflowError,
93 "byte string is too large");
94 return NULL;
95 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000097 /* Inline PyObject_NewVar */
98 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
99 if (op == NULL)
100 return PyErr_NoMemory();
101 PyObject_INIT_VAR(op, &PyBytes_Type, size);
102 op->ob_shash = -1;
103 if (str != NULL)
104 Py_MEMCPY(op->ob_sval, str, size);
105 op->ob_sval[size] = '\0';
106 /* share short strings */
107 if (size == 0) {
108 nullstring = op;
109 Py_INCREF(op);
110 } else if (size == 1 && str != NULL) {
111 characters[*str & UCHAR_MAX] = op;
112 Py_INCREF(op);
113 }
114 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000115}
116
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000117PyObject *
118PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000119{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000120 register size_t size;
121 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000122
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000123 assert(str != NULL);
124 size = strlen(str);
125 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
126 PyErr_SetString(PyExc_OverflowError,
127 "byte string is too long");
128 return NULL;
129 }
130 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000131#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000133#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 Py_INCREF(op);
135 return (PyObject *)op;
136 }
137 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000138#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000139 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000140#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 Py_INCREF(op);
142 return (PyObject *)op;
143 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000144
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 /* Inline PyObject_NewVar */
146 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
147 if (op == NULL)
148 return PyErr_NoMemory();
149 PyObject_INIT_VAR(op, &PyBytes_Type, size);
150 op->ob_shash = -1;
151 Py_MEMCPY(op->ob_sval, str, size+1);
152 /* share short strings */
153 if (size == 0) {
154 nullstring = op;
155 Py_INCREF(op);
156 } else if (size == 1) {
157 characters[*str & UCHAR_MAX] = op;
158 Py_INCREF(op);
159 }
160 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000161}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000162
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000163PyObject *
164PyBytes_FromFormatV(const char *format, va_list vargs)
165{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000166 va_list count;
167 Py_ssize_t n = 0;
168 const char* f;
169 char *s;
170 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000171
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000172 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000173 /* step 1: figure out how large a buffer we need */
174 for (f = format; *f; f++) {
175 if (*f == '%') {
176 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000177 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000178 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000179
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000180 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
181 * they don't affect the amount of space we reserve.
182 */
183 if ((*f == 'l' || *f == 'z') &&
184 (f[1] == 'd' || f[1] == 'u'))
185 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000186
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000187 switch (*f) {
188 case 'c':
189 (void)va_arg(count, int);
190 /* fall through... */
191 case '%':
192 n++;
193 break;
194 case 'd': case 'u': case 'i': case 'x':
195 (void) va_arg(count, int);
196 /* 20 bytes is enough to hold a 64-bit
197 integer. Decimal takes the most space.
198 This isn't enough for octal. */
199 n += 20;
200 break;
201 case 's':
202 s = va_arg(count, char*);
203 n += strlen(s);
204 break;
205 case 'p':
206 (void) va_arg(count, int);
207 /* maximum 64-bit pointer representation:
208 * 0xffffffffffffffff
209 * so 19 characters is enough.
210 * XXX I count 18 -- what's the extra for?
211 */
212 n += 19;
213 break;
214 default:
215 /* if we stumble upon an unknown
216 formatting code, copy the rest of
217 the format string to the output
218 string. (we cannot just skip the
219 code, since there's no way to know
220 what's in the argument list) */
221 n += strlen(p);
222 goto expand;
223 }
224 } else
225 n++;
226 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000227 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 /* step 2: fill the buffer */
229 /* Since we've analyzed how much space we need for the worst case,
230 use sprintf directly instead of the slower PyOS_snprintf. */
231 string = PyBytes_FromStringAndSize(NULL, n);
232 if (!string)
233 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000234
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000235 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000236
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000237 for (f = format; *f; f++) {
238 if (*f == '%') {
239 const char* p = f++;
240 Py_ssize_t i;
241 int longflag = 0;
242 int size_tflag = 0;
243 /* parse the width.precision part (we're only
244 interested in the precision value, if any) */
245 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000246 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000247 n = (n*10) + *f++ - '0';
248 if (*f == '.') {
249 f++;
250 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000251 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000252 n = (n*10) + *f++ - '0';
253 }
David Malcolm96960882010-11-05 17:23:41 +0000254 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000255 f++;
256 /* handle the long flag, but only for %ld and %lu.
257 others can be added when necessary. */
258 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
259 longflag = 1;
260 ++f;
261 }
262 /* handle the size_t flag. */
263 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
264 size_tflag = 1;
265 ++f;
266 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000267
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000268 switch (*f) {
269 case 'c':
270 *s++ = va_arg(vargs, int);
271 break;
272 case 'd':
273 if (longflag)
274 sprintf(s, "%ld", va_arg(vargs, long));
275 else if (size_tflag)
276 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
277 va_arg(vargs, Py_ssize_t));
278 else
279 sprintf(s, "%d", va_arg(vargs, int));
280 s += strlen(s);
281 break;
282 case 'u':
283 if (longflag)
284 sprintf(s, "%lu",
285 va_arg(vargs, unsigned long));
286 else if (size_tflag)
287 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
288 va_arg(vargs, size_t));
289 else
290 sprintf(s, "%u",
291 va_arg(vargs, unsigned int));
292 s += strlen(s);
293 break;
294 case 'i':
295 sprintf(s, "%i", va_arg(vargs, int));
296 s += strlen(s);
297 break;
298 case 'x':
299 sprintf(s, "%x", va_arg(vargs, int));
300 s += strlen(s);
301 break;
302 case 's':
303 p = va_arg(vargs, char*);
304 i = strlen(p);
305 if (n > 0 && i > n)
306 i = n;
307 Py_MEMCPY(s, p, i);
308 s += i;
309 break;
310 case 'p':
311 sprintf(s, "%p", va_arg(vargs, void*));
312 /* %p is ill-defined: ensure leading 0x. */
313 if (s[1] == 'X')
314 s[1] = 'x';
315 else if (s[1] != 'x') {
316 memmove(s+2, s, strlen(s)+1);
317 s[0] = '0';
318 s[1] = 'x';
319 }
320 s += strlen(s);
321 break;
322 case '%':
323 *s++ = '%';
324 break;
325 default:
326 strcpy(s, p);
327 s += strlen(s);
328 goto end;
329 }
330 } else
331 *s++ = *f;
332 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000333
334 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000335 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
336 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000337}
338
339PyObject *
340PyBytes_FromFormat(const char *format, ...)
341{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000342 PyObject* ret;
343 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000344
345#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000347#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000348 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000349#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 ret = PyBytes_FromFormatV(format, vargs);
351 va_end(vargs);
352 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000353}
354
355static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000356bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000357{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000358 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000359}
360
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000361/* Unescape a backslash-escaped string. If unicode is non-zero,
362 the string is a u-literal. If recode_encoding is non-zero,
363 the string is UTF-8 encoded and should be re-encoded in the
364 specified encoding. */
365
366PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 Py_ssize_t len,
368 const char *errors,
369 Py_ssize_t unicode,
370 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000371{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 int c;
373 char *p, *buf;
374 const char *end;
375 PyObject *v;
376 Py_ssize_t newlen = recode_encoding ? 4*len:len;
377 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
378 if (v == NULL)
379 return NULL;
380 p = buf = PyBytes_AsString(v);
381 end = s + len;
382 while (s < end) {
383 if (*s != '\\') {
384 non_esc:
385 if (recode_encoding && (*s & 0x80)) {
386 PyObject *u, *w;
387 char *r;
388 const char* t;
389 Py_ssize_t rn;
390 t = s;
391 /* Decode non-ASCII bytes as UTF-8. */
392 while (t < end && (*t & 0x80)) t++;
393 u = PyUnicode_DecodeUTF8(s, t - s, errors);
394 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000395
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000396 /* Recode them in target encoding. */
397 w = PyUnicode_AsEncodedString(
398 u, recode_encoding, errors);
399 Py_DECREF(u);
400 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000401
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000402 /* Append bytes to output buffer. */
403 assert(PyBytes_Check(w));
404 r = PyBytes_AS_STRING(w);
405 rn = PyBytes_GET_SIZE(w);
406 Py_MEMCPY(p, r, rn);
407 p += rn;
408 Py_DECREF(w);
409 s = t;
410 } else {
411 *p++ = *s++;
412 }
413 continue;
414 }
415 s++;
416 if (s==end) {
417 PyErr_SetString(PyExc_ValueError,
418 "Trailing \\ in string");
419 goto failed;
420 }
421 switch (*s++) {
422 /* XXX This assumes ASCII! */
423 case '\n': break;
424 case '\\': *p++ = '\\'; break;
425 case '\'': *p++ = '\''; break;
426 case '\"': *p++ = '\"'; break;
427 case 'b': *p++ = '\b'; break;
428 case 'f': *p++ = '\014'; break; /* FF */
429 case 't': *p++ = '\t'; break;
430 case 'n': *p++ = '\n'; break;
431 case 'r': *p++ = '\r'; break;
432 case 'v': *p++ = '\013'; break; /* VT */
433 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
434 case '0': case '1': case '2': case '3':
435 case '4': case '5': case '6': case '7':
436 c = s[-1] - '0';
437 if (s < end && '0' <= *s && *s <= '7') {
438 c = (c<<3) + *s++ - '0';
439 if (s < end && '0' <= *s && *s <= '7')
440 c = (c<<3) + *s++ - '0';
441 }
442 *p++ = c;
443 break;
444 case 'x':
David Malcolm96960882010-11-05 17:23:41 +0000445 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000446 unsigned int x = 0;
447 c = Py_CHARMASK(*s);
448 s++;
David Malcolm96960882010-11-05 17:23:41 +0000449 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000450 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000451 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000452 x = 10 + c - 'a';
453 else
454 x = 10 + c - 'A';
455 x = x << 4;
456 c = Py_CHARMASK(*s);
457 s++;
David Malcolm96960882010-11-05 17:23:41 +0000458 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000459 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000460 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000461 x += 10 + c - 'a';
462 else
463 x += 10 + c - 'A';
464 *p++ = x;
465 break;
466 }
467 if (!errors || strcmp(errors, "strict") == 0) {
468 PyErr_SetString(PyExc_ValueError,
469 "invalid \\x escape");
470 goto failed;
471 }
472 if (strcmp(errors, "replace") == 0) {
473 *p++ = '?';
474 } else if (strcmp(errors, "ignore") == 0)
475 /* do nothing */;
476 else {
477 PyErr_Format(PyExc_ValueError,
478 "decoding error; unknown "
479 "error handling code: %.400s",
480 errors);
481 goto failed;
482 }
483 default:
484 *p++ = '\\';
485 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +0200486 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000487 UTF-8 bytes may follow. */
488 }
489 }
490 if (p-buf < newlen)
491 _PyBytes_Resize(&v, p - buf);
492 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000493 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000494 Py_DECREF(v);
495 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000496}
497
498/* -------------------------------------------------------------------- */
499/* object api */
500
501Py_ssize_t
502PyBytes_Size(register PyObject *op)
503{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000504 if (!PyBytes_Check(op)) {
505 PyErr_Format(PyExc_TypeError,
506 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
507 return -1;
508 }
509 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000510}
511
512char *
513PyBytes_AsString(register PyObject *op)
514{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000515 if (!PyBytes_Check(op)) {
516 PyErr_Format(PyExc_TypeError,
517 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
518 return NULL;
519 }
520 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000521}
522
523int
524PyBytes_AsStringAndSize(register PyObject *obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000525 register char **s,
526 register Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000527{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000528 if (s == NULL) {
529 PyErr_BadInternalCall();
530 return -1;
531 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000532
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000533 if (!PyBytes_Check(obj)) {
534 PyErr_Format(PyExc_TypeError,
535 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
536 return -1;
537 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000538
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000539 *s = PyBytes_AS_STRING(obj);
540 if (len != NULL)
541 *len = PyBytes_GET_SIZE(obj);
542 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
543 PyErr_SetString(PyExc_TypeError,
544 "expected bytes with no null");
545 return -1;
546 }
547 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000548}
Neal Norwitz6968b052007-02-27 19:02:19 +0000549
550/* -------------------------------------------------------------------- */
551/* Methods */
552
Eric Smith0923d1d2009-04-16 20:16:10 +0000553#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000554
555#include "stringlib/fastsearch.h"
556#include "stringlib/count.h"
557#include "stringlib/find.h"
558#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000559#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000560#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000561
Eric Smith0f78bff2009-11-30 01:01:42 +0000562#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000563
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000564PyObject *
565PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000566{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000567 static const char *hexdigits = "0123456789abcdef";
568 register PyBytesObject* op = (PyBytesObject*) obj;
569 Py_ssize_t length = Py_SIZE(op);
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000570 size_t newsize;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000571 PyObject *v;
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000572 if (length > (PY_SSIZE_T_MAX - 3) / 4) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000573 PyErr_SetString(PyExc_OverflowError,
574 "bytes object is too large to make repr");
575 return NULL;
576 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000577 newsize = 3 + 4 * length;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000578 v = PyUnicode_FromUnicode(NULL, newsize);
579 if (v == NULL) {
580 return NULL;
581 }
582 else {
583 register Py_ssize_t i;
584 register Py_UNICODE c;
585 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
586 int quote;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000587
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000588 /* Figure out which quote to use; single is preferred */
589 quote = '\'';
590 if (smartquotes) {
591 char *test, *start;
592 start = PyBytes_AS_STRING(op);
593 for (test = start; test < start+length; ++test) {
594 if (*test == '"') {
595 quote = '\''; /* back to single */
596 goto decided;
597 }
598 else if (*test == '\'')
599 quote = '"';
600 }
601 decided:
602 ;
603 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000604
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000605 *p++ = 'b', *p++ = quote;
606 for (i = 0; i < length; i++) {
607 /* There's at least enough room for a hex escape
608 and a closing quote. */
609 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
610 c = op->ob_sval[i];
611 if (c == quote || c == '\\')
612 *p++ = '\\', *p++ = c;
613 else if (c == '\t')
614 *p++ = '\\', *p++ = 't';
615 else if (c == '\n')
616 *p++ = '\\', *p++ = 'n';
617 else if (c == '\r')
618 *p++ = '\\', *p++ = 'r';
619 else if (c < ' ' || c >= 0x7f) {
620 *p++ = '\\';
621 *p++ = 'x';
622 *p++ = hexdigits[(c & 0xf0) >> 4];
623 *p++ = hexdigits[c & 0xf];
624 }
625 else
626 *p++ = c;
627 }
628 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
629 *p++ = quote;
630 *p = '\0';
631 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
632 Py_DECREF(v);
633 return NULL;
634 }
635 return v;
636 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000637}
638
Neal Norwitz6968b052007-02-27 19:02:19 +0000639static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000640bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000641{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000642 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000643}
644
Neal Norwitz6968b052007-02-27 19:02:19 +0000645static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000646bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000647{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000648 if (Py_BytesWarningFlag) {
649 if (PyErr_WarnEx(PyExc_BytesWarning,
650 "str() on a bytes instance", 1))
651 return NULL;
652 }
653 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000654}
655
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000656static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000657bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000658{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000659 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000660}
Neal Norwitz6968b052007-02-27 19:02:19 +0000661
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000662/* This is also used by PyBytes_Concat() */
663static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000664bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000665{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000666 Py_ssize_t size;
667 Py_buffer va, vb;
668 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000669
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000670 va.len = -1;
671 vb.len = -1;
672 if (_getbuffer(a, &va) < 0 ||
673 _getbuffer(b, &vb) < 0) {
674 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
675 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
676 goto done;
677 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000678
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000679 /* Optimize end cases */
680 if (va.len == 0 && PyBytes_CheckExact(b)) {
681 result = b;
682 Py_INCREF(result);
683 goto done;
684 }
685 if (vb.len == 0 && PyBytes_CheckExact(a)) {
686 result = a;
687 Py_INCREF(result);
688 goto done;
689 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000690
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000691 size = va.len + vb.len;
692 if (size < 0) {
693 PyErr_NoMemory();
694 goto done;
695 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000696
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000697 result = PyBytes_FromStringAndSize(NULL, size);
698 if (result != NULL) {
699 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
700 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
701 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000702
703 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000704 if (va.len != -1)
705 PyBuffer_Release(&va);
706 if (vb.len != -1)
707 PyBuffer_Release(&vb);
708 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000709}
Neal Norwitz6968b052007-02-27 19:02:19 +0000710
711static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000712bytes_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000713{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000714 register Py_ssize_t i;
715 register Py_ssize_t j;
716 register Py_ssize_t size;
717 register PyBytesObject *op;
718 size_t nbytes;
719 if (n < 0)
720 n = 0;
721 /* watch out for overflows: the size can overflow int,
722 * and the # of bytes needed can overflow size_t
723 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000724 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000725 PyErr_SetString(PyExc_OverflowError,
726 "repeated bytes are too long");
727 return NULL;
728 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000729 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000730 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
731 Py_INCREF(a);
732 return (PyObject *)a;
733 }
734 nbytes = (size_t)size;
735 if (nbytes + PyBytesObject_SIZE <= nbytes) {
736 PyErr_SetString(PyExc_OverflowError,
737 "repeated bytes are too long");
738 return NULL;
739 }
740 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
741 if (op == NULL)
742 return PyErr_NoMemory();
743 PyObject_INIT_VAR(op, &PyBytes_Type, size);
744 op->ob_shash = -1;
745 op->ob_sval[size] = '\0';
746 if (Py_SIZE(a) == 1 && n > 0) {
747 memset(op->ob_sval, a->ob_sval[0] , n);
748 return (PyObject *) op;
749 }
750 i = 0;
751 if (i < size) {
752 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
753 i = Py_SIZE(a);
754 }
755 while (i < size) {
756 j = (i <= size-i) ? i : size-i;
757 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
758 i += j;
759 }
760 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000761}
762
Guido van Rossum98297ee2007-11-06 21:34:58 +0000763static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000764bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000765{
766 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
767 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000768 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +0000769 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +0000770 PyErr_Clear();
771 if (_getbuffer(arg, &varg) < 0)
772 return -1;
773 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
774 varg.buf, varg.len, 0);
775 PyBuffer_Release(&varg);
776 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000777 }
778 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000779 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
780 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000781 }
782
Antoine Pitrou0010d372010-08-15 17:12:55 +0000783 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000784}
785
Neal Norwitz6968b052007-02-27 19:02:19 +0000786static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000787bytes_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000788{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000789 if (i < 0 || i >= Py_SIZE(a)) {
790 PyErr_SetString(PyExc_IndexError, "index out of range");
791 return NULL;
792 }
793 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000794}
795
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000796static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000797bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000798{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000799 int c;
800 Py_ssize_t len_a, len_b;
801 Py_ssize_t min_len;
802 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000803
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000804 /* Make sure both arguments are strings. */
805 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
806 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
807 (PyObject_IsInstance((PyObject*)a,
808 (PyObject*)&PyUnicode_Type) ||
809 PyObject_IsInstance((PyObject*)b,
810 (PyObject*)&PyUnicode_Type))) {
811 if (PyErr_WarnEx(PyExc_BytesWarning,
812 "Comparison between bytes and string", 1))
813 return NULL;
814 }
815 result = Py_NotImplemented;
816 goto out;
817 }
818 if (a == b) {
819 switch (op) {
820 case Py_EQ:case Py_LE:case Py_GE:
821 result = Py_True;
822 goto out;
823 case Py_NE:case Py_LT:case Py_GT:
824 result = Py_False;
825 goto out;
826 }
827 }
828 if (op == Py_EQ) {
829 /* Supporting Py_NE here as well does not save
830 much time, since Py_NE is rarely used. */
831 if (Py_SIZE(a) == Py_SIZE(b)
832 && (a->ob_sval[0] == b->ob_sval[0]
833 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
834 result = Py_True;
835 } else {
836 result = Py_False;
837 }
838 goto out;
839 }
840 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
841 min_len = (len_a < len_b) ? len_a : len_b;
842 if (min_len > 0) {
843 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
844 if (c==0)
845 c = memcmp(a->ob_sval, b->ob_sval, min_len);
846 } else
847 c = 0;
848 if (c == 0)
849 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
850 switch (op) {
851 case Py_LT: c = c < 0; break;
852 case Py_LE: c = c <= 0; break;
853 case Py_EQ: assert(0); break; /* unreachable */
854 case Py_NE: c = c != 0; break;
855 case Py_GT: c = c > 0; break;
856 case Py_GE: c = c >= 0; break;
857 default:
858 result = Py_NotImplemented;
859 goto out;
860 }
861 result = c ? Py_True : Py_False;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000862 out:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000863 Py_INCREF(result);
864 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000865}
866
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000867static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000868bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000869{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000870 register Py_ssize_t len;
871 register unsigned char *p;
Mark Dickinson57e683e2011-09-24 18:18:40 +0100872 register Py_uhash_t x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000873
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000874 if (a->ob_shash != -1)
875 return a->ob_shash;
876 len = Py_SIZE(a);
877 p = (unsigned char *) a->ob_sval;
Mark Dickinson57e683e2011-09-24 18:18:40 +0100878 x = (Py_uhash_t)*p << 7;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000879 while (--len >= 0)
Mark Dickinson57e683e2011-09-24 18:18:40 +0100880 x = (1000003U*x) ^ (Py_uhash_t)*p++;
881 x ^= (Py_uhash_t)Py_SIZE(a);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000882 if (x == -1)
883 x = -2;
884 a->ob_shash = x;
885 return x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000886}
887
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000888static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000889bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000890{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000891 if (PyIndex_Check(item)) {
892 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
893 if (i == -1 && PyErr_Occurred())
894 return NULL;
895 if (i < 0)
896 i += PyBytes_GET_SIZE(self);
897 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
898 PyErr_SetString(PyExc_IndexError,
899 "index out of range");
900 return NULL;
901 }
902 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
903 }
904 else if (PySlice_Check(item)) {
905 Py_ssize_t start, stop, step, slicelength, cur, i;
906 char* source_buf;
907 char* result_buf;
908 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000909
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000910 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000911 PyBytes_GET_SIZE(self),
912 &start, &stop, &step, &slicelength) < 0) {
913 return NULL;
914 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000915
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000916 if (slicelength <= 0) {
917 return PyBytes_FromStringAndSize("", 0);
918 }
919 else if (start == 0 && step == 1 &&
920 slicelength == PyBytes_GET_SIZE(self) &&
921 PyBytes_CheckExact(self)) {
922 Py_INCREF(self);
923 return (PyObject *)self;
924 }
925 else if (step == 1) {
926 return PyBytes_FromStringAndSize(
927 PyBytes_AS_STRING(self) + start,
928 slicelength);
929 }
930 else {
931 source_buf = PyBytes_AS_STRING(self);
932 result = PyBytes_FromStringAndSize(NULL, slicelength);
933 if (result == NULL)
934 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000935
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000936 result_buf = PyBytes_AS_STRING(result);
937 for (cur = start, i = 0; i < slicelength;
938 cur += step, i++) {
939 result_buf[i] = source_buf[cur];
940 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000941
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000942 return result;
943 }
944 }
945 else {
946 PyErr_Format(PyExc_TypeError,
947 "byte indices must be integers, not %.200s",
948 Py_TYPE(item)->tp_name);
949 return NULL;
950 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000951}
952
953static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000954bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000955{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000956 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
957 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000958}
959
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000960static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000961 (lenfunc)bytes_length, /*sq_length*/
962 (binaryfunc)bytes_concat, /*sq_concat*/
963 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
964 (ssizeargfunc)bytes_item, /*sq_item*/
965 0, /*sq_slice*/
966 0, /*sq_ass_item*/
967 0, /*sq_ass_slice*/
968 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000969};
970
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000971static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000972 (lenfunc)bytes_length,
973 (binaryfunc)bytes_subscript,
974 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000975};
976
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000977static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000978 (getbufferproc)bytes_buffer_getbuffer,
979 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000980};
981
982
983#define LEFTSTRIP 0
984#define RIGHTSTRIP 1
985#define BOTHSTRIP 2
986
987/* Arrays indexed by above */
988static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
989
990#define STRIPNAME(i) (stripformat[i]+3)
991
Neal Norwitz6968b052007-02-27 19:02:19 +0000992PyDoc_STRVAR(split__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000993"B.split([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +0000994\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +0000995Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000996If sep is not specified or is None, B is split on ASCII whitespace\n\
997characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +0000998If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +0000999
1000static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001001bytes_split(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001002{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001003 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1004 Py_ssize_t maxsplit = -1;
1005 const char *s = PyBytes_AS_STRING(self), *sub;
1006 Py_buffer vsub;
1007 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001008
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001009 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1010 return NULL;
1011 if (maxsplit < 0)
1012 maxsplit = PY_SSIZE_T_MAX;
1013 if (subobj == Py_None)
1014 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1015 if (_getbuffer(subobj, &vsub) < 0)
1016 return NULL;
1017 sub = vsub.buf;
1018 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001019
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001020 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1021 PyBuffer_Release(&vsub);
1022 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001023}
1024
Neal Norwitz6968b052007-02-27 19:02:19 +00001025PyDoc_STRVAR(partition__doc__,
1026"B.partition(sep) -> (head, sep, tail)\n\
1027\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001028Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001029the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001030found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001031
1032static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001033bytes_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001034{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001035 const char *sep;
1036 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001037
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001038 if (PyBytes_Check(sep_obj)) {
1039 sep = PyBytes_AS_STRING(sep_obj);
1040 sep_len = PyBytes_GET_SIZE(sep_obj);
1041 }
1042 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1043 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001044
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001045 return stringlib_partition(
1046 (PyObject*) self,
1047 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1048 sep_obj, sep, sep_len
1049 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001050}
1051
1052PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti5b2b2422010-01-25 11:58:28 +00001053"B.rpartition(sep) -> (head, sep, tail)\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001054\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001055Search for the separator sep in B, starting at the end of B,\n\
1056and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001057part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001058bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001059
1060static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001061bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001062{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001063 const char *sep;
1064 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001065
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001066 if (PyBytes_Check(sep_obj)) {
1067 sep = PyBytes_AS_STRING(sep_obj);
1068 sep_len = PyBytes_GET_SIZE(sep_obj);
1069 }
1070 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1071 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001072
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001073 return stringlib_rpartition(
1074 (PyObject*) self,
1075 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1076 sep_obj, sep, sep_len
1077 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001078}
1079
Neal Norwitz6968b052007-02-27 19:02:19 +00001080PyDoc_STRVAR(rsplit__doc__,
Benjamin Peterson4116f362008-05-27 00:36:20 +00001081"B.rsplit([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001082\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001083Return a list of the sections in B, using sep as the delimiter,\n\
1084starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001085If sep is not given, B is split on ASCII whitespace characters\n\
1086(space, tab, return, newline, formfeed, vertical tab).\n\
1087If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001088
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001089
Neal Norwitz6968b052007-02-27 19:02:19 +00001090static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001091bytes_rsplit(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001092{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001093 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1094 Py_ssize_t maxsplit = -1;
1095 const char *s = PyBytes_AS_STRING(self), *sub;
1096 Py_buffer vsub;
1097 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001098
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001099 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1100 return NULL;
1101 if (maxsplit < 0)
1102 maxsplit = PY_SSIZE_T_MAX;
1103 if (subobj == Py_None)
1104 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1105 if (_getbuffer(subobj, &vsub) < 0)
1106 return NULL;
1107 sub = vsub.buf;
1108 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001109
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001110 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1111 PyBuffer_Release(&vsub);
1112 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001113}
1114
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001115
1116PyDoc_STRVAR(join__doc__,
1117"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001118\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001119Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001120Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1121
Neal Norwitz6968b052007-02-27 19:02:19 +00001122static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001123bytes_join(PyObject *self, PyObject *orig)
Neal Norwitz6968b052007-02-27 19:02:19 +00001124{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001125 char *sep = PyBytes_AS_STRING(self);
1126 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1127 PyObject *res = NULL;
1128 char *p;
1129 Py_ssize_t seqlen = 0;
1130 size_t sz = 0;
1131 Py_ssize_t i;
1132 PyObject *seq, *item;
Neal Norwitz6968b052007-02-27 19:02:19 +00001133
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001134 seq = PySequence_Fast(orig, "");
1135 if (seq == NULL) {
1136 return NULL;
1137 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001138
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001139 seqlen = PySequence_Size(seq);
1140 if (seqlen == 0) {
1141 Py_DECREF(seq);
1142 return PyBytes_FromString("");
1143 }
1144 if (seqlen == 1) {
1145 item = PySequence_Fast_GET_ITEM(seq, 0);
1146 if (PyBytes_CheckExact(item)) {
1147 Py_INCREF(item);
1148 Py_DECREF(seq);
1149 return item;
1150 }
1151 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001152
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001153 /* There are at least two things to join, or else we have a subclass
1154 * of the builtin types in the sequence.
1155 * Do a pre-pass to figure out the total amount of space we'll
1156 * need (sz), and see whether all argument are bytes.
1157 */
1158 /* XXX Shouldn't we use _getbuffer() on these items instead? */
1159 for (i = 0; i < seqlen; i++) {
1160 const size_t old_sz = sz;
1161 item = PySequence_Fast_GET_ITEM(seq, i);
1162 if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
1163 PyErr_Format(PyExc_TypeError,
1164 "sequence item %zd: expected bytes,"
1165 " %.80s found",
1166 i, Py_TYPE(item)->tp_name);
1167 Py_DECREF(seq);
1168 return NULL;
1169 }
1170 sz += Py_SIZE(item);
1171 if (i != 0)
1172 sz += seplen;
1173 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1174 PyErr_SetString(PyExc_OverflowError,
1175 "join() result is too long for bytes");
1176 Py_DECREF(seq);
1177 return NULL;
1178 }
1179 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001180
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001181 /* Allocate result space. */
1182 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1183 if (res == NULL) {
1184 Py_DECREF(seq);
1185 return NULL;
1186 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001187
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001188 /* Catenate everything. */
1189 /* I'm not worried about a PyByteArray item growing because there's
1190 nowhere in this function where we release the GIL. */
1191 p = PyBytes_AS_STRING(res);
1192 for (i = 0; i < seqlen; ++i) {
1193 size_t n;
1194 char *q;
1195 if (i) {
1196 Py_MEMCPY(p, sep, seplen);
1197 p += seplen;
1198 }
1199 item = PySequence_Fast_GET_ITEM(seq, i);
1200 n = Py_SIZE(item);
1201 if (PyBytes_Check(item))
1202 q = PyBytes_AS_STRING(item);
1203 else
1204 q = PyByteArray_AS_STRING(item);
1205 Py_MEMCPY(p, q, n);
1206 p += n;
1207 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001208
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001209 Py_DECREF(seq);
1210 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001211}
1212
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001213PyObject *
1214_PyBytes_Join(PyObject *sep, PyObject *x)
1215{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001216 assert(sep != NULL && PyBytes_Check(sep));
1217 assert(x != NULL);
1218 return bytes_join(sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001219}
1220
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001221/* helper macro to fixup start/end slice values */
1222#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001223 if (end > len) \
1224 end = len; \
1225 else if (end < 0) { \
1226 end += len; \
1227 if (end < 0) \
1228 end = 0; \
1229 } \
1230 if (start < 0) { \
1231 start += len; \
1232 if (start < 0) \
1233 start = 0; \
1234 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001235
1236Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001237bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001238{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001239 PyObject *subobj;
1240 const char *sub;
1241 Py_ssize_t sub_len;
1242 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001243
Jesus Ceaac451502011-04-20 17:09:23 +02001244 if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1245 args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001246 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001247
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001248 if (PyBytes_Check(subobj)) {
1249 sub = PyBytes_AS_STRING(subobj);
1250 sub_len = PyBytes_GET_SIZE(subobj);
1251 }
1252 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1253 /* XXX - the "expected a character buffer object" is pretty
1254 confusing for a non-expert. remap to something else ? */
1255 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001256
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001257 if (dir > 0)
1258 return stringlib_find_slice(
1259 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1260 sub, sub_len, start, end);
1261 else
1262 return stringlib_rfind_slice(
1263 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1264 sub, sub_len, start, end);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001265}
1266
1267
1268PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001269"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001270\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001271Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001272such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001273arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001274\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001275Return -1 on failure.");
1276
Neal Norwitz6968b052007-02-27 19:02:19 +00001277static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001278bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001279{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001280 Py_ssize_t result = bytes_find_internal(self, args, +1);
1281 if (result == -2)
1282 return NULL;
1283 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001284}
1285
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001286
1287PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001288"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001289\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001290Like B.find() but raise ValueError when the substring is not found.");
1291
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001292static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001293bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001294{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001295 Py_ssize_t result = bytes_find_internal(self, args, +1);
1296 if (result == -2)
1297 return NULL;
1298 if (result == -1) {
1299 PyErr_SetString(PyExc_ValueError,
1300 "substring not found");
1301 return NULL;
1302 }
1303 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001304}
1305
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001306
1307PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001308"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001309\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001310Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001311such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001312arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001313\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001314Return -1 on failure.");
1315
Neal Norwitz6968b052007-02-27 19:02:19 +00001316static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001317bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001318{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001319 Py_ssize_t result = bytes_find_internal(self, args, -1);
1320 if (result == -2)
1321 return NULL;
1322 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001323}
1324
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001325
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001326PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001327"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001328\n\
1329Like B.rfind() but raise ValueError when the substring is not found.");
1330
1331static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001332bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001333{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001334 Py_ssize_t result = bytes_find_internal(self, args, -1);
1335 if (result == -2)
1336 return NULL;
1337 if (result == -1) {
1338 PyErr_SetString(PyExc_ValueError,
1339 "substring not found");
1340 return NULL;
1341 }
1342 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001343}
1344
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001345
1346Py_LOCAL_INLINE(PyObject *)
1347do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001348{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001349 Py_buffer vsep;
1350 char *s = PyBytes_AS_STRING(self);
1351 Py_ssize_t len = PyBytes_GET_SIZE(self);
1352 char *sep;
1353 Py_ssize_t seplen;
1354 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001355
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001356 if (_getbuffer(sepobj, &vsep) < 0)
1357 return NULL;
1358 sep = vsep.buf;
1359 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001360
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001361 i = 0;
1362 if (striptype != RIGHTSTRIP) {
1363 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1364 i++;
1365 }
1366 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001367
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001368 j = len;
1369 if (striptype != LEFTSTRIP) {
1370 do {
1371 j--;
1372 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1373 j++;
1374 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001375
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001376 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001377
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001378 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1379 Py_INCREF(self);
1380 return (PyObject*)self;
1381 }
1382 else
1383 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001384}
1385
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001386
1387Py_LOCAL_INLINE(PyObject *)
1388do_strip(PyBytesObject *self, int striptype)
1389{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001390 char *s = PyBytes_AS_STRING(self);
1391 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001392
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001393 i = 0;
1394 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001395 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001396 i++;
1397 }
1398 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001399
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001400 j = len;
1401 if (striptype != LEFTSTRIP) {
1402 do {
1403 j--;
David Malcolm96960882010-11-05 17:23:41 +00001404 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001405 j++;
1406 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001407
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001408 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1409 Py_INCREF(self);
1410 return (PyObject*)self;
1411 }
1412 else
1413 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001414}
1415
1416
1417Py_LOCAL_INLINE(PyObject *)
1418do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1419{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001420 PyObject *sep = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001421
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001422 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1423 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001424
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001425 if (sep != NULL && sep != Py_None) {
1426 return do_xstrip(self, striptype, sep);
1427 }
1428 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001429}
1430
1431
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001432PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001433"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001434\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001435Strip leading and trailing bytes contained in the argument.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001436If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001437static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001438bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001439{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001440 if (PyTuple_GET_SIZE(args) == 0)
1441 return do_strip(self, BOTHSTRIP); /* Common case */
1442 else
1443 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001444}
1445
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001446
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001447PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001448"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001449\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001450Strip leading bytes contained in the argument.\n\
1451If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001452static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001453bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001454{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001455 if (PyTuple_GET_SIZE(args) == 0)
1456 return do_strip(self, LEFTSTRIP); /* Common case */
1457 else
1458 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001459}
1460
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001461
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001462PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001463"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001464\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001465Strip trailing bytes contained in the argument.\n\
1466If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001467static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001468bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001469{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001470 if (PyTuple_GET_SIZE(args) == 0)
1471 return do_strip(self, RIGHTSTRIP); /* Common case */
1472 else
1473 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001474}
Neal Norwitz6968b052007-02-27 19:02:19 +00001475
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001476
1477PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001478"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001479\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001480Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001481string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001482as in slice notation.");
1483
1484static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001485bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001486{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001487 PyObject *sub_obj;
1488 const char *str = PyBytes_AS_STRING(self), *sub;
1489 Py_ssize_t sub_len;
1490 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001491
Jesus Ceaac451502011-04-20 17:09:23 +02001492 if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001493 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001494
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001495 if (PyBytes_Check(sub_obj)) {
1496 sub = PyBytes_AS_STRING(sub_obj);
1497 sub_len = PyBytes_GET_SIZE(sub_obj);
1498 }
1499 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1500 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001501
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001502 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001503
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001504 return PyLong_FromSsize_t(
1505 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
1506 );
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001507}
1508
1509
1510PyDoc_STRVAR(translate__doc__,
1511"B.translate(table[, deletechars]) -> bytes\n\
1512\n\
1513Return a copy of B, where all characters occurring in the\n\
1514optional argument deletechars are removed, and the remaining\n\
1515characters have been mapped through the given translation\n\
1516table, which must be a bytes object of length 256.");
1517
1518static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001519bytes_translate(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001520{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001521 register char *input, *output;
1522 const char *table;
1523 register Py_ssize_t i, c, changed = 0;
1524 PyObject *input_obj = (PyObject*)self;
1525 const char *output_start, *del_table=NULL;
1526 Py_ssize_t inlen, tablen, dellen = 0;
1527 PyObject *result;
1528 int trans_table[256];
1529 PyObject *tableobj, *delobj = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001530
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001531 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1532 &tableobj, &delobj))
1533 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001534
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001535 if (PyBytes_Check(tableobj)) {
1536 table = PyBytes_AS_STRING(tableobj);
1537 tablen = PyBytes_GET_SIZE(tableobj);
1538 }
1539 else if (tableobj == Py_None) {
1540 table = NULL;
1541 tablen = 256;
1542 }
1543 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1544 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001545
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001546 if (tablen != 256) {
1547 PyErr_SetString(PyExc_ValueError,
1548 "translation table must be 256 characters long");
1549 return NULL;
1550 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001551
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001552 if (delobj != NULL) {
1553 if (PyBytes_Check(delobj)) {
1554 del_table = PyBytes_AS_STRING(delobj);
1555 dellen = PyBytes_GET_SIZE(delobj);
1556 }
1557 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1558 return NULL;
1559 }
1560 else {
1561 del_table = NULL;
1562 dellen = 0;
1563 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001564
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001565 inlen = PyBytes_GET_SIZE(input_obj);
1566 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1567 if (result == NULL)
1568 return NULL;
1569 output_start = output = PyBytes_AsString(result);
1570 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001571
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001572 if (dellen == 0 && table != NULL) {
1573 /* If no deletions are required, use faster code */
1574 for (i = inlen; --i >= 0; ) {
1575 c = Py_CHARMASK(*input++);
1576 if (Py_CHARMASK((*output++ = table[c])) != c)
1577 changed = 1;
1578 }
1579 if (changed || !PyBytes_CheckExact(input_obj))
1580 return result;
1581 Py_DECREF(result);
1582 Py_INCREF(input_obj);
1583 return input_obj;
1584 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001585
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001586 if (table == NULL) {
1587 for (i = 0; i < 256; i++)
1588 trans_table[i] = Py_CHARMASK(i);
1589 } else {
1590 for (i = 0; i < 256; i++)
1591 trans_table[i] = Py_CHARMASK(table[i]);
1592 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001593
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001594 for (i = 0; i < dellen; i++)
1595 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001596
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001597 for (i = inlen; --i >= 0; ) {
1598 c = Py_CHARMASK(*input++);
1599 if (trans_table[c] != -1)
1600 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1601 continue;
1602 changed = 1;
1603 }
1604 if (!changed && PyBytes_CheckExact(input_obj)) {
1605 Py_DECREF(result);
1606 Py_INCREF(input_obj);
1607 return input_obj;
1608 }
1609 /* Fix the size of the resulting string */
1610 if (inlen > 0)
1611 _PyBytes_Resize(&result, output - output_start);
1612 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001613}
1614
1615
Georg Brandlabc38772009-04-12 15:51:51 +00001616static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001617bytes_maketrans(PyObject *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001618{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001619 return _Py_bytes_maketrans(args);
Georg Brandlabc38772009-04-12 15:51:51 +00001620}
1621
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001622/* find and count characters and substrings */
1623
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001624#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001625 ((char *)memchr((const void *)(target), c, target_len))
1626
1627/* String ops must return a string. */
1628/* If the object is subclass of string, create a copy */
1629Py_LOCAL(PyBytesObject *)
1630return_self(PyBytesObject *self)
1631{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001632 if (PyBytes_CheckExact(self)) {
1633 Py_INCREF(self);
1634 return self;
1635 }
1636 return (PyBytesObject *)PyBytes_FromStringAndSize(
1637 PyBytes_AS_STRING(self),
1638 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001639}
1640
1641Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00001642countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001643{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001644 Py_ssize_t count=0;
1645 const char *start=target;
1646 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001647
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001648 while ( (start=findchar(start, end-start, c)) != NULL ) {
1649 count++;
1650 if (count >= maxcount)
1651 break;
1652 start += 1;
1653 }
1654 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001655}
1656
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001657
1658/* Algorithms for different cases of string replacement */
1659
1660/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1661Py_LOCAL(PyBytesObject *)
1662replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001663 const char *to_s, Py_ssize_t to_len,
1664 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001665{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001666 char *self_s, *result_s;
1667 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001668 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001669 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001670
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001671 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001672
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001673 /* 1 at the end plus 1 after every character;
1674 count = min(maxcount, self_len + 1) */
1675 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001676 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001677 else
1678 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
1679 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001680
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001681 /* Check for overflow */
1682 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001683 assert(count > 0);
1684 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001685 PyErr_SetString(PyExc_OverflowError,
1686 "replacement bytes are too long");
1687 return NULL;
1688 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001689 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001690
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001691 if (! (result = (PyBytesObject *)
1692 PyBytes_FromStringAndSize(NULL, result_len)) )
1693 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001694
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001695 self_s = PyBytes_AS_STRING(self);
1696 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001697
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001698 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001699
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001700 /* Lay the first one down (guaranteed this will occur) */
1701 Py_MEMCPY(result_s, to_s, to_len);
1702 result_s += to_len;
1703 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001704
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001705 for (i=0; i<count; i++) {
1706 *result_s++ = *self_s++;
1707 Py_MEMCPY(result_s, to_s, to_len);
1708 result_s += to_len;
1709 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001710
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001711 /* Copy the rest of the original string */
1712 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001713
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001714 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001715}
1716
1717/* Special case for deleting a single character */
1718/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1719Py_LOCAL(PyBytesObject *)
1720replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001721 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001722{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001723 char *self_s, *result_s;
1724 char *start, *next, *end;
1725 Py_ssize_t self_len, result_len;
1726 Py_ssize_t count;
1727 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001728
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001729 self_len = PyBytes_GET_SIZE(self);
1730 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001731
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001732 count = countchar(self_s, self_len, from_c, maxcount);
1733 if (count == 0) {
1734 return return_self(self);
1735 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001736
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001737 result_len = self_len - count; /* from_len == 1 */
1738 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001739
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001740 if ( (result = (PyBytesObject *)
1741 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1742 return NULL;
1743 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001744
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001745 start = self_s;
1746 end = self_s + self_len;
1747 while (count-- > 0) {
1748 next = findchar(start, end-start, from_c);
1749 if (next == NULL)
1750 break;
1751 Py_MEMCPY(result_s, start, next-start);
1752 result_s += (next-start);
1753 start = next+1;
1754 }
1755 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001756
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001757 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001758}
1759
1760/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1761
1762Py_LOCAL(PyBytesObject *)
1763replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001764 const char *from_s, Py_ssize_t from_len,
1765 Py_ssize_t maxcount) {
1766 char *self_s, *result_s;
1767 char *start, *next, *end;
1768 Py_ssize_t self_len, result_len;
1769 Py_ssize_t count, offset;
1770 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001771
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001772 self_len = PyBytes_GET_SIZE(self);
1773 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001774
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001775 count = stringlib_count(self_s, self_len,
1776 from_s, from_len,
1777 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001778
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001779 if (count == 0) {
1780 /* no matches */
1781 return return_self(self);
1782 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001783
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001784 result_len = self_len - (count * from_len);
1785 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001786
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001787 if ( (result = (PyBytesObject *)
1788 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1789 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001790
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001791 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001792
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001793 start = self_s;
1794 end = self_s + self_len;
1795 while (count-- > 0) {
1796 offset = stringlib_find(start, end-start,
1797 from_s, from_len,
1798 0);
1799 if (offset == -1)
1800 break;
1801 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001802
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001803 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001804
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001805 result_s += (next-start);
1806 start = next+from_len;
1807 }
1808 Py_MEMCPY(result_s, start, end-start);
1809 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001810}
1811
1812/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1813Py_LOCAL(PyBytesObject *)
1814replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001815 char from_c, char to_c,
1816 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001817{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001818 char *self_s, *result_s, *start, *end, *next;
1819 Py_ssize_t self_len;
1820 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001821
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001822 /* The result string will be the same size */
1823 self_s = PyBytes_AS_STRING(self);
1824 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001825
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001826 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001827
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001828 if (next == NULL) {
1829 /* No matches; return the original string */
1830 return return_self(self);
1831 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001832
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001833 /* Need to make a new string */
1834 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1835 if (result == NULL)
1836 return NULL;
1837 result_s = PyBytes_AS_STRING(result);
1838 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001839
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001840 /* change everything in-place, starting with this one */
1841 start = result_s + (next-self_s);
1842 *start = to_c;
1843 start++;
1844 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001845
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001846 while (--maxcount > 0) {
1847 next = findchar(start, end-start, from_c);
1848 if (next == NULL)
1849 break;
1850 *next = to_c;
1851 start = next+1;
1852 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001853
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001854 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001855}
1856
1857/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1858Py_LOCAL(PyBytesObject *)
1859replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001860 const char *from_s, Py_ssize_t from_len,
1861 const char *to_s, Py_ssize_t to_len,
1862 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001863{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001864 char *result_s, *start, *end;
1865 char *self_s;
1866 Py_ssize_t self_len, offset;
1867 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001868
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001869 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001870
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001871 self_s = PyBytes_AS_STRING(self);
1872 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001873
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001874 offset = stringlib_find(self_s, self_len,
1875 from_s, from_len,
1876 0);
1877 if (offset == -1) {
1878 /* No matches; return the original string */
1879 return return_self(self);
1880 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001881
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001882 /* Need to make a new string */
1883 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1884 if (result == NULL)
1885 return NULL;
1886 result_s = PyBytes_AS_STRING(result);
1887 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001888
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001889 /* change everything in-place, starting with this one */
1890 start = result_s + offset;
1891 Py_MEMCPY(start, to_s, from_len);
1892 start += from_len;
1893 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001894
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001895 while ( --maxcount > 0) {
1896 offset = stringlib_find(start, end-start,
1897 from_s, from_len,
1898 0);
1899 if (offset==-1)
1900 break;
1901 Py_MEMCPY(start+offset, to_s, from_len);
1902 start += offset+from_len;
1903 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001904
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001905 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001906}
1907
1908/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1909Py_LOCAL(PyBytesObject *)
1910replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001911 char from_c,
1912 const char *to_s, Py_ssize_t to_len,
1913 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001914{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001915 char *self_s, *result_s;
1916 char *start, *next, *end;
1917 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001918 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001919 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001920
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001921 self_s = PyBytes_AS_STRING(self);
1922 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001923
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001924 count = countchar(self_s, self_len, from_c, maxcount);
1925 if (count == 0) {
1926 /* no matches, return unchanged */
1927 return return_self(self);
1928 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001929
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001930 /* use the difference between current and new, hence the "-1" */
1931 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001932 assert(count > 0);
1933 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001934 PyErr_SetString(PyExc_OverflowError,
1935 "replacement bytes are too long");
1936 return NULL;
1937 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001938 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001939
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001940 if ( (result = (PyBytesObject *)
1941 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1942 return NULL;
1943 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001944
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001945 start = self_s;
1946 end = self_s + self_len;
1947 while (count-- > 0) {
1948 next = findchar(start, end-start, from_c);
1949 if (next == NULL)
1950 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001951
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001952 if (next == start) {
1953 /* replace with the 'to' */
1954 Py_MEMCPY(result_s, to_s, to_len);
1955 result_s += to_len;
1956 start += 1;
1957 } else {
1958 /* copy the unchanged old then the 'to' */
1959 Py_MEMCPY(result_s, start, next-start);
1960 result_s += (next-start);
1961 Py_MEMCPY(result_s, to_s, to_len);
1962 result_s += to_len;
1963 start = next+1;
1964 }
1965 }
1966 /* Copy the remainder of the remaining string */
1967 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001968
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001969 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001970}
1971
1972/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1973Py_LOCAL(PyBytesObject *)
1974replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001975 const char *from_s, Py_ssize_t from_len,
1976 const char *to_s, Py_ssize_t to_len,
1977 Py_ssize_t maxcount) {
1978 char *self_s, *result_s;
1979 char *start, *next, *end;
1980 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001981 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001982 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001983
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001984 self_s = PyBytes_AS_STRING(self);
1985 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001986
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001987 count = stringlib_count(self_s, self_len,
1988 from_s, from_len,
1989 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001990
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001991 if (count == 0) {
1992 /* no matches, return unchanged */
1993 return return_self(self);
1994 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001995
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001996 /* Check for overflow */
1997 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001998 assert(count > 0);
1999 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002000 PyErr_SetString(PyExc_OverflowError,
2001 "replacement bytes are too long");
2002 return NULL;
2003 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002004 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002005
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002006 if ( (result = (PyBytesObject *)
2007 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2008 return NULL;
2009 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002010
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002011 start = self_s;
2012 end = self_s + self_len;
2013 while (count-- > 0) {
2014 offset = stringlib_find(start, end-start,
2015 from_s, from_len,
2016 0);
2017 if (offset == -1)
2018 break;
2019 next = start+offset;
2020 if (next == start) {
2021 /* replace with the 'to' */
2022 Py_MEMCPY(result_s, to_s, to_len);
2023 result_s += to_len;
2024 start += from_len;
2025 } else {
2026 /* copy the unchanged old then the 'to' */
2027 Py_MEMCPY(result_s, start, next-start);
2028 result_s += (next-start);
2029 Py_MEMCPY(result_s, to_s, to_len);
2030 result_s += to_len;
2031 start = next+from_len;
2032 }
2033 }
2034 /* Copy the remainder of the remaining string */
2035 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002036
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002037 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002038}
2039
2040
2041Py_LOCAL(PyBytesObject *)
2042replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002043 const char *from_s, Py_ssize_t from_len,
2044 const char *to_s, Py_ssize_t to_len,
2045 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002046{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002047 if (maxcount < 0) {
2048 maxcount = PY_SSIZE_T_MAX;
2049 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2050 /* nothing to do; return the original string */
2051 return return_self(self);
2052 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002053
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002054 if (maxcount == 0 ||
2055 (from_len == 0 && to_len == 0)) {
2056 /* nothing to do; return the original string */
2057 return return_self(self);
2058 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002059
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002060 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002061
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002062 if (from_len == 0) {
2063 /* insert the 'to' string everywhere. */
2064 /* >>> "Python".replace("", ".") */
2065 /* '.P.y.t.h.o.n.' */
2066 return replace_interleave(self, to_s, to_len, maxcount);
2067 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002068
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002069 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2070 /* point for an empty self string to generate a non-empty string */
2071 /* Special case so the remaining code always gets a non-empty string */
2072 if (PyBytes_GET_SIZE(self) == 0) {
2073 return return_self(self);
2074 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002075
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002076 if (to_len == 0) {
2077 /* delete all occurrences of 'from' string */
2078 if (from_len == 1) {
2079 return replace_delete_single_character(
2080 self, from_s[0], maxcount);
2081 } else {
2082 return replace_delete_substring(self, from_s,
2083 from_len, maxcount);
2084 }
2085 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002086
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002087 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002088
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002089 if (from_len == to_len) {
2090 if (from_len == 1) {
2091 return replace_single_character_in_place(
2092 self,
2093 from_s[0],
2094 to_s[0],
2095 maxcount);
2096 } else {
2097 return replace_substring_in_place(
2098 self, from_s, from_len, to_s, to_len,
2099 maxcount);
2100 }
2101 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002102
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002103 /* Otherwise use the more generic algorithms */
2104 if (from_len == 1) {
2105 return replace_single_character(self, from_s[0],
2106 to_s, to_len, maxcount);
2107 } else {
2108 /* len('from')>=2, len('to')>=1 */
2109 return replace_substring(self, from_s, from_len, to_s, to_len,
2110 maxcount);
2111 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002112}
2113
2114PyDoc_STRVAR(replace__doc__,
2115"B.replace(old, new[, count]) -> bytes\n\
2116\n\
2117Return a copy of B with all occurrences of subsection\n\
2118old replaced by new. If the optional argument count is\n\
Senthil Kumaran9a9dd1c2010-09-08 12:50:29 +00002119given, only first count occurances are replaced.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002120
2121static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002122bytes_replace(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002123{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002124 Py_ssize_t count = -1;
2125 PyObject *from, *to;
2126 const char *from_s, *to_s;
2127 Py_ssize_t from_len, to_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002128
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002129 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2130 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002131
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002132 if (PyBytes_Check(from)) {
2133 from_s = PyBytes_AS_STRING(from);
2134 from_len = PyBytes_GET_SIZE(from);
2135 }
2136 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2137 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002138
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002139 if (PyBytes_Check(to)) {
2140 to_s = PyBytes_AS_STRING(to);
2141 to_len = PyBytes_GET_SIZE(to);
2142 }
2143 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2144 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002145
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002146 return (PyObject *)replace((PyBytesObject *) self,
2147 from_s, from_len,
2148 to_s, to_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002149}
2150
2151/** End DALKE **/
2152
2153/* Matches the end (direction >= 0) or start (direction < 0) of self
2154 * against substr, using the start and end arguments. Returns
2155 * -1 on error, 0 if not found and 1 if found.
2156 */
2157Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002158_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002159 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002160{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002161 Py_ssize_t len = PyBytes_GET_SIZE(self);
2162 Py_ssize_t slen;
2163 const char* sub;
2164 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002165
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002166 if (PyBytes_Check(substr)) {
2167 sub = PyBytes_AS_STRING(substr);
2168 slen = PyBytes_GET_SIZE(substr);
2169 }
2170 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2171 return -1;
2172 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002173
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002174 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002175
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002176 if (direction < 0) {
2177 /* startswith */
2178 if (start+slen > len)
2179 return 0;
2180 } else {
2181 /* endswith */
2182 if (end-start < slen || start > len)
2183 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002184
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002185 if (end-slen > start)
2186 start = end - slen;
2187 }
2188 if (end-start >= slen)
2189 return ! memcmp(str+start, sub, slen);
2190 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002191}
2192
2193
2194PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002195"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002196\n\
2197Return True if B starts with the specified prefix, False otherwise.\n\
2198With optional start, test B beginning at that position.\n\
2199With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002200prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002201
2202static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002203bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002204{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002205 Py_ssize_t start = 0;
2206 Py_ssize_t end = PY_SSIZE_T_MAX;
2207 PyObject *subobj;
2208 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002209
Jesus Ceaac451502011-04-20 17:09:23 +02002210 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002211 return NULL;
2212 if (PyTuple_Check(subobj)) {
2213 Py_ssize_t i;
2214 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2215 result = _bytes_tailmatch(self,
2216 PyTuple_GET_ITEM(subobj, i),
2217 start, end, -1);
2218 if (result == -1)
2219 return NULL;
2220 else if (result) {
2221 Py_RETURN_TRUE;
2222 }
2223 }
2224 Py_RETURN_FALSE;
2225 }
2226 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002227 if (result == -1) {
2228 if (PyErr_ExceptionMatches(PyExc_TypeError))
2229 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2230 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002231 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002232 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002233 else
2234 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002235}
2236
2237
2238PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002239"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002240\n\
2241Return True if B ends with the specified suffix, False otherwise.\n\
2242With optional start, test B beginning at that position.\n\
2243With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002244suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002245
2246static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002247bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002248{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002249 Py_ssize_t start = 0;
2250 Py_ssize_t end = PY_SSIZE_T_MAX;
2251 PyObject *subobj;
2252 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002253
Jesus Ceaac451502011-04-20 17:09:23 +02002254 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002255 return NULL;
2256 if (PyTuple_Check(subobj)) {
2257 Py_ssize_t i;
2258 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2259 result = _bytes_tailmatch(self,
2260 PyTuple_GET_ITEM(subobj, i),
2261 start, end, +1);
2262 if (result == -1)
2263 return NULL;
2264 else if (result) {
2265 Py_RETURN_TRUE;
2266 }
2267 }
2268 Py_RETURN_FALSE;
2269 }
2270 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002271 if (result == -1) {
2272 if (PyErr_ExceptionMatches(PyExc_TypeError))
2273 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2274 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002275 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002276 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002277 else
2278 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002279}
2280
2281
2282PyDoc_STRVAR(decode__doc__,
Victor Stinnerc911bbf2010-11-07 19:04:46 +00002283"B.decode(encoding='utf-8', errors='strict') -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002284\n\
Victor Stinnere14e2122010-11-07 18:41:46 +00002285Decode B using the codec registered for encoding. Default encoding\n\
2286is 'utf-8'. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002287handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2288a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002289as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002290able to handle UnicodeDecodeErrors.");
2291
2292static PyObject *
Benjamin Peterson308d6372009-09-18 21:42:35 +00002293bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002294{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002295 const char *encoding = NULL;
2296 const char *errors = NULL;
2297 static char *kwlist[] = {"encoding", "errors", 0};
Guido van Rossumd624f182006-04-24 13:47:05 +00002298
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002299 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2300 return NULL;
2301 if (encoding == NULL)
2302 encoding = PyUnicode_GetDefaultEncoding();
2303 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002304}
2305
Guido van Rossum20188312006-05-05 15:15:40 +00002306
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002307PyDoc_STRVAR(splitlines__doc__,
2308"B.splitlines([keepends]) -> list of lines\n\
2309\n\
2310Return a list of the lines in B, breaking at line boundaries.\n\
2311Line breaks are not included in the resulting list unless keepends\n\
2312is given and true.");
2313
2314static PyObject*
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002315bytes_splitlines(PyObject *self, PyObject *args, PyObject *kwds)
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002316{
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002317 static char *kwlist[] = {"keepends", 0};
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002318 int keepends = 0;
2319
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002320 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:splitlines",
2321 kwlist, &keepends))
Antoine Pitroud1188562010-06-09 16:38:55 +00002322 return NULL;
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002323
2324 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002325 (PyObject*) self, PyBytes_AS_STRING(self),
2326 PyBytes_GET_SIZE(self), keepends
2327 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002328}
2329
2330
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002331PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002332"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002333\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002334Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002335Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002336Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002337
2338static int
Guido van Rossumae404e22007-10-26 21:46:44 +00002339hex_digit_to_int(Py_UNICODE c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002340{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002341 if (c >= 128)
2342 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002343 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002344 return c - '0';
2345 else {
David Malcolm96960882010-11-05 17:23:41 +00002346 if (Py_ISUPPER(c))
2347 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002348 if (c >= 'a' && c <= 'f')
2349 return c - 'a' + 10;
2350 }
2351 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002352}
2353
2354static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002355bytes_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002356{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002357 PyObject *newstring, *hexobj;
2358 char *buf;
2359 Py_UNICODE *hex;
2360 Py_ssize_t hexlen, byteslen, i, j;
2361 int top, bot;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002362
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002363 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2364 return NULL;
2365 assert(PyUnicode_Check(hexobj));
2366 hexlen = PyUnicode_GET_SIZE(hexobj);
2367 hex = PyUnicode_AS_UNICODE(hexobj);
2368 byteslen = hexlen/2; /* This overestimates if there are spaces */
2369 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2370 if (!newstring)
2371 return NULL;
2372 buf = PyBytes_AS_STRING(newstring);
2373 for (i = j = 0; i < hexlen; i += 2) {
2374 /* skip over spaces in the input */
2375 while (hex[i] == ' ')
2376 i++;
2377 if (i >= hexlen)
2378 break;
2379 top = hex_digit_to_int(hex[i]);
2380 bot = hex_digit_to_int(hex[i+1]);
2381 if (top == -1 || bot == -1) {
2382 PyErr_Format(PyExc_ValueError,
2383 "non-hexadecimal number found in "
2384 "fromhex() arg at position %zd", i);
2385 goto error;
2386 }
2387 buf[j++] = (top << 4) + bot;
2388 }
2389 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2390 goto error;
2391 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002392
2393 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002394 Py_XDECREF(newstring);
2395 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002396}
2397
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002398PyDoc_STRVAR(sizeof__doc__,
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002399"B.__sizeof__() -> size of B in memory, in bytes");
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002400
2401static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002402bytes_sizeof(PyBytesObject *v)
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002403{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002404 Py_ssize_t res;
2405 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2406 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002407}
2408
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002409
2410static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002411bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002412{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002413 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002414}
2415
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002416
2417static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002418bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002419 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2420 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2421 _Py_capitalize__doc__},
2422 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2423 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2424 {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
2425 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2426 endswith__doc__},
2427 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2428 expandtabs__doc__},
2429 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2430 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2431 fromhex_doc},
2432 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2433 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2434 _Py_isalnum__doc__},
2435 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2436 _Py_isalpha__doc__},
2437 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2438 _Py_isdigit__doc__},
2439 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2440 _Py_islower__doc__},
2441 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2442 _Py_isspace__doc__},
2443 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2444 _Py_istitle__doc__},
2445 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2446 _Py_isupper__doc__},
2447 {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
2448 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2449 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2450 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2451 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
2452 _Py_maketrans__doc__},
2453 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2454 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2455 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2456 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2457 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2458 {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
2459 rpartition__doc__},
2460 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2461 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
2462 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002463 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002464 splitlines__doc__},
2465 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2466 startswith__doc__},
2467 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
2468 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2469 _Py_swapcase__doc__},
2470 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2471 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
2472 translate__doc__},
2473 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2474 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2475 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
2476 sizeof__doc__},
2477 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002478};
2479
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002480static PyObject *
2481str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2482
2483static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002484bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002485{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002486 PyObject *x = NULL;
2487 const char *encoding = NULL;
2488 const char *errors = NULL;
2489 PyObject *new = NULL;
2490 Py_ssize_t size;
2491 static char *kwlist[] = {"source", "encoding", "errors", 0};
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002492
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002493 if (type != &PyBytes_Type)
2494 return str_subtype_new(type, args, kwds);
2495 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2496 &encoding, &errors))
2497 return NULL;
2498 if (x == NULL) {
2499 if (encoding != NULL || errors != NULL) {
2500 PyErr_SetString(PyExc_TypeError,
2501 "encoding or errors without sequence "
2502 "argument");
2503 return NULL;
2504 }
2505 return PyBytes_FromString("");
2506 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002507
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002508 if (PyUnicode_Check(x)) {
2509 /* Encode via the codec registry */
2510 if (encoding == NULL) {
2511 PyErr_SetString(PyExc_TypeError,
2512 "string argument without an encoding");
2513 return NULL;
2514 }
2515 new = PyUnicode_AsEncodedString(x, encoding, errors);
2516 if (new == NULL)
2517 return NULL;
2518 assert(PyBytes_Check(new));
2519 return new;
2520 }
2521 /* Is it an integer? */
2522 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2523 if (size == -1 && PyErr_Occurred()) {
2524 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2525 return NULL;
2526 PyErr_Clear();
2527 }
2528 else if (size < 0) {
2529 PyErr_SetString(PyExc_ValueError, "negative count");
2530 return NULL;
2531 }
2532 else {
2533 new = PyBytes_FromStringAndSize(NULL, size);
2534 if (new == NULL) {
2535 return NULL;
2536 }
2537 if (size > 0) {
2538 memset(((PyBytesObject*)new)->ob_sval, 0, size);
2539 }
2540 return new;
2541 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002542
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002543 /* If it's not unicode, there can't be encoding or errors */
2544 if (encoding != NULL || errors != NULL) {
2545 PyErr_SetString(PyExc_TypeError,
2546 "encoding or errors without a string argument");
2547 return NULL;
2548 }
2549 return PyObject_Bytes(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002550}
2551
2552PyObject *
2553PyBytes_FromObject(PyObject *x)
2554{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002555 PyObject *new, *it;
2556 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002557
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002558 if (x == NULL) {
2559 PyErr_BadInternalCall();
2560 return NULL;
2561 }
2562 /* Use the modern buffer interface */
2563 if (PyObject_CheckBuffer(x)) {
2564 Py_buffer view;
2565 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2566 return NULL;
2567 new = PyBytes_FromStringAndSize(NULL, view.len);
2568 if (!new)
2569 goto fail;
2570 /* XXX(brett.cannon): Better way to get to internal buffer? */
2571 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2572 &view, view.len, 'C') < 0)
2573 goto fail;
2574 PyBuffer_Release(&view);
2575 return new;
2576 fail:
2577 Py_XDECREF(new);
2578 PyBuffer_Release(&view);
2579 return NULL;
2580 }
2581 if (PyUnicode_Check(x)) {
2582 PyErr_SetString(PyExc_TypeError,
2583 "cannot convert unicode object to bytes");
2584 return NULL;
2585 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002586
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002587 if (PyList_CheckExact(x)) {
2588 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2589 if (new == NULL)
2590 return NULL;
2591 for (i = 0; i < Py_SIZE(x); i++) {
2592 Py_ssize_t value = PyNumber_AsSsize_t(
2593 PyList_GET_ITEM(x, i), PyExc_ValueError);
2594 if (value == -1 && PyErr_Occurred()) {
2595 Py_DECREF(new);
2596 return NULL;
2597 }
2598 if (value < 0 || value >= 256) {
2599 PyErr_SetString(PyExc_ValueError,
2600 "bytes must be in range(0, 256)");
2601 Py_DECREF(new);
2602 return NULL;
2603 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002604 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002605 }
2606 return new;
2607 }
2608 if (PyTuple_CheckExact(x)) {
2609 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2610 if (new == NULL)
2611 return NULL;
2612 for (i = 0; i < Py_SIZE(x); i++) {
2613 Py_ssize_t value = PyNumber_AsSsize_t(
2614 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
2615 if (value == -1 && PyErr_Occurred()) {
2616 Py_DECREF(new);
2617 return NULL;
2618 }
2619 if (value < 0 || value >= 256) {
2620 PyErr_SetString(PyExc_ValueError,
2621 "bytes must be in range(0, 256)");
2622 Py_DECREF(new);
2623 return NULL;
2624 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002625 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002626 }
2627 return new;
2628 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00002629
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002630 /* For iterator version, create a string object and resize as needed */
2631 size = _PyObject_LengthHint(x, 64);
2632 if (size == -1 && PyErr_Occurred())
2633 return NULL;
2634 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
2635 returning a shared empty bytes string. This required because we
2636 want to call _PyBytes_Resize() the returned object, which we can
2637 only do on bytes objects with refcount == 1. */
2638 size += 1;
2639 new = PyBytes_FromStringAndSize(NULL, size);
2640 if (new == NULL)
2641 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002642
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002643 /* Get the iterator */
2644 it = PyObject_GetIter(x);
2645 if (it == NULL)
2646 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002647
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002648 /* Run the iterator to exhaustion */
2649 for (i = 0; ; i++) {
2650 PyObject *item;
2651 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002652
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002653 /* Get the next item */
2654 item = PyIter_Next(it);
2655 if (item == NULL) {
2656 if (PyErr_Occurred())
2657 goto error;
2658 break;
2659 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002660
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002661 /* Interpret it as an int (__index__) */
2662 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2663 Py_DECREF(item);
2664 if (value == -1 && PyErr_Occurred())
2665 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002666
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002667 /* Range check */
2668 if (value < 0 || value >= 256) {
2669 PyErr_SetString(PyExc_ValueError,
2670 "bytes must be in range(0, 256)");
2671 goto error;
2672 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002673
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002674 /* Append the byte */
2675 if (i >= size) {
2676 size = 2 * size + 1;
2677 if (_PyBytes_Resize(&new, size) < 0)
2678 goto error;
2679 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002680 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002681 }
2682 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002683
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002684 /* Clean up and return success */
2685 Py_DECREF(it);
2686 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002687
2688 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002689 /* Error handling when new != NULL */
2690 Py_XDECREF(it);
2691 Py_DECREF(new);
2692 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002693}
2694
2695static PyObject *
2696str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2697{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002698 PyObject *tmp, *pnew;
2699 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002700
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002701 assert(PyType_IsSubtype(type, &PyBytes_Type));
2702 tmp = bytes_new(&PyBytes_Type, args, kwds);
2703 if (tmp == NULL)
2704 return NULL;
2705 assert(PyBytes_CheckExact(tmp));
2706 n = PyBytes_GET_SIZE(tmp);
2707 pnew = type->tp_alloc(type, n);
2708 if (pnew != NULL) {
2709 Py_MEMCPY(PyBytes_AS_STRING(pnew),
2710 PyBytes_AS_STRING(tmp), n+1);
2711 ((PyBytesObject *)pnew)->ob_shash =
2712 ((PyBytesObject *)tmp)->ob_shash;
2713 }
2714 Py_DECREF(tmp);
2715 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002716}
2717
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002718PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002719"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002720bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002721bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
2722bytes(memory_view) -> bytes\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002723\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002724Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002725 - an iterable yielding integers in range(256)\n\
2726 - a text string encoded using the specified encoding\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002727 - a bytes or a buffer object\n\
2728 - any object implementing the buffer API.");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002729
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002730static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002731
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002732PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002733 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2734 "bytes",
2735 PyBytesObject_SIZE,
2736 sizeof(char),
2737 bytes_dealloc, /* tp_dealloc */
2738 0, /* tp_print */
2739 0, /* tp_getattr */
2740 0, /* tp_setattr */
2741 0, /* tp_reserved */
2742 (reprfunc)bytes_repr, /* tp_repr */
2743 0, /* tp_as_number */
2744 &bytes_as_sequence, /* tp_as_sequence */
2745 &bytes_as_mapping, /* tp_as_mapping */
2746 (hashfunc)bytes_hash, /* tp_hash */
2747 0, /* tp_call */
2748 bytes_str, /* tp_str */
2749 PyObject_GenericGetAttr, /* tp_getattro */
2750 0, /* tp_setattro */
2751 &bytes_as_buffer, /* tp_as_buffer */
2752 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2753 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2754 bytes_doc, /* tp_doc */
2755 0, /* tp_traverse */
2756 0, /* tp_clear */
2757 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2758 0, /* tp_weaklistoffset */
2759 bytes_iter, /* tp_iter */
2760 0, /* tp_iternext */
2761 bytes_methods, /* tp_methods */
2762 0, /* tp_members */
2763 0, /* tp_getset */
2764 &PyBaseObject_Type, /* tp_base */
2765 0, /* tp_dict */
2766 0, /* tp_descr_get */
2767 0, /* tp_descr_set */
2768 0, /* tp_dictoffset */
2769 0, /* tp_init */
2770 0, /* tp_alloc */
2771 bytes_new, /* tp_new */
2772 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002773};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002774
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002775void
2776PyBytes_Concat(register PyObject **pv, register PyObject *w)
2777{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002778 register PyObject *v;
2779 assert(pv != NULL);
2780 if (*pv == NULL)
2781 return;
2782 if (w == NULL) {
2783 Py_DECREF(*pv);
2784 *pv = NULL;
2785 return;
2786 }
2787 v = bytes_concat(*pv, w);
2788 Py_DECREF(*pv);
2789 *pv = v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002790}
2791
2792void
2793PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
2794{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002795 PyBytes_Concat(pv, w);
2796 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002797}
2798
2799
2800/* The following function breaks the notion that strings are immutable:
2801 it changes the size of a string. We get away with this only if there
2802 is only one module referencing the object. You can also think of it
2803 as creating a new string object and destroying the old one, only
2804 more efficiently. In any case, don't use this if the string may
2805 already be known to some other part of the code...
2806 Note that if there's not enough memory to resize the string, the original
2807 string object at *pv is deallocated, *pv is set to NULL, an "out of
2808 memory" exception is set, and -1 is returned. Else (on success) 0 is
2809 returned, and the value in *pv may or may not be the same as on input.
2810 As always, an extra byte is allocated for a trailing \0 byte (newsize
2811 does *not* include that), and a trailing \0 byte is stored.
2812*/
2813
2814int
2815_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2816{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002817 register PyObject *v;
2818 register PyBytesObject *sv;
2819 v = *pv;
2820 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
2821 *pv = 0;
2822 Py_DECREF(v);
2823 PyErr_BadInternalCall();
2824 return -1;
2825 }
2826 /* XXX UNREF/NEWREF interface should be more symmetrical */
2827 _Py_DEC_REFTOTAL;
2828 _Py_ForgetReference(v);
2829 *pv = (PyObject *)
2830 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
2831 if (*pv == NULL) {
2832 PyObject_Del(v);
2833 PyErr_NoMemory();
2834 return -1;
2835 }
2836 _Py_NewReference(*pv);
2837 sv = (PyBytesObject *) *pv;
2838 Py_SIZE(sv) = newsize;
2839 sv->ob_sval[newsize] = '\0';
2840 sv->ob_shash = -1; /* invalidate cached hash value */
2841 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002842}
2843
2844/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and
2845 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2846 * Python's regular ints.
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002847 * Return value: a new PyBytes*, or NULL if error.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002848 * . *pbuf is set to point into it,
2849 * *plen set to the # of chars following that.
2850 * Caller must decref it when done using pbuf.
2851 * The string starting at *pbuf is of the form
2852 * "-"? ("0x" | "0X")? digit+
2853 * "0x"/"0X" are present only for x and X conversions, with F_ALT
2854 * set in flags. The case of hex digits will be correct,
2855 * There will be at least prec digits, zero-filled on the left if
2856 * necessary to get that many.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002857 * val object to be converted
2858 * flags bitmask of format flags; only F_ALT is looked at
2859 * prec minimum number of digits; 0-fill on left if needed
2860 * type a character in [duoxX]; u acts the same as d
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002861 *
2862 * CAUTION: o, x and X conversions on regular ints can never
2863 * produce a '-' sign, but can for Python's unbounded ints.
2864 */
2865PyObject*
2866_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002867 char **pbuf, int *plen)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002868{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002869 PyObject *result = NULL;
2870 char *buf;
2871 Py_ssize_t i;
2872 int sign; /* 1 if '-', else 0 */
2873 int len; /* number of characters */
2874 Py_ssize_t llen;
2875 int numdigits; /* len == numnondigits + numdigits */
2876 int numnondigits = 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002877
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002878 /* Avoid exceeding SSIZE_T_MAX */
2879 if (prec > INT_MAX-3) {
2880 PyErr_SetString(PyExc_OverflowError,
2881 "precision too large");
2882 return NULL;
2883 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002884
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002885 switch (type) {
2886 case 'd':
2887 case 'u':
2888 /* Special-case boolean: we want 0/1 */
2889 if (PyBool_Check(val))
2890 result = PyNumber_ToBase(val, 10);
2891 else
2892 result = Py_TYPE(val)->tp_str(val);
2893 break;
2894 case 'o':
2895 numnondigits = 2;
2896 result = PyNumber_ToBase(val, 8);
2897 break;
2898 case 'x':
2899 case 'X':
2900 numnondigits = 2;
2901 result = PyNumber_ToBase(val, 16);
2902 break;
2903 default:
2904 assert(!"'type' not in [duoxX]");
2905 }
2906 if (!result)
2907 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002908
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002909 buf = _PyUnicode_AsString(result);
2910 if (!buf) {
2911 Py_DECREF(result);
2912 return NULL;
2913 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002914
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002915 /* To modify the string in-place, there can only be one reference. */
2916 if (Py_REFCNT(result) != 1) {
2917 PyErr_BadInternalCall();
2918 return NULL;
2919 }
2920 llen = PyUnicode_GetSize(result);
2921 if (llen > INT_MAX) {
2922 PyErr_SetString(PyExc_ValueError,
2923 "string too large in _PyBytes_FormatLong");
2924 return NULL;
2925 }
2926 len = (int)llen;
2927 if (buf[len-1] == 'L') {
2928 --len;
2929 buf[len] = '\0';
2930 }
2931 sign = buf[0] == '-';
2932 numnondigits += sign;
2933 numdigits = len - numnondigits;
2934 assert(numdigits > 0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002935
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002936 /* Get rid of base marker unless F_ALT */
2937 if (((flags & F_ALT) == 0 &&
2938 (type == 'o' || type == 'x' || type == 'X'))) {
2939 assert(buf[sign] == '0');
2940 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
2941 buf[sign+1] == 'o');
2942 numnondigits -= 2;
2943 buf += 2;
2944 len -= 2;
2945 if (sign)
2946 buf[0] = '-';
2947 assert(len == numnondigits + numdigits);
2948 assert(numdigits > 0);
2949 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002950
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002951 /* Fill with leading zeroes to meet minimum width. */
2952 if (prec > numdigits) {
2953 PyObject *r1 = PyBytes_FromStringAndSize(NULL,
2954 numnondigits + prec);
2955 char *b1;
2956 if (!r1) {
2957 Py_DECREF(result);
2958 return NULL;
2959 }
2960 b1 = PyBytes_AS_STRING(r1);
2961 for (i = 0; i < numnondigits; ++i)
2962 *b1++ = *buf++;
2963 for (i = 0; i < prec - numdigits; i++)
2964 *b1++ = '0';
2965 for (i = 0; i < numdigits; i++)
2966 *b1++ = *buf++;
2967 *b1 = '\0';
2968 Py_DECREF(result);
2969 result = r1;
2970 buf = PyBytes_AS_STRING(result);
2971 len = numnondigits + prec;
2972 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002973
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002974 /* Fix up case for hex conversions. */
2975 if (type == 'X') {
2976 /* Need to convert all lower case letters to upper case.
2977 and need to convert 0x to 0X (and -0x to -0X). */
2978 for (i = 0; i < len; i++)
2979 if (buf[i] >= 'a' && buf[i] <= 'x')
2980 buf[i] -= 'a'-'A';
2981 }
2982 *pbuf = buf;
2983 *plen = len;
2984 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002985}
2986
2987void
2988PyBytes_Fini(void)
2989{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002990 int i;
2991 for (i = 0; i < UCHAR_MAX + 1; i++) {
2992 Py_XDECREF(characters[i]);
2993 characters[i] = NULL;
2994 }
2995 Py_XDECREF(nullstring);
2996 nullstring = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002997}
2998
Benjamin Peterson4116f362008-05-27 00:36:20 +00002999/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003000
3001typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003002 PyObject_HEAD
3003 Py_ssize_t it_index;
3004 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003005} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003006
3007static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003008striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003009{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003010 _PyObject_GC_UNTRACK(it);
3011 Py_XDECREF(it->it_seq);
3012 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003013}
3014
3015static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003016striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003017{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003018 Py_VISIT(it->it_seq);
3019 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003020}
3021
3022static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003023striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003024{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003025 PyBytesObject *seq;
3026 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003027
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003028 assert(it != NULL);
3029 seq = it->it_seq;
3030 if (seq == NULL)
3031 return NULL;
3032 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003033
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003034 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3035 item = PyLong_FromLong(
3036 (unsigned char)seq->ob_sval[it->it_index]);
3037 if (item != NULL)
3038 ++it->it_index;
3039 return item;
3040 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003041
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003042 Py_DECREF(seq);
3043 it->it_seq = NULL;
3044 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003045}
3046
3047static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003048striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003049{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003050 Py_ssize_t len = 0;
3051 if (it->it_seq)
3052 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3053 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003054}
3055
3056PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003057 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003058
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003059static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003060 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3061 length_hint_doc},
3062 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003063};
3064
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003065PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003066 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3067 "bytes_iterator", /* tp_name */
3068 sizeof(striterobject), /* tp_basicsize */
3069 0, /* tp_itemsize */
3070 /* methods */
3071 (destructor)striter_dealloc, /* tp_dealloc */
3072 0, /* tp_print */
3073 0, /* tp_getattr */
3074 0, /* tp_setattr */
3075 0, /* tp_reserved */
3076 0, /* tp_repr */
3077 0, /* tp_as_number */
3078 0, /* tp_as_sequence */
3079 0, /* tp_as_mapping */
3080 0, /* tp_hash */
3081 0, /* tp_call */
3082 0, /* tp_str */
3083 PyObject_GenericGetAttr, /* tp_getattro */
3084 0, /* tp_setattro */
3085 0, /* tp_as_buffer */
3086 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3087 0, /* tp_doc */
3088 (traverseproc)striter_traverse, /* tp_traverse */
3089 0, /* tp_clear */
3090 0, /* tp_richcompare */
3091 0, /* tp_weaklistoffset */
3092 PyObject_SelfIter, /* tp_iter */
3093 (iternextfunc)striter_next, /* tp_iternext */
3094 striter_methods, /* tp_methods */
3095 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003096};
3097
3098static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003099bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003100{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003101 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003102
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003103 if (!PyBytes_Check(seq)) {
3104 PyErr_BadInternalCall();
3105 return NULL;
3106 }
3107 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3108 if (it == NULL)
3109 return NULL;
3110 it->it_index = 0;
3111 Py_INCREF(seq);
3112 it->it_seq = (PyBytesObject *)seq;
3113 _PyObject_GC_TRACK(it);
3114 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003115}