blob: b77d693fac8889e090c7b4bbd9ac27ba590e22c1 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Christian Heimes90aa7642007-12-19 02:45:37 +000013 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000014
Gregory P. Smith60d241f2007-10-16 06:31:30 +000015 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000016 {
Antoine Pitroud1188562010-06-09 16:38:55 +000017 PyErr_Format(PyExc_TypeError,
18 "Type %.100s doesn't support the buffer API",
19 Py_TYPE(obj)->tp_name);
20 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000021 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000023 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000024 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000025 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000026}
27
Christian Heimes2c9c7a52008-05-26 13:42:13 +000028#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000029Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000030#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000031
Christian Heimes2c9c7a52008-05-26 13:42:13 +000032static PyBytesObject *characters[UCHAR_MAX + 1];
33static PyBytesObject *nullstring;
34
Mark Dickinsonfd24b322008-12-06 15:33:31 +000035/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
36 for a string of length n should request PyBytesObject_SIZE + n bytes.
37
38 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
39 3 bytes per string allocation on a typical system.
40*/
41#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
42
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000044 For PyBytes_FromString(), the parameter `str' points to a null-terminated
45 string containing exactly `size' bytes.
46
47 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
48 either NULL or else points to a string containing at least `size' bytes.
49 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
50 not have to be null-terminated. (Therefore it is safe to construct a
51 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
52 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
53 bytes (setting the last byte to the null terminating character) and you can
54 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000055 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000056 alter the data yourself, since the strings may be shared.
57
58 The PyObject member `op->ob_size', which denotes the number of "extra
59 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020060 allocated for string data, not counting the null terminating character.
61 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000062 PyBytes_FromStringAndSize()) or the length of the string in the `str'
63 parameter (for PyBytes_FromString()).
64*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000065PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000066PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000067{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000068 register PyBytesObject *op;
69 if (size < 0) {
70 PyErr_SetString(PyExc_SystemError,
71 "Negative size passed to PyBytes_FromStringAndSize");
72 return NULL;
73 }
74 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000075#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000076 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000077#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078 Py_INCREF(op);
79 return (PyObject *)op;
80 }
81 if (size == 1 && str != NULL &&
82 (op = characters[*str & UCHAR_MAX]) != NULL)
83 {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000084#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000086#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 Py_INCREF(op);
88 return (PyObject *)op;
89 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000090
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
92 PyErr_SetString(PyExc_OverflowError,
93 "byte string is too large");
94 return NULL;
95 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000097 /* Inline PyObject_NewVar */
98 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
99 if (op == NULL)
100 return PyErr_NoMemory();
101 PyObject_INIT_VAR(op, &PyBytes_Type, size);
102 op->ob_shash = -1;
103 if (str != NULL)
104 Py_MEMCPY(op->ob_sval, str, size);
105 op->ob_sval[size] = '\0';
106 /* share short strings */
107 if (size == 0) {
108 nullstring = op;
109 Py_INCREF(op);
110 } else if (size == 1 && str != NULL) {
111 characters[*str & UCHAR_MAX] = op;
112 Py_INCREF(op);
113 }
114 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000115}
116
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000117PyObject *
118PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000119{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000120 register size_t size;
121 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000122
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000123 assert(str != NULL);
124 size = strlen(str);
125 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
126 PyErr_SetString(PyExc_OverflowError,
127 "byte string is too long");
128 return NULL;
129 }
130 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000131#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000133#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 Py_INCREF(op);
135 return (PyObject *)op;
136 }
137 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000138#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000139 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000140#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 Py_INCREF(op);
142 return (PyObject *)op;
143 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000144
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 /* Inline PyObject_NewVar */
146 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
147 if (op == NULL)
148 return PyErr_NoMemory();
149 PyObject_INIT_VAR(op, &PyBytes_Type, size);
150 op->ob_shash = -1;
151 Py_MEMCPY(op->ob_sval, str, size+1);
152 /* share short strings */
153 if (size == 0) {
154 nullstring = op;
155 Py_INCREF(op);
156 } else if (size == 1) {
157 characters[*str & UCHAR_MAX] = op;
158 Py_INCREF(op);
159 }
160 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000161}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000162
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000163PyObject *
164PyBytes_FromFormatV(const char *format, va_list vargs)
165{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000166 va_list count;
167 Py_ssize_t n = 0;
168 const char* f;
169 char *s;
170 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000171
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000172 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000173 /* step 1: figure out how large a buffer we need */
174 for (f = format; *f; f++) {
175 if (*f == '%') {
176 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000177 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000178 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000179
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000180 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
181 * they don't affect the amount of space we reserve.
182 */
183 if ((*f == 'l' || *f == 'z') &&
184 (f[1] == 'd' || f[1] == 'u'))
185 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000186
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000187 switch (*f) {
188 case 'c':
189 (void)va_arg(count, int);
190 /* fall through... */
191 case '%':
192 n++;
193 break;
194 case 'd': case 'u': case 'i': case 'x':
195 (void) va_arg(count, int);
196 /* 20 bytes is enough to hold a 64-bit
197 integer. Decimal takes the most space.
198 This isn't enough for octal. */
199 n += 20;
200 break;
201 case 's':
202 s = va_arg(count, char*);
203 n += strlen(s);
204 break;
205 case 'p':
206 (void) va_arg(count, int);
207 /* maximum 64-bit pointer representation:
208 * 0xffffffffffffffff
209 * so 19 characters is enough.
210 * XXX I count 18 -- what's the extra for?
211 */
212 n += 19;
213 break;
214 default:
215 /* if we stumble upon an unknown
216 formatting code, copy the rest of
217 the format string to the output
218 string. (we cannot just skip the
219 code, since there's no way to know
220 what's in the argument list) */
221 n += strlen(p);
222 goto expand;
223 }
224 } else
225 n++;
226 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000227 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 /* step 2: fill the buffer */
229 /* Since we've analyzed how much space we need for the worst case,
230 use sprintf directly instead of the slower PyOS_snprintf. */
231 string = PyBytes_FromStringAndSize(NULL, n);
232 if (!string)
233 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000234
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000235 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000236
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000237 for (f = format; *f; f++) {
238 if (*f == '%') {
239 const char* p = f++;
240 Py_ssize_t i;
241 int longflag = 0;
242 int size_tflag = 0;
243 /* parse the width.precision part (we're only
244 interested in the precision value, if any) */
245 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000246 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000247 n = (n*10) + *f++ - '0';
248 if (*f == '.') {
249 f++;
250 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000251 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000252 n = (n*10) + *f++ - '0';
253 }
David Malcolm96960882010-11-05 17:23:41 +0000254 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000255 f++;
256 /* handle the long flag, but only for %ld and %lu.
257 others can be added when necessary. */
258 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
259 longflag = 1;
260 ++f;
261 }
262 /* handle the size_t flag. */
263 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
264 size_tflag = 1;
265 ++f;
266 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000267
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000268 switch (*f) {
269 case 'c':
270 *s++ = va_arg(vargs, int);
271 break;
272 case 'd':
273 if (longflag)
274 sprintf(s, "%ld", va_arg(vargs, long));
275 else if (size_tflag)
276 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
277 va_arg(vargs, Py_ssize_t));
278 else
279 sprintf(s, "%d", va_arg(vargs, int));
280 s += strlen(s);
281 break;
282 case 'u':
283 if (longflag)
284 sprintf(s, "%lu",
285 va_arg(vargs, unsigned long));
286 else if (size_tflag)
287 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
288 va_arg(vargs, size_t));
289 else
290 sprintf(s, "%u",
291 va_arg(vargs, unsigned int));
292 s += strlen(s);
293 break;
294 case 'i':
295 sprintf(s, "%i", va_arg(vargs, int));
296 s += strlen(s);
297 break;
298 case 'x':
299 sprintf(s, "%x", va_arg(vargs, int));
300 s += strlen(s);
301 break;
302 case 's':
303 p = va_arg(vargs, char*);
304 i = strlen(p);
305 if (n > 0 && i > n)
306 i = n;
307 Py_MEMCPY(s, p, i);
308 s += i;
309 break;
310 case 'p':
311 sprintf(s, "%p", va_arg(vargs, void*));
312 /* %p is ill-defined: ensure leading 0x. */
313 if (s[1] == 'X')
314 s[1] = 'x';
315 else if (s[1] != 'x') {
316 memmove(s+2, s, strlen(s)+1);
317 s[0] = '0';
318 s[1] = 'x';
319 }
320 s += strlen(s);
321 break;
322 case '%':
323 *s++ = '%';
324 break;
325 default:
326 strcpy(s, p);
327 s += strlen(s);
328 goto end;
329 }
330 } else
331 *s++ = *f;
332 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000333
334 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000335 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
336 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000337}
338
339PyObject *
340PyBytes_FromFormat(const char *format, ...)
341{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000342 PyObject* ret;
343 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000344
345#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000347#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000348 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000349#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 ret = PyBytes_FromFormatV(format, vargs);
351 va_end(vargs);
352 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000353}
354
355static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000356bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000357{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000358 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000359}
360
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000361/* Unescape a backslash-escaped string. If unicode is non-zero,
362 the string is a u-literal. If recode_encoding is non-zero,
363 the string is UTF-8 encoded and should be re-encoded in the
364 specified encoding. */
365
366PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 Py_ssize_t len,
368 const char *errors,
369 Py_ssize_t unicode,
370 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000371{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 int c;
373 char *p, *buf;
374 const char *end;
375 PyObject *v;
376 Py_ssize_t newlen = recode_encoding ? 4*len:len;
377 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
378 if (v == NULL)
379 return NULL;
380 p = buf = PyBytes_AsString(v);
381 end = s + len;
382 while (s < end) {
383 if (*s != '\\') {
384 non_esc:
385 if (recode_encoding && (*s & 0x80)) {
386 PyObject *u, *w;
387 char *r;
388 const char* t;
389 Py_ssize_t rn;
390 t = s;
391 /* Decode non-ASCII bytes as UTF-8. */
392 while (t < end && (*t & 0x80)) t++;
393 u = PyUnicode_DecodeUTF8(s, t - s, errors);
394 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000395
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000396 /* Recode them in target encoding. */
397 w = PyUnicode_AsEncodedString(
398 u, recode_encoding, errors);
399 Py_DECREF(u);
400 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000401
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000402 /* Append bytes to output buffer. */
403 assert(PyBytes_Check(w));
404 r = PyBytes_AS_STRING(w);
405 rn = PyBytes_GET_SIZE(w);
406 Py_MEMCPY(p, r, rn);
407 p += rn;
408 Py_DECREF(w);
409 s = t;
410 } else {
411 *p++ = *s++;
412 }
413 continue;
414 }
415 s++;
416 if (s==end) {
417 PyErr_SetString(PyExc_ValueError,
418 "Trailing \\ in string");
419 goto failed;
420 }
421 switch (*s++) {
422 /* XXX This assumes ASCII! */
423 case '\n': break;
424 case '\\': *p++ = '\\'; break;
425 case '\'': *p++ = '\''; break;
426 case '\"': *p++ = '\"'; break;
427 case 'b': *p++ = '\b'; break;
428 case 'f': *p++ = '\014'; break; /* FF */
429 case 't': *p++ = '\t'; break;
430 case 'n': *p++ = '\n'; break;
431 case 'r': *p++ = '\r'; break;
432 case 'v': *p++ = '\013'; break; /* VT */
433 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
434 case '0': case '1': case '2': case '3':
435 case '4': case '5': case '6': case '7':
436 c = s[-1] - '0';
437 if (s < end && '0' <= *s && *s <= '7') {
438 c = (c<<3) + *s++ - '0';
439 if (s < end && '0' <= *s && *s <= '7')
440 c = (c<<3) + *s++ - '0';
441 }
442 *p++ = c;
443 break;
444 case 'x':
David Malcolm96960882010-11-05 17:23:41 +0000445 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000446 unsigned int x = 0;
447 c = Py_CHARMASK(*s);
448 s++;
David Malcolm96960882010-11-05 17:23:41 +0000449 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000450 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000451 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000452 x = 10 + c - 'a';
453 else
454 x = 10 + c - 'A';
455 x = x << 4;
456 c = Py_CHARMASK(*s);
457 s++;
David Malcolm96960882010-11-05 17:23:41 +0000458 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000459 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000460 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000461 x += 10 + c - 'a';
462 else
463 x += 10 + c - 'A';
464 *p++ = x;
465 break;
466 }
467 if (!errors || strcmp(errors, "strict") == 0) {
468 PyErr_SetString(PyExc_ValueError,
469 "invalid \\x escape");
470 goto failed;
471 }
472 if (strcmp(errors, "replace") == 0) {
473 *p++ = '?';
474 } else if (strcmp(errors, "ignore") == 0)
475 /* do nothing */;
476 else {
477 PyErr_Format(PyExc_ValueError,
478 "decoding error; unknown "
479 "error handling code: %.400s",
480 errors);
481 goto failed;
482 }
483 default:
484 *p++ = '\\';
485 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +0200486 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000487 UTF-8 bytes may follow. */
488 }
489 }
490 if (p-buf < newlen)
491 _PyBytes_Resize(&v, p - buf);
492 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000493 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000494 Py_DECREF(v);
495 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000496}
497
498/* -------------------------------------------------------------------- */
499/* object api */
500
501Py_ssize_t
502PyBytes_Size(register PyObject *op)
503{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000504 if (!PyBytes_Check(op)) {
505 PyErr_Format(PyExc_TypeError,
506 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
507 return -1;
508 }
509 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000510}
511
512char *
513PyBytes_AsString(register PyObject *op)
514{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000515 if (!PyBytes_Check(op)) {
516 PyErr_Format(PyExc_TypeError,
517 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
518 return NULL;
519 }
520 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000521}
522
523int
524PyBytes_AsStringAndSize(register PyObject *obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000525 register char **s,
526 register Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000527{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000528 if (s == NULL) {
529 PyErr_BadInternalCall();
530 return -1;
531 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000532
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000533 if (!PyBytes_Check(obj)) {
534 PyErr_Format(PyExc_TypeError,
535 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
536 return -1;
537 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000538
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000539 *s = PyBytes_AS_STRING(obj);
540 if (len != NULL)
541 *len = PyBytes_GET_SIZE(obj);
542 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
543 PyErr_SetString(PyExc_TypeError,
544 "expected bytes with no null");
545 return -1;
546 }
547 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000548}
Neal Norwitz6968b052007-02-27 19:02:19 +0000549
550/* -------------------------------------------------------------------- */
551/* Methods */
552
Eric Smith0923d1d2009-04-16 20:16:10 +0000553#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000554
555#include "stringlib/fastsearch.h"
556#include "stringlib/count.h"
557#include "stringlib/find.h"
558#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000559#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000560#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000561
Eric Smith0f78bff2009-11-30 01:01:42 +0000562#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000563
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000564PyObject *
565PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000566{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000567 static const char *hexdigits = "0123456789abcdef";
568 register PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200569 Py_ssize_t i, length = Py_SIZE(op);
570 size_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000571 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200572 unsigned char quote, *s, *p;
573
574 /* Compute size of output string */
575 squotes = dquotes = 0;
576 newsize = 3; /* b'' */
577 s = (unsigned char*)op->ob_sval;
578 for (i = 0; i < length; i++) {
579 switch(s[i]) {
580 case '\'': squotes++; newsize++; break;
581 case '"': dquotes++; newsize++; break;
582 case '\\': case '\t': case '\n': case '\r':
583 newsize += 2; break; /* \C */
584 default:
585 if (s[i] < ' ' || s[i] >= 0x7f)
586 newsize += 4; /* \xHH */
587 else
588 newsize++;
589 }
590 }
591 quote = '\'';
592 if (smartquotes && squotes && !dquotes)
593 quote = '"';
594 if (squotes && quote == '\'')
595 newsize += squotes;
596
597 if (newsize > (PY_SSIZE_T_MAX - sizeof(PyUnicodeObject) - 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000598 PyErr_SetString(PyExc_OverflowError,
599 "bytes object is too large to make repr");
600 return NULL;
601 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200602
603 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000604 if (v == NULL) {
605 return NULL;
606 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200607 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000608
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200609 *p++ = 'b', *p++ = quote;
610 for (i = 0; i < length; i++) {
611 unsigned char c = op->ob_sval[i];
612 if (c == quote || c == '\\')
613 *p++ = '\\', *p++ = c;
614 else if (c == '\t')
615 *p++ = '\\', *p++ = 't';
616 else if (c == '\n')
617 *p++ = '\\', *p++ = 'n';
618 else if (c == '\r')
619 *p++ = '\\', *p++ = 'r';
620 else if (c < ' ' || c >= 0x7f) {
621 *p++ = '\\';
622 *p++ = 'x';
623 *p++ = hexdigits[(c & 0xf0) >> 4];
624 *p++ = hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000625 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200626 else
627 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000628 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200629 *p++ = quote;
630 return v;
Neal Norwitz6968b052007-02-27 19:02:19 +0000631}
632
Neal Norwitz6968b052007-02-27 19:02:19 +0000633static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000634bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000635{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000636 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000637}
638
Neal Norwitz6968b052007-02-27 19:02:19 +0000639static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000640bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000641{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000642 if (Py_BytesWarningFlag) {
643 if (PyErr_WarnEx(PyExc_BytesWarning,
644 "str() on a bytes instance", 1))
645 return NULL;
646 }
647 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000648}
649
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000650static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000651bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000652{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000653 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000654}
Neal Norwitz6968b052007-02-27 19:02:19 +0000655
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000656/* This is also used by PyBytes_Concat() */
657static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000658bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000659{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000660 Py_ssize_t size;
661 Py_buffer va, vb;
662 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000663
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000664 va.len = -1;
665 vb.len = -1;
666 if (_getbuffer(a, &va) < 0 ||
667 _getbuffer(b, &vb) < 0) {
668 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
669 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
670 goto done;
671 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000672
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000673 /* Optimize end cases */
674 if (va.len == 0 && PyBytes_CheckExact(b)) {
675 result = b;
676 Py_INCREF(result);
677 goto done;
678 }
679 if (vb.len == 0 && PyBytes_CheckExact(a)) {
680 result = a;
681 Py_INCREF(result);
682 goto done;
683 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000684
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000685 size = va.len + vb.len;
686 if (size < 0) {
687 PyErr_NoMemory();
688 goto done;
689 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000690
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000691 result = PyBytes_FromStringAndSize(NULL, size);
692 if (result != NULL) {
693 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
694 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
695 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000696
697 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000698 if (va.len != -1)
699 PyBuffer_Release(&va);
700 if (vb.len != -1)
701 PyBuffer_Release(&vb);
702 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000703}
Neal Norwitz6968b052007-02-27 19:02:19 +0000704
705static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000706bytes_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000707{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000708 register Py_ssize_t i;
709 register Py_ssize_t j;
710 register Py_ssize_t size;
711 register PyBytesObject *op;
712 size_t nbytes;
713 if (n < 0)
714 n = 0;
715 /* watch out for overflows: the size can overflow int,
716 * and the # of bytes needed can overflow size_t
717 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000718 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000719 PyErr_SetString(PyExc_OverflowError,
720 "repeated bytes are too long");
721 return NULL;
722 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000723 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000724 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
725 Py_INCREF(a);
726 return (PyObject *)a;
727 }
728 nbytes = (size_t)size;
729 if (nbytes + PyBytesObject_SIZE <= nbytes) {
730 PyErr_SetString(PyExc_OverflowError,
731 "repeated bytes are too long");
732 return NULL;
733 }
734 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
735 if (op == NULL)
736 return PyErr_NoMemory();
737 PyObject_INIT_VAR(op, &PyBytes_Type, size);
738 op->ob_shash = -1;
739 op->ob_sval[size] = '\0';
740 if (Py_SIZE(a) == 1 && n > 0) {
741 memset(op->ob_sval, a->ob_sval[0] , n);
742 return (PyObject *) op;
743 }
744 i = 0;
745 if (i < size) {
746 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
747 i = Py_SIZE(a);
748 }
749 while (i < size) {
750 j = (i <= size-i) ? i : size-i;
751 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
752 i += j;
753 }
754 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000755}
756
Guido van Rossum98297ee2007-11-06 21:34:58 +0000757static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000758bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000759{
760 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
761 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000762 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +0000763 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +0000764 PyErr_Clear();
765 if (_getbuffer(arg, &varg) < 0)
766 return -1;
767 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
768 varg.buf, varg.len, 0);
769 PyBuffer_Release(&varg);
770 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000771 }
772 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000773 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
774 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000775 }
776
Antoine Pitrou0010d372010-08-15 17:12:55 +0000777 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000778}
779
Neal Norwitz6968b052007-02-27 19:02:19 +0000780static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000781bytes_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000782{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000783 if (i < 0 || i >= Py_SIZE(a)) {
784 PyErr_SetString(PyExc_IndexError, "index out of range");
785 return NULL;
786 }
787 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000788}
789
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000790static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000791bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000792{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000793 int c;
794 Py_ssize_t len_a, len_b;
795 Py_ssize_t min_len;
796 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000797
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000798 /* Make sure both arguments are strings. */
799 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
800 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
801 (PyObject_IsInstance((PyObject*)a,
802 (PyObject*)&PyUnicode_Type) ||
803 PyObject_IsInstance((PyObject*)b,
804 (PyObject*)&PyUnicode_Type))) {
805 if (PyErr_WarnEx(PyExc_BytesWarning,
806 "Comparison between bytes and string", 1))
807 return NULL;
808 }
809 result = Py_NotImplemented;
810 goto out;
811 }
812 if (a == b) {
813 switch (op) {
814 case Py_EQ:case Py_LE:case Py_GE:
815 result = Py_True;
816 goto out;
817 case Py_NE:case Py_LT:case Py_GT:
818 result = Py_False;
819 goto out;
820 }
821 }
822 if (op == Py_EQ) {
823 /* Supporting Py_NE here as well does not save
824 much time, since Py_NE is rarely used. */
825 if (Py_SIZE(a) == Py_SIZE(b)
826 && (a->ob_sval[0] == b->ob_sval[0]
827 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
828 result = Py_True;
829 } else {
830 result = Py_False;
831 }
832 goto out;
833 }
834 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
835 min_len = (len_a < len_b) ? len_a : len_b;
836 if (min_len > 0) {
837 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
838 if (c==0)
839 c = memcmp(a->ob_sval, b->ob_sval, min_len);
840 } else
841 c = 0;
842 if (c == 0)
843 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
844 switch (op) {
845 case Py_LT: c = c < 0; break;
846 case Py_LE: c = c <= 0; break;
847 case Py_EQ: assert(0); break; /* unreachable */
848 case Py_NE: c = c != 0; break;
849 case Py_GT: c = c > 0; break;
850 case Py_GE: c = c >= 0; break;
851 default:
852 result = Py_NotImplemented;
853 goto out;
854 }
855 result = c ? Py_True : Py_False;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000856 out:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000857 Py_INCREF(result);
858 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000859}
860
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000861static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000862bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000863{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000864 register Py_ssize_t len;
865 register unsigned char *p;
Mark Dickinson57e683e2011-09-24 18:18:40 +0100866 register Py_uhash_t x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000867
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000868 if (a->ob_shash != -1)
869 return a->ob_shash;
870 len = Py_SIZE(a);
871 p = (unsigned char *) a->ob_sval;
Mark Dickinson57e683e2011-09-24 18:18:40 +0100872 x = (Py_uhash_t)*p << 7;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000873 while (--len >= 0)
Mark Dickinson57e683e2011-09-24 18:18:40 +0100874 x = (1000003U*x) ^ (Py_uhash_t)*p++;
875 x ^= (Py_uhash_t)Py_SIZE(a);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000876 if (x == -1)
877 x = -2;
878 a->ob_shash = x;
879 return x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000880}
881
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000882static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000883bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000884{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000885 if (PyIndex_Check(item)) {
886 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
887 if (i == -1 && PyErr_Occurred())
888 return NULL;
889 if (i < 0)
890 i += PyBytes_GET_SIZE(self);
891 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
892 PyErr_SetString(PyExc_IndexError,
893 "index out of range");
894 return NULL;
895 }
896 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
897 }
898 else if (PySlice_Check(item)) {
899 Py_ssize_t start, stop, step, slicelength, cur, i;
900 char* source_buf;
901 char* result_buf;
902 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000903
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000904 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000905 PyBytes_GET_SIZE(self),
906 &start, &stop, &step, &slicelength) < 0) {
907 return NULL;
908 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000909
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000910 if (slicelength <= 0) {
911 return PyBytes_FromStringAndSize("", 0);
912 }
913 else if (start == 0 && step == 1 &&
914 slicelength == PyBytes_GET_SIZE(self) &&
915 PyBytes_CheckExact(self)) {
916 Py_INCREF(self);
917 return (PyObject *)self;
918 }
919 else if (step == 1) {
920 return PyBytes_FromStringAndSize(
921 PyBytes_AS_STRING(self) + start,
922 slicelength);
923 }
924 else {
925 source_buf = PyBytes_AS_STRING(self);
926 result = PyBytes_FromStringAndSize(NULL, slicelength);
927 if (result == NULL)
928 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000929
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000930 result_buf = PyBytes_AS_STRING(result);
931 for (cur = start, i = 0; i < slicelength;
932 cur += step, i++) {
933 result_buf[i] = source_buf[cur];
934 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000935
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000936 return result;
937 }
938 }
939 else {
940 PyErr_Format(PyExc_TypeError,
941 "byte indices must be integers, not %.200s",
942 Py_TYPE(item)->tp_name);
943 return NULL;
944 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000945}
946
947static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000948bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000949{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000950 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
951 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000952}
953
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000954static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000955 (lenfunc)bytes_length, /*sq_length*/
956 (binaryfunc)bytes_concat, /*sq_concat*/
957 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
958 (ssizeargfunc)bytes_item, /*sq_item*/
959 0, /*sq_slice*/
960 0, /*sq_ass_item*/
961 0, /*sq_ass_slice*/
962 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000963};
964
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000965static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000966 (lenfunc)bytes_length,
967 (binaryfunc)bytes_subscript,
968 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000969};
970
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000971static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000972 (getbufferproc)bytes_buffer_getbuffer,
973 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000974};
975
976
977#define LEFTSTRIP 0
978#define RIGHTSTRIP 1
979#define BOTHSTRIP 2
980
981/* Arrays indexed by above */
982static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
983
984#define STRIPNAME(i) (stripformat[i]+3)
985
Neal Norwitz6968b052007-02-27 19:02:19 +0000986PyDoc_STRVAR(split__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000987"B.split([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +0000988\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +0000989Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000990If sep is not specified or is None, B is split on ASCII whitespace\n\
991characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +0000992If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +0000993
994static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000995bytes_split(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +0000996{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000997 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
998 Py_ssize_t maxsplit = -1;
999 const char *s = PyBytes_AS_STRING(self), *sub;
1000 Py_buffer vsub;
1001 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001002
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001003 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1004 return NULL;
1005 if (maxsplit < 0)
1006 maxsplit = PY_SSIZE_T_MAX;
1007 if (subobj == Py_None)
1008 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1009 if (_getbuffer(subobj, &vsub) < 0)
1010 return NULL;
1011 sub = vsub.buf;
1012 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001013
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001014 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1015 PyBuffer_Release(&vsub);
1016 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001017}
1018
Neal Norwitz6968b052007-02-27 19:02:19 +00001019PyDoc_STRVAR(partition__doc__,
1020"B.partition(sep) -> (head, sep, tail)\n\
1021\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001022Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001023the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001024found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001025
1026static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001027bytes_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001028{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001029 const char *sep;
1030 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001031
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001032 if (PyBytes_Check(sep_obj)) {
1033 sep = PyBytes_AS_STRING(sep_obj);
1034 sep_len = PyBytes_GET_SIZE(sep_obj);
1035 }
1036 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1037 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001038
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001039 return stringlib_partition(
1040 (PyObject*) self,
1041 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1042 sep_obj, sep, sep_len
1043 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001044}
1045
1046PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti5b2b2422010-01-25 11:58:28 +00001047"B.rpartition(sep) -> (head, sep, tail)\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001048\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001049Search for the separator sep in B, starting at the end of B,\n\
1050and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001051part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001052bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001053
1054static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001055bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001056{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001057 const char *sep;
1058 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001059
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001060 if (PyBytes_Check(sep_obj)) {
1061 sep = PyBytes_AS_STRING(sep_obj);
1062 sep_len = PyBytes_GET_SIZE(sep_obj);
1063 }
1064 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1065 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001066
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001067 return stringlib_rpartition(
1068 (PyObject*) self,
1069 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1070 sep_obj, sep, sep_len
1071 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001072}
1073
Neal Norwitz6968b052007-02-27 19:02:19 +00001074PyDoc_STRVAR(rsplit__doc__,
Benjamin Peterson4116f362008-05-27 00:36:20 +00001075"B.rsplit([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001076\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001077Return a list of the sections in B, using sep as the delimiter,\n\
1078starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001079If sep is not given, B is split on ASCII whitespace characters\n\
1080(space, tab, return, newline, formfeed, vertical tab).\n\
1081If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001082
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001083
Neal Norwitz6968b052007-02-27 19:02:19 +00001084static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001085bytes_rsplit(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001086{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001087 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1088 Py_ssize_t maxsplit = -1;
1089 const char *s = PyBytes_AS_STRING(self), *sub;
1090 Py_buffer vsub;
1091 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001092
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001093 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1094 return NULL;
1095 if (maxsplit < 0)
1096 maxsplit = PY_SSIZE_T_MAX;
1097 if (subobj == Py_None)
1098 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1099 if (_getbuffer(subobj, &vsub) < 0)
1100 return NULL;
1101 sub = vsub.buf;
1102 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001103
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001104 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1105 PyBuffer_Release(&vsub);
1106 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001107}
1108
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001109
1110PyDoc_STRVAR(join__doc__,
1111"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001112\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001113Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001114Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1115
Neal Norwitz6968b052007-02-27 19:02:19 +00001116static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001117bytes_join(PyObject *self, PyObject *orig)
Neal Norwitz6968b052007-02-27 19:02:19 +00001118{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001119 char *sep = PyBytes_AS_STRING(self);
1120 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1121 PyObject *res = NULL;
1122 char *p;
1123 Py_ssize_t seqlen = 0;
1124 size_t sz = 0;
1125 Py_ssize_t i;
1126 PyObject *seq, *item;
Neal Norwitz6968b052007-02-27 19:02:19 +00001127
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001128 seq = PySequence_Fast(orig, "");
1129 if (seq == NULL) {
1130 return NULL;
1131 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001132
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001133 seqlen = PySequence_Size(seq);
1134 if (seqlen == 0) {
1135 Py_DECREF(seq);
1136 return PyBytes_FromString("");
1137 }
1138 if (seqlen == 1) {
1139 item = PySequence_Fast_GET_ITEM(seq, 0);
1140 if (PyBytes_CheckExact(item)) {
1141 Py_INCREF(item);
1142 Py_DECREF(seq);
1143 return item;
1144 }
1145 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001146
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001147 /* There are at least two things to join, or else we have a subclass
1148 * of the builtin types in the sequence.
1149 * Do a pre-pass to figure out the total amount of space we'll
1150 * need (sz), and see whether all argument are bytes.
1151 */
1152 /* XXX Shouldn't we use _getbuffer() on these items instead? */
1153 for (i = 0; i < seqlen; i++) {
1154 const size_t old_sz = sz;
1155 item = PySequence_Fast_GET_ITEM(seq, i);
1156 if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
1157 PyErr_Format(PyExc_TypeError,
1158 "sequence item %zd: expected bytes,"
1159 " %.80s found",
1160 i, Py_TYPE(item)->tp_name);
1161 Py_DECREF(seq);
1162 return NULL;
1163 }
1164 sz += Py_SIZE(item);
1165 if (i != 0)
1166 sz += seplen;
1167 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1168 PyErr_SetString(PyExc_OverflowError,
1169 "join() result is too long for bytes");
1170 Py_DECREF(seq);
1171 return NULL;
1172 }
1173 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001174
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001175 /* Allocate result space. */
1176 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1177 if (res == NULL) {
1178 Py_DECREF(seq);
1179 return NULL;
1180 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001181
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001182 /* Catenate everything. */
1183 /* I'm not worried about a PyByteArray item growing because there's
1184 nowhere in this function where we release the GIL. */
1185 p = PyBytes_AS_STRING(res);
1186 for (i = 0; i < seqlen; ++i) {
1187 size_t n;
1188 char *q;
1189 if (i) {
1190 Py_MEMCPY(p, sep, seplen);
1191 p += seplen;
1192 }
1193 item = PySequence_Fast_GET_ITEM(seq, i);
1194 n = Py_SIZE(item);
1195 if (PyBytes_Check(item))
1196 q = PyBytes_AS_STRING(item);
1197 else
1198 q = PyByteArray_AS_STRING(item);
1199 Py_MEMCPY(p, q, n);
1200 p += n;
1201 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001202
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001203 Py_DECREF(seq);
1204 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001205}
1206
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001207PyObject *
1208_PyBytes_Join(PyObject *sep, PyObject *x)
1209{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001210 assert(sep != NULL && PyBytes_Check(sep));
1211 assert(x != NULL);
1212 return bytes_join(sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001213}
1214
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001215/* helper macro to fixup start/end slice values */
1216#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001217 if (end > len) \
1218 end = len; \
1219 else if (end < 0) { \
1220 end += len; \
1221 if (end < 0) \
1222 end = 0; \
1223 } \
1224 if (start < 0) { \
1225 start += len; \
1226 if (start < 0) \
1227 start = 0; \
1228 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001229
1230Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001231bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001232{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001233 PyObject *subobj;
1234 const char *sub;
1235 Py_ssize_t sub_len;
1236 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001237
Jesus Ceaac451502011-04-20 17:09:23 +02001238 if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1239 args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001240 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001241
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001242 if (PyBytes_Check(subobj)) {
1243 sub = PyBytes_AS_STRING(subobj);
1244 sub_len = PyBytes_GET_SIZE(subobj);
1245 }
1246 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1247 /* XXX - the "expected a character buffer object" is pretty
1248 confusing for a non-expert. remap to something else ? */
1249 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001250
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001251 if (dir > 0)
1252 return stringlib_find_slice(
1253 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1254 sub, sub_len, start, end);
1255 else
1256 return stringlib_rfind_slice(
1257 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1258 sub, sub_len, start, end);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001259}
1260
1261
1262PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001263"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001264\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001265Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001266such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001267arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001268\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001269Return -1 on failure.");
1270
Neal Norwitz6968b052007-02-27 19:02:19 +00001271static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001272bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001273{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001274 Py_ssize_t result = bytes_find_internal(self, args, +1);
1275 if (result == -2)
1276 return NULL;
1277 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001278}
1279
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001280
1281PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001282"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001283\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001284Like B.find() but raise ValueError when the substring is not found.");
1285
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001286static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001287bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001288{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001289 Py_ssize_t result = bytes_find_internal(self, args, +1);
1290 if (result == -2)
1291 return NULL;
1292 if (result == -1) {
1293 PyErr_SetString(PyExc_ValueError,
1294 "substring not found");
1295 return NULL;
1296 }
1297 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001298}
1299
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001300
1301PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001302"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001303\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001304Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001305such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001306arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001307\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001308Return -1 on failure.");
1309
Neal Norwitz6968b052007-02-27 19:02:19 +00001310static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001311bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001312{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001313 Py_ssize_t result = bytes_find_internal(self, args, -1);
1314 if (result == -2)
1315 return NULL;
1316 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001317}
1318
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001319
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001320PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001321"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001322\n\
1323Like B.rfind() but raise ValueError when the substring is not found.");
1324
1325static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001326bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001327{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001328 Py_ssize_t result = bytes_find_internal(self, args, -1);
1329 if (result == -2)
1330 return NULL;
1331 if (result == -1) {
1332 PyErr_SetString(PyExc_ValueError,
1333 "substring not found");
1334 return NULL;
1335 }
1336 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001337}
1338
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001339
1340Py_LOCAL_INLINE(PyObject *)
1341do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001342{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001343 Py_buffer vsep;
1344 char *s = PyBytes_AS_STRING(self);
1345 Py_ssize_t len = PyBytes_GET_SIZE(self);
1346 char *sep;
1347 Py_ssize_t seplen;
1348 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001349
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001350 if (_getbuffer(sepobj, &vsep) < 0)
1351 return NULL;
1352 sep = vsep.buf;
1353 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001354
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001355 i = 0;
1356 if (striptype != RIGHTSTRIP) {
1357 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1358 i++;
1359 }
1360 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001361
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001362 j = len;
1363 if (striptype != LEFTSTRIP) {
1364 do {
1365 j--;
1366 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1367 j++;
1368 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001369
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001370 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001371
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001372 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1373 Py_INCREF(self);
1374 return (PyObject*)self;
1375 }
1376 else
1377 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001378}
1379
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001380
1381Py_LOCAL_INLINE(PyObject *)
1382do_strip(PyBytesObject *self, int striptype)
1383{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001384 char *s = PyBytes_AS_STRING(self);
1385 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001386
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001387 i = 0;
1388 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001389 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001390 i++;
1391 }
1392 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001393
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001394 j = len;
1395 if (striptype != LEFTSTRIP) {
1396 do {
1397 j--;
David Malcolm96960882010-11-05 17:23:41 +00001398 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001399 j++;
1400 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001401
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001402 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1403 Py_INCREF(self);
1404 return (PyObject*)self;
1405 }
1406 else
1407 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001408}
1409
1410
1411Py_LOCAL_INLINE(PyObject *)
1412do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1413{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001414 PyObject *sep = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001415
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001416 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1417 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001418
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001419 if (sep != NULL && sep != Py_None) {
1420 return do_xstrip(self, striptype, sep);
1421 }
1422 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001423}
1424
1425
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001426PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001427"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001428\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001429Strip leading and trailing bytes contained in the argument.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001430If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001431static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001432bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001433{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001434 if (PyTuple_GET_SIZE(args) == 0)
1435 return do_strip(self, BOTHSTRIP); /* Common case */
1436 else
1437 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001438}
1439
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001440
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001441PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001442"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001443\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001444Strip leading bytes contained in the argument.\n\
1445If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001446static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001447bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001448{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001449 if (PyTuple_GET_SIZE(args) == 0)
1450 return do_strip(self, LEFTSTRIP); /* Common case */
1451 else
1452 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001453}
1454
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001455
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001456PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001457"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001458\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001459Strip trailing bytes contained in the argument.\n\
1460If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001461static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001462bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001463{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001464 if (PyTuple_GET_SIZE(args) == 0)
1465 return do_strip(self, RIGHTSTRIP); /* Common case */
1466 else
1467 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001468}
Neal Norwitz6968b052007-02-27 19:02:19 +00001469
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001470
1471PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001472"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001473\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001474Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001475string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001476as in slice notation.");
1477
1478static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001479bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001480{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001481 PyObject *sub_obj;
1482 const char *str = PyBytes_AS_STRING(self), *sub;
1483 Py_ssize_t sub_len;
1484 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001485
Jesus Ceaac451502011-04-20 17:09:23 +02001486 if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001487 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001488
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001489 if (PyBytes_Check(sub_obj)) {
1490 sub = PyBytes_AS_STRING(sub_obj);
1491 sub_len = PyBytes_GET_SIZE(sub_obj);
1492 }
1493 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1494 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001495
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001496 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001497
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001498 return PyLong_FromSsize_t(
1499 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
1500 );
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001501}
1502
1503
1504PyDoc_STRVAR(translate__doc__,
1505"B.translate(table[, deletechars]) -> bytes\n\
1506\n\
1507Return a copy of B, where all characters occurring in the\n\
1508optional argument deletechars are removed, and the remaining\n\
1509characters have been mapped through the given translation\n\
1510table, which must be a bytes object of length 256.");
1511
1512static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001513bytes_translate(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001514{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001515 register char *input, *output;
1516 const char *table;
1517 register Py_ssize_t i, c, changed = 0;
1518 PyObject *input_obj = (PyObject*)self;
1519 const char *output_start, *del_table=NULL;
1520 Py_ssize_t inlen, tablen, dellen = 0;
1521 PyObject *result;
1522 int trans_table[256];
1523 PyObject *tableobj, *delobj = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001524
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001525 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1526 &tableobj, &delobj))
1527 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001528
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001529 if (PyBytes_Check(tableobj)) {
1530 table = PyBytes_AS_STRING(tableobj);
1531 tablen = PyBytes_GET_SIZE(tableobj);
1532 }
1533 else if (tableobj == Py_None) {
1534 table = NULL;
1535 tablen = 256;
1536 }
1537 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1538 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001539
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001540 if (tablen != 256) {
1541 PyErr_SetString(PyExc_ValueError,
1542 "translation table must be 256 characters long");
1543 return NULL;
1544 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001545
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001546 if (delobj != NULL) {
1547 if (PyBytes_Check(delobj)) {
1548 del_table = PyBytes_AS_STRING(delobj);
1549 dellen = PyBytes_GET_SIZE(delobj);
1550 }
1551 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1552 return NULL;
1553 }
1554 else {
1555 del_table = NULL;
1556 dellen = 0;
1557 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001558
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001559 inlen = PyBytes_GET_SIZE(input_obj);
1560 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1561 if (result == NULL)
1562 return NULL;
1563 output_start = output = PyBytes_AsString(result);
1564 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001565
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001566 if (dellen == 0 && table != NULL) {
1567 /* If no deletions are required, use faster code */
1568 for (i = inlen; --i >= 0; ) {
1569 c = Py_CHARMASK(*input++);
1570 if (Py_CHARMASK((*output++ = table[c])) != c)
1571 changed = 1;
1572 }
1573 if (changed || !PyBytes_CheckExact(input_obj))
1574 return result;
1575 Py_DECREF(result);
1576 Py_INCREF(input_obj);
1577 return input_obj;
1578 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001579
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001580 if (table == NULL) {
1581 for (i = 0; i < 256; i++)
1582 trans_table[i] = Py_CHARMASK(i);
1583 } else {
1584 for (i = 0; i < 256; i++)
1585 trans_table[i] = Py_CHARMASK(table[i]);
1586 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001587
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001588 for (i = 0; i < dellen; i++)
1589 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001590
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001591 for (i = inlen; --i >= 0; ) {
1592 c = Py_CHARMASK(*input++);
1593 if (trans_table[c] != -1)
1594 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1595 continue;
1596 changed = 1;
1597 }
1598 if (!changed && PyBytes_CheckExact(input_obj)) {
1599 Py_DECREF(result);
1600 Py_INCREF(input_obj);
1601 return input_obj;
1602 }
1603 /* Fix the size of the resulting string */
1604 if (inlen > 0)
1605 _PyBytes_Resize(&result, output - output_start);
1606 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001607}
1608
1609
Georg Brandlabc38772009-04-12 15:51:51 +00001610static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001611bytes_maketrans(PyObject *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001612{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001613 return _Py_bytes_maketrans(args);
Georg Brandlabc38772009-04-12 15:51:51 +00001614}
1615
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001616/* find and count characters and substrings */
1617
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001618#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001619 ((char *)memchr((const void *)(target), c, target_len))
1620
1621/* String ops must return a string. */
1622/* If the object is subclass of string, create a copy */
1623Py_LOCAL(PyBytesObject *)
1624return_self(PyBytesObject *self)
1625{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001626 if (PyBytes_CheckExact(self)) {
1627 Py_INCREF(self);
1628 return self;
1629 }
1630 return (PyBytesObject *)PyBytes_FromStringAndSize(
1631 PyBytes_AS_STRING(self),
1632 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001633}
1634
1635Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00001636countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001637{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001638 Py_ssize_t count=0;
1639 const char *start=target;
1640 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001641
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001642 while ( (start=findchar(start, end-start, c)) != NULL ) {
1643 count++;
1644 if (count >= maxcount)
1645 break;
1646 start += 1;
1647 }
1648 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001649}
1650
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001651
1652/* Algorithms for different cases of string replacement */
1653
1654/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1655Py_LOCAL(PyBytesObject *)
1656replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001657 const char *to_s, Py_ssize_t to_len,
1658 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001659{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001660 char *self_s, *result_s;
1661 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001662 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001663 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001664
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001665 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001666
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001667 /* 1 at the end plus 1 after every character;
1668 count = min(maxcount, self_len + 1) */
1669 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001670 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001671 else
1672 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
1673 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001674
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001675 /* Check for overflow */
1676 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001677 assert(count > 0);
1678 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001679 PyErr_SetString(PyExc_OverflowError,
1680 "replacement bytes are too long");
1681 return NULL;
1682 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001683 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001684
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001685 if (! (result = (PyBytesObject *)
1686 PyBytes_FromStringAndSize(NULL, result_len)) )
1687 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001688
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001689 self_s = PyBytes_AS_STRING(self);
1690 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001691
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001692 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001693
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001694 /* Lay the first one down (guaranteed this will occur) */
1695 Py_MEMCPY(result_s, to_s, to_len);
1696 result_s += to_len;
1697 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001698
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001699 for (i=0; i<count; i++) {
1700 *result_s++ = *self_s++;
1701 Py_MEMCPY(result_s, to_s, to_len);
1702 result_s += to_len;
1703 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001704
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001705 /* Copy the rest of the original string */
1706 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001707
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001708 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001709}
1710
1711/* Special case for deleting a single character */
1712/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1713Py_LOCAL(PyBytesObject *)
1714replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001715 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001716{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001717 char *self_s, *result_s;
1718 char *start, *next, *end;
1719 Py_ssize_t self_len, result_len;
1720 Py_ssize_t count;
1721 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001722
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001723 self_len = PyBytes_GET_SIZE(self);
1724 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001725
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001726 count = countchar(self_s, self_len, from_c, maxcount);
1727 if (count == 0) {
1728 return return_self(self);
1729 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001730
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001731 result_len = self_len - count; /* from_len == 1 */
1732 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001733
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001734 if ( (result = (PyBytesObject *)
1735 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1736 return NULL;
1737 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001738
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001739 start = self_s;
1740 end = self_s + self_len;
1741 while (count-- > 0) {
1742 next = findchar(start, end-start, from_c);
1743 if (next == NULL)
1744 break;
1745 Py_MEMCPY(result_s, start, next-start);
1746 result_s += (next-start);
1747 start = next+1;
1748 }
1749 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001750
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001751 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001752}
1753
1754/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1755
1756Py_LOCAL(PyBytesObject *)
1757replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001758 const char *from_s, Py_ssize_t from_len,
1759 Py_ssize_t maxcount) {
1760 char *self_s, *result_s;
1761 char *start, *next, *end;
1762 Py_ssize_t self_len, result_len;
1763 Py_ssize_t count, offset;
1764 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001765
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001766 self_len = PyBytes_GET_SIZE(self);
1767 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001768
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001769 count = stringlib_count(self_s, self_len,
1770 from_s, from_len,
1771 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001772
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001773 if (count == 0) {
1774 /* no matches */
1775 return return_self(self);
1776 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001777
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001778 result_len = self_len - (count * from_len);
1779 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001780
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001781 if ( (result = (PyBytesObject *)
1782 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1783 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001784
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001785 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001786
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001787 start = self_s;
1788 end = self_s + self_len;
1789 while (count-- > 0) {
1790 offset = stringlib_find(start, end-start,
1791 from_s, from_len,
1792 0);
1793 if (offset == -1)
1794 break;
1795 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001796
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001797 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001798
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001799 result_s += (next-start);
1800 start = next+from_len;
1801 }
1802 Py_MEMCPY(result_s, start, end-start);
1803 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001804}
1805
1806/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1807Py_LOCAL(PyBytesObject *)
1808replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001809 char from_c, char to_c,
1810 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001811{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001812 char *self_s, *result_s, *start, *end, *next;
1813 Py_ssize_t self_len;
1814 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001815
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001816 /* The result string will be the same size */
1817 self_s = PyBytes_AS_STRING(self);
1818 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001819
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001820 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001821
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001822 if (next == NULL) {
1823 /* No matches; return the original string */
1824 return return_self(self);
1825 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001826
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001827 /* Need to make a new string */
1828 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1829 if (result == NULL)
1830 return NULL;
1831 result_s = PyBytes_AS_STRING(result);
1832 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001833
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001834 /* change everything in-place, starting with this one */
1835 start = result_s + (next-self_s);
1836 *start = to_c;
1837 start++;
1838 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001839
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001840 while (--maxcount > 0) {
1841 next = findchar(start, end-start, from_c);
1842 if (next == NULL)
1843 break;
1844 *next = to_c;
1845 start = next+1;
1846 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001847
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001848 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001849}
1850
1851/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1852Py_LOCAL(PyBytesObject *)
1853replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001854 const char *from_s, Py_ssize_t from_len,
1855 const char *to_s, Py_ssize_t to_len,
1856 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001857{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001858 char *result_s, *start, *end;
1859 char *self_s;
1860 Py_ssize_t self_len, offset;
1861 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001862
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001863 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001864
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001865 self_s = PyBytes_AS_STRING(self);
1866 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001867
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001868 offset = stringlib_find(self_s, self_len,
1869 from_s, from_len,
1870 0);
1871 if (offset == -1) {
1872 /* No matches; return the original string */
1873 return return_self(self);
1874 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001875
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001876 /* Need to make a new string */
1877 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1878 if (result == NULL)
1879 return NULL;
1880 result_s = PyBytes_AS_STRING(result);
1881 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001882
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001883 /* change everything in-place, starting with this one */
1884 start = result_s + offset;
1885 Py_MEMCPY(start, to_s, from_len);
1886 start += from_len;
1887 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001888
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001889 while ( --maxcount > 0) {
1890 offset = stringlib_find(start, end-start,
1891 from_s, from_len,
1892 0);
1893 if (offset==-1)
1894 break;
1895 Py_MEMCPY(start+offset, to_s, from_len);
1896 start += offset+from_len;
1897 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001898
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001899 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001900}
1901
1902/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1903Py_LOCAL(PyBytesObject *)
1904replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001905 char from_c,
1906 const char *to_s, Py_ssize_t to_len,
1907 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001908{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001909 char *self_s, *result_s;
1910 char *start, *next, *end;
1911 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001912 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001913 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001914
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001915 self_s = PyBytes_AS_STRING(self);
1916 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001917
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001918 count = countchar(self_s, self_len, from_c, maxcount);
1919 if (count == 0) {
1920 /* no matches, return unchanged */
1921 return return_self(self);
1922 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001923
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001924 /* use the difference between current and new, hence the "-1" */
1925 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001926 assert(count > 0);
1927 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001928 PyErr_SetString(PyExc_OverflowError,
1929 "replacement bytes are too long");
1930 return NULL;
1931 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001932 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001933
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001934 if ( (result = (PyBytesObject *)
1935 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1936 return NULL;
1937 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001938
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001939 start = self_s;
1940 end = self_s + self_len;
1941 while (count-- > 0) {
1942 next = findchar(start, end-start, from_c);
1943 if (next == NULL)
1944 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001945
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001946 if (next == start) {
1947 /* replace with the 'to' */
1948 Py_MEMCPY(result_s, to_s, to_len);
1949 result_s += to_len;
1950 start += 1;
1951 } else {
1952 /* copy the unchanged old then the 'to' */
1953 Py_MEMCPY(result_s, start, next-start);
1954 result_s += (next-start);
1955 Py_MEMCPY(result_s, to_s, to_len);
1956 result_s += to_len;
1957 start = next+1;
1958 }
1959 }
1960 /* Copy the remainder of the remaining string */
1961 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001962
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001963 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001964}
1965
1966/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1967Py_LOCAL(PyBytesObject *)
1968replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001969 const char *from_s, Py_ssize_t from_len,
1970 const char *to_s, Py_ssize_t to_len,
1971 Py_ssize_t maxcount) {
1972 char *self_s, *result_s;
1973 char *start, *next, *end;
1974 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001975 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001976 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001977
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001978 self_s = PyBytes_AS_STRING(self);
1979 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001980
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001981 count = stringlib_count(self_s, self_len,
1982 from_s, from_len,
1983 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001984
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001985 if (count == 0) {
1986 /* no matches, return unchanged */
1987 return return_self(self);
1988 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001989
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001990 /* Check for overflow */
1991 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001992 assert(count > 0);
1993 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001994 PyErr_SetString(PyExc_OverflowError,
1995 "replacement bytes are too long");
1996 return NULL;
1997 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001998 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001999
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002000 if ( (result = (PyBytesObject *)
2001 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2002 return NULL;
2003 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002004
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002005 start = self_s;
2006 end = self_s + self_len;
2007 while (count-- > 0) {
2008 offset = stringlib_find(start, end-start,
2009 from_s, from_len,
2010 0);
2011 if (offset == -1)
2012 break;
2013 next = start+offset;
2014 if (next == start) {
2015 /* replace with the 'to' */
2016 Py_MEMCPY(result_s, to_s, to_len);
2017 result_s += to_len;
2018 start += from_len;
2019 } else {
2020 /* copy the unchanged old then the 'to' */
2021 Py_MEMCPY(result_s, start, next-start);
2022 result_s += (next-start);
2023 Py_MEMCPY(result_s, to_s, to_len);
2024 result_s += to_len;
2025 start = next+from_len;
2026 }
2027 }
2028 /* Copy the remainder of the remaining string */
2029 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002030
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002031 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002032}
2033
2034
2035Py_LOCAL(PyBytesObject *)
2036replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002037 const char *from_s, Py_ssize_t from_len,
2038 const char *to_s, Py_ssize_t to_len,
2039 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002040{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002041 if (maxcount < 0) {
2042 maxcount = PY_SSIZE_T_MAX;
2043 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2044 /* nothing to do; return the original string */
2045 return return_self(self);
2046 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002047
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002048 if (maxcount == 0 ||
2049 (from_len == 0 && to_len == 0)) {
2050 /* nothing to do; return the original string */
2051 return return_self(self);
2052 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002053
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002054 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002055
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002056 if (from_len == 0) {
2057 /* insert the 'to' string everywhere. */
2058 /* >>> "Python".replace("", ".") */
2059 /* '.P.y.t.h.o.n.' */
2060 return replace_interleave(self, to_s, to_len, maxcount);
2061 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002062
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002063 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2064 /* point for an empty self string to generate a non-empty string */
2065 /* Special case so the remaining code always gets a non-empty string */
2066 if (PyBytes_GET_SIZE(self) == 0) {
2067 return return_self(self);
2068 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002069
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002070 if (to_len == 0) {
2071 /* delete all occurrences of 'from' string */
2072 if (from_len == 1) {
2073 return replace_delete_single_character(
2074 self, from_s[0], maxcount);
2075 } else {
2076 return replace_delete_substring(self, from_s,
2077 from_len, maxcount);
2078 }
2079 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002080
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002081 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002082
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002083 if (from_len == to_len) {
2084 if (from_len == 1) {
2085 return replace_single_character_in_place(
2086 self,
2087 from_s[0],
2088 to_s[0],
2089 maxcount);
2090 } else {
2091 return replace_substring_in_place(
2092 self, from_s, from_len, to_s, to_len,
2093 maxcount);
2094 }
2095 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002097 /* Otherwise use the more generic algorithms */
2098 if (from_len == 1) {
2099 return replace_single_character(self, from_s[0],
2100 to_s, to_len, maxcount);
2101 } else {
2102 /* len('from')>=2, len('to')>=1 */
2103 return replace_substring(self, from_s, from_len, to_s, to_len,
2104 maxcount);
2105 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002106}
2107
2108PyDoc_STRVAR(replace__doc__,
2109"B.replace(old, new[, count]) -> bytes\n\
2110\n\
2111Return a copy of B with all occurrences of subsection\n\
2112old replaced by new. If the optional argument count is\n\
Senthil Kumaran9a9dd1c2010-09-08 12:50:29 +00002113given, only first count occurances are replaced.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002114
2115static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002116bytes_replace(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002117{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002118 Py_ssize_t count = -1;
2119 PyObject *from, *to;
2120 const char *from_s, *to_s;
2121 Py_ssize_t from_len, to_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002122
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002123 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2124 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002125
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002126 if (PyBytes_Check(from)) {
2127 from_s = PyBytes_AS_STRING(from);
2128 from_len = PyBytes_GET_SIZE(from);
2129 }
2130 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2131 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002132
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002133 if (PyBytes_Check(to)) {
2134 to_s = PyBytes_AS_STRING(to);
2135 to_len = PyBytes_GET_SIZE(to);
2136 }
2137 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2138 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002139
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002140 return (PyObject *)replace((PyBytesObject *) self,
2141 from_s, from_len,
2142 to_s, to_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002143}
2144
2145/** End DALKE **/
2146
2147/* Matches the end (direction >= 0) or start (direction < 0) of self
2148 * against substr, using the start and end arguments. Returns
2149 * -1 on error, 0 if not found and 1 if found.
2150 */
2151Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002152_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002153 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002154{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002155 Py_ssize_t len = PyBytes_GET_SIZE(self);
2156 Py_ssize_t slen;
2157 const char* sub;
2158 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002159
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002160 if (PyBytes_Check(substr)) {
2161 sub = PyBytes_AS_STRING(substr);
2162 slen = PyBytes_GET_SIZE(substr);
2163 }
2164 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2165 return -1;
2166 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002167
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002168 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002169
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002170 if (direction < 0) {
2171 /* startswith */
2172 if (start+slen > len)
2173 return 0;
2174 } else {
2175 /* endswith */
2176 if (end-start < slen || start > len)
2177 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002178
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002179 if (end-slen > start)
2180 start = end - slen;
2181 }
2182 if (end-start >= slen)
2183 return ! memcmp(str+start, sub, slen);
2184 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002185}
2186
2187
2188PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002189"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002190\n\
2191Return True if B starts with the specified prefix, False otherwise.\n\
2192With optional start, test B beginning at that position.\n\
2193With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002194prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002195
2196static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002197bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002198{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002199 Py_ssize_t start = 0;
2200 Py_ssize_t end = PY_SSIZE_T_MAX;
2201 PyObject *subobj;
2202 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002203
Jesus Ceaac451502011-04-20 17:09:23 +02002204 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002205 return NULL;
2206 if (PyTuple_Check(subobj)) {
2207 Py_ssize_t i;
2208 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2209 result = _bytes_tailmatch(self,
2210 PyTuple_GET_ITEM(subobj, i),
2211 start, end, -1);
2212 if (result == -1)
2213 return NULL;
2214 else if (result) {
2215 Py_RETURN_TRUE;
2216 }
2217 }
2218 Py_RETURN_FALSE;
2219 }
2220 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002221 if (result == -1) {
2222 if (PyErr_ExceptionMatches(PyExc_TypeError))
2223 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2224 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002225 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002226 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002227 else
2228 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002229}
2230
2231
2232PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002233"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002234\n\
2235Return True if B ends with the specified suffix, False otherwise.\n\
2236With optional start, test B beginning at that position.\n\
2237With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002238suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002239
2240static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002241bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002242{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002243 Py_ssize_t start = 0;
2244 Py_ssize_t end = PY_SSIZE_T_MAX;
2245 PyObject *subobj;
2246 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002247
Jesus Ceaac451502011-04-20 17:09:23 +02002248 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002249 return NULL;
2250 if (PyTuple_Check(subobj)) {
2251 Py_ssize_t i;
2252 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2253 result = _bytes_tailmatch(self,
2254 PyTuple_GET_ITEM(subobj, i),
2255 start, end, +1);
2256 if (result == -1)
2257 return NULL;
2258 else if (result) {
2259 Py_RETURN_TRUE;
2260 }
2261 }
2262 Py_RETURN_FALSE;
2263 }
2264 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002265 if (result == -1) {
2266 if (PyErr_ExceptionMatches(PyExc_TypeError))
2267 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2268 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002269 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002270 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002271 else
2272 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002273}
2274
2275
2276PyDoc_STRVAR(decode__doc__,
Victor Stinnerc911bbf2010-11-07 19:04:46 +00002277"B.decode(encoding='utf-8', errors='strict') -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002278\n\
Victor Stinnere14e2122010-11-07 18:41:46 +00002279Decode B using the codec registered for encoding. Default encoding\n\
2280is 'utf-8'. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002281handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2282a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002283as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002284able to handle UnicodeDecodeErrors.");
2285
2286static PyObject *
Benjamin Peterson308d6372009-09-18 21:42:35 +00002287bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002288{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002289 const char *encoding = NULL;
2290 const char *errors = NULL;
2291 static char *kwlist[] = {"encoding", "errors", 0};
Guido van Rossumd624f182006-04-24 13:47:05 +00002292
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002293 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2294 return NULL;
2295 if (encoding == NULL)
2296 encoding = PyUnicode_GetDefaultEncoding();
2297 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002298}
2299
Guido van Rossum20188312006-05-05 15:15:40 +00002300
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002301PyDoc_STRVAR(splitlines__doc__,
2302"B.splitlines([keepends]) -> list of lines\n\
2303\n\
2304Return a list of the lines in B, breaking at line boundaries.\n\
2305Line breaks are not included in the resulting list unless keepends\n\
2306is given and true.");
2307
2308static PyObject*
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002309bytes_splitlines(PyObject *self, PyObject *args, PyObject *kwds)
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002310{
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002311 static char *kwlist[] = {"keepends", 0};
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002312 int keepends = 0;
2313
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002314 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:splitlines",
2315 kwlist, &keepends))
Antoine Pitroud1188562010-06-09 16:38:55 +00002316 return NULL;
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002317
2318 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002319 (PyObject*) self, PyBytes_AS_STRING(self),
2320 PyBytes_GET_SIZE(self), keepends
2321 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002322}
2323
2324
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002325PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002326"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002327\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002328Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002329Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002330Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002331
2332static int
Guido van Rossumae404e22007-10-26 21:46:44 +00002333hex_digit_to_int(Py_UNICODE c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002334{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002335 if (c >= 128)
2336 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002337 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002338 return c - '0';
2339 else {
David Malcolm96960882010-11-05 17:23:41 +00002340 if (Py_ISUPPER(c))
2341 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002342 if (c >= 'a' && c <= 'f')
2343 return c - 'a' + 10;
2344 }
2345 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002346}
2347
2348static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002349bytes_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002350{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002351 PyObject *newstring, *hexobj;
2352 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002353 Py_ssize_t hexlen, byteslen, i, j;
2354 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002355 void *data;
2356 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002357
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002358 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2359 return NULL;
2360 assert(PyUnicode_Check(hexobj));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002361 if (PyUnicode_READY(hexobj))
2362 return NULL;
2363 kind = PyUnicode_KIND(hexobj);
2364 data = PyUnicode_DATA(hexobj);
2365 hexlen = PyUnicode_GET_LENGTH(hexobj);
2366
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002367 byteslen = hexlen/2; /* This overestimates if there are spaces */
2368 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2369 if (!newstring)
2370 return NULL;
2371 buf = PyBytes_AS_STRING(newstring);
2372 for (i = j = 0; i < hexlen; i += 2) {
2373 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002374 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002375 i++;
2376 if (i >= hexlen)
2377 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002378 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
2379 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002380 if (top == -1 || bot == -1) {
2381 PyErr_Format(PyExc_ValueError,
2382 "non-hexadecimal number found in "
2383 "fromhex() arg at position %zd", i);
2384 goto error;
2385 }
2386 buf[j++] = (top << 4) + bot;
2387 }
2388 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2389 goto error;
2390 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002391
2392 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002393 Py_XDECREF(newstring);
2394 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002395}
2396
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002397PyDoc_STRVAR(sizeof__doc__,
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002398"B.__sizeof__() -> size of B in memory, in bytes");
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002399
2400static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002401bytes_sizeof(PyBytesObject *v)
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002402{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002403 Py_ssize_t res;
2404 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2405 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002406}
2407
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002408
2409static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002410bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002411{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002412 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002413}
2414
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002415
2416static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002417bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002418 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2419 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2420 _Py_capitalize__doc__},
2421 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2422 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2423 {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
2424 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2425 endswith__doc__},
2426 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2427 expandtabs__doc__},
2428 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2429 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2430 fromhex_doc},
2431 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2432 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2433 _Py_isalnum__doc__},
2434 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2435 _Py_isalpha__doc__},
2436 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2437 _Py_isdigit__doc__},
2438 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2439 _Py_islower__doc__},
2440 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2441 _Py_isspace__doc__},
2442 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2443 _Py_istitle__doc__},
2444 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2445 _Py_isupper__doc__},
2446 {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
2447 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2448 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2449 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2450 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
2451 _Py_maketrans__doc__},
2452 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2453 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2454 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2455 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2456 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2457 {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
2458 rpartition__doc__},
2459 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2460 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
2461 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002462 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002463 splitlines__doc__},
2464 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2465 startswith__doc__},
2466 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
2467 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2468 _Py_swapcase__doc__},
2469 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2470 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
2471 translate__doc__},
2472 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2473 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2474 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
2475 sizeof__doc__},
2476 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002477};
2478
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002479static PyObject *
2480str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2481
2482static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002483bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002484{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002485 PyObject *x = NULL;
2486 const char *encoding = NULL;
2487 const char *errors = NULL;
2488 PyObject *new = NULL;
2489 Py_ssize_t size;
2490 static char *kwlist[] = {"source", "encoding", "errors", 0};
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002491
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002492 if (type != &PyBytes_Type)
2493 return str_subtype_new(type, args, kwds);
2494 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2495 &encoding, &errors))
2496 return NULL;
2497 if (x == NULL) {
2498 if (encoding != NULL || errors != NULL) {
2499 PyErr_SetString(PyExc_TypeError,
2500 "encoding or errors without sequence "
2501 "argument");
2502 return NULL;
2503 }
2504 return PyBytes_FromString("");
2505 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002506
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002507 if (PyUnicode_Check(x)) {
2508 /* Encode via the codec registry */
2509 if (encoding == NULL) {
2510 PyErr_SetString(PyExc_TypeError,
2511 "string argument without an encoding");
2512 return NULL;
2513 }
2514 new = PyUnicode_AsEncodedString(x, encoding, errors);
2515 if (new == NULL)
2516 return NULL;
2517 assert(PyBytes_Check(new));
2518 return new;
2519 }
2520 /* Is it an integer? */
2521 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2522 if (size == -1 && PyErr_Occurred()) {
2523 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2524 return NULL;
2525 PyErr_Clear();
2526 }
2527 else if (size < 0) {
2528 PyErr_SetString(PyExc_ValueError, "negative count");
2529 return NULL;
2530 }
2531 else {
2532 new = PyBytes_FromStringAndSize(NULL, size);
2533 if (new == NULL) {
2534 return NULL;
2535 }
2536 if (size > 0) {
2537 memset(((PyBytesObject*)new)->ob_sval, 0, size);
2538 }
2539 return new;
2540 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002541
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002542 /* If it's not unicode, there can't be encoding or errors */
2543 if (encoding != NULL || errors != NULL) {
2544 PyErr_SetString(PyExc_TypeError,
2545 "encoding or errors without a string argument");
2546 return NULL;
2547 }
2548 return PyObject_Bytes(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002549}
2550
2551PyObject *
2552PyBytes_FromObject(PyObject *x)
2553{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002554 PyObject *new, *it;
2555 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002556
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002557 if (x == NULL) {
2558 PyErr_BadInternalCall();
2559 return NULL;
2560 }
2561 /* Use the modern buffer interface */
2562 if (PyObject_CheckBuffer(x)) {
2563 Py_buffer view;
2564 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2565 return NULL;
2566 new = PyBytes_FromStringAndSize(NULL, view.len);
2567 if (!new)
2568 goto fail;
2569 /* XXX(brett.cannon): Better way to get to internal buffer? */
2570 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2571 &view, view.len, 'C') < 0)
2572 goto fail;
2573 PyBuffer_Release(&view);
2574 return new;
2575 fail:
2576 Py_XDECREF(new);
2577 PyBuffer_Release(&view);
2578 return NULL;
2579 }
2580 if (PyUnicode_Check(x)) {
2581 PyErr_SetString(PyExc_TypeError,
2582 "cannot convert unicode object to bytes");
2583 return NULL;
2584 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002585
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002586 if (PyList_CheckExact(x)) {
2587 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2588 if (new == NULL)
2589 return NULL;
2590 for (i = 0; i < Py_SIZE(x); i++) {
2591 Py_ssize_t value = PyNumber_AsSsize_t(
2592 PyList_GET_ITEM(x, i), PyExc_ValueError);
2593 if (value == -1 && PyErr_Occurred()) {
2594 Py_DECREF(new);
2595 return NULL;
2596 }
2597 if (value < 0 || value >= 256) {
2598 PyErr_SetString(PyExc_ValueError,
2599 "bytes must be in range(0, 256)");
2600 Py_DECREF(new);
2601 return NULL;
2602 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002603 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002604 }
2605 return new;
2606 }
2607 if (PyTuple_CheckExact(x)) {
2608 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2609 if (new == NULL)
2610 return NULL;
2611 for (i = 0; i < Py_SIZE(x); i++) {
2612 Py_ssize_t value = PyNumber_AsSsize_t(
2613 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
2614 if (value == -1 && PyErr_Occurred()) {
2615 Py_DECREF(new);
2616 return NULL;
2617 }
2618 if (value < 0 || value >= 256) {
2619 PyErr_SetString(PyExc_ValueError,
2620 "bytes must be in range(0, 256)");
2621 Py_DECREF(new);
2622 return NULL;
2623 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002624 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002625 }
2626 return new;
2627 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00002628
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002629 /* For iterator version, create a string object and resize as needed */
2630 size = _PyObject_LengthHint(x, 64);
2631 if (size == -1 && PyErr_Occurred())
2632 return NULL;
2633 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
2634 returning a shared empty bytes string. This required because we
2635 want to call _PyBytes_Resize() the returned object, which we can
2636 only do on bytes objects with refcount == 1. */
2637 size += 1;
2638 new = PyBytes_FromStringAndSize(NULL, size);
2639 if (new == NULL)
2640 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002641
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002642 /* Get the iterator */
2643 it = PyObject_GetIter(x);
2644 if (it == NULL)
2645 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002646
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002647 /* Run the iterator to exhaustion */
2648 for (i = 0; ; i++) {
2649 PyObject *item;
2650 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002651
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002652 /* Get the next item */
2653 item = PyIter_Next(it);
2654 if (item == NULL) {
2655 if (PyErr_Occurred())
2656 goto error;
2657 break;
2658 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002659
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002660 /* Interpret it as an int (__index__) */
2661 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2662 Py_DECREF(item);
2663 if (value == -1 && PyErr_Occurred())
2664 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002665
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002666 /* Range check */
2667 if (value < 0 || value >= 256) {
2668 PyErr_SetString(PyExc_ValueError,
2669 "bytes must be in range(0, 256)");
2670 goto error;
2671 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002672
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002673 /* Append the byte */
2674 if (i >= size) {
2675 size = 2 * size + 1;
2676 if (_PyBytes_Resize(&new, size) < 0)
2677 goto error;
2678 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002679 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002680 }
2681 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002682
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002683 /* Clean up and return success */
2684 Py_DECREF(it);
2685 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002686
2687 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002688 /* Error handling when new != NULL */
2689 Py_XDECREF(it);
2690 Py_DECREF(new);
2691 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002692}
2693
2694static PyObject *
2695str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2696{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002697 PyObject *tmp, *pnew;
2698 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002699
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002700 assert(PyType_IsSubtype(type, &PyBytes_Type));
2701 tmp = bytes_new(&PyBytes_Type, args, kwds);
2702 if (tmp == NULL)
2703 return NULL;
2704 assert(PyBytes_CheckExact(tmp));
2705 n = PyBytes_GET_SIZE(tmp);
2706 pnew = type->tp_alloc(type, n);
2707 if (pnew != NULL) {
2708 Py_MEMCPY(PyBytes_AS_STRING(pnew),
2709 PyBytes_AS_STRING(tmp), n+1);
2710 ((PyBytesObject *)pnew)->ob_shash =
2711 ((PyBytesObject *)tmp)->ob_shash;
2712 }
2713 Py_DECREF(tmp);
2714 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002715}
2716
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002717PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002718"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002719bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002720bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
2721bytes(memory_view) -> bytes\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002722\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002723Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002724 - an iterable yielding integers in range(256)\n\
2725 - a text string encoded using the specified encoding\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002726 - a bytes or a buffer object\n\
2727 - any object implementing the buffer API.");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002728
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002729static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002730
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002731PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002732 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2733 "bytes",
2734 PyBytesObject_SIZE,
2735 sizeof(char),
2736 bytes_dealloc, /* tp_dealloc */
2737 0, /* tp_print */
2738 0, /* tp_getattr */
2739 0, /* tp_setattr */
2740 0, /* tp_reserved */
2741 (reprfunc)bytes_repr, /* tp_repr */
2742 0, /* tp_as_number */
2743 &bytes_as_sequence, /* tp_as_sequence */
2744 &bytes_as_mapping, /* tp_as_mapping */
2745 (hashfunc)bytes_hash, /* tp_hash */
2746 0, /* tp_call */
2747 bytes_str, /* tp_str */
2748 PyObject_GenericGetAttr, /* tp_getattro */
2749 0, /* tp_setattro */
2750 &bytes_as_buffer, /* tp_as_buffer */
2751 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2752 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2753 bytes_doc, /* tp_doc */
2754 0, /* tp_traverse */
2755 0, /* tp_clear */
2756 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2757 0, /* tp_weaklistoffset */
2758 bytes_iter, /* tp_iter */
2759 0, /* tp_iternext */
2760 bytes_methods, /* tp_methods */
2761 0, /* tp_members */
2762 0, /* tp_getset */
2763 &PyBaseObject_Type, /* tp_base */
2764 0, /* tp_dict */
2765 0, /* tp_descr_get */
2766 0, /* tp_descr_set */
2767 0, /* tp_dictoffset */
2768 0, /* tp_init */
2769 0, /* tp_alloc */
2770 bytes_new, /* tp_new */
2771 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002772};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002773
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002774void
2775PyBytes_Concat(register PyObject **pv, register PyObject *w)
2776{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002777 register PyObject *v;
2778 assert(pv != NULL);
2779 if (*pv == NULL)
2780 return;
2781 if (w == NULL) {
2782 Py_DECREF(*pv);
2783 *pv = NULL;
2784 return;
2785 }
2786 v = bytes_concat(*pv, w);
2787 Py_DECREF(*pv);
2788 *pv = v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002789}
2790
2791void
2792PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
2793{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002794 PyBytes_Concat(pv, w);
2795 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002796}
2797
2798
2799/* The following function breaks the notion that strings are immutable:
2800 it changes the size of a string. We get away with this only if there
2801 is only one module referencing the object. You can also think of it
2802 as creating a new string object and destroying the old one, only
2803 more efficiently. In any case, don't use this if the string may
2804 already be known to some other part of the code...
2805 Note that if there's not enough memory to resize the string, the original
2806 string object at *pv is deallocated, *pv is set to NULL, an "out of
2807 memory" exception is set, and -1 is returned. Else (on success) 0 is
2808 returned, and the value in *pv may or may not be the same as on input.
2809 As always, an extra byte is allocated for a trailing \0 byte (newsize
2810 does *not* include that), and a trailing \0 byte is stored.
2811*/
2812
2813int
2814_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2815{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002816 register PyObject *v;
2817 register PyBytesObject *sv;
2818 v = *pv;
2819 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
2820 *pv = 0;
2821 Py_DECREF(v);
2822 PyErr_BadInternalCall();
2823 return -1;
2824 }
2825 /* XXX UNREF/NEWREF interface should be more symmetrical */
2826 _Py_DEC_REFTOTAL;
2827 _Py_ForgetReference(v);
2828 *pv = (PyObject *)
2829 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
2830 if (*pv == NULL) {
2831 PyObject_Del(v);
2832 PyErr_NoMemory();
2833 return -1;
2834 }
2835 _Py_NewReference(*pv);
2836 sv = (PyBytesObject *) *pv;
2837 Py_SIZE(sv) = newsize;
2838 sv->ob_sval[newsize] = '\0';
2839 sv->ob_shash = -1; /* invalidate cached hash value */
2840 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002841}
2842
2843/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and
2844 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2845 * Python's regular ints.
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002846 * Return value: a new PyBytes*, or NULL if error.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002847 * . *pbuf is set to point into it,
2848 * *plen set to the # of chars following that.
2849 * Caller must decref it when done using pbuf.
2850 * The string starting at *pbuf is of the form
2851 * "-"? ("0x" | "0X")? digit+
2852 * "0x"/"0X" are present only for x and X conversions, with F_ALT
2853 * set in flags. The case of hex digits will be correct,
2854 * There will be at least prec digits, zero-filled on the left if
2855 * necessary to get that many.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002856 * val object to be converted
2857 * flags bitmask of format flags; only F_ALT is looked at
2858 * prec minimum number of digits; 0-fill on left if needed
2859 * type a character in [duoxX]; u acts the same as d
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002860 *
2861 * CAUTION: o, x and X conversions on regular ints can never
2862 * produce a '-' sign, but can for Python's unbounded ints.
2863 */
2864PyObject*
2865_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002866 char **pbuf, int *plen)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002867{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002868 PyObject *result = NULL;
2869 char *buf;
2870 Py_ssize_t i;
2871 int sign; /* 1 if '-', else 0 */
2872 int len; /* number of characters */
2873 Py_ssize_t llen;
2874 int numdigits; /* len == numnondigits + numdigits */
2875 int numnondigits = 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002876
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002877 /* Avoid exceeding SSIZE_T_MAX */
2878 if (prec > INT_MAX-3) {
2879 PyErr_SetString(PyExc_OverflowError,
2880 "precision too large");
2881 return NULL;
2882 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002883
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002884 switch (type) {
2885 case 'd':
2886 case 'u':
2887 /* Special-case boolean: we want 0/1 */
2888 if (PyBool_Check(val))
2889 result = PyNumber_ToBase(val, 10);
2890 else
2891 result = Py_TYPE(val)->tp_str(val);
2892 break;
2893 case 'o':
2894 numnondigits = 2;
2895 result = PyNumber_ToBase(val, 8);
2896 break;
2897 case 'x':
2898 case 'X':
2899 numnondigits = 2;
2900 result = PyNumber_ToBase(val, 16);
2901 break;
2902 default:
2903 assert(!"'type' not in [duoxX]");
2904 }
2905 if (!result)
2906 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002907
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002908 buf = _PyUnicode_AsString(result);
2909 if (!buf) {
2910 Py_DECREF(result);
2911 return NULL;
2912 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002913
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002914 /* To modify the string in-place, there can only be one reference. */
2915 if (Py_REFCNT(result) != 1) {
2916 PyErr_BadInternalCall();
2917 return NULL;
2918 }
2919 llen = PyUnicode_GetSize(result);
2920 if (llen > INT_MAX) {
2921 PyErr_SetString(PyExc_ValueError,
2922 "string too large in _PyBytes_FormatLong");
2923 return NULL;
2924 }
2925 len = (int)llen;
2926 if (buf[len-1] == 'L') {
2927 --len;
2928 buf[len] = '\0';
2929 }
2930 sign = buf[0] == '-';
2931 numnondigits += sign;
2932 numdigits = len - numnondigits;
2933 assert(numdigits > 0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002934
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002935 /* Get rid of base marker unless F_ALT */
2936 if (((flags & F_ALT) == 0 &&
2937 (type == 'o' || type == 'x' || type == 'X'))) {
2938 assert(buf[sign] == '0');
2939 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
2940 buf[sign+1] == 'o');
2941 numnondigits -= 2;
2942 buf += 2;
2943 len -= 2;
2944 if (sign)
2945 buf[0] = '-';
2946 assert(len == numnondigits + numdigits);
2947 assert(numdigits > 0);
2948 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002949
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002950 /* Fill with leading zeroes to meet minimum width. */
2951 if (prec > numdigits) {
2952 PyObject *r1 = PyBytes_FromStringAndSize(NULL,
2953 numnondigits + prec);
2954 char *b1;
2955 if (!r1) {
2956 Py_DECREF(result);
2957 return NULL;
2958 }
2959 b1 = PyBytes_AS_STRING(r1);
2960 for (i = 0; i < numnondigits; ++i)
2961 *b1++ = *buf++;
2962 for (i = 0; i < prec - numdigits; i++)
2963 *b1++ = '0';
2964 for (i = 0; i < numdigits; i++)
2965 *b1++ = *buf++;
2966 *b1 = '\0';
2967 Py_DECREF(result);
2968 result = r1;
2969 buf = PyBytes_AS_STRING(result);
2970 len = numnondigits + prec;
2971 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002972
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002973 /* Fix up case for hex conversions. */
2974 if (type == 'X') {
2975 /* Need to convert all lower case letters to upper case.
2976 and need to convert 0x to 0X (and -0x to -0X). */
2977 for (i = 0; i < len; i++)
2978 if (buf[i] >= 'a' && buf[i] <= 'x')
2979 buf[i] -= 'a'-'A';
2980 }
2981 *pbuf = buf;
2982 *plen = len;
2983 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002984}
2985
2986void
2987PyBytes_Fini(void)
2988{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002989 int i;
2990 for (i = 0; i < UCHAR_MAX + 1; i++) {
2991 Py_XDECREF(characters[i]);
2992 characters[i] = NULL;
2993 }
2994 Py_XDECREF(nullstring);
2995 nullstring = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002996}
2997
Benjamin Peterson4116f362008-05-27 00:36:20 +00002998/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002999
3000typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003001 PyObject_HEAD
3002 Py_ssize_t it_index;
3003 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003004} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003005
3006static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003007striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003008{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003009 _PyObject_GC_UNTRACK(it);
3010 Py_XDECREF(it->it_seq);
3011 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003012}
3013
3014static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003015striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003016{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003017 Py_VISIT(it->it_seq);
3018 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003019}
3020
3021static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003022striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003023{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003024 PyBytesObject *seq;
3025 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003026
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003027 assert(it != NULL);
3028 seq = it->it_seq;
3029 if (seq == NULL)
3030 return NULL;
3031 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003032
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003033 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3034 item = PyLong_FromLong(
3035 (unsigned char)seq->ob_sval[it->it_index]);
3036 if (item != NULL)
3037 ++it->it_index;
3038 return item;
3039 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003040
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003041 Py_DECREF(seq);
3042 it->it_seq = NULL;
3043 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003044}
3045
3046static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003047striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003048{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003049 Py_ssize_t len = 0;
3050 if (it->it_seq)
3051 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3052 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003053}
3054
3055PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003056 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003057
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003058static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003059 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3060 length_hint_doc},
3061 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003062};
3063
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003064PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003065 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3066 "bytes_iterator", /* tp_name */
3067 sizeof(striterobject), /* tp_basicsize */
3068 0, /* tp_itemsize */
3069 /* methods */
3070 (destructor)striter_dealloc, /* tp_dealloc */
3071 0, /* tp_print */
3072 0, /* tp_getattr */
3073 0, /* tp_setattr */
3074 0, /* tp_reserved */
3075 0, /* tp_repr */
3076 0, /* tp_as_number */
3077 0, /* tp_as_sequence */
3078 0, /* tp_as_mapping */
3079 0, /* tp_hash */
3080 0, /* tp_call */
3081 0, /* tp_str */
3082 PyObject_GenericGetAttr, /* tp_getattro */
3083 0, /* tp_setattro */
3084 0, /* tp_as_buffer */
3085 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3086 0, /* tp_doc */
3087 (traverseproc)striter_traverse, /* tp_traverse */
3088 0, /* tp_clear */
3089 0, /* tp_richcompare */
3090 0, /* tp_weaklistoffset */
3091 PyObject_SelfIter, /* tp_iter */
3092 (iternextfunc)striter_next, /* tp_iternext */
3093 striter_methods, /* tp_methods */
3094 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003095};
3096
3097static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003098bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003099{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003100 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003101
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003102 if (!PyBytes_Check(seq)) {
3103 PyErr_BadInternalCall();
3104 return NULL;
3105 }
3106 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3107 if (it == NULL)
3108 return NULL;
3109 it->it_index = 0;
3110 Py_INCREF(seq);
3111 it->it_seq = (PyBytesObject *)seq;
3112 _PyObject_GC_TRACK(it);
3113 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003114}