blob: 17e31b9a694dcb363832658c26e2f98f36536689 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Christian Heimes90aa7642007-12-19 02:45:37 +000013 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000014
Gregory P. Smith60d241f2007-10-16 06:31:30 +000015 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000016 {
Antoine Pitroud1188562010-06-09 16:38:55 +000017 PyErr_Format(PyExc_TypeError,
18 "Type %.100s doesn't support the buffer API",
19 Py_TYPE(obj)->tp_name);
20 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000021 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000023 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000024 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000025 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000026}
27
Christian Heimes2c9c7a52008-05-26 13:42:13 +000028#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000029Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000030#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000031
Christian Heimes2c9c7a52008-05-26 13:42:13 +000032static PyBytesObject *characters[UCHAR_MAX + 1];
33static PyBytesObject *nullstring;
34
Mark Dickinsonfd24b322008-12-06 15:33:31 +000035/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
36 for a string of length n should request PyBytesObject_SIZE + n bytes.
37
38 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
39 3 bytes per string allocation on a typical system.
40*/
41#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
42
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000044 For PyBytes_FromString(), the parameter `str' points to a null-terminated
45 string containing exactly `size' bytes.
46
47 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
48 either NULL or else points to a string containing at least `size' bytes.
49 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
50 not have to be null-terminated. (Therefore it is safe to construct a
51 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
52 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
53 bytes (setting the last byte to the null terminating character) and you can
54 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000055 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000056 alter the data yourself, since the strings may be shared.
57
58 The PyObject member `op->ob_size', which denotes the number of "extra
59 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020060 allocated for string data, not counting the null terminating character.
61 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000062 PyBytes_FromStringAndSize()) or the length of the string in the `str'
63 parameter (for PyBytes_FromString()).
64*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000065PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000066PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000067{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000068 register PyBytesObject *op;
69 if (size < 0) {
70 PyErr_SetString(PyExc_SystemError,
71 "Negative size passed to PyBytes_FromStringAndSize");
72 return NULL;
73 }
74 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000075#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000076 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000077#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078 Py_INCREF(op);
79 return (PyObject *)op;
80 }
81 if (size == 1 && str != NULL &&
82 (op = characters[*str & UCHAR_MAX]) != NULL)
83 {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000084#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000086#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 Py_INCREF(op);
88 return (PyObject *)op;
89 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000090
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
92 PyErr_SetString(PyExc_OverflowError,
93 "byte string is too large");
94 return NULL;
95 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000097 /* Inline PyObject_NewVar */
98 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
99 if (op == NULL)
100 return PyErr_NoMemory();
101 PyObject_INIT_VAR(op, &PyBytes_Type, size);
102 op->ob_shash = -1;
103 if (str != NULL)
104 Py_MEMCPY(op->ob_sval, str, size);
105 op->ob_sval[size] = '\0';
106 /* share short strings */
107 if (size == 0) {
108 nullstring = op;
109 Py_INCREF(op);
110 } else if (size == 1 && str != NULL) {
111 characters[*str & UCHAR_MAX] = op;
112 Py_INCREF(op);
113 }
114 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000115}
116
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000117PyObject *
118PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000119{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000120 register size_t size;
121 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000122
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000123 assert(str != NULL);
124 size = strlen(str);
125 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
126 PyErr_SetString(PyExc_OverflowError,
127 "byte string is too long");
128 return NULL;
129 }
130 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000131#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000133#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 Py_INCREF(op);
135 return (PyObject *)op;
136 }
137 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000138#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000139 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000140#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 Py_INCREF(op);
142 return (PyObject *)op;
143 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000144
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 /* Inline PyObject_NewVar */
146 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
147 if (op == NULL)
148 return PyErr_NoMemory();
149 PyObject_INIT_VAR(op, &PyBytes_Type, size);
150 op->ob_shash = -1;
151 Py_MEMCPY(op->ob_sval, str, size+1);
152 /* share short strings */
153 if (size == 0) {
154 nullstring = op;
155 Py_INCREF(op);
156 } else if (size == 1) {
157 characters[*str & UCHAR_MAX] = op;
158 Py_INCREF(op);
159 }
160 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000161}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000162
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000163PyObject *
164PyBytes_FromFormatV(const char *format, va_list vargs)
165{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000166 va_list count;
167 Py_ssize_t n = 0;
168 const char* f;
169 char *s;
170 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000171
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000172 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000173 /* step 1: figure out how large a buffer we need */
174 for (f = format; *f; f++) {
175 if (*f == '%') {
176 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000177 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000178 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000179
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000180 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
181 * they don't affect the amount of space we reserve.
182 */
183 if ((*f == 'l' || *f == 'z') &&
184 (f[1] == 'd' || f[1] == 'u'))
185 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000186
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000187 switch (*f) {
188 case 'c':
189 (void)va_arg(count, int);
190 /* fall through... */
191 case '%':
192 n++;
193 break;
194 case 'd': case 'u': case 'i': case 'x':
195 (void) va_arg(count, int);
196 /* 20 bytes is enough to hold a 64-bit
197 integer. Decimal takes the most space.
198 This isn't enough for octal. */
199 n += 20;
200 break;
201 case 's':
202 s = va_arg(count, char*);
203 n += strlen(s);
204 break;
205 case 'p':
206 (void) va_arg(count, int);
207 /* maximum 64-bit pointer representation:
208 * 0xffffffffffffffff
209 * so 19 characters is enough.
210 * XXX I count 18 -- what's the extra for?
211 */
212 n += 19;
213 break;
214 default:
215 /* if we stumble upon an unknown
216 formatting code, copy the rest of
217 the format string to the output
218 string. (we cannot just skip the
219 code, since there's no way to know
220 what's in the argument list) */
221 n += strlen(p);
222 goto expand;
223 }
224 } else
225 n++;
226 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000227 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 /* step 2: fill the buffer */
229 /* Since we've analyzed how much space we need for the worst case,
230 use sprintf directly instead of the slower PyOS_snprintf. */
231 string = PyBytes_FromStringAndSize(NULL, n);
232 if (!string)
233 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000234
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000235 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000236
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000237 for (f = format; *f; f++) {
238 if (*f == '%') {
239 const char* p = f++;
240 Py_ssize_t i;
241 int longflag = 0;
242 int size_tflag = 0;
243 /* parse the width.precision part (we're only
244 interested in the precision value, if any) */
245 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000246 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000247 n = (n*10) + *f++ - '0';
248 if (*f == '.') {
249 f++;
250 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000251 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000252 n = (n*10) + *f++ - '0';
253 }
David Malcolm96960882010-11-05 17:23:41 +0000254 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000255 f++;
256 /* handle the long flag, but only for %ld and %lu.
257 others can be added when necessary. */
258 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
259 longflag = 1;
260 ++f;
261 }
262 /* handle the size_t flag. */
263 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
264 size_tflag = 1;
265 ++f;
266 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000267
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000268 switch (*f) {
269 case 'c':
270 *s++ = va_arg(vargs, int);
271 break;
272 case 'd':
273 if (longflag)
274 sprintf(s, "%ld", va_arg(vargs, long));
275 else if (size_tflag)
276 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
277 va_arg(vargs, Py_ssize_t));
278 else
279 sprintf(s, "%d", va_arg(vargs, int));
280 s += strlen(s);
281 break;
282 case 'u':
283 if (longflag)
284 sprintf(s, "%lu",
285 va_arg(vargs, unsigned long));
286 else if (size_tflag)
287 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
288 va_arg(vargs, size_t));
289 else
290 sprintf(s, "%u",
291 va_arg(vargs, unsigned int));
292 s += strlen(s);
293 break;
294 case 'i':
295 sprintf(s, "%i", va_arg(vargs, int));
296 s += strlen(s);
297 break;
298 case 'x':
299 sprintf(s, "%x", va_arg(vargs, int));
300 s += strlen(s);
301 break;
302 case 's':
303 p = va_arg(vargs, char*);
304 i = strlen(p);
305 if (n > 0 && i > n)
306 i = n;
307 Py_MEMCPY(s, p, i);
308 s += i;
309 break;
310 case 'p':
311 sprintf(s, "%p", va_arg(vargs, void*));
312 /* %p is ill-defined: ensure leading 0x. */
313 if (s[1] == 'X')
314 s[1] = 'x';
315 else if (s[1] != 'x') {
316 memmove(s+2, s, strlen(s)+1);
317 s[0] = '0';
318 s[1] = 'x';
319 }
320 s += strlen(s);
321 break;
322 case '%':
323 *s++ = '%';
324 break;
325 default:
326 strcpy(s, p);
327 s += strlen(s);
328 goto end;
329 }
330 } else
331 *s++ = *f;
332 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000333
334 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000335 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
336 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000337}
338
339PyObject *
340PyBytes_FromFormat(const char *format, ...)
341{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000342 PyObject* ret;
343 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000344
345#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000347#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000348 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000349#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 ret = PyBytes_FromFormatV(format, vargs);
351 va_end(vargs);
352 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000353}
354
355static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000356bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000357{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000358 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000359}
360
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000361/* Unescape a backslash-escaped string. If unicode is non-zero,
362 the string is a u-literal. If recode_encoding is non-zero,
363 the string is UTF-8 encoded and should be re-encoded in the
364 specified encoding. */
365
366PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 Py_ssize_t len,
368 const char *errors,
369 Py_ssize_t unicode,
370 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000371{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 int c;
373 char *p, *buf;
374 const char *end;
375 PyObject *v;
376 Py_ssize_t newlen = recode_encoding ? 4*len:len;
377 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
378 if (v == NULL)
379 return NULL;
380 p = buf = PyBytes_AsString(v);
381 end = s + len;
382 while (s < end) {
383 if (*s != '\\') {
384 non_esc:
385 if (recode_encoding && (*s & 0x80)) {
386 PyObject *u, *w;
387 char *r;
388 const char* t;
389 Py_ssize_t rn;
390 t = s;
391 /* Decode non-ASCII bytes as UTF-8. */
392 while (t < end && (*t & 0x80)) t++;
393 u = PyUnicode_DecodeUTF8(s, t - s, errors);
394 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000395
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000396 /* Recode them in target encoding. */
397 w = PyUnicode_AsEncodedString(
398 u, recode_encoding, errors);
399 Py_DECREF(u);
400 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000401
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000402 /* Append bytes to output buffer. */
403 assert(PyBytes_Check(w));
404 r = PyBytes_AS_STRING(w);
405 rn = PyBytes_GET_SIZE(w);
406 Py_MEMCPY(p, r, rn);
407 p += rn;
408 Py_DECREF(w);
409 s = t;
410 } else {
411 *p++ = *s++;
412 }
413 continue;
414 }
415 s++;
416 if (s==end) {
417 PyErr_SetString(PyExc_ValueError,
418 "Trailing \\ in string");
419 goto failed;
420 }
421 switch (*s++) {
422 /* XXX This assumes ASCII! */
423 case '\n': break;
424 case '\\': *p++ = '\\'; break;
425 case '\'': *p++ = '\''; break;
426 case '\"': *p++ = '\"'; break;
427 case 'b': *p++ = '\b'; break;
428 case 'f': *p++ = '\014'; break; /* FF */
429 case 't': *p++ = '\t'; break;
430 case 'n': *p++ = '\n'; break;
431 case 'r': *p++ = '\r'; break;
432 case 'v': *p++ = '\013'; break; /* VT */
433 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
434 case '0': case '1': case '2': case '3':
435 case '4': case '5': case '6': case '7':
436 c = s[-1] - '0';
437 if (s < end && '0' <= *s && *s <= '7') {
438 c = (c<<3) + *s++ - '0';
439 if (s < end && '0' <= *s && *s <= '7')
440 c = (c<<3) + *s++ - '0';
441 }
442 *p++ = c;
443 break;
444 case 'x':
David Malcolm96960882010-11-05 17:23:41 +0000445 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000446 unsigned int x = 0;
447 c = Py_CHARMASK(*s);
448 s++;
David Malcolm96960882010-11-05 17:23:41 +0000449 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000450 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000451 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000452 x = 10 + c - 'a';
453 else
454 x = 10 + c - 'A';
455 x = x << 4;
456 c = Py_CHARMASK(*s);
457 s++;
David Malcolm96960882010-11-05 17:23:41 +0000458 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000459 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000460 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000461 x += 10 + c - 'a';
462 else
463 x += 10 + c - 'A';
464 *p++ = x;
465 break;
466 }
467 if (!errors || strcmp(errors, "strict") == 0) {
468 PyErr_SetString(PyExc_ValueError,
469 "invalid \\x escape");
470 goto failed;
471 }
472 if (strcmp(errors, "replace") == 0) {
473 *p++ = '?';
474 } else if (strcmp(errors, "ignore") == 0)
475 /* do nothing */;
476 else {
477 PyErr_Format(PyExc_ValueError,
478 "decoding error; unknown "
479 "error handling code: %.400s",
480 errors);
481 goto failed;
482 }
483 default:
484 *p++ = '\\';
485 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +0200486 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000487 UTF-8 bytes may follow. */
488 }
489 }
490 if (p-buf < newlen)
491 _PyBytes_Resize(&v, p - buf);
492 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000493 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000494 Py_DECREF(v);
495 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000496}
497
498/* -------------------------------------------------------------------- */
499/* object api */
500
501Py_ssize_t
502PyBytes_Size(register PyObject *op)
503{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000504 if (!PyBytes_Check(op)) {
505 PyErr_Format(PyExc_TypeError,
506 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
507 return -1;
508 }
509 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000510}
511
512char *
513PyBytes_AsString(register PyObject *op)
514{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000515 if (!PyBytes_Check(op)) {
516 PyErr_Format(PyExc_TypeError,
517 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
518 return NULL;
519 }
520 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000521}
522
523int
524PyBytes_AsStringAndSize(register PyObject *obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000525 register char **s,
526 register Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000527{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000528 if (s == NULL) {
529 PyErr_BadInternalCall();
530 return -1;
531 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000532
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000533 if (!PyBytes_Check(obj)) {
534 PyErr_Format(PyExc_TypeError,
535 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
536 return -1;
537 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000538
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000539 *s = PyBytes_AS_STRING(obj);
540 if (len != NULL)
541 *len = PyBytes_GET_SIZE(obj);
542 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
543 PyErr_SetString(PyExc_TypeError,
544 "expected bytes with no null");
545 return -1;
546 }
547 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000548}
Neal Norwitz6968b052007-02-27 19:02:19 +0000549
550/* -------------------------------------------------------------------- */
551/* Methods */
552
Eric Smith0923d1d2009-04-16 20:16:10 +0000553#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000554
555#include "stringlib/fastsearch.h"
556#include "stringlib/count.h"
557#include "stringlib/find.h"
558#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000559#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000560#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000561
Eric Smith0f78bff2009-11-30 01:01:42 +0000562#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000563
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000564PyObject *
565PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000566{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000567 register PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200568 Py_ssize_t i, length = Py_SIZE(op);
569 size_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000570 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200571 unsigned char quote, *s, *p;
572
573 /* Compute size of output string */
574 squotes = dquotes = 0;
575 newsize = 3; /* b'' */
576 s = (unsigned char*)op->ob_sval;
577 for (i = 0; i < length; i++) {
578 switch(s[i]) {
579 case '\'': squotes++; newsize++; break;
580 case '"': dquotes++; newsize++; break;
581 case '\\': case '\t': case '\n': case '\r':
582 newsize += 2; break; /* \C */
583 default:
584 if (s[i] < ' ' || s[i] >= 0x7f)
585 newsize += 4; /* \xHH */
586 else
587 newsize++;
588 }
589 }
590 quote = '\'';
591 if (smartquotes && squotes && !dquotes)
592 quote = '"';
593 if (squotes && quote == '\'')
594 newsize += squotes;
Victor Stinner6430fd52011-09-29 04:02:13 +0200595
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200596 if (newsize > (PY_SSIZE_T_MAX - sizeof(PyUnicodeObject) - 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000597 PyErr_SetString(PyExc_OverflowError,
598 "bytes object is too large to make repr");
599 return NULL;
600 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200601
602 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000603 if (v == NULL) {
604 return NULL;
605 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200606 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000607
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200608 *p++ = 'b', *p++ = quote;
609 for (i = 0; i < length; i++) {
610 unsigned char c = op->ob_sval[i];
611 if (c == quote || c == '\\')
612 *p++ = '\\', *p++ = c;
613 else if (c == '\t')
614 *p++ = '\\', *p++ = 't';
615 else if (c == '\n')
616 *p++ = '\\', *p++ = 'n';
617 else if (c == '\r')
618 *p++ = '\\', *p++ = 'r';
619 else if (c < ' ' || c >= 0x7f) {
620 *p++ = '\\';
621 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200622 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
623 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000624 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200625 else
626 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000627 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200628 *p++ = quote;
629 return v;
Neal Norwitz6968b052007-02-27 19:02:19 +0000630}
631
Neal Norwitz6968b052007-02-27 19:02:19 +0000632static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000633bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000634{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000635 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000636}
637
Neal Norwitz6968b052007-02-27 19:02:19 +0000638static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000639bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000640{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000641 if (Py_BytesWarningFlag) {
642 if (PyErr_WarnEx(PyExc_BytesWarning,
643 "str() on a bytes instance", 1))
644 return NULL;
645 }
646 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000647}
648
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000649static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000650bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000651{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000652 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000653}
Neal Norwitz6968b052007-02-27 19:02:19 +0000654
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000655/* This is also used by PyBytes_Concat() */
656static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000657bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000658{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000659 Py_ssize_t size;
660 Py_buffer va, vb;
661 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000662
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000663 va.len = -1;
664 vb.len = -1;
665 if (_getbuffer(a, &va) < 0 ||
666 _getbuffer(b, &vb) < 0) {
667 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
668 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
669 goto done;
670 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000671
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000672 /* Optimize end cases */
673 if (va.len == 0 && PyBytes_CheckExact(b)) {
674 result = b;
675 Py_INCREF(result);
676 goto done;
677 }
678 if (vb.len == 0 && PyBytes_CheckExact(a)) {
679 result = a;
680 Py_INCREF(result);
681 goto done;
682 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000683
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000684 size = va.len + vb.len;
685 if (size < 0) {
686 PyErr_NoMemory();
687 goto done;
688 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000689
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000690 result = PyBytes_FromStringAndSize(NULL, size);
691 if (result != NULL) {
692 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
693 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
694 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000695
696 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000697 if (va.len != -1)
698 PyBuffer_Release(&va);
699 if (vb.len != -1)
700 PyBuffer_Release(&vb);
701 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000702}
Neal Norwitz6968b052007-02-27 19:02:19 +0000703
704static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000705bytes_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000706{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000707 register Py_ssize_t i;
708 register Py_ssize_t j;
709 register Py_ssize_t size;
710 register PyBytesObject *op;
711 size_t nbytes;
712 if (n < 0)
713 n = 0;
714 /* watch out for overflows: the size can overflow int,
715 * and the # of bytes needed can overflow size_t
716 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000717 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000718 PyErr_SetString(PyExc_OverflowError,
719 "repeated bytes are too long");
720 return NULL;
721 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000722 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000723 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
724 Py_INCREF(a);
725 return (PyObject *)a;
726 }
727 nbytes = (size_t)size;
728 if (nbytes + PyBytesObject_SIZE <= nbytes) {
729 PyErr_SetString(PyExc_OverflowError,
730 "repeated bytes are too long");
731 return NULL;
732 }
733 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
734 if (op == NULL)
735 return PyErr_NoMemory();
736 PyObject_INIT_VAR(op, &PyBytes_Type, size);
737 op->ob_shash = -1;
738 op->ob_sval[size] = '\0';
739 if (Py_SIZE(a) == 1 && n > 0) {
740 memset(op->ob_sval, a->ob_sval[0] , n);
741 return (PyObject *) op;
742 }
743 i = 0;
744 if (i < size) {
745 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
746 i = Py_SIZE(a);
747 }
748 while (i < size) {
749 j = (i <= size-i) ? i : size-i;
750 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
751 i += j;
752 }
753 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000754}
755
Guido van Rossum98297ee2007-11-06 21:34:58 +0000756static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000757bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000758{
759 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
760 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000761 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +0000762 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +0000763 PyErr_Clear();
764 if (_getbuffer(arg, &varg) < 0)
765 return -1;
766 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
767 varg.buf, varg.len, 0);
768 PyBuffer_Release(&varg);
769 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000770 }
771 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000772 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
773 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000774 }
775
Antoine Pitrou0010d372010-08-15 17:12:55 +0000776 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000777}
778
Neal Norwitz6968b052007-02-27 19:02:19 +0000779static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000780bytes_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000781{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000782 if (i < 0 || i >= Py_SIZE(a)) {
783 PyErr_SetString(PyExc_IndexError, "index out of range");
784 return NULL;
785 }
786 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000787}
788
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000789static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000790bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000791{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000792 int c;
793 Py_ssize_t len_a, len_b;
794 Py_ssize_t min_len;
795 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000796
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000797 /* Make sure both arguments are strings. */
798 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
799 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
800 (PyObject_IsInstance((PyObject*)a,
801 (PyObject*)&PyUnicode_Type) ||
802 PyObject_IsInstance((PyObject*)b,
803 (PyObject*)&PyUnicode_Type))) {
804 if (PyErr_WarnEx(PyExc_BytesWarning,
805 "Comparison between bytes and string", 1))
806 return NULL;
807 }
808 result = Py_NotImplemented;
809 goto out;
810 }
811 if (a == b) {
812 switch (op) {
813 case Py_EQ:case Py_LE:case Py_GE:
814 result = Py_True;
815 goto out;
816 case Py_NE:case Py_LT:case Py_GT:
817 result = Py_False;
818 goto out;
819 }
820 }
821 if (op == Py_EQ) {
822 /* Supporting Py_NE here as well does not save
823 much time, since Py_NE is rarely used. */
824 if (Py_SIZE(a) == Py_SIZE(b)
825 && (a->ob_sval[0] == b->ob_sval[0]
826 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
827 result = Py_True;
828 } else {
829 result = Py_False;
830 }
831 goto out;
832 }
833 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
834 min_len = (len_a < len_b) ? len_a : len_b;
835 if (min_len > 0) {
836 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
837 if (c==0)
838 c = memcmp(a->ob_sval, b->ob_sval, min_len);
839 } else
840 c = 0;
841 if (c == 0)
842 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
843 switch (op) {
844 case Py_LT: c = c < 0; break;
845 case Py_LE: c = c <= 0; break;
846 case Py_EQ: assert(0); break; /* unreachable */
847 case Py_NE: c = c != 0; break;
848 case Py_GT: c = c > 0; break;
849 case Py_GE: c = c >= 0; break;
850 default:
851 result = Py_NotImplemented;
852 goto out;
853 }
854 result = c ? Py_True : Py_False;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000855 out:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000856 Py_INCREF(result);
857 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000858}
859
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000860static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000861bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000862{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000863 register Py_ssize_t len;
864 register unsigned char *p;
Mark Dickinson57e683e2011-09-24 18:18:40 +0100865 register Py_uhash_t x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000866
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000867 if (a->ob_shash != -1)
868 return a->ob_shash;
869 len = Py_SIZE(a);
870 p = (unsigned char *) a->ob_sval;
Mark Dickinson57e683e2011-09-24 18:18:40 +0100871 x = (Py_uhash_t)*p << 7;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000872 while (--len >= 0)
Mark Dickinson57e683e2011-09-24 18:18:40 +0100873 x = (1000003U*x) ^ (Py_uhash_t)*p++;
874 x ^= (Py_uhash_t)Py_SIZE(a);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000875 if (x == -1)
876 x = -2;
877 a->ob_shash = x;
878 return x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000879}
880
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000881static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000882bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000883{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000884 if (PyIndex_Check(item)) {
885 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
886 if (i == -1 && PyErr_Occurred())
887 return NULL;
888 if (i < 0)
889 i += PyBytes_GET_SIZE(self);
890 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
891 PyErr_SetString(PyExc_IndexError,
892 "index out of range");
893 return NULL;
894 }
895 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
896 }
897 else if (PySlice_Check(item)) {
898 Py_ssize_t start, stop, step, slicelength, cur, i;
899 char* source_buf;
900 char* result_buf;
901 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000902
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000903 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000904 PyBytes_GET_SIZE(self),
905 &start, &stop, &step, &slicelength) < 0) {
906 return NULL;
907 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000908
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000909 if (slicelength <= 0) {
910 return PyBytes_FromStringAndSize("", 0);
911 }
912 else if (start == 0 && step == 1 &&
913 slicelength == PyBytes_GET_SIZE(self) &&
914 PyBytes_CheckExact(self)) {
915 Py_INCREF(self);
916 return (PyObject *)self;
917 }
918 else if (step == 1) {
919 return PyBytes_FromStringAndSize(
920 PyBytes_AS_STRING(self) + start,
921 slicelength);
922 }
923 else {
924 source_buf = PyBytes_AS_STRING(self);
925 result = PyBytes_FromStringAndSize(NULL, slicelength);
926 if (result == NULL)
927 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000928
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000929 result_buf = PyBytes_AS_STRING(result);
930 for (cur = start, i = 0; i < slicelength;
931 cur += step, i++) {
932 result_buf[i] = source_buf[cur];
933 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000934
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000935 return result;
936 }
937 }
938 else {
939 PyErr_Format(PyExc_TypeError,
940 "byte indices must be integers, not %.200s",
941 Py_TYPE(item)->tp_name);
942 return NULL;
943 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000944}
945
946static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000947bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000948{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000949 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
950 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000951}
952
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000953static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000954 (lenfunc)bytes_length, /*sq_length*/
955 (binaryfunc)bytes_concat, /*sq_concat*/
956 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
957 (ssizeargfunc)bytes_item, /*sq_item*/
958 0, /*sq_slice*/
959 0, /*sq_ass_item*/
960 0, /*sq_ass_slice*/
961 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000962};
963
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000964static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000965 (lenfunc)bytes_length,
966 (binaryfunc)bytes_subscript,
967 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000968};
969
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000970static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000971 (getbufferproc)bytes_buffer_getbuffer,
972 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000973};
974
975
976#define LEFTSTRIP 0
977#define RIGHTSTRIP 1
978#define BOTHSTRIP 2
979
980/* Arrays indexed by above */
981static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
982
983#define STRIPNAME(i) (stripformat[i]+3)
984
Neal Norwitz6968b052007-02-27 19:02:19 +0000985PyDoc_STRVAR(split__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000986"B.split([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +0000987\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +0000988Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000989If sep is not specified or is None, B is split on ASCII whitespace\n\
990characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +0000991If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +0000992
993static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000994bytes_split(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +0000995{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000996 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
997 Py_ssize_t maxsplit = -1;
998 const char *s = PyBytes_AS_STRING(self), *sub;
999 Py_buffer vsub;
1000 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001001
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001002 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1003 return NULL;
1004 if (maxsplit < 0)
1005 maxsplit = PY_SSIZE_T_MAX;
1006 if (subobj == Py_None)
1007 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1008 if (_getbuffer(subobj, &vsub) < 0)
1009 return NULL;
1010 sub = vsub.buf;
1011 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001012
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001013 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1014 PyBuffer_Release(&vsub);
1015 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001016}
1017
Neal Norwitz6968b052007-02-27 19:02:19 +00001018PyDoc_STRVAR(partition__doc__,
1019"B.partition(sep) -> (head, sep, tail)\n\
1020\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001021Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001022the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001023found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001024
1025static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001026bytes_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001027{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001028 const char *sep;
1029 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001030
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001031 if (PyBytes_Check(sep_obj)) {
1032 sep = PyBytes_AS_STRING(sep_obj);
1033 sep_len = PyBytes_GET_SIZE(sep_obj);
1034 }
1035 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1036 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001037
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001038 return stringlib_partition(
1039 (PyObject*) self,
1040 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1041 sep_obj, sep, sep_len
1042 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001043}
1044
1045PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti5b2b2422010-01-25 11:58:28 +00001046"B.rpartition(sep) -> (head, sep, tail)\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001047\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001048Search for the separator sep in B, starting at the end of B,\n\
1049and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001050part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001051bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001052
1053static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001054bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001055{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001056 const char *sep;
1057 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001058
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001059 if (PyBytes_Check(sep_obj)) {
1060 sep = PyBytes_AS_STRING(sep_obj);
1061 sep_len = PyBytes_GET_SIZE(sep_obj);
1062 }
1063 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1064 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001065
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001066 return stringlib_rpartition(
1067 (PyObject*) self,
1068 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1069 sep_obj, sep, sep_len
1070 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001071}
1072
Neal Norwitz6968b052007-02-27 19:02:19 +00001073PyDoc_STRVAR(rsplit__doc__,
Benjamin Peterson4116f362008-05-27 00:36:20 +00001074"B.rsplit([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001075\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001076Return a list of the sections in B, using sep as the delimiter,\n\
1077starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001078If sep is not given, B is split on ASCII whitespace characters\n\
1079(space, tab, return, newline, formfeed, vertical tab).\n\
1080If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001081
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001082
Neal Norwitz6968b052007-02-27 19:02:19 +00001083static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001084bytes_rsplit(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001085{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001086 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1087 Py_ssize_t maxsplit = -1;
1088 const char *s = PyBytes_AS_STRING(self), *sub;
1089 Py_buffer vsub;
1090 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001091
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001092 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1093 return NULL;
1094 if (maxsplit < 0)
1095 maxsplit = PY_SSIZE_T_MAX;
1096 if (subobj == Py_None)
1097 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1098 if (_getbuffer(subobj, &vsub) < 0)
1099 return NULL;
1100 sub = vsub.buf;
1101 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001102
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001103 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1104 PyBuffer_Release(&vsub);
1105 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001106}
1107
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001108
1109PyDoc_STRVAR(join__doc__,
1110"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001111\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001112Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001113Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1114
Neal Norwitz6968b052007-02-27 19:02:19 +00001115static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001116bytes_join(PyObject *self, PyObject *orig)
Neal Norwitz6968b052007-02-27 19:02:19 +00001117{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001118 char *sep = PyBytes_AS_STRING(self);
1119 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1120 PyObject *res = NULL;
1121 char *p;
1122 Py_ssize_t seqlen = 0;
1123 size_t sz = 0;
1124 Py_ssize_t i;
1125 PyObject *seq, *item;
Neal Norwitz6968b052007-02-27 19:02:19 +00001126
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001127 seq = PySequence_Fast(orig, "");
1128 if (seq == NULL) {
1129 return NULL;
1130 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001131
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001132 seqlen = PySequence_Size(seq);
1133 if (seqlen == 0) {
1134 Py_DECREF(seq);
1135 return PyBytes_FromString("");
1136 }
1137 if (seqlen == 1) {
1138 item = PySequence_Fast_GET_ITEM(seq, 0);
1139 if (PyBytes_CheckExact(item)) {
1140 Py_INCREF(item);
1141 Py_DECREF(seq);
1142 return item;
1143 }
1144 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001145
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001146 /* There are at least two things to join, or else we have a subclass
1147 * of the builtin types in the sequence.
1148 * Do a pre-pass to figure out the total amount of space we'll
1149 * need (sz), and see whether all argument are bytes.
1150 */
1151 /* XXX Shouldn't we use _getbuffer() on these items instead? */
1152 for (i = 0; i < seqlen; i++) {
1153 const size_t old_sz = sz;
1154 item = PySequence_Fast_GET_ITEM(seq, i);
1155 if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
1156 PyErr_Format(PyExc_TypeError,
1157 "sequence item %zd: expected bytes,"
1158 " %.80s found",
1159 i, Py_TYPE(item)->tp_name);
1160 Py_DECREF(seq);
1161 return NULL;
1162 }
1163 sz += Py_SIZE(item);
1164 if (i != 0)
1165 sz += seplen;
1166 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1167 PyErr_SetString(PyExc_OverflowError,
1168 "join() result is too long for bytes");
1169 Py_DECREF(seq);
1170 return NULL;
1171 }
1172 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001173
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001174 /* Allocate result space. */
1175 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1176 if (res == NULL) {
1177 Py_DECREF(seq);
1178 return NULL;
1179 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001180
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001181 /* Catenate everything. */
1182 /* I'm not worried about a PyByteArray item growing because there's
1183 nowhere in this function where we release the GIL. */
1184 p = PyBytes_AS_STRING(res);
1185 for (i = 0; i < seqlen; ++i) {
1186 size_t n;
1187 char *q;
1188 if (i) {
1189 Py_MEMCPY(p, sep, seplen);
1190 p += seplen;
1191 }
1192 item = PySequence_Fast_GET_ITEM(seq, i);
1193 n = Py_SIZE(item);
1194 if (PyBytes_Check(item))
1195 q = PyBytes_AS_STRING(item);
1196 else
1197 q = PyByteArray_AS_STRING(item);
1198 Py_MEMCPY(p, q, n);
1199 p += n;
1200 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001201
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001202 Py_DECREF(seq);
1203 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001204}
1205
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001206PyObject *
1207_PyBytes_Join(PyObject *sep, PyObject *x)
1208{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001209 assert(sep != NULL && PyBytes_Check(sep));
1210 assert(x != NULL);
1211 return bytes_join(sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001212}
1213
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001214/* helper macro to fixup start/end slice values */
1215#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001216 if (end > len) \
1217 end = len; \
1218 else if (end < 0) { \
1219 end += len; \
1220 if (end < 0) \
1221 end = 0; \
1222 } \
1223 if (start < 0) { \
1224 start += len; \
1225 if (start < 0) \
1226 start = 0; \
1227 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001228
1229Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001230bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001231{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001232 PyObject *subobj;
1233 const char *sub;
1234 Py_ssize_t sub_len;
1235 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001236
Jesus Ceaac451502011-04-20 17:09:23 +02001237 if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1238 args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001239 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001240
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001241 if (PyBytes_Check(subobj)) {
1242 sub = PyBytes_AS_STRING(subobj);
1243 sub_len = PyBytes_GET_SIZE(subobj);
1244 }
1245 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1246 /* XXX - the "expected a character buffer object" is pretty
1247 confusing for a non-expert. remap to something else ? */
1248 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001249
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001250 if (dir > 0)
1251 return stringlib_find_slice(
1252 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1253 sub, sub_len, start, end);
1254 else
1255 return stringlib_rfind_slice(
1256 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1257 sub, sub_len, start, end);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001258}
1259
1260
1261PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001262"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001263\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001264Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001265such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001266arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001267\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001268Return -1 on failure.");
1269
Neal Norwitz6968b052007-02-27 19:02:19 +00001270static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001271bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001272{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001273 Py_ssize_t result = bytes_find_internal(self, args, +1);
1274 if (result == -2)
1275 return NULL;
1276 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001277}
1278
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001279
1280PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001281"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001282\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001283Like B.find() but raise ValueError when the substring is not found.");
1284
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001285static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001286bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001287{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001288 Py_ssize_t result = bytes_find_internal(self, args, +1);
1289 if (result == -2)
1290 return NULL;
1291 if (result == -1) {
1292 PyErr_SetString(PyExc_ValueError,
1293 "substring not found");
1294 return NULL;
1295 }
1296 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001297}
1298
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001299
1300PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001301"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001302\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001303Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001304such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001305arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001306\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001307Return -1 on failure.");
1308
Neal Norwitz6968b052007-02-27 19:02:19 +00001309static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001310bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001311{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001312 Py_ssize_t result = bytes_find_internal(self, args, -1);
1313 if (result == -2)
1314 return NULL;
1315 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001316}
1317
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001318
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001319PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001320"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001321\n\
1322Like B.rfind() but raise ValueError when the substring is not found.");
1323
1324static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001325bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001326{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001327 Py_ssize_t result = bytes_find_internal(self, args, -1);
1328 if (result == -2)
1329 return NULL;
1330 if (result == -1) {
1331 PyErr_SetString(PyExc_ValueError,
1332 "substring not found");
1333 return NULL;
1334 }
1335 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001336}
1337
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001338
1339Py_LOCAL_INLINE(PyObject *)
1340do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001341{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001342 Py_buffer vsep;
1343 char *s = PyBytes_AS_STRING(self);
1344 Py_ssize_t len = PyBytes_GET_SIZE(self);
1345 char *sep;
1346 Py_ssize_t seplen;
1347 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001348
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001349 if (_getbuffer(sepobj, &vsep) < 0)
1350 return NULL;
1351 sep = vsep.buf;
1352 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001353
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001354 i = 0;
1355 if (striptype != RIGHTSTRIP) {
1356 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1357 i++;
1358 }
1359 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001360
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001361 j = len;
1362 if (striptype != LEFTSTRIP) {
1363 do {
1364 j--;
1365 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1366 j++;
1367 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001368
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001369 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001370
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001371 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1372 Py_INCREF(self);
1373 return (PyObject*)self;
1374 }
1375 else
1376 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001377}
1378
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001379
1380Py_LOCAL_INLINE(PyObject *)
1381do_strip(PyBytesObject *self, int striptype)
1382{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001383 char *s = PyBytes_AS_STRING(self);
1384 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001385
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001386 i = 0;
1387 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001388 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001389 i++;
1390 }
1391 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001392
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001393 j = len;
1394 if (striptype != LEFTSTRIP) {
1395 do {
1396 j--;
David Malcolm96960882010-11-05 17:23:41 +00001397 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001398 j++;
1399 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001400
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001401 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1402 Py_INCREF(self);
1403 return (PyObject*)self;
1404 }
1405 else
1406 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001407}
1408
1409
1410Py_LOCAL_INLINE(PyObject *)
1411do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1412{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001413 PyObject *sep = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001414
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001415 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1416 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001417
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001418 if (sep != NULL && sep != Py_None) {
1419 return do_xstrip(self, striptype, sep);
1420 }
1421 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001422}
1423
1424
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001425PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001426"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001427\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001428Strip leading and trailing bytes contained in the argument.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001429If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001430static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001431bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001432{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001433 if (PyTuple_GET_SIZE(args) == 0)
1434 return do_strip(self, BOTHSTRIP); /* Common case */
1435 else
1436 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001437}
1438
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001439
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001440PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001441"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001442\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001443Strip leading bytes contained in the argument.\n\
1444If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001445static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001446bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001447{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001448 if (PyTuple_GET_SIZE(args) == 0)
1449 return do_strip(self, LEFTSTRIP); /* Common case */
1450 else
1451 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001452}
1453
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001454
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001455PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001456"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001457\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001458Strip trailing bytes contained in the argument.\n\
1459If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001460static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001461bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001462{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001463 if (PyTuple_GET_SIZE(args) == 0)
1464 return do_strip(self, RIGHTSTRIP); /* Common case */
1465 else
1466 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001467}
Neal Norwitz6968b052007-02-27 19:02:19 +00001468
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001469
1470PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001471"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001472\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001473Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001474string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001475as in slice notation.");
1476
1477static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001478bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001479{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001480 PyObject *sub_obj;
1481 const char *str = PyBytes_AS_STRING(self), *sub;
1482 Py_ssize_t sub_len;
1483 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001484
Jesus Ceaac451502011-04-20 17:09:23 +02001485 if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001486 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001487
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001488 if (PyBytes_Check(sub_obj)) {
1489 sub = PyBytes_AS_STRING(sub_obj);
1490 sub_len = PyBytes_GET_SIZE(sub_obj);
1491 }
1492 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1493 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001494
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001495 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001496
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001497 return PyLong_FromSsize_t(
1498 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
1499 );
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001500}
1501
1502
1503PyDoc_STRVAR(translate__doc__,
1504"B.translate(table[, deletechars]) -> bytes\n\
1505\n\
1506Return a copy of B, where all characters occurring in the\n\
1507optional argument deletechars are removed, and the remaining\n\
1508characters have been mapped through the given translation\n\
1509table, which must be a bytes object of length 256.");
1510
1511static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001512bytes_translate(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001513{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001514 register char *input, *output;
1515 const char *table;
1516 register Py_ssize_t i, c, changed = 0;
1517 PyObject *input_obj = (PyObject*)self;
1518 const char *output_start, *del_table=NULL;
1519 Py_ssize_t inlen, tablen, dellen = 0;
1520 PyObject *result;
1521 int trans_table[256];
1522 PyObject *tableobj, *delobj = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001523
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001524 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1525 &tableobj, &delobj))
1526 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001527
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001528 if (PyBytes_Check(tableobj)) {
1529 table = PyBytes_AS_STRING(tableobj);
1530 tablen = PyBytes_GET_SIZE(tableobj);
1531 }
1532 else if (tableobj == Py_None) {
1533 table = NULL;
1534 tablen = 256;
1535 }
1536 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1537 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001538
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001539 if (tablen != 256) {
1540 PyErr_SetString(PyExc_ValueError,
1541 "translation table must be 256 characters long");
1542 return NULL;
1543 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001544
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001545 if (delobj != NULL) {
1546 if (PyBytes_Check(delobj)) {
1547 del_table = PyBytes_AS_STRING(delobj);
1548 dellen = PyBytes_GET_SIZE(delobj);
1549 }
1550 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1551 return NULL;
1552 }
1553 else {
1554 del_table = NULL;
1555 dellen = 0;
1556 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001557
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001558 inlen = PyBytes_GET_SIZE(input_obj);
1559 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1560 if (result == NULL)
1561 return NULL;
1562 output_start = output = PyBytes_AsString(result);
1563 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001564
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001565 if (dellen == 0 && table != NULL) {
1566 /* If no deletions are required, use faster code */
1567 for (i = inlen; --i >= 0; ) {
1568 c = Py_CHARMASK(*input++);
1569 if (Py_CHARMASK((*output++ = table[c])) != c)
1570 changed = 1;
1571 }
1572 if (changed || !PyBytes_CheckExact(input_obj))
1573 return result;
1574 Py_DECREF(result);
1575 Py_INCREF(input_obj);
1576 return input_obj;
1577 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001578
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001579 if (table == NULL) {
1580 for (i = 0; i < 256; i++)
1581 trans_table[i] = Py_CHARMASK(i);
1582 } else {
1583 for (i = 0; i < 256; i++)
1584 trans_table[i] = Py_CHARMASK(table[i]);
1585 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001586
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001587 for (i = 0; i < dellen; i++)
1588 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001589
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001590 for (i = inlen; --i >= 0; ) {
1591 c = Py_CHARMASK(*input++);
1592 if (trans_table[c] != -1)
1593 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1594 continue;
1595 changed = 1;
1596 }
1597 if (!changed && PyBytes_CheckExact(input_obj)) {
1598 Py_DECREF(result);
1599 Py_INCREF(input_obj);
1600 return input_obj;
1601 }
1602 /* Fix the size of the resulting string */
1603 if (inlen > 0)
1604 _PyBytes_Resize(&result, output - output_start);
1605 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001606}
1607
1608
Georg Brandlabc38772009-04-12 15:51:51 +00001609static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001610bytes_maketrans(PyObject *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001611{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001612 return _Py_bytes_maketrans(args);
Georg Brandlabc38772009-04-12 15:51:51 +00001613}
1614
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001615/* find and count characters and substrings */
1616
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001617#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001618 ((char *)memchr((const void *)(target), c, target_len))
1619
1620/* String ops must return a string. */
1621/* If the object is subclass of string, create a copy */
1622Py_LOCAL(PyBytesObject *)
1623return_self(PyBytesObject *self)
1624{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001625 if (PyBytes_CheckExact(self)) {
1626 Py_INCREF(self);
1627 return self;
1628 }
1629 return (PyBytesObject *)PyBytes_FromStringAndSize(
1630 PyBytes_AS_STRING(self),
1631 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001632}
1633
1634Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00001635countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001636{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001637 Py_ssize_t count=0;
1638 const char *start=target;
1639 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001640
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001641 while ( (start=findchar(start, end-start, c)) != NULL ) {
1642 count++;
1643 if (count >= maxcount)
1644 break;
1645 start += 1;
1646 }
1647 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001648}
1649
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001650
1651/* Algorithms for different cases of string replacement */
1652
1653/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1654Py_LOCAL(PyBytesObject *)
1655replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001656 const char *to_s, Py_ssize_t to_len,
1657 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001658{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001659 char *self_s, *result_s;
1660 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001661 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001662 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001663
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001664 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001665
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001666 /* 1 at the end plus 1 after every character;
1667 count = min(maxcount, self_len + 1) */
1668 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001669 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001670 else
1671 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
1672 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001673
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001674 /* Check for overflow */
1675 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001676 assert(count > 0);
1677 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001678 PyErr_SetString(PyExc_OverflowError,
1679 "replacement bytes are too long");
1680 return NULL;
1681 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001682 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001683
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001684 if (! (result = (PyBytesObject *)
1685 PyBytes_FromStringAndSize(NULL, result_len)) )
1686 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001687
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001688 self_s = PyBytes_AS_STRING(self);
1689 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001690
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001691 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001692
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001693 /* Lay the first one down (guaranteed this will occur) */
1694 Py_MEMCPY(result_s, to_s, to_len);
1695 result_s += to_len;
1696 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001697
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001698 for (i=0; i<count; i++) {
1699 *result_s++ = *self_s++;
1700 Py_MEMCPY(result_s, to_s, to_len);
1701 result_s += to_len;
1702 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001703
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001704 /* Copy the rest of the original string */
1705 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001706
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001707 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001708}
1709
1710/* Special case for deleting a single character */
1711/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1712Py_LOCAL(PyBytesObject *)
1713replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001714 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001715{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001716 char *self_s, *result_s;
1717 char *start, *next, *end;
1718 Py_ssize_t self_len, result_len;
1719 Py_ssize_t count;
1720 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001721
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001722 self_len = PyBytes_GET_SIZE(self);
1723 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001724
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001725 count = countchar(self_s, self_len, from_c, maxcount);
1726 if (count == 0) {
1727 return return_self(self);
1728 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001729
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001730 result_len = self_len - count; /* from_len == 1 */
1731 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001732
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001733 if ( (result = (PyBytesObject *)
1734 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1735 return NULL;
1736 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001737
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001738 start = self_s;
1739 end = self_s + self_len;
1740 while (count-- > 0) {
1741 next = findchar(start, end-start, from_c);
1742 if (next == NULL)
1743 break;
1744 Py_MEMCPY(result_s, start, next-start);
1745 result_s += (next-start);
1746 start = next+1;
1747 }
1748 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001749
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001750 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001751}
1752
1753/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1754
1755Py_LOCAL(PyBytesObject *)
1756replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001757 const char *from_s, Py_ssize_t from_len,
1758 Py_ssize_t maxcount) {
1759 char *self_s, *result_s;
1760 char *start, *next, *end;
1761 Py_ssize_t self_len, result_len;
1762 Py_ssize_t count, offset;
1763 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001764
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001765 self_len = PyBytes_GET_SIZE(self);
1766 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001767
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001768 count = stringlib_count(self_s, self_len,
1769 from_s, from_len,
1770 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001771
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001772 if (count == 0) {
1773 /* no matches */
1774 return return_self(self);
1775 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001776
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001777 result_len = self_len - (count * from_len);
1778 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001779
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001780 if ( (result = (PyBytesObject *)
1781 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1782 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001783
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001784 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001785
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001786 start = self_s;
1787 end = self_s + self_len;
1788 while (count-- > 0) {
1789 offset = stringlib_find(start, end-start,
1790 from_s, from_len,
1791 0);
1792 if (offset == -1)
1793 break;
1794 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001795
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001796 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001797
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001798 result_s += (next-start);
1799 start = next+from_len;
1800 }
1801 Py_MEMCPY(result_s, start, end-start);
1802 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001803}
1804
1805/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1806Py_LOCAL(PyBytesObject *)
1807replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001808 char from_c, char to_c,
1809 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001810{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001811 char *self_s, *result_s, *start, *end, *next;
1812 Py_ssize_t self_len;
1813 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001814
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001815 /* The result string will be the same size */
1816 self_s = PyBytes_AS_STRING(self);
1817 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001818
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001819 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001820
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001821 if (next == NULL) {
1822 /* No matches; return the original string */
1823 return return_self(self);
1824 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001825
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001826 /* Need to make a new string */
1827 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1828 if (result == NULL)
1829 return NULL;
1830 result_s = PyBytes_AS_STRING(result);
1831 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001832
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001833 /* change everything in-place, starting with this one */
1834 start = result_s + (next-self_s);
1835 *start = to_c;
1836 start++;
1837 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001838
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001839 while (--maxcount > 0) {
1840 next = findchar(start, end-start, from_c);
1841 if (next == NULL)
1842 break;
1843 *next = to_c;
1844 start = next+1;
1845 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001846
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001847 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001848}
1849
1850/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1851Py_LOCAL(PyBytesObject *)
1852replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001853 const char *from_s, Py_ssize_t from_len,
1854 const char *to_s, Py_ssize_t to_len,
1855 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001856{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001857 char *result_s, *start, *end;
1858 char *self_s;
1859 Py_ssize_t self_len, offset;
1860 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001861
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001862 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001863
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001864 self_s = PyBytes_AS_STRING(self);
1865 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001866
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001867 offset = stringlib_find(self_s, self_len,
1868 from_s, from_len,
1869 0);
1870 if (offset == -1) {
1871 /* No matches; return the original string */
1872 return return_self(self);
1873 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001874
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001875 /* Need to make a new string */
1876 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1877 if (result == NULL)
1878 return NULL;
1879 result_s = PyBytes_AS_STRING(result);
1880 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001881
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001882 /* change everything in-place, starting with this one */
1883 start = result_s + offset;
1884 Py_MEMCPY(start, to_s, from_len);
1885 start += from_len;
1886 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001887
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001888 while ( --maxcount > 0) {
1889 offset = stringlib_find(start, end-start,
1890 from_s, from_len,
1891 0);
1892 if (offset==-1)
1893 break;
1894 Py_MEMCPY(start+offset, to_s, from_len);
1895 start += offset+from_len;
1896 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001897
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001898 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001899}
1900
1901/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1902Py_LOCAL(PyBytesObject *)
1903replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001904 char from_c,
1905 const char *to_s, Py_ssize_t to_len,
1906 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001907{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001908 char *self_s, *result_s;
1909 char *start, *next, *end;
1910 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001911 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001912 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001913
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001914 self_s = PyBytes_AS_STRING(self);
1915 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001916
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001917 count = countchar(self_s, self_len, from_c, maxcount);
1918 if (count == 0) {
1919 /* no matches, return unchanged */
1920 return return_self(self);
1921 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001922
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001923 /* use the difference between current and new, hence the "-1" */
1924 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001925 assert(count > 0);
1926 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001927 PyErr_SetString(PyExc_OverflowError,
1928 "replacement bytes are too long");
1929 return NULL;
1930 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001931 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001932
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001933 if ( (result = (PyBytesObject *)
1934 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1935 return NULL;
1936 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001937
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001938 start = self_s;
1939 end = self_s + self_len;
1940 while (count-- > 0) {
1941 next = findchar(start, end-start, from_c);
1942 if (next == NULL)
1943 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001944
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001945 if (next == start) {
1946 /* replace with the 'to' */
1947 Py_MEMCPY(result_s, to_s, to_len);
1948 result_s += to_len;
1949 start += 1;
1950 } else {
1951 /* copy the unchanged old then the 'to' */
1952 Py_MEMCPY(result_s, start, next-start);
1953 result_s += (next-start);
1954 Py_MEMCPY(result_s, to_s, to_len);
1955 result_s += to_len;
1956 start = next+1;
1957 }
1958 }
1959 /* Copy the remainder of the remaining string */
1960 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001961
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001962 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001963}
1964
1965/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1966Py_LOCAL(PyBytesObject *)
1967replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001968 const char *from_s, Py_ssize_t from_len,
1969 const char *to_s, Py_ssize_t to_len,
1970 Py_ssize_t maxcount) {
1971 char *self_s, *result_s;
1972 char *start, *next, *end;
1973 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001974 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001975 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001976
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001977 self_s = PyBytes_AS_STRING(self);
1978 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001979
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001980 count = stringlib_count(self_s, self_len,
1981 from_s, from_len,
1982 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001983
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001984 if (count == 0) {
1985 /* no matches, return unchanged */
1986 return return_self(self);
1987 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001988
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001989 /* Check for overflow */
1990 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001991 assert(count > 0);
1992 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001993 PyErr_SetString(PyExc_OverflowError,
1994 "replacement bytes are too long");
1995 return NULL;
1996 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001997 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001998
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001999 if ( (result = (PyBytesObject *)
2000 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2001 return NULL;
2002 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002003
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002004 start = self_s;
2005 end = self_s + self_len;
2006 while (count-- > 0) {
2007 offset = stringlib_find(start, end-start,
2008 from_s, from_len,
2009 0);
2010 if (offset == -1)
2011 break;
2012 next = start+offset;
2013 if (next == start) {
2014 /* replace with the 'to' */
2015 Py_MEMCPY(result_s, to_s, to_len);
2016 result_s += to_len;
2017 start += from_len;
2018 } else {
2019 /* copy the unchanged old then the 'to' */
2020 Py_MEMCPY(result_s, start, next-start);
2021 result_s += (next-start);
2022 Py_MEMCPY(result_s, to_s, to_len);
2023 result_s += to_len;
2024 start = next+from_len;
2025 }
2026 }
2027 /* Copy the remainder of the remaining string */
2028 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002029
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002030 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002031}
2032
2033
2034Py_LOCAL(PyBytesObject *)
2035replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002036 const char *from_s, Py_ssize_t from_len,
2037 const char *to_s, Py_ssize_t to_len,
2038 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002039{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002040 if (maxcount < 0) {
2041 maxcount = PY_SSIZE_T_MAX;
2042 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2043 /* nothing to do; return the original string */
2044 return return_self(self);
2045 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002046
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002047 if (maxcount == 0 ||
2048 (from_len == 0 && to_len == 0)) {
2049 /* nothing to do; return the original string */
2050 return return_self(self);
2051 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002052
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002053 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002054
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002055 if (from_len == 0) {
2056 /* insert the 'to' string everywhere. */
2057 /* >>> "Python".replace("", ".") */
2058 /* '.P.y.t.h.o.n.' */
2059 return replace_interleave(self, to_s, to_len, maxcount);
2060 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002061
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002062 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2063 /* point for an empty self string to generate a non-empty string */
2064 /* Special case so the remaining code always gets a non-empty string */
2065 if (PyBytes_GET_SIZE(self) == 0) {
2066 return return_self(self);
2067 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002068
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002069 if (to_len == 0) {
2070 /* delete all occurrences of 'from' string */
2071 if (from_len == 1) {
2072 return replace_delete_single_character(
2073 self, from_s[0], maxcount);
2074 } else {
2075 return replace_delete_substring(self, from_s,
2076 from_len, maxcount);
2077 }
2078 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002079
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002080 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002081
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002082 if (from_len == to_len) {
2083 if (from_len == 1) {
2084 return replace_single_character_in_place(
2085 self,
2086 from_s[0],
2087 to_s[0],
2088 maxcount);
2089 } else {
2090 return replace_substring_in_place(
2091 self, from_s, from_len, to_s, to_len,
2092 maxcount);
2093 }
2094 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002095
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002096 /* Otherwise use the more generic algorithms */
2097 if (from_len == 1) {
2098 return replace_single_character(self, from_s[0],
2099 to_s, to_len, maxcount);
2100 } else {
2101 /* len('from')>=2, len('to')>=1 */
2102 return replace_substring(self, from_s, from_len, to_s, to_len,
2103 maxcount);
2104 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002105}
2106
2107PyDoc_STRVAR(replace__doc__,
2108"B.replace(old, new[, count]) -> bytes\n\
2109\n\
2110Return a copy of B with all occurrences of subsection\n\
2111old replaced by new. If the optional argument count is\n\
Senthil Kumaran9a9dd1c2010-09-08 12:50:29 +00002112given, only first count occurances are replaced.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002113
2114static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002115bytes_replace(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002116{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002117 Py_ssize_t count = -1;
2118 PyObject *from, *to;
2119 const char *from_s, *to_s;
2120 Py_ssize_t from_len, to_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002121
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002122 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2123 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002124
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002125 if (PyBytes_Check(from)) {
2126 from_s = PyBytes_AS_STRING(from);
2127 from_len = PyBytes_GET_SIZE(from);
2128 }
2129 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2130 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002131
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002132 if (PyBytes_Check(to)) {
2133 to_s = PyBytes_AS_STRING(to);
2134 to_len = PyBytes_GET_SIZE(to);
2135 }
2136 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2137 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002138
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002139 return (PyObject *)replace((PyBytesObject *) self,
2140 from_s, from_len,
2141 to_s, to_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002142}
2143
2144/** End DALKE **/
2145
2146/* Matches the end (direction >= 0) or start (direction < 0) of self
2147 * against substr, using the start and end arguments. Returns
2148 * -1 on error, 0 if not found and 1 if found.
2149 */
2150Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002151_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002152 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002153{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002154 Py_ssize_t len = PyBytes_GET_SIZE(self);
2155 Py_ssize_t slen;
2156 const char* sub;
2157 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002158
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002159 if (PyBytes_Check(substr)) {
2160 sub = PyBytes_AS_STRING(substr);
2161 slen = PyBytes_GET_SIZE(substr);
2162 }
2163 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2164 return -1;
2165 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002166
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002167 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002168
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002169 if (direction < 0) {
2170 /* startswith */
2171 if (start+slen > len)
2172 return 0;
2173 } else {
2174 /* endswith */
2175 if (end-start < slen || start > len)
2176 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002177
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002178 if (end-slen > start)
2179 start = end - slen;
2180 }
2181 if (end-start >= slen)
2182 return ! memcmp(str+start, sub, slen);
2183 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002184}
2185
2186
2187PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002188"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002189\n\
2190Return True if B starts with the specified prefix, False otherwise.\n\
2191With optional start, test B beginning at that position.\n\
2192With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002193prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002194
2195static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002196bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002197{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002198 Py_ssize_t start = 0;
2199 Py_ssize_t end = PY_SSIZE_T_MAX;
2200 PyObject *subobj;
2201 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002202
Jesus Ceaac451502011-04-20 17:09:23 +02002203 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002204 return NULL;
2205 if (PyTuple_Check(subobj)) {
2206 Py_ssize_t i;
2207 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2208 result = _bytes_tailmatch(self,
2209 PyTuple_GET_ITEM(subobj, i),
2210 start, end, -1);
2211 if (result == -1)
2212 return NULL;
2213 else if (result) {
2214 Py_RETURN_TRUE;
2215 }
2216 }
2217 Py_RETURN_FALSE;
2218 }
2219 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002220 if (result == -1) {
2221 if (PyErr_ExceptionMatches(PyExc_TypeError))
2222 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2223 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002224 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002225 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002226 else
2227 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002228}
2229
2230
2231PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002232"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002233\n\
2234Return True if B ends with the specified suffix, False otherwise.\n\
2235With optional start, test B beginning at that position.\n\
2236With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002237suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002238
2239static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002240bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002241{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002242 Py_ssize_t start = 0;
2243 Py_ssize_t end = PY_SSIZE_T_MAX;
2244 PyObject *subobj;
2245 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002246
Jesus Ceaac451502011-04-20 17:09:23 +02002247 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002248 return NULL;
2249 if (PyTuple_Check(subobj)) {
2250 Py_ssize_t i;
2251 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2252 result = _bytes_tailmatch(self,
2253 PyTuple_GET_ITEM(subobj, i),
2254 start, end, +1);
2255 if (result == -1)
2256 return NULL;
2257 else if (result) {
2258 Py_RETURN_TRUE;
2259 }
2260 }
2261 Py_RETURN_FALSE;
2262 }
2263 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002264 if (result == -1) {
2265 if (PyErr_ExceptionMatches(PyExc_TypeError))
2266 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2267 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002268 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002269 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002270 else
2271 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002272}
2273
2274
2275PyDoc_STRVAR(decode__doc__,
Victor Stinnerc911bbf2010-11-07 19:04:46 +00002276"B.decode(encoding='utf-8', errors='strict') -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002277\n\
Victor Stinnere14e2122010-11-07 18:41:46 +00002278Decode B using the codec registered for encoding. Default encoding\n\
2279is 'utf-8'. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002280handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2281a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002282as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002283able to handle UnicodeDecodeErrors.");
2284
2285static PyObject *
Benjamin Peterson308d6372009-09-18 21:42:35 +00002286bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002287{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002288 const char *encoding = NULL;
2289 const char *errors = NULL;
2290 static char *kwlist[] = {"encoding", "errors", 0};
Guido van Rossumd624f182006-04-24 13:47:05 +00002291
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002292 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2293 return NULL;
2294 if (encoding == NULL)
2295 encoding = PyUnicode_GetDefaultEncoding();
2296 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002297}
2298
Guido van Rossum20188312006-05-05 15:15:40 +00002299
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002300PyDoc_STRVAR(splitlines__doc__,
2301"B.splitlines([keepends]) -> list of lines\n\
2302\n\
2303Return a list of the lines in B, breaking at line boundaries.\n\
2304Line breaks are not included in the resulting list unless keepends\n\
2305is given and true.");
2306
2307static PyObject*
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002308bytes_splitlines(PyObject *self, PyObject *args, PyObject *kwds)
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002309{
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002310 static char *kwlist[] = {"keepends", 0};
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002311 int keepends = 0;
2312
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002313 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:splitlines",
2314 kwlist, &keepends))
Antoine Pitroud1188562010-06-09 16:38:55 +00002315 return NULL;
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002316
2317 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002318 (PyObject*) self, PyBytes_AS_STRING(self),
2319 PyBytes_GET_SIZE(self), keepends
2320 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002321}
2322
2323
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002324PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002325"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002326\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002327Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002328Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002329Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002330
2331static int
Victor Stinner6430fd52011-09-29 04:02:13 +02002332hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002333{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002334 if (c >= 128)
2335 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002336 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002337 return c - '0';
2338 else {
David Malcolm96960882010-11-05 17:23:41 +00002339 if (Py_ISUPPER(c))
2340 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002341 if (c >= 'a' && c <= 'f')
2342 return c - 'a' + 10;
2343 }
2344 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002345}
2346
2347static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002348bytes_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002349{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002350 PyObject *newstring, *hexobj;
2351 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002352 Py_ssize_t hexlen, byteslen, i, j;
2353 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002354 void *data;
2355 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002356
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002357 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2358 return NULL;
2359 assert(PyUnicode_Check(hexobj));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002360 if (PyUnicode_READY(hexobj))
2361 return NULL;
2362 kind = PyUnicode_KIND(hexobj);
2363 data = PyUnicode_DATA(hexobj);
2364 hexlen = PyUnicode_GET_LENGTH(hexobj);
2365
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002366 byteslen = hexlen/2; /* This overestimates if there are spaces */
2367 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2368 if (!newstring)
2369 return NULL;
2370 buf = PyBytes_AS_STRING(newstring);
2371 for (i = j = 0; i < hexlen; i += 2) {
2372 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002373 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002374 i++;
2375 if (i >= hexlen)
2376 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002377 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
2378 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002379 if (top == -1 || bot == -1) {
2380 PyErr_Format(PyExc_ValueError,
2381 "non-hexadecimal number found in "
2382 "fromhex() arg at position %zd", i);
2383 goto error;
2384 }
2385 buf[j++] = (top << 4) + bot;
2386 }
2387 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2388 goto error;
2389 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002390
2391 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002392 Py_XDECREF(newstring);
2393 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002394}
2395
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002396PyDoc_STRVAR(sizeof__doc__,
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002397"B.__sizeof__() -> size of B in memory, in bytes");
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002398
2399static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002400bytes_sizeof(PyBytesObject *v)
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002401{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002402 Py_ssize_t res;
2403 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2404 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002405}
2406
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002407
2408static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002409bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002410{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002411 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002412}
2413
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002414
2415static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002416bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002417 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2418 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2419 _Py_capitalize__doc__},
2420 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2421 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2422 {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
2423 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2424 endswith__doc__},
2425 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2426 expandtabs__doc__},
2427 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2428 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2429 fromhex_doc},
2430 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2431 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2432 _Py_isalnum__doc__},
2433 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2434 _Py_isalpha__doc__},
2435 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2436 _Py_isdigit__doc__},
2437 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2438 _Py_islower__doc__},
2439 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2440 _Py_isspace__doc__},
2441 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2442 _Py_istitle__doc__},
2443 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2444 _Py_isupper__doc__},
2445 {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
2446 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2447 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2448 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2449 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
2450 _Py_maketrans__doc__},
2451 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2452 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2453 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2454 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2455 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2456 {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
2457 rpartition__doc__},
2458 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2459 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
2460 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002461 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002462 splitlines__doc__},
2463 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2464 startswith__doc__},
2465 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
2466 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2467 _Py_swapcase__doc__},
2468 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2469 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
2470 translate__doc__},
2471 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2472 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2473 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
2474 sizeof__doc__},
2475 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002476};
2477
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002478static PyObject *
2479str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2480
2481static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002482bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002483{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002484 PyObject *x = NULL;
2485 const char *encoding = NULL;
2486 const char *errors = NULL;
2487 PyObject *new = NULL;
2488 Py_ssize_t size;
2489 static char *kwlist[] = {"source", "encoding", "errors", 0};
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002490
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002491 if (type != &PyBytes_Type)
2492 return str_subtype_new(type, args, kwds);
2493 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2494 &encoding, &errors))
2495 return NULL;
2496 if (x == NULL) {
2497 if (encoding != NULL || errors != NULL) {
2498 PyErr_SetString(PyExc_TypeError,
2499 "encoding or errors without sequence "
2500 "argument");
2501 return NULL;
2502 }
2503 return PyBytes_FromString("");
2504 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002505
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002506 if (PyUnicode_Check(x)) {
2507 /* Encode via the codec registry */
2508 if (encoding == NULL) {
2509 PyErr_SetString(PyExc_TypeError,
2510 "string argument without an encoding");
2511 return NULL;
2512 }
2513 new = PyUnicode_AsEncodedString(x, encoding, errors);
2514 if (new == NULL)
2515 return NULL;
2516 assert(PyBytes_Check(new));
2517 return new;
2518 }
2519 /* Is it an integer? */
2520 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2521 if (size == -1 && PyErr_Occurred()) {
2522 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2523 return NULL;
2524 PyErr_Clear();
2525 }
2526 else if (size < 0) {
2527 PyErr_SetString(PyExc_ValueError, "negative count");
2528 return NULL;
2529 }
2530 else {
2531 new = PyBytes_FromStringAndSize(NULL, size);
2532 if (new == NULL) {
2533 return NULL;
2534 }
2535 if (size > 0) {
2536 memset(((PyBytesObject*)new)->ob_sval, 0, size);
2537 }
2538 return new;
2539 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002540
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002541 /* If it's not unicode, there can't be encoding or errors */
2542 if (encoding != NULL || errors != NULL) {
2543 PyErr_SetString(PyExc_TypeError,
2544 "encoding or errors without a string argument");
2545 return NULL;
2546 }
2547 return PyObject_Bytes(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002548}
2549
2550PyObject *
2551PyBytes_FromObject(PyObject *x)
2552{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002553 PyObject *new, *it;
2554 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002555
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002556 if (x == NULL) {
2557 PyErr_BadInternalCall();
2558 return NULL;
2559 }
2560 /* Use the modern buffer interface */
2561 if (PyObject_CheckBuffer(x)) {
2562 Py_buffer view;
2563 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2564 return NULL;
2565 new = PyBytes_FromStringAndSize(NULL, view.len);
2566 if (!new)
2567 goto fail;
2568 /* XXX(brett.cannon): Better way to get to internal buffer? */
2569 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2570 &view, view.len, 'C') < 0)
2571 goto fail;
2572 PyBuffer_Release(&view);
2573 return new;
2574 fail:
2575 Py_XDECREF(new);
2576 PyBuffer_Release(&view);
2577 return NULL;
2578 }
2579 if (PyUnicode_Check(x)) {
2580 PyErr_SetString(PyExc_TypeError,
2581 "cannot convert unicode object to bytes");
2582 return NULL;
2583 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002584
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002585 if (PyList_CheckExact(x)) {
2586 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2587 if (new == NULL)
2588 return NULL;
2589 for (i = 0; i < Py_SIZE(x); i++) {
2590 Py_ssize_t value = PyNumber_AsSsize_t(
2591 PyList_GET_ITEM(x, i), PyExc_ValueError);
2592 if (value == -1 && PyErr_Occurred()) {
2593 Py_DECREF(new);
2594 return NULL;
2595 }
2596 if (value < 0 || value >= 256) {
2597 PyErr_SetString(PyExc_ValueError,
2598 "bytes must be in range(0, 256)");
2599 Py_DECREF(new);
2600 return NULL;
2601 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002602 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002603 }
2604 return new;
2605 }
2606 if (PyTuple_CheckExact(x)) {
2607 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2608 if (new == NULL)
2609 return NULL;
2610 for (i = 0; i < Py_SIZE(x); i++) {
2611 Py_ssize_t value = PyNumber_AsSsize_t(
2612 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
2613 if (value == -1 && PyErr_Occurred()) {
2614 Py_DECREF(new);
2615 return NULL;
2616 }
2617 if (value < 0 || value >= 256) {
2618 PyErr_SetString(PyExc_ValueError,
2619 "bytes must be in range(0, 256)");
2620 Py_DECREF(new);
2621 return NULL;
2622 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002623 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002624 }
2625 return new;
2626 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00002627
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002628 /* For iterator version, create a string object and resize as needed */
2629 size = _PyObject_LengthHint(x, 64);
2630 if (size == -1 && PyErr_Occurred())
2631 return NULL;
2632 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
2633 returning a shared empty bytes string. This required because we
2634 want to call _PyBytes_Resize() the returned object, which we can
2635 only do on bytes objects with refcount == 1. */
2636 size += 1;
2637 new = PyBytes_FromStringAndSize(NULL, size);
2638 if (new == NULL)
2639 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002640
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002641 /* Get the iterator */
2642 it = PyObject_GetIter(x);
2643 if (it == NULL)
2644 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002645
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002646 /* Run the iterator to exhaustion */
2647 for (i = 0; ; i++) {
2648 PyObject *item;
2649 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002650
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002651 /* Get the next item */
2652 item = PyIter_Next(it);
2653 if (item == NULL) {
2654 if (PyErr_Occurred())
2655 goto error;
2656 break;
2657 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002658
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002659 /* Interpret it as an int (__index__) */
2660 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2661 Py_DECREF(item);
2662 if (value == -1 && PyErr_Occurred())
2663 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002664
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002665 /* Range check */
2666 if (value < 0 || value >= 256) {
2667 PyErr_SetString(PyExc_ValueError,
2668 "bytes must be in range(0, 256)");
2669 goto error;
2670 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002671
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002672 /* Append the byte */
2673 if (i >= size) {
2674 size = 2 * size + 1;
2675 if (_PyBytes_Resize(&new, size) < 0)
2676 goto error;
2677 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002678 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002679 }
2680 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002681
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002682 /* Clean up and return success */
2683 Py_DECREF(it);
2684 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002685
2686 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002687 /* Error handling when new != NULL */
2688 Py_XDECREF(it);
2689 Py_DECREF(new);
2690 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002691}
2692
2693static PyObject *
2694str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2695{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002696 PyObject *tmp, *pnew;
2697 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002698
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002699 assert(PyType_IsSubtype(type, &PyBytes_Type));
2700 tmp = bytes_new(&PyBytes_Type, args, kwds);
2701 if (tmp == NULL)
2702 return NULL;
2703 assert(PyBytes_CheckExact(tmp));
2704 n = PyBytes_GET_SIZE(tmp);
2705 pnew = type->tp_alloc(type, n);
2706 if (pnew != NULL) {
2707 Py_MEMCPY(PyBytes_AS_STRING(pnew),
2708 PyBytes_AS_STRING(tmp), n+1);
2709 ((PyBytesObject *)pnew)->ob_shash =
2710 ((PyBytesObject *)tmp)->ob_shash;
2711 }
2712 Py_DECREF(tmp);
2713 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002714}
2715
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002716PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002717"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002718bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002719bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
2720bytes(memory_view) -> bytes\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002721\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002722Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002723 - an iterable yielding integers in range(256)\n\
2724 - a text string encoded using the specified encoding\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002725 - a bytes or a buffer object\n\
2726 - any object implementing the buffer API.");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002727
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002728static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002729
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002730PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002731 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2732 "bytes",
2733 PyBytesObject_SIZE,
2734 sizeof(char),
2735 bytes_dealloc, /* tp_dealloc */
2736 0, /* tp_print */
2737 0, /* tp_getattr */
2738 0, /* tp_setattr */
2739 0, /* tp_reserved */
2740 (reprfunc)bytes_repr, /* tp_repr */
2741 0, /* tp_as_number */
2742 &bytes_as_sequence, /* tp_as_sequence */
2743 &bytes_as_mapping, /* tp_as_mapping */
2744 (hashfunc)bytes_hash, /* tp_hash */
2745 0, /* tp_call */
2746 bytes_str, /* tp_str */
2747 PyObject_GenericGetAttr, /* tp_getattro */
2748 0, /* tp_setattro */
2749 &bytes_as_buffer, /* tp_as_buffer */
2750 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2751 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2752 bytes_doc, /* tp_doc */
2753 0, /* tp_traverse */
2754 0, /* tp_clear */
2755 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2756 0, /* tp_weaklistoffset */
2757 bytes_iter, /* tp_iter */
2758 0, /* tp_iternext */
2759 bytes_methods, /* tp_methods */
2760 0, /* tp_members */
2761 0, /* tp_getset */
2762 &PyBaseObject_Type, /* tp_base */
2763 0, /* tp_dict */
2764 0, /* tp_descr_get */
2765 0, /* tp_descr_set */
2766 0, /* tp_dictoffset */
2767 0, /* tp_init */
2768 0, /* tp_alloc */
2769 bytes_new, /* tp_new */
2770 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002771};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002772
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002773void
2774PyBytes_Concat(register PyObject **pv, register PyObject *w)
2775{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002776 register PyObject *v;
2777 assert(pv != NULL);
2778 if (*pv == NULL)
2779 return;
2780 if (w == NULL) {
2781 Py_DECREF(*pv);
2782 *pv = NULL;
2783 return;
2784 }
2785 v = bytes_concat(*pv, w);
2786 Py_DECREF(*pv);
2787 *pv = v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002788}
2789
2790void
2791PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
2792{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002793 PyBytes_Concat(pv, w);
2794 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002795}
2796
2797
2798/* The following function breaks the notion that strings are immutable:
2799 it changes the size of a string. We get away with this only if there
2800 is only one module referencing the object. You can also think of it
2801 as creating a new string object and destroying the old one, only
2802 more efficiently. In any case, don't use this if the string may
2803 already be known to some other part of the code...
2804 Note that if there's not enough memory to resize the string, the original
2805 string object at *pv is deallocated, *pv is set to NULL, an "out of
2806 memory" exception is set, and -1 is returned. Else (on success) 0 is
2807 returned, and the value in *pv may or may not be the same as on input.
2808 As always, an extra byte is allocated for a trailing \0 byte (newsize
2809 does *not* include that), and a trailing \0 byte is stored.
2810*/
2811
2812int
2813_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2814{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002815 register PyObject *v;
2816 register PyBytesObject *sv;
2817 v = *pv;
2818 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
2819 *pv = 0;
2820 Py_DECREF(v);
2821 PyErr_BadInternalCall();
2822 return -1;
2823 }
2824 /* XXX UNREF/NEWREF interface should be more symmetrical */
2825 _Py_DEC_REFTOTAL;
2826 _Py_ForgetReference(v);
2827 *pv = (PyObject *)
2828 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
2829 if (*pv == NULL) {
2830 PyObject_Del(v);
2831 PyErr_NoMemory();
2832 return -1;
2833 }
2834 _Py_NewReference(*pv);
2835 sv = (PyBytesObject *) *pv;
2836 Py_SIZE(sv) = newsize;
2837 sv->ob_sval[newsize] = '\0';
2838 sv->ob_shash = -1; /* invalidate cached hash value */
2839 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002840}
2841
2842/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and
2843 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2844 * Python's regular ints.
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002845 * Return value: a new PyBytes*, or NULL if error.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002846 * . *pbuf is set to point into it,
2847 * *plen set to the # of chars following that.
2848 * Caller must decref it when done using pbuf.
2849 * The string starting at *pbuf is of the form
2850 * "-"? ("0x" | "0X")? digit+
2851 * "0x"/"0X" are present only for x and X conversions, with F_ALT
2852 * set in flags. The case of hex digits will be correct,
2853 * There will be at least prec digits, zero-filled on the left if
2854 * necessary to get that many.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002855 * val object to be converted
2856 * flags bitmask of format flags; only F_ALT is looked at
2857 * prec minimum number of digits; 0-fill on left if needed
2858 * type a character in [duoxX]; u acts the same as d
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002859 *
2860 * CAUTION: o, x and X conversions on regular ints can never
2861 * produce a '-' sign, but can for Python's unbounded ints.
2862 */
2863PyObject*
2864_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002865 char **pbuf, int *plen)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002866{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002867 PyObject *result = NULL;
2868 char *buf;
2869 Py_ssize_t i;
2870 int sign; /* 1 if '-', else 0 */
2871 int len; /* number of characters */
2872 Py_ssize_t llen;
2873 int numdigits; /* len == numnondigits + numdigits */
2874 int numnondigits = 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002875
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002876 /* Avoid exceeding SSIZE_T_MAX */
2877 if (prec > INT_MAX-3) {
2878 PyErr_SetString(PyExc_OverflowError,
2879 "precision too large");
2880 return NULL;
2881 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002882
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002883 switch (type) {
2884 case 'd':
2885 case 'u':
2886 /* Special-case boolean: we want 0/1 */
2887 if (PyBool_Check(val))
2888 result = PyNumber_ToBase(val, 10);
2889 else
2890 result = Py_TYPE(val)->tp_str(val);
2891 break;
2892 case 'o':
2893 numnondigits = 2;
2894 result = PyNumber_ToBase(val, 8);
2895 break;
2896 case 'x':
2897 case 'X':
2898 numnondigits = 2;
2899 result = PyNumber_ToBase(val, 16);
2900 break;
2901 default:
2902 assert(!"'type' not in [duoxX]");
2903 }
2904 if (!result)
2905 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002906
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002907 buf = _PyUnicode_AsString(result);
2908 if (!buf) {
2909 Py_DECREF(result);
2910 return NULL;
2911 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002912
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002913 /* To modify the string in-place, there can only be one reference. */
2914 if (Py_REFCNT(result) != 1) {
2915 PyErr_BadInternalCall();
2916 return NULL;
2917 }
2918 llen = PyUnicode_GetSize(result);
2919 if (llen > INT_MAX) {
2920 PyErr_SetString(PyExc_ValueError,
2921 "string too large in _PyBytes_FormatLong");
2922 return NULL;
2923 }
2924 len = (int)llen;
2925 if (buf[len-1] == 'L') {
2926 --len;
2927 buf[len] = '\0';
2928 }
2929 sign = buf[0] == '-';
2930 numnondigits += sign;
2931 numdigits = len - numnondigits;
2932 assert(numdigits > 0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002933
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002934 /* Get rid of base marker unless F_ALT */
2935 if (((flags & F_ALT) == 0 &&
2936 (type == 'o' || type == 'x' || type == 'X'))) {
2937 assert(buf[sign] == '0');
2938 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
2939 buf[sign+1] == 'o');
2940 numnondigits -= 2;
2941 buf += 2;
2942 len -= 2;
2943 if (sign)
2944 buf[0] = '-';
2945 assert(len == numnondigits + numdigits);
2946 assert(numdigits > 0);
2947 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002948
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002949 /* Fill with leading zeroes to meet minimum width. */
2950 if (prec > numdigits) {
2951 PyObject *r1 = PyBytes_FromStringAndSize(NULL,
2952 numnondigits + prec);
2953 char *b1;
2954 if (!r1) {
2955 Py_DECREF(result);
2956 return NULL;
2957 }
2958 b1 = PyBytes_AS_STRING(r1);
2959 for (i = 0; i < numnondigits; ++i)
2960 *b1++ = *buf++;
2961 for (i = 0; i < prec - numdigits; i++)
2962 *b1++ = '0';
2963 for (i = 0; i < numdigits; i++)
2964 *b1++ = *buf++;
2965 *b1 = '\0';
2966 Py_DECREF(result);
2967 result = r1;
2968 buf = PyBytes_AS_STRING(result);
2969 len = numnondigits + prec;
2970 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002971
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002972 /* Fix up case for hex conversions. */
2973 if (type == 'X') {
2974 /* Need to convert all lower case letters to upper case.
2975 and need to convert 0x to 0X (and -0x to -0X). */
2976 for (i = 0; i < len; i++)
2977 if (buf[i] >= 'a' && buf[i] <= 'x')
2978 buf[i] -= 'a'-'A';
2979 }
2980 *pbuf = buf;
2981 *plen = len;
2982 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002983}
2984
2985void
2986PyBytes_Fini(void)
2987{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002988 int i;
2989 for (i = 0; i < UCHAR_MAX + 1; i++) {
2990 Py_XDECREF(characters[i]);
2991 characters[i] = NULL;
2992 }
2993 Py_XDECREF(nullstring);
2994 nullstring = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002995}
2996
Benjamin Peterson4116f362008-05-27 00:36:20 +00002997/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002998
2999typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003000 PyObject_HEAD
3001 Py_ssize_t it_index;
3002 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003003} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003004
3005static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003006striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003007{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003008 _PyObject_GC_UNTRACK(it);
3009 Py_XDECREF(it->it_seq);
3010 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003011}
3012
3013static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003014striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003015{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003016 Py_VISIT(it->it_seq);
3017 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003018}
3019
3020static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003021striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003022{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003023 PyBytesObject *seq;
3024 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003025
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003026 assert(it != NULL);
3027 seq = it->it_seq;
3028 if (seq == NULL)
3029 return NULL;
3030 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003031
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003032 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3033 item = PyLong_FromLong(
3034 (unsigned char)seq->ob_sval[it->it_index]);
3035 if (item != NULL)
3036 ++it->it_index;
3037 return item;
3038 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003039
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003040 Py_DECREF(seq);
3041 it->it_seq = NULL;
3042 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003043}
3044
3045static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003046striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003047{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003048 Py_ssize_t len = 0;
3049 if (it->it_seq)
3050 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3051 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003052}
3053
3054PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003055 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003056
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003057static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003058 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3059 length_hint_doc},
3060 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003061};
3062
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003063PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003064 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3065 "bytes_iterator", /* tp_name */
3066 sizeof(striterobject), /* tp_basicsize */
3067 0, /* tp_itemsize */
3068 /* methods */
3069 (destructor)striter_dealloc, /* tp_dealloc */
3070 0, /* tp_print */
3071 0, /* tp_getattr */
3072 0, /* tp_setattr */
3073 0, /* tp_reserved */
3074 0, /* tp_repr */
3075 0, /* tp_as_number */
3076 0, /* tp_as_sequence */
3077 0, /* tp_as_mapping */
3078 0, /* tp_hash */
3079 0, /* tp_call */
3080 0, /* tp_str */
3081 PyObject_GenericGetAttr, /* tp_getattro */
3082 0, /* tp_setattro */
3083 0, /* tp_as_buffer */
3084 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3085 0, /* tp_doc */
3086 (traverseproc)striter_traverse, /* tp_traverse */
3087 0, /* tp_clear */
3088 0, /* tp_richcompare */
3089 0, /* tp_weaklistoffset */
3090 PyObject_SelfIter, /* tp_iter */
3091 (iternextfunc)striter_next, /* tp_iternext */
3092 striter_methods, /* tp_methods */
3093 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003094};
3095
3096static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003097bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003098{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003099 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003100
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003101 if (!PyBytes_Check(seq)) {
3102 PyErr_BadInternalCall();
3103 return NULL;
3104 }
3105 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3106 if (it == NULL)
3107 return NULL;
3108 it->it_index = 0;
3109 Py_INCREF(seq);
3110 it->it_seq = (PyBytesObject *)seq;
3111 _PyObject_GC_TRACK(it);
3112 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003113}