blob: a89798a167b409cfe6b114bfaa390bc695cacf3b [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Christian Heimes90aa7642007-12-19 02:45:37 +000013 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000014
Gregory P. Smith60d241f2007-10-16 06:31:30 +000015 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000016 {
Antoine Pitroud1188562010-06-09 16:38:55 +000017 PyErr_Format(PyExc_TypeError,
18 "Type %.100s doesn't support the buffer API",
19 Py_TYPE(obj)->tp_name);
20 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000021 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000023 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000024 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000025 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000026}
27
Christian Heimes2c9c7a52008-05-26 13:42:13 +000028#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000029Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000030#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000031
Christian Heimes2c9c7a52008-05-26 13:42:13 +000032static PyBytesObject *characters[UCHAR_MAX + 1];
33static PyBytesObject *nullstring;
34
Mark Dickinsonfd24b322008-12-06 15:33:31 +000035/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
36 for a string of length n should request PyBytesObject_SIZE + n bytes.
37
38 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
39 3 bytes per string allocation on a typical system.
40*/
41#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
42
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000044 For PyBytes_FromString(), the parameter `str' points to a null-terminated
45 string containing exactly `size' bytes.
46
47 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
48 either NULL or else points to a string containing at least `size' bytes.
49 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
50 not have to be null-terminated. (Therefore it is safe to construct a
51 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
52 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
53 bytes (setting the last byte to the null terminating character) and you can
54 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000055 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000056 alter the data yourself, since the strings may be shared.
57
58 The PyObject member `op->ob_size', which denotes the number of "extra
59 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020060 allocated for string data, not counting the null terminating character.
61 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000062 PyBytes_FromStringAndSize()) or the length of the string in the `str'
63 parameter (for PyBytes_FromString()).
64*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000065PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000066PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000067{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000068 register PyBytesObject *op;
69 if (size < 0) {
70 PyErr_SetString(PyExc_SystemError,
71 "Negative size passed to PyBytes_FromStringAndSize");
72 return NULL;
73 }
74 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000075#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000076 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000077#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078 Py_INCREF(op);
79 return (PyObject *)op;
80 }
81 if (size == 1 && str != NULL &&
82 (op = characters[*str & UCHAR_MAX]) != NULL)
83 {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000084#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000086#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 Py_INCREF(op);
88 return (PyObject *)op;
89 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000090
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
92 PyErr_SetString(PyExc_OverflowError,
93 "byte string is too large");
94 return NULL;
95 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000097 /* Inline PyObject_NewVar */
98 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
99 if (op == NULL)
100 return PyErr_NoMemory();
101 PyObject_INIT_VAR(op, &PyBytes_Type, size);
102 op->ob_shash = -1;
103 if (str != NULL)
104 Py_MEMCPY(op->ob_sval, str, size);
105 op->ob_sval[size] = '\0';
106 /* share short strings */
107 if (size == 0) {
108 nullstring = op;
109 Py_INCREF(op);
110 } else if (size == 1 && str != NULL) {
111 characters[*str & UCHAR_MAX] = op;
112 Py_INCREF(op);
113 }
114 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000115}
116
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000117PyObject *
118PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000119{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000120 register size_t size;
121 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000122
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000123 assert(str != NULL);
124 size = strlen(str);
125 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
126 PyErr_SetString(PyExc_OverflowError,
127 "byte string is too long");
128 return NULL;
129 }
130 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000131#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000133#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 Py_INCREF(op);
135 return (PyObject *)op;
136 }
137 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000138#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000139 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000140#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 Py_INCREF(op);
142 return (PyObject *)op;
143 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000144
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 /* Inline PyObject_NewVar */
146 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
147 if (op == NULL)
148 return PyErr_NoMemory();
149 PyObject_INIT_VAR(op, &PyBytes_Type, size);
150 op->ob_shash = -1;
151 Py_MEMCPY(op->ob_sval, str, size+1);
152 /* share short strings */
153 if (size == 0) {
154 nullstring = op;
155 Py_INCREF(op);
156 } else if (size == 1) {
157 characters[*str & UCHAR_MAX] = op;
158 Py_INCREF(op);
159 }
160 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000161}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000162
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000163PyObject *
164PyBytes_FromFormatV(const char *format, va_list vargs)
165{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000166 va_list count;
167 Py_ssize_t n = 0;
168 const char* f;
169 char *s;
170 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000171
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000172 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000173 /* step 1: figure out how large a buffer we need */
174 for (f = format; *f; f++) {
175 if (*f == '%') {
176 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000177 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000178 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000179
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000180 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
181 * they don't affect the amount of space we reserve.
182 */
183 if ((*f == 'l' || *f == 'z') &&
184 (f[1] == 'd' || f[1] == 'u'))
185 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000186
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000187 switch (*f) {
188 case 'c':
189 (void)va_arg(count, int);
190 /* fall through... */
191 case '%':
192 n++;
193 break;
194 case 'd': case 'u': case 'i': case 'x':
195 (void) va_arg(count, int);
196 /* 20 bytes is enough to hold a 64-bit
197 integer. Decimal takes the most space.
198 This isn't enough for octal. */
199 n += 20;
200 break;
201 case 's':
202 s = va_arg(count, char*);
203 n += strlen(s);
204 break;
205 case 'p':
206 (void) va_arg(count, int);
207 /* maximum 64-bit pointer representation:
208 * 0xffffffffffffffff
209 * so 19 characters is enough.
210 * XXX I count 18 -- what's the extra for?
211 */
212 n += 19;
213 break;
214 default:
215 /* if we stumble upon an unknown
216 formatting code, copy the rest of
217 the format string to the output
218 string. (we cannot just skip the
219 code, since there's no way to know
220 what's in the argument list) */
221 n += strlen(p);
222 goto expand;
223 }
224 } else
225 n++;
226 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000227 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 /* step 2: fill the buffer */
229 /* Since we've analyzed how much space we need for the worst case,
230 use sprintf directly instead of the slower PyOS_snprintf. */
231 string = PyBytes_FromStringAndSize(NULL, n);
232 if (!string)
233 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000234
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000235 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000236
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000237 for (f = format; *f; f++) {
238 if (*f == '%') {
239 const char* p = f++;
240 Py_ssize_t i;
241 int longflag = 0;
242 int size_tflag = 0;
243 /* parse the width.precision part (we're only
244 interested in the precision value, if any) */
245 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000246 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000247 n = (n*10) + *f++ - '0';
248 if (*f == '.') {
249 f++;
250 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000251 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000252 n = (n*10) + *f++ - '0';
253 }
David Malcolm96960882010-11-05 17:23:41 +0000254 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000255 f++;
256 /* handle the long flag, but only for %ld and %lu.
257 others can be added when necessary. */
258 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
259 longflag = 1;
260 ++f;
261 }
262 /* handle the size_t flag. */
263 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
264 size_tflag = 1;
265 ++f;
266 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000267
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000268 switch (*f) {
269 case 'c':
270 *s++ = va_arg(vargs, int);
271 break;
272 case 'd':
273 if (longflag)
274 sprintf(s, "%ld", va_arg(vargs, long));
275 else if (size_tflag)
276 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
277 va_arg(vargs, Py_ssize_t));
278 else
279 sprintf(s, "%d", va_arg(vargs, int));
280 s += strlen(s);
281 break;
282 case 'u':
283 if (longflag)
284 sprintf(s, "%lu",
285 va_arg(vargs, unsigned long));
286 else if (size_tflag)
287 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
288 va_arg(vargs, size_t));
289 else
290 sprintf(s, "%u",
291 va_arg(vargs, unsigned int));
292 s += strlen(s);
293 break;
294 case 'i':
295 sprintf(s, "%i", va_arg(vargs, int));
296 s += strlen(s);
297 break;
298 case 'x':
299 sprintf(s, "%x", va_arg(vargs, int));
300 s += strlen(s);
301 break;
302 case 's':
303 p = va_arg(vargs, char*);
304 i = strlen(p);
305 if (n > 0 && i > n)
306 i = n;
307 Py_MEMCPY(s, p, i);
308 s += i;
309 break;
310 case 'p':
311 sprintf(s, "%p", va_arg(vargs, void*));
312 /* %p is ill-defined: ensure leading 0x. */
313 if (s[1] == 'X')
314 s[1] = 'x';
315 else if (s[1] != 'x') {
316 memmove(s+2, s, strlen(s)+1);
317 s[0] = '0';
318 s[1] = 'x';
319 }
320 s += strlen(s);
321 break;
322 case '%':
323 *s++ = '%';
324 break;
325 default:
326 strcpy(s, p);
327 s += strlen(s);
328 goto end;
329 }
330 } else
331 *s++ = *f;
332 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000333
334 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000335 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
336 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000337}
338
339PyObject *
340PyBytes_FromFormat(const char *format, ...)
341{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000342 PyObject* ret;
343 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000344
345#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000347#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000348 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000349#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 ret = PyBytes_FromFormatV(format, vargs);
351 va_end(vargs);
352 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000353}
354
355static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000356bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000357{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000358 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000359}
360
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000361/* Unescape a backslash-escaped string. If unicode is non-zero,
362 the string is a u-literal. If recode_encoding is non-zero,
363 the string is UTF-8 encoded and should be re-encoded in the
364 specified encoding. */
365
366PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 Py_ssize_t len,
368 const char *errors,
369 Py_ssize_t unicode,
370 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000371{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 int c;
373 char *p, *buf;
374 const char *end;
375 PyObject *v;
376 Py_ssize_t newlen = recode_encoding ? 4*len:len;
377 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
378 if (v == NULL)
379 return NULL;
380 p = buf = PyBytes_AsString(v);
381 end = s + len;
382 while (s < end) {
383 if (*s != '\\') {
384 non_esc:
385 if (recode_encoding && (*s & 0x80)) {
386 PyObject *u, *w;
387 char *r;
388 const char* t;
389 Py_ssize_t rn;
390 t = s;
391 /* Decode non-ASCII bytes as UTF-8. */
392 while (t < end && (*t & 0x80)) t++;
393 u = PyUnicode_DecodeUTF8(s, t - s, errors);
394 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000395
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000396 /* Recode them in target encoding. */
397 w = PyUnicode_AsEncodedString(
398 u, recode_encoding, errors);
399 Py_DECREF(u);
400 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000401
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000402 /* Append bytes to output buffer. */
403 assert(PyBytes_Check(w));
404 r = PyBytes_AS_STRING(w);
405 rn = PyBytes_GET_SIZE(w);
406 Py_MEMCPY(p, r, rn);
407 p += rn;
408 Py_DECREF(w);
409 s = t;
410 } else {
411 *p++ = *s++;
412 }
413 continue;
414 }
415 s++;
416 if (s==end) {
417 PyErr_SetString(PyExc_ValueError,
418 "Trailing \\ in string");
419 goto failed;
420 }
421 switch (*s++) {
422 /* XXX This assumes ASCII! */
423 case '\n': break;
424 case '\\': *p++ = '\\'; break;
425 case '\'': *p++ = '\''; break;
426 case '\"': *p++ = '\"'; break;
427 case 'b': *p++ = '\b'; break;
428 case 'f': *p++ = '\014'; break; /* FF */
429 case 't': *p++ = '\t'; break;
430 case 'n': *p++ = '\n'; break;
431 case 'r': *p++ = '\r'; break;
432 case 'v': *p++ = '\013'; break; /* VT */
433 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
434 case '0': case '1': case '2': case '3':
435 case '4': case '5': case '6': case '7':
436 c = s[-1] - '0';
437 if (s < end && '0' <= *s && *s <= '7') {
438 c = (c<<3) + *s++ - '0';
439 if (s < end && '0' <= *s && *s <= '7')
440 c = (c<<3) + *s++ - '0';
441 }
442 *p++ = c;
443 break;
444 case 'x':
David Malcolm96960882010-11-05 17:23:41 +0000445 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000446 unsigned int x = 0;
447 c = Py_CHARMASK(*s);
448 s++;
David Malcolm96960882010-11-05 17:23:41 +0000449 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000450 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000451 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000452 x = 10 + c - 'a';
453 else
454 x = 10 + c - 'A';
455 x = x << 4;
456 c = Py_CHARMASK(*s);
457 s++;
David Malcolm96960882010-11-05 17:23:41 +0000458 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000459 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000460 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000461 x += 10 + c - 'a';
462 else
463 x += 10 + c - 'A';
464 *p++ = x;
465 break;
466 }
467 if (!errors || strcmp(errors, "strict") == 0) {
468 PyErr_SetString(PyExc_ValueError,
469 "invalid \\x escape");
470 goto failed;
471 }
472 if (strcmp(errors, "replace") == 0) {
473 *p++ = '?';
474 } else if (strcmp(errors, "ignore") == 0)
475 /* do nothing */;
476 else {
477 PyErr_Format(PyExc_ValueError,
478 "decoding error; unknown "
479 "error handling code: %.400s",
480 errors);
481 goto failed;
482 }
483 default:
484 *p++ = '\\';
485 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +0200486 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000487 UTF-8 bytes may follow. */
488 }
489 }
490 if (p-buf < newlen)
491 _PyBytes_Resize(&v, p - buf);
492 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000493 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000494 Py_DECREF(v);
495 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000496}
497
498/* -------------------------------------------------------------------- */
499/* object api */
500
501Py_ssize_t
502PyBytes_Size(register PyObject *op)
503{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000504 if (!PyBytes_Check(op)) {
505 PyErr_Format(PyExc_TypeError,
506 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
507 return -1;
508 }
509 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000510}
511
512char *
513PyBytes_AsString(register PyObject *op)
514{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000515 if (!PyBytes_Check(op)) {
516 PyErr_Format(PyExc_TypeError,
517 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
518 return NULL;
519 }
520 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000521}
522
523int
524PyBytes_AsStringAndSize(register PyObject *obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000525 register char **s,
526 register Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000527{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000528 if (s == NULL) {
529 PyErr_BadInternalCall();
530 return -1;
531 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000532
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000533 if (!PyBytes_Check(obj)) {
534 PyErr_Format(PyExc_TypeError,
535 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
536 return -1;
537 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000538
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000539 *s = PyBytes_AS_STRING(obj);
540 if (len != NULL)
541 *len = PyBytes_GET_SIZE(obj);
542 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
543 PyErr_SetString(PyExc_TypeError,
544 "expected bytes with no null");
545 return -1;
546 }
547 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000548}
Neal Norwitz6968b052007-02-27 19:02:19 +0000549
550/* -------------------------------------------------------------------- */
551/* Methods */
552
Eric Smith0923d1d2009-04-16 20:16:10 +0000553#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000554
555#include "stringlib/fastsearch.h"
556#include "stringlib/count.h"
557#include "stringlib/find.h"
558#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000559#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000560#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000561
Eric Smith0f78bff2009-11-30 01:01:42 +0000562#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000563
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000564PyObject *
565PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000566{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000567 register PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200568 Py_ssize_t i, length = Py_SIZE(op);
569 size_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000570 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200571 unsigned char quote, *s, *p;
572
573 /* Compute size of output string */
574 squotes = dquotes = 0;
575 newsize = 3; /* b'' */
576 s = (unsigned char*)op->ob_sval;
577 for (i = 0; i < length; i++) {
578 switch(s[i]) {
579 case '\'': squotes++; newsize++; break;
580 case '"': dquotes++; newsize++; break;
581 case '\\': case '\t': case '\n': case '\r':
582 newsize += 2; break; /* \C */
583 default:
584 if (s[i] < ' ' || s[i] >= 0x7f)
585 newsize += 4; /* \xHH */
586 else
587 newsize++;
588 }
589 }
590 quote = '\'';
591 if (smartquotes && squotes && !dquotes)
592 quote = '"';
593 if (squotes && quote == '\'')
594 newsize += squotes;
Victor Stinner6430fd52011-09-29 04:02:13 +0200595
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200596 if (newsize > (PY_SSIZE_T_MAX - sizeof(PyUnicodeObject) - 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000597 PyErr_SetString(PyExc_OverflowError,
598 "bytes object is too large to make repr");
599 return NULL;
600 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200601
602 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000603 if (v == NULL) {
604 return NULL;
605 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200606 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000607
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200608 *p++ = 'b', *p++ = quote;
609 for (i = 0; i < length; i++) {
610 unsigned char c = op->ob_sval[i];
611 if (c == quote || c == '\\')
612 *p++ = '\\', *p++ = c;
613 else if (c == '\t')
614 *p++ = '\\', *p++ = 't';
615 else if (c == '\n')
616 *p++ = '\\', *p++ = 'n';
617 else if (c == '\r')
618 *p++ = '\\', *p++ = 'r';
619 else if (c < ' ' || c >= 0x7f) {
620 *p++ = '\\';
621 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200622 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
623 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000624 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200625 else
626 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000627 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200628 *p++ = quote;
629 return v;
Neal Norwitz6968b052007-02-27 19:02:19 +0000630}
631
Neal Norwitz6968b052007-02-27 19:02:19 +0000632static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000633bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000634{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000635 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000636}
637
Neal Norwitz6968b052007-02-27 19:02:19 +0000638static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000639bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000640{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000641 if (Py_BytesWarningFlag) {
642 if (PyErr_WarnEx(PyExc_BytesWarning,
643 "str() on a bytes instance", 1))
644 return NULL;
645 }
646 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000647}
648
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000649static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000650bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000651{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000652 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000653}
Neal Norwitz6968b052007-02-27 19:02:19 +0000654
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000655/* This is also used by PyBytes_Concat() */
656static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000657bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000658{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000659 Py_ssize_t size;
660 Py_buffer va, vb;
661 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000662
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000663 va.len = -1;
664 vb.len = -1;
665 if (_getbuffer(a, &va) < 0 ||
666 _getbuffer(b, &vb) < 0) {
667 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
668 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
669 goto done;
670 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000671
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000672 /* Optimize end cases */
673 if (va.len == 0 && PyBytes_CheckExact(b)) {
674 result = b;
675 Py_INCREF(result);
676 goto done;
677 }
678 if (vb.len == 0 && PyBytes_CheckExact(a)) {
679 result = a;
680 Py_INCREF(result);
681 goto done;
682 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000683
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000684 size = va.len + vb.len;
685 if (size < 0) {
686 PyErr_NoMemory();
687 goto done;
688 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000689
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000690 result = PyBytes_FromStringAndSize(NULL, size);
691 if (result != NULL) {
692 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
693 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
694 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000695
696 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000697 if (va.len != -1)
698 PyBuffer_Release(&va);
699 if (vb.len != -1)
700 PyBuffer_Release(&vb);
701 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000702}
Neal Norwitz6968b052007-02-27 19:02:19 +0000703
704static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000705bytes_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000706{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000707 register Py_ssize_t i;
708 register Py_ssize_t j;
709 register Py_ssize_t size;
710 register PyBytesObject *op;
711 size_t nbytes;
712 if (n < 0)
713 n = 0;
714 /* watch out for overflows: the size can overflow int,
715 * and the # of bytes needed can overflow size_t
716 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000717 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000718 PyErr_SetString(PyExc_OverflowError,
719 "repeated bytes are too long");
720 return NULL;
721 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000722 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000723 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
724 Py_INCREF(a);
725 return (PyObject *)a;
726 }
727 nbytes = (size_t)size;
728 if (nbytes + PyBytesObject_SIZE <= nbytes) {
729 PyErr_SetString(PyExc_OverflowError,
730 "repeated bytes are too long");
731 return NULL;
732 }
733 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
734 if (op == NULL)
735 return PyErr_NoMemory();
736 PyObject_INIT_VAR(op, &PyBytes_Type, size);
737 op->ob_shash = -1;
738 op->ob_sval[size] = '\0';
739 if (Py_SIZE(a) == 1 && n > 0) {
740 memset(op->ob_sval, a->ob_sval[0] , n);
741 return (PyObject *) op;
742 }
743 i = 0;
744 if (i < size) {
745 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
746 i = Py_SIZE(a);
747 }
748 while (i < size) {
749 j = (i <= size-i) ? i : size-i;
750 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
751 i += j;
752 }
753 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000754}
755
Guido van Rossum98297ee2007-11-06 21:34:58 +0000756static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000757bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000758{
759 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
760 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000761 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +0000762 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +0000763 PyErr_Clear();
764 if (_getbuffer(arg, &varg) < 0)
765 return -1;
766 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
767 varg.buf, varg.len, 0);
768 PyBuffer_Release(&varg);
769 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000770 }
771 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000772 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
773 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000774 }
775
Antoine Pitrou0010d372010-08-15 17:12:55 +0000776 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000777}
778
Neal Norwitz6968b052007-02-27 19:02:19 +0000779static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000780bytes_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000781{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000782 if (i < 0 || i >= Py_SIZE(a)) {
783 PyErr_SetString(PyExc_IndexError, "index out of range");
784 return NULL;
785 }
786 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000787}
788
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000789static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000790bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000791{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000792 int c;
793 Py_ssize_t len_a, len_b;
794 Py_ssize_t min_len;
795 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000796
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000797 /* Make sure both arguments are strings. */
798 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
799 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
800 (PyObject_IsInstance((PyObject*)a,
801 (PyObject*)&PyUnicode_Type) ||
802 PyObject_IsInstance((PyObject*)b,
803 (PyObject*)&PyUnicode_Type))) {
804 if (PyErr_WarnEx(PyExc_BytesWarning,
805 "Comparison between bytes and string", 1))
806 return NULL;
807 }
808 result = Py_NotImplemented;
809 goto out;
810 }
811 if (a == b) {
812 switch (op) {
813 case Py_EQ:case Py_LE:case Py_GE:
814 result = Py_True;
815 goto out;
816 case Py_NE:case Py_LT:case Py_GT:
817 result = Py_False;
818 goto out;
819 }
820 }
821 if (op == Py_EQ) {
822 /* Supporting Py_NE here as well does not save
823 much time, since Py_NE is rarely used. */
824 if (Py_SIZE(a) == Py_SIZE(b)
825 && (a->ob_sval[0] == b->ob_sval[0]
826 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
827 result = Py_True;
828 } else {
829 result = Py_False;
830 }
831 goto out;
832 }
833 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
834 min_len = (len_a < len_b) ? len_a : len_b;
835 if (min_len > 0) {
836 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
837 if (c==0)
838 c = memcmp(a->ob_sval, b->ob_sval, min_len);
839 } else
840 c = 0;
841 if (c == 0)
842 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
843 switch (op) {
844 case Py_LT: c = c < 0; break;
845 case Py_LE: c = c <= 0; break;
846 case Py_EQ: assert(0); break; /* unreachable */
847 case Py_NE: c = c != 0; break;
848 case Py_GT: c = c > 0; break;
849 case Py_GE: c = c >= 0; break;
850 default:
851 result = Py_NotImplemented;
852 goto out;
853 }
854 result = c ? Py_True : Py_False;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000855 out:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000856 Py_INCREF(result);
857 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000858}
859
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000860static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000861bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000862{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000863 register Py_ssize_t len;
864 register unsigned char *p;
Mark Dickinson57e683e2011-09-24 18:18:40 +0100865 register Py_uhash_t x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000866
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000867 if (a->ob_shash != -1)
868 return a->ob_shash;
869 len = Py_SIZE(a);
870 p = (unsigned char *) a->ob_sval;
Mark Dickinson57e683e2011-09-24 18:18:40 +0100871 x = (Py_uhash_t)*p << 7;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000872 while (--len >= 0)
Mark Dickinson57e683e2011-09-24 18:18:40 +0100873 x = (1000003U*x) ^ (Py_uhash_t)*p++;
874 x ^= (Py_uhash_t)Py_SIZE(a);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000875 if (x == -1)
876 x = -2;
877 a->ob_shash = x;
878 return x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000879}
880
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000881static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000882bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000883{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000884 if (PyIndex_Check(item)) {
885 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
886 if (i == -1 && PyErr_Occurred())
887 return NULL;
888 if (i < 0)
889 i += PyBytes_GET_SIZE(self);
890 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
891 PyErr_SetString(PyExc_IndexError,
892 "index out of range");
893 return NULL;
894 }
895 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
896 }
897 else if (PySlice_Check(item)) {
898 Py_ssize_t start, stop, step, slicelength, cur, i;
899 char* source_buf;
900 char* result_buf;
901 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000902
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000903 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000904 PyBytes_GET_SIZE(self),
905 &start, &stop, &step, &slicelength) < 0) {
906 return NULL;
907 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000908
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000909 if (slicelength <= 0) {
910 return PyBytes_FromStringAndSize("", 0);
911 }
912 else if (start == 0 && step == 1 &&
913 slicelength == PyBytes_GET_SIZE(self) &&
914 PyBytes_CheckExact(self)) {
915 Py_INCREF(self);
916 return (PyObject *)self;
917 }
918 else if (step == 1) {
919 return PyBytes_FromStringAndSize(
920 PyBytes_AS_STRING(self) + start,
921 slicelength);
922 }
923 else {
924 source_buf = PyBytes_AS_STRING(self);
925 result = PyBytes_FromStringAndSize(NULL, slicelength);
926 if (result == NULL)
927 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000928
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000929 result_buf = PyBytes_AS_STRING(result);
930 for (cur = start, i = 0; i < slicelength;
931 cur += step, i++) {
932 result_buf[i] = source_buf[cur];
933 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000934
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000935 return result;
936 }
937 }
938 else {
939 PyErr_Format(PyExc_TypeError,
940 "byte indices must be integers, not %.200s",
941 Py_TYPE(item)->tp_name);
942 return NULL;
943 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000944}
945
946static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000947bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000948{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000949 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
950 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000951}
952
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000953static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000954 (lenfunc)bytes_length, /*sq_length*/
955 (binaryfunc)bytes_concat, /*sq_concat*/
956 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
957 (ssizeargfunc)bytes_item, /*sq_item*/
958 0, /*sq_slice*/
959 0, /*sq_ass_item*/
960 0, /*sq_ass_slice*/
961 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000962};
963
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000964static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000965 (lenfunc)bytes_length,
966 (binaryfunc)bytes_subscript,
967 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000968};
969
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000970static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000971 (getbufferproc)bytes_buffer_getbuffer,
972 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000973};
974
975
976#define LEFTSTRIP 0
977#define RIGHTSTRIP 1
978#define BOTHSTRIP 2
979
980/* Arrays indexed by above */
981static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
982
983#define STRIPNAME(i) (stripformat[i]+3)
984
Neal Norwitz6968b052007-02-27 19:02:19 +0000985PyDoc_STRVAR(split__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000986"B.split([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +0000987\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +0000988Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000989If sep is not specified or is None, B is split on ASCII whitespace\n\
990characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +0000991If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +0000992
993static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000994bytes_split(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +0000995{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000996 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
997 Py_ssize_t maxsplit = -1;
998 const char *s = PyBytes_AS_STRING(self), *sub;
999 Py_buffer vsub;
1000 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001001
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001002 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1003 return NULL;
1004 if (maxsplit < 0)
1005 maxsplit = PY_SSIZE_T_MAX;
1006 if (subobj == Py_None)
1007 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1008 if (_getbuffer(subobj, &vsub) < 0)
1009 return NULL;
1010 sub = vsub.buf;
1011 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001012
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001013 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1014 PyBuffer_Release(&vsub);
1015 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001016}
1017
Neal Norwitz6968b052007-02-27 19:02:19 +00001018PyDoc_STRVAR(partition__doc__,
1019"B.partition(sep) -> (head, sep, tail)\n\
1020\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001021Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001022the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001023found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001024
1025static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001026bytes_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001027{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001028 const char *sep;
1029 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001030
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001031 if (PyBytes_Check(sep_obj)) {
1032 sep = PyBytes_AS_STRING(sep_obj);
1033 sep_len = PyBytes_GET_SIZE(sep_obj);
1034 }
1035 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1036 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001037
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001038 return stringlib_partition(
1039 (PyObject*) self,
1040 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1041 sep_obj, sep, sep_len
1042 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001043}
1044
1045PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti5b2b2422010-01-25 11:58:28 +00001046"B.rpartition(sep) -> (head, sep, tail)\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001047\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001048Search for the separator sep in B, starting at the end of B,\n\
1049and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001050part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001051bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001052
1053static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001054bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001055{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001056 const char *sep;
1057 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001058
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001059 if (PyBytes_Check(sep_obj)) {
1060 sep = PyBytes_AS_STRING(sep_obj);
1061 sep_len = PyBytes_GET_SIZE(sep_obj);
1062 }
1063 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1064 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001065
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001066 return stringlib_rpartition(
1067 (PyObject*) self,
1068 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1069 sep_obj, sep, sep_len
1070 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001071}
1072
Neal Norwitz6968b052007-02-27 19:02:19 +00001073PyDoc_STRVAR(rsplit__doc__,
Benjamin Peterson4116f362008-05-27 00:36:20 +00001074"B.rsplit([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001075\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001076Return a list of the sections in B, using sep as the delimiter,\n\
1077starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001078If sep is not given, B is split on ASCII whitespace characters\n\
1079(space, tab, return, newline, formfeed, vertical tab).\n\
1080If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001081
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001082
Neal Norwitz6968b052007-02-27 19:02:19 +00001083static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001084bytes_rsplit(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001085{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001086 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1087 Py_ssize_t maxsplit = -1;
1088 const char *s = PyBytes_AS_STRING(self), *sub;
1089 Py_buffer vsub;
1090 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001091
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001092 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1093 return NULL;
1094 if (maxsplit < 0)
1095 maxsplit = PY_SSIZE_T_MAX;
1096 if (subobj == Py_None)
1097 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1098 if (_getbuffer(subobj, &vsub) < 0)
1099 return NULL;
1100 sub = vsub.buf;
1101 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001102
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001103 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1104 PyBuffer_Release(&vsub);
1105 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001106}
1107
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001108
1109PyDoc_STRVAR(join__doc__,
1110"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001111\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001112Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001113Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1114
Neal Norwitz6968b052007-02-27 19:02:19 +00001115static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001116bytes_join(PyObject *self, PyObject *orig)
Neal Norwitz6968b052007-02-27 19:02:19 +00001117{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001118 char *sep = PyBytes_AS_STRING(self);
1119 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1120 PyObject *res = NULL;
1121 char *p;
1122 Py_ssize_t seqlen = 0;
1123 size_t sz = 0;
1124 Py_ssize_t i;
1125 PyObject *seq, *item;
Neal Norwitz6968b052007-02-27 19:02:19 +00001126
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001127 seq = PySequence_Fast(orig, "");
1128 if (seq == NULL) {
1129 return NULL;
1130 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001131
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001132 seqlen = PySequence_Size(seq);
1133 if (seqlen == 0) {
1134 Py_DECREF(seq);
1135 return PyBytes_FromString("");
1136 }
1137 if (seqlen == 1) {
1138 item = PySequence_Fast_GET_ITEM(seq, 0);
1139 if (PyBytes_CheckExact(item)) {
1140 Py_INCREF(item);
1141 Py_DECREF(seq);
1142 return item;
1143 }
1144 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001145
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001146 /* There are at least two things to join, or else we have a subclass
1147 * of the builtin types in the sequence.
1148 * Do a pre-pass to figure out the total amount of space we'll
1149 * need (sz), and see whether all argument are bytes.
1150 */
1151 /* XXX Shouldn't we use _getbuffer() on these items instead? */
1152 for (i = 0; i < seqlen; i++) {
1153 const size_t old_sz = sz;
1154 item = PySequence_Fast_GET_ITEM(seq, i);
1155 if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
1156 PyErr_Format(PyExc_TypeError,
1157 "sequence item %zd: expected bytes,"
1158 " %.80s found",
1159 i, Py_TYPE(item)->tp_name);
1160 Py_DECREF(seq);
1161 return NULL;
1162 }
1163 sz += Py_SIZE(item);
1164 if (i != 0)
1165 sz += seplen;
1166 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1167 PyErr_SetString(PyExc_OverflowError,
1168 "join() result is too long for bytes");
1169 Py_DECREF(seq);
1170 return NULL;
1171 }
1172 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001173
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001174 /* Allocate result space. */
1175 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1176 if (res == NULL) {
1177 Py_DECREF(seq);
1178 return NULL;
1179 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001180
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001181 /* Catenate everything. */
1182 /* I'm not worried about a PyByteArray item growing because there's
1183 nowhere in this function where we release the GIL. */
1184 p = PyBytes_AS_STRING(res);
1185 for (i = 0; i < seqlen; ++i) {
1186 size_t n;
1187 char *q;
1188 if (i) {
1189 Py_MEMCPY(p, sep, seplen);
1190 p += seplen;
1191 }
1192 item = PySequence_Fast_GET_ITEM(seq, i);
1193 n = Py_SIZE(item);
1194 if (PyBytes_Check(item))
1195 q = PyBytes_AS_STRING(item);
1196 else
1197 q = PyByteArray_AS_STRING(item);
1198 Py_MEMCPY(p, q, n);
1199 p += n;
1200 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001201
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001202 Py_DECREF(seq);
1203 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001204}
1205
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001206PyObject *
1207_PyBytes_Join(PyObject *sep, PyObject *x)
1208{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001209 assert(sep != NULL && PyBytes_Check(sep));
1210 assert(x != NULL);
1211 return bytes_join(sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001212}
1213
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001214/* helper macro to fixup start/end slice values */
1215#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001216 if (end > len) \
1217 end = len; \
1218 else if (end < 0) { \
1219 end += len; \
1220 if (end < 0) \
1221 end = 0; \
1222 } \
1223 if (start < 0) { \
1224 start += len; \
1225 if (start < 0) \
1226 start = 0; \
1227 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001228
1229Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001230bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001231{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001232 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001233 char byte;
1234 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001235 const char *sub;
1236 Py_ssize_t sub_len;
1237 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001238 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001239
Antoine Pitrouac65d962011-10-20 23:54:17 +02001240 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1241 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001242 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001243
Antoine Pitrouac65d962011-10-20 23:54:17 +02001244 if (subobj) {
1245 if (_getbuffer(subobj, &subbuf) < 0)
1246 return -2;
1247
1248 sub = subbuf.buf;
1249 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001250 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001251 else {
1252 sub = &byte;
1253 sub_len = 1;
1254 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001255
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001256 if (dir > 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001257 res = stringlib_find_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001258 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1259 sub, sub_len, start, end);
1260 else
Antoine Pitrouac65d962011-10-20 23:54:17 +02001261 res = stringlib_rfind_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001262 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1263 sub, sub_len, start, end);
Antoine Pitrouac65d962011-10-20 23:54:17 +02001264
1265 if (subobj)
1266 PyBuffer_Release(&subbuf);
1267
1268 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001269}
1270
1271
1272PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001273"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001274\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001275Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001276such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001277arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001278\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001279Return -1 on failure.");
1280
Neal Norwitz6968b052007-02-27 19:02:19 +00001281static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001282bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001283{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001284 Py_ssize_t result = bytes_find_internal(self, args, +1);
1285 if (result == -2)
1286 return NULL;
1287 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001288}
1289
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001290
1291PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001292"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001293\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001294Like B.find() but raise ValueError when the substring is not found.");
1295
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001296static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001297bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001298{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001299 Py_ssize_t result = bytes_find_internal(self, args, +1);
1300 if (result == -2)
1301 return NULL;
1302 if (result == -1) {
1303 PyErr_SetString(PyExc_ValueError,
1304 "substring not found");
1305 return NULL;
1306 }
1307 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001308}
1309
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001310
1311PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001312"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001313\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001314Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001315such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001316arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001317\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001318Return -1 on failure.");
1319
Neal Norwitz6968b052007-02-27 19:02:19 +00001320static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001321bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001322{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001323 Py_ssize_t result = bytes_find_internal(self, args, -1);
1324 if (result == -2)
1325 return NULL;
1326 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001327}
1328
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001329
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001330PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001331"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001332\n\
1333Like B.rfind() but raise ValueError when the substring is not found.");
1334
1335static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001336bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001337{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001338 Py_ssize_t result = bytes_find_internal(self, args, -1);
1339 if (result == -2)
1340 return NULL;
1341 if (result == -1) {
1342 PyErr_SetString(PyExc_ValueError,
1343 "substring not found");
1344 return NULL;
1345 }
1346 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001347}
1348
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001349
1350Py_LOCAL_INLINE(PyObject *)
1351do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001352{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001353 Py_buffer vsep;
1354 char *s = PyBytes_AS_STRING(self);
1355 Py_ssize_t len = PyBytes_GET_SIZE(self);
1356 char *sep;
1357 Py_ssize_t seplen;
1358 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001359
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001360 if (_getbuffer(sepobj, &vsep) < 0)
1361 return NULL;
1362 sep = vsep.buf;
1363 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001364
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001365 i = 0;
1366 if (striptype != RIGHTSTRIP) {
1367 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1368 i++;
1369 }
1370 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001371
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001372 j = len;
1373 if (striptype != LEFTSTRIP) {
1374 do {
1375 j--;
1376 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1377 j++;
1378 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001379
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001380 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001381
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001382 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1383 Py_INCREF(self);
1384 return (PyObject*)self;
1385 }
1386 else
1387 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001388}
1389
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001390
1391Py_LOCAL_INLINE(PyObject *)
1392do_strip(PyBytesObject *self, int striptype)
1393{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001394 char *s = PyBytes_AS_STRING(self);
1395 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001396
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001397 i = 0;
1398 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001399 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001400 i++;
1401 }
1402 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001403
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001404 j = len;
1405 if (striptype != LEFTSTRIP) {
1406 do {
1407 j--;
David Malcolm96960882010-11-05 17:23:41 +00001408 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001409 j++;
1410 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001411
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001412 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1413 Py_INCREF(self);
1414 return (PyObject*)self;
1415 }
1416 else
1417 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001418}
1419
1420
1421Py_LOCAL_INLINE(PyObject *)
1422do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1423{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001424 PyObject *sep = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001425
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001426 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1427 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001428
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001429 if (sep != NULL && sep != Py_None) {
1430 return do_xstrip(self, striptype, sep);
1431 }
1432 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001433}
1434
1435
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001436PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001437"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001438\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001439Strip leading and trailing bytes contained in the argument.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001440If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001441static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001442bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001443{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001444 if (PyTuple_GET_SIZE(args) == 0)
1445 return do_strip(self, BOTHSTRIP); /* Common case */
1446 else
1447 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001448}
1449
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001450
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001451PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001452"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001453\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001454Strip leading bytes contained in the argument.\n\
1455If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001456static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001457bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001458{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001459 if (PyTuple_GET_SIZE(args) == 0)
1460 return do_strip(self, LEFTSTRIP); /* Common case */
1461 else
1462 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001463}
1464
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001465
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001466PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001467"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001468\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001469Strip trailing bytes contained in the argument.\n\
1470If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001471static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001472bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001473{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001474 if (PyTuple_GET_SIZE(args) == 0)
1475 return do_strip(self, RIGHTSTRIP); /* Common case */
1476 else
1477 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001478}
Neal Norwitz6968b052007-02-27 19:02:19 +00001479
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001480
1481PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001482"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001483\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001484Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001485string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001486as in slice notation.");
1487
1488static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001489bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001490{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001491 PyObject *sub_obj;
1492 const char *str = PyBytes_AS_STRING(self), *sub;
1493 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001494 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001495 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001496
Antoine Pitrouac65d962011-10-20 23:54:17 +02001497 Py_buffer vsub;
1498 PyObject *count_obj;
1499
1500 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
1501 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001502 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001503
Antoine Pitrouac65d962011-10-20 23:54:17 +02001504 if (sub_obj) {
1505 if (_getbuffer(sub_obj, &vsub) < 0)
1506 return NULL;
1507
1508 sub = vsub.buf;
1509 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001510 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001511 else {
1512 sub = &byte;
1513 sub_len = 1;
1514 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001515
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001516 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001517
Antoine Pitrouac65d962011-10-20 23:54:17 +02001518 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001519 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
1520 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02001521
1522 if (sub_obj)
1523 PyBuffer_Release(&vsub);
1524
1525 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001526}
1527
1528
1529PyDoc_STRVAR(translate__doc__,
1530"B.translate(table[, deletechars]) -> bytes\n\
1531\n\
1532Return a copy of B, where all characters occurring in the\n\
1533optional argument deletechars are removed, and the remaining\n\
1534characters have been mapped through the given translation\n\
1535table, which must be a bytes object of length 256.");
1536
1537static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001538bytes_translate(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001539{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001540 register char *input, *output;
1541 const char *table;
1542 register Py_ssize_t i, c, changed = 0;
1543 PyObject *input_obj = (PyObject*)self;
1544 const char *output_start, *del_table=NULL;
1545 Py_ssize_t inlen, tablen, dellen = 0;
1546 PyObject *result;
1547 int trans_table[256];
1548 PyObject *tableobj, *delobj = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001549
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001550 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1551 &tableobj, &delobj))
1552 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001553
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001554 if (PyBytes_Check(tableobj)) {
1555 table = PyBytes_AS_STRING(tableobj);
1556 tablen = PyBytes_GET_SIZE(tableobj);
1557 }
1558 else if (tableobj == Py_None) {
1559 table = NULL;
1560 tablen = 256;
1561 }
1562 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1563 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001564
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001565 if (tablen != 256) {
1566 PyErr_SetString(PyExc_ValueError,
1567 "translation table must be 256 characters long");
1568 return NULL;
1569 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001570
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001571 if (delobj != NULL) {
1572 if (PyBytes_Check(delobj)) {
1573 del_table = PyBytes_AS_STRING(delobj);
1574 dellen = PyBytes_GET_SIZE(delobj);
1575 }
1576 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1577 return NULL;
1578 }
1579 else {
1580 del_table = NULL;
1581 dellen = 0;
1582 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001583
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001584 inlen = PyBytes_GET_SIZE(input_obj);
1585 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1586 if (result == NULL)
1587 return NULL;
1588 output_start = output = PyBytes_AsString(result);
1589 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001590
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001591 if (dellen == 0 && table != NULL) {
1592 /* If no deletions are required, use faster code */
1593 for (i = inlen; --i >= 0; ) {
1594 c = Py_CHARMASK(*input++);
1595 if (Py_CHARMASK((*output++ = table[c])) != c)
1596 changed = 1;
1597 }
1598 if (changed || !PyBytes_CheckExact(input_obj))
1599 return result;
1600 Py_DECREF(result);
1601 Py_INCREF(input_obj);
1602 return input_obj;
1603 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001604
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001605 if (table == NULL) {
1606 for (i = 0; i < 256; i++)
1607 trans_table[i] = Py_CHARMASK(i);
1608 } else {
1609 for (i = 0; i < 256; i++)
1610 trans_table[i] = Py_CHARMASK(table[i]);
1611 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001612
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001613 for (i = 0; i < dellen; i++)
1614 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001615
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001616 for (i = inlen; --i >= 0; ) {
1617 c = Py_CHARMASK(*input++);
1618 if (trans_table[c] != -1)
1619 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1620 continue;
1621 changed = 1;
1622 }
1623 if (!changed && PyBytes_CheckExact(input_obj)) {
1624 Py_DECREF(result);
1625 Py_INCREF(input_obj);
1626 return input_obj;
1627 }
1628 /* Fix the size of the resulting string */
1629 if (inlen > 0)
1630 _PyBytes_Resize(&result, output - output_start);
1631 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001632}
1633
1634
Georg Brandlabc38772009-04-12 15:51:51 +00001635static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001636bytes_maketrans(PyObject *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001637{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001638 return _Py_bytes_maketrans(args);
Georg Brandlabc38772009-04-12 15:51:51 +00001639}
1640
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001641/* find and count characters and substrings */
1642
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001643#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001644 ((char *)memchr((const void *)(target), c, target_len))
1645
1646/* String ops must return a string. */
1647/* If the object is subclass of string, create a copy */
1648Py_LOCAL(PyBytesObject *)
1649return_self(PyBytesObject *self)
1650{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001651 if (PyBytes_CheckExact(self)) {
1652 Py_INCREF(self);
1653 return self;
1654 }
1655 return (PyBytesObject *)PyBytes_FromStringAndSize(
1656 PyBytes_AS_STRING(self),
1657 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001658}
1659
1660Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00001661countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001662{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001663 Py_ssize_t count=0;
1664 const char *start=target;
1665 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001666
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001667 while ( (start=findchar(start, end-start, c)) != NULL ) {
1668 count++;
1669 if (count >= maxcount)
1670 break;
1671 start += 1;
1672 }
1673 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001674}
1675
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001676
1677/* Algorithms for different cases of string replacement */
1678
1679/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1680Py_LOCAL(PyBytesObject *)
1681replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001682 const char *to_s, Py_ssize_t to_len,
1683 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001684{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001685 char *self_s, *result_s;
1686 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001687 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001688 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001689
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001690 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001691
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001692 /* 1 at the end plus 1 after every character;
1693 count = min(maxcount, self_len + 1) */
1694 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001695 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001696 else
1697 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
1698 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001699
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001700 /* Check for overflow */
1701 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001702 assert(count > 0);
1703 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001704 PyErr_SetString(PyExc_OverflowError,
1705 "replacement bytes are too long");
1706 return NULL;
1707 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001708 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001709
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001710 if (! (result = (PyBytesObject *)
1711 PyBytes_FromStringAndSize(NULL, result_len)) )
1712 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001713
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001714 self_s = PyBytes_AS_STRING(self);
1715 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001716
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001717 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001718
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001719 /* Lay the first one down (guaranteed this will occur) */
1720 Py_MEMCPY(result_s, to_s, to_len);
1721 result_s += to_len;
1722 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001723
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001724 for (i=0; i<count; i++) {
1725 *result_s++ = *self_s++;
1726 Py_MEMCPY(result_s, to_s, to_len);
1727 result_s += to_len;
1728 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001729
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001730 /* Copy the rest of the original string */
1731 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001732
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001733 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001734}
1735
1736/* Special case for deleting a single character */
1737/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1738Py_LOCAL(PyBytesObject *)
1739replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001740 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001741{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001742 char *self_s, *result_s;
1743 char *start, *next, *end;
1744 Py_ssize_t self_len, result_len;
1745 Py_ssize_t count;
1746 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001747
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001748 self_len = PyBytes_GET_SIZE(self);
1749 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001750
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001751 count = countchar(self_s, self_len, from_c, maxcount);
1752 if (count == 0) {
1753 return return_self(self);
1754 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001755
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001756 result_len = self_len - count; /* from_len == 1 */
1757 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001758
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001759 if ( (result = (PyBytesObject *)
1760 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1761 return NULL;
1762 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001763
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001764 start = self_s;
1765 end = self_s + self_len;
1766 while (count-- > 0) {
1767 next = findchar(start, end-start, from_c);
1768 if (next == NULL)
1769 break;
1770 Py_MEMCPY(result_s, start, next-start);
1771 result_s += (next-start);
1772 start = next+1;
1773 }
1774 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001775
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001776 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001777}
1778
1779/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1780
1781Py_LOCAL(PyBytesObject *)
1782replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001783 const char *from_s, Py_ssize_t from_len,
1784 Py_ssize_t maxcount) {
1785 char *self_s, *result_s;
1786 char *start, *next, *end;
1787 Py_ssize_t self_len, result_len;
1788 Py_ssize_t count, offset;
1789 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001790
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001791 self_len = PyBytes_GET_SIZE(self);
1792 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001793
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001794 count = stringlib_count(self_s, self_len,
1795 from_s, from_len,
1796 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001797
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001798 if (count == 0) {
1799 /* no matches */
1800 return return_self(self);
1801 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001802
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001803 result_len = self_len - (count * from_len);
1804 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001805
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001806 if ( (result = (PyBytesObject *)
1807 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1808 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001809
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001810 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001811
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001812 start = self_s;
1813 end = self_s + self_len;
1814 while (count-- > 0) {
1815 offset = stringlib_find(start, end-start,
1816 from_s, from_len,
1817 0);
1818 if (offset == -1)
1819 break;
1820 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001821
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001822 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001823
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001824 result_s += (next-start);
1825 start = next+from_len;
1826 }
1827 Py_MEMCPY(result_s, start, end-start);
1828 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001829}
1830
1831/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1832Py_LOCAL(PyBytesObject *)
1833replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001834 char from_c, char to_c,
1835 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001836{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001837 char *self_s, *result_s, *start, *end, *next;
1838 Py_ssize_t self_len;
1839 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001840
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001841 /* The result string will be the same size */
1842 self_s = PyBytes_AS_STRING(self);
1843 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001844
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001845 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001846
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001847 if (next == NULL) {
1848 /* No matches; return the original string */
1849 return return_self(self);
1850 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001851
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001852 /* Need to make a new string */
1853 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1854 if (result == NULL)
1855 return NULL;
1856 result_s = PyBytes_AS_STRING(result);
1857 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001858
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001859 /* change everything in-place, starting with this one */
1860 start = result_s + (next-self_s);
1861 *start = to_c;
1862 start++;
1863 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001864
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001865 while (--maxcount > 0) {
1866 next = findchar(start, end-start, from_c);
1867 if (next == NULL)
1868 break;
1869 *next = to_c;
1870 start = next+1;
1871 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001872
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001873 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001874}
1875
1876/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1877Py_LOCAL(PyBytesObject *)
1878replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001879 const char *from_s, Py_ssize_t from_len,
1880 const char *to_s, Py_ssize_t to_len,
1881 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001882{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001883 char *result_s, *start, *end;
1884 char *self_s;
1885 Py_ssize_t self_len, offset;
1886 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001887
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001888 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001889
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001890 self_s = PyBytes_AS_STRING(self);
1891 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001892
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001893 offset = stringlib_find(self_s, self_len,
1894 from_s, from_len,
1895 0);
1896 if (offset == -1) {
1897 /* No matches; return the original string */
1898 return return_self(self);
1899 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001900
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001901 /* Need to make a new string */
1902 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1903 if (result == NULL)
1904 return NULL;
1905 result_s = PyBytes_AS_STRING(result);
1906 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001907
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001908 /* change everything in-place, starting with this one */
1909 start = result_s + offset;
1910 Py_MEMCPY(start, to_s, from_len);
1911 start += from_len;
1912 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001913
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001914 while ( --maxcount > 0) {
1915 offset = stringlib_find(start, end-start,
1916 from_s, from_len,
1917 0);
1918 if (offset==-1)
1919 break;
1920 Py_MEMCPY(start+offset, to_s, from_len);
1921 start += offset+from_len;
1922 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001923
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001924 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001925}
1926
1927/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1928Py_LOCAL(PyBytesObject *)
1929replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001930 char from_c,
1931 const char *to_s, Py_ssize_t to_len,
1932 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001933{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001934 char *self_s, *result_s;
1935 char *start, *next, *end;
1936 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001937 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001938 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001939
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001940 self_s = PyBytes_AS_STRING(self);
1941 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001942
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001943 count = countchar(self_s, self_len, from_c, maxcount);
1944 if (count == 0) {
1945 /* no matches, return unchanged */
1946 return return_self(self);
1947 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001948
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001949 /* use the difference between current and new, hence the "-1" */
1950 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001951 assert(count > 0);
1952 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001953 PyErr_SetString(PyExc_OverflowError,
1954 "replacement bytes are too long");
1955 return NULL;
1956 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001957 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001958
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001959 if ( (result = (PyBytesObject *)
1960 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1961 return NULL;
1962 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001963
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001964 start = self_s;
1965 end = self_s + self_len;
1966 while (count-- > 0) {
1967 next = findchar(start, end-start, from_c);
1968 if (next == NULL)
1969 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001970
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001971 if (next == start) {
1972 /* replace with the 'to' */
1973 Py_MEMCPY(result_s, to_s, to_len);
1974 result_s += to_len;
1975 start += 1;
1976 } else {
1977 /* copy the unchanged old then the 'to' */
1978 Py_MEMCPY(result_s, start, next-start);
1979 result_s += (next-start);
1980 Py_MEMCPY(result_s, to_s, to_len);
1981 result_s += to_len;
1982 start = next+1;
1983 }
1984 }
1985 /* Copy the remainder of the remaining string */
1986 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001987
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001988 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001989}
1990
1991/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1992Py_LOCAL(PyBytesObject *)
1993replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001994 const char *from_s, Py_ssize_t from_len,
1995 const char *to_s, Py_ssize_t to_len,
1996 Py_ssize_t maxcount) {
1997 char *self_s, *result_s;
1998 char *start, *next, *end;
1999 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002000 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002001 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002002
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002003 self_s = PyBytes_AS_STRING(self);
2004 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002005
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002006 count = stringlib_count(self_s, self_len,
2007 from_s, from_len,
2008 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002009
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002010 if (count == 0) {
2011 /* no matches, return unchanged */
2012 return return_self(self);
2013 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002014
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002015 /* Check for overflow */
2016 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002017 assert(count > 0);
2018 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002019 PyErr_SetString(PyExc_OverflowError,
2020 "replacement bytes are too long");
2021 return NULL;
2022 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002023 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002024
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002025 if ( (result = (PyBytesObject *)
2026 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2027 return NULL;
2028 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002029
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002030 start = self_s;
2031 end = self_s + self_len;
2032 while (count-- > 0) {
2033 offset = stringlib_find(start, end-start,
2034 from_s, from_len,
2035 0);
2036 if (offset == -1)
2037 break;
2038 next = start+offset;
2039 if (next == start) {
2040 /* replace with the 'to' */
2041 Py_MEMCPY(result_s, to_s, to_len);
2042 result_s += to_len;
2043 start += from_len;
2044 } else {
2045 /* copy the unchanged old then the 'to' */
2046 Py_MEMCPY(result_s, start, next-start);
2047 result_s += (next-start);
2048 Py_MEMCPY(result_s, to_s, to_len);
2049 result_s += to_len;
2050 start = next+from_len;
2051 }
2052 }
2053 /* Copy the remainder of the remaining string */
2054 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002055
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002056 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002057}
2058
2059
2060Py_LOCAL(PyBytesObject *)
2061replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002062 const char *from_s, Py_ssize_t from_len,
2063 const char *to_s, Py_ssize_t to_len,
2064 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002065{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002066 if (maxcount < 0) {
2067 maxcount = PY_SSIZE_T_MAX;
2068 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2069 /* nothing to do; return the original string */
2070 return return_self(self);
2071 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002072
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002073 if (maxcount == 0 ||
2074 (from_len == 0 && to_len == 0)) {
2075 /* nothing to do; return the original string */
2076 return return_self(self);
2077 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002078
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002079 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002080
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002081 if (from_len == 0) {
2082 /* insert the 'to' string everywhere. */
2083 /* >>> "Python".replace("", ".") */
2084 /* '.P.y.t.h.o.n.' */
2085 return replace_interleave(self, to_s, to_len, maxcount);
2086 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002087
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002088 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2089 /* point for an empty self string to generate a non-empty string */
2090 /* Special case so the remaining code always gets a non-empty string */
2091 if (PyBytes_GET_SIZE(self) == 0) {
2092 return return_self(self);
2093 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002094
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002095 if (to_len == 0) {
2096 /* delete all occurrences of 'from' string */
2097 if (from_len == 1) {
2098 return replace_delete_single_character(
2099 self, from_s[0], maxcount);
2100 } else {
2101 return replace_delete_substring(self, from_s,
2102 from_len, maxcount);
2103 }
2104 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002105
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002106 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002107
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002108 if (from_len == to_len) {
2109 if (from_len == 1) {
2110 return replace_single_character_in_place(
2111 self,
2112 from_s[0],
2113 to_s[0],
2114 maxcount);
2115 } else {
2116 return replace_substring_in_place(
2117 self, from_s, from_len, to_s, to_len,
2118 maxcount);
2119 }
2120 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002121
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002122 /* Otherwise use the more generic algorithms */
2123 if (from_len == 1) {
2124 return replace_single_character(self, from_s[0],
2125 to_s, to_len, maxcount);
2126 } else {
2127 /* len('from')>=2, len('to')>=1 */
2128 return replace_substring(self, from_s, from_len, to_s, to_len,
2129 maxcount);
2130 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002131}
2132
2133PyDoc_STRVAR(replace__doc__,
2134"B.replace(old, new[, count]) -> bytes\n\
2135\n\
2136Return a copy of B with all occurrences of subsection\n\
2137old replaced by new. If the optional argument count is\n\
Senthil Kumaran9a9dd1c2010-09-08 12:50:29 +00002138given, only first count occurances are replaced.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002139
2140static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002141bytes_replace(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002142{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002143 Py_ssize_t count = -1;
2144 PyObject *from, *to;
2145 const char *from_s, *to_s;
2146 Py_ssize_t from_len, to_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002147
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002148 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2149 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002150
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002151 if (PyBytes_Check(from)) {
2152 from_s = PyBytes_AS_STRING(from);
2153 from_len = PyBytes_GET_SIZE(from);
2154 }
2155 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2156 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002157
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002158 if (PyBytes_Check(to)) {
2159 to_s = PyBytes_AS_STRING(to);
2160 to_len = PyBytes_GET_SIZE(to);
2161 }
2162 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2163 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002164
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002165 return (PyObject *)replace((PyBytesObject *) self,
2166 from_s, from_len,
2167 to_s, to_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002168}
2169
2170/** End DALKE **/
2171
2172/* Matches the end (direction >= 0) or start (direction < 0) of self
2173 * against substr, using the start and end arguments. Returns
2174 * -1 on error, 0 if not found and 1 if found.
2175 */
2176Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002177_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002178 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002179{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002180 Py_ssize_t len = PyBytes_GET_SIZE(self);
2181 Py_ssize_t slen;
2182 const char* sub;
2183 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002184
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002185 if (PyBytes_Check(substr)) {
2186 sub = PyBytes_AS_STRING(substr);
2187 slen = PyBytes_GET_SIZE(substr);
2188 }
2189 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2190 return -1;
2191 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002192
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002193 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002194
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002195 if (direction < 0) {
2196 /* startswith */
2197 if (start+slen > len)
2198 return 0;
2199 } else {
2200 /* endswith */
2201 if (end-start < slen || start > len)
2202 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002203
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002204 if (end-slen > start)
2205 start = end - slen;
2206 }
2207 if (end-start >= slen)
2208 return ! memcmp(str+start, sub, slen);
2209 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002210}
2211
2212
2213PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002214"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002215\n\
2216Return True if B starts with the specified prefix, False otherwise.\n\
2217With optional start, test B beginning at that position.\n\
2218With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002219prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002220
2221static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002222bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002223{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002224 Py_ssize_t start = 0;
2225 Py_ssize_t end = PY_SSIZE_T_MAX;
2226 PyObject *subobj;
2227 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002228
Jesus Ceaac451502011-04-20 17:09:23 +02002229 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002230 return NULL;
2231 if (PyTuple_Check(subobj)) {
2232 Py_ssize_t i;
2233 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2234 result = _bytes_tailmatch(self,
2235 PyTuple_GET_ITEM(subobj, i),
2236 start, end, -1);
2237 if (result == -1)
2238 return NULL;
2239 else if (result) {
2240 Py_RETURN_TRUE;
2241 }
2242 }
2243 Py_RETURN_FALSE;
2244 }
2245 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002246 if (result == -1) {
2247 if (PyErr_ExceptionMatches(PyExc_TypeError))
2248 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2249 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002250 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002251 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002252 else
2253 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002254}
2255
2256
2257PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002258"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002259\n\
2260Return True if B ends with the specified suffix, False otherwise.\n\
2261With optional start, test B beginning at that position.\n\
2262With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002263suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002264
2265static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002266bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002267{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002268 Py_ssize_t start = 0;
2269 Py_ssize_t end = PY_SSIZE_T_MAX;
2270 PyObject *subobj;
2271 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002272
Jesus Ceaac451502011-04-20 17:09:23 +02002273 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002274 return NULL;
2275 if (PyTuple_Check(subobj)) {
2276 Py_ssize_t i;
2277 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2278 result = _bytes_tailmatch(self,
2279 PyTuple_GET_ITEM(subobj, i),
2280 start, end, +1);
2281 if (result == -1)
2282 return NULL;
2283 else if (result) {
2284 Py_RETURN_TRUE;
2285 }
2286 }
2287 Py_RETURN_FALSE;
2288 }
2289 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002290 if (result == -1) {
2291 if (PyErr_ExceptionMatches(PyExc_TypeError))
2292 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2293 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002294 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002295 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002296 else
2297 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002298}
2299
2300
2301PyDoc_STRVAR(decode__doc__,
Victor Stinnerc911bbf2010-11-07 19:04:46 +00002302"B.decode(encoding='utf-8', errors='strict') -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002303\n\
Victor Stinnere14e2122010-11-07 18:41:46 +00002304Decode B using the codec registered for encoding. Default encoding\n\
2305is 'utf-8'. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002306handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2307a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002308as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002309able to handle UnicodeDecodeErrors.");
2310
2311static PyObject *
Benjamin Peterson308d6372009-09-18 21:42:35 +00002312bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002313{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002314 const char *encoding = NULL;
2315 const char *errors = NULL;
2316 static char *kwlist[] = {"encoding", "errors", 0};
Guido van Rossumd624f182006-04-24 13:47:05 +00002317
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002318 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2319 return NULL;
2320 if (encoding == NULL)
2321 encoding = PyUnicode_GetDefaultEncoding();
2322 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002323}
2324
Guido van Rossum20188312006-05-05 15:15:40 +00002325
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002326PyDoc_STRVAR(splitlines__doc__,
2327"B.splitlines([keepends]) -> list of lines\n\
2328\n\
2329Return a list of the lines in B, breaking at line boundaries.\n\
2330Line breaks are not included in the resulting list unless keepends\n\
2331is given and true.");
2332
2333static PyObject*
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002334bytes_splitlines(PyObject *self, PyObject *args, PyObject *kwds)
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002335{
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002336 static char *kwlist[] = {"keepends", 0};
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002337 int keepends = 0;
2338
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002339 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:splitlines",
2340 kwlist, &keepends))
Antoine Pitroud1188562010-06-09 16:38:55 +00002341 return NULL;
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002342
2343 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002344 (PyObject*) self, PyBytes_AS_STRING(self),
2345 PyBytes_GET_SIZE(self), keepends
2346 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002347}
2348
2349
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002350PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002351"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002352\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002353Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002354Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002355Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002356
2357static int
Victor Stinner6430fd52011-09-29 04:02:13 +02002358hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002359{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002360 if (c >= 128)
2361 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002362 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002363 return c - '0';
2364 else {
David Malcolm96960882010-11-05 17:23:41 +00002365 if (Py_ISUPPER(c))
2366 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002367 if (c >= 'a' && c <= 'f')
2368 return c - 'a' + 10;
2369 }
2370 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002371}
2372
2373static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002374bytes_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002375{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002376 PyObject *newstring, *hexobj;
2377 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002378 Py_ssize_t hexlen, byteslen, i, j;
2379 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002380 void *data;
2381 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002382
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002383 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2384 return NULL;
2385 assert(PyUnicode_Check(hexobj));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002386 if (PyUnicode_READY(hexobj))
2387 return NULL;
2388 kind = PyUnicode_KIND(hexobj);
2389 data = PyUnicode_DATA(hexobj);
2390 hexlen = PyUnicode_GET_LENGTH(hexobj);
2391
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002392 byteslen = hexlen/2; /* This overestimates if there are spaces */
2393 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2394 if (!newstring)
2395 return NULL;
2396 buf = PyBytes_AS_STRING(newstring);
2397 for (i = j = 0; i < hexlen; i += 2) {
2398 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002399 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002400 i++;
2401 if (i >= hexlen)
2402 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002403 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
2404 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002405 if (top == -1 || bot == -1) {
2406 PyErr_Format(PyExc_ValueError,
2407 "non-hexadecimal number found in "
2408 "fromhex() arg at position %zd", i);
2409 goto error;
2410 }
2411 buf[j++] = (top << 4) + bot;
2412 }
2413 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2414 goto error;
2415 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002416
2417 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002418 Py_XDECREF(newstring);
2419 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002420}
2421
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002422PyDoc_STRVAR(sizeof__doc__,
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002423"B.__sizeof__() -> size of B in memory, in bytes");
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002424
2425static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002426bytes_sizeof(PyBytesObject *v)
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002427{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002428 Py_ssize_t res;
2429 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2430 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002431}
2432
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002433
2434static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002435bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002436{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002437 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002438}
2439
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002440
2441static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002442bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002443 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2444 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2445 _Py_capitalize__doc__},
2446 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2447 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2448 {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
2449 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2450 endswith__doc__},
2451 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2452 expandtabs__doc__},
2453 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2454 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2455 fromhex_doc},
2456 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2457 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2458 _Py_isalnum__doc__},
2459 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2460 _Py_isalpha__doc__},
2461 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2462 _Py_isdigit__doc__},
2463 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2464 _Py_islower__doc__},
2465 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2466 _Py_isspace__doc__},
2467 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2468 _Py_istitle__doc__},
2469 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2470 _Py_isupper__doc__},
2471 {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
2472 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2473 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2474 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2475 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
2476 _Py_maketrans__doc__},
2477 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2478 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2479 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2480 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2481 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2482 {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
2483 rpartition__doc__},
2484 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2485 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
2486 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002487 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002488 splitlines__doc__},
2489 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2490 startswith__doc__},
2491 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
2492 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2493 _Py_swapcase__doc__},
2494 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2495 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
2496 translate__doc__},
2497 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2498 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2499 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
2500 sizeof__doc__},
2501 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002502};
2503
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002504static PyObject *
2505str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2506
2507static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002508bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002509{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002510 PyObject *x = NULL;
2511 const char *encoding = NULL;
2512 const char *errors = NULL;
2513 PyObject *new = NULL;
2514 Py_ssize_t size;
2515 static char *kwlist[] = {"source", "encoding", "errors", 0};
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002516
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002517 if (type != &PyBytes_Type)
2518 return str_subtype_new(type, args, kwds);
2519 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2520 &encoding, &errors))
2521 return NULL;
2522 if (x == NULL) {
2523 if (encoding != NULL || errors != NULL) {
2524 PyErr_SetString(PyExc_TypeError,
2525 "encoding or errors without sequence "
2526 "argument");
2527 return NULL;
2528 }
2529 return PyBytes_FromString("");
2530 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002531
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002532 if (PyUnicode_Check(x)) {
2533 /* Encode via the codec registry */
2534 if (encoding == NULL) {
2535 PyErr_SetString(PyExc_TypeError,
2536 "string argument without an encoding");
2537 return NULL;
2538 }
2539 new = PyUnicode_AsEncodedString(x, encoding, errors);
2540 if (new == NULL)
2541 return NULL;
2542 assert(PyBytes_Check(new));
2543 return new;
2544 }
2545 /* Is it an integer? */
2546 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2547 if (size == -1 && PyErr_Occurred()) {
2548 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2549 return NULL;
2550 PyErr_Clear();
2551 }
2552 else if (size < 0) {
2553 PyErr_SetString(PyExc_ValueError, "negative count");
2554 return NULL;
2555 }
2556 else {
2557 new = PyBytes_FromStringAndSize(NULL, size);
2558 if (new == NULL) {
2559 return NULL;
2560 }
2561 if (size > 0) {
2562 memset(((PyBytesObject*)new)->ob_sval, 0, size);
2563 }
2564 return new;
2565 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002566
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002567 /* If it's not unicode, there can't be encoding or errors */
2568 if (encoding != NULL || errors != NULL) {
2569 PyErr_SetString(PyExc_TypeError,
2570 "encoding or errors without a string argument");
2571 return NULL;
2572 }
2573 return PyObject_Bytes(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002574}
2575
2576PyObject *
2577PyBytes_FromObject(PyObject *x)
2578{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002579 PyObject *new, *it;
2580 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002581
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002582 if (x == NULL) {
2583 PyErr_BadInternalCall();
2584 return NULL;
2585 }
2586 /* Use the modern buffer interface */
2587 if (PyObject_CheckBuffer(x)) {
2588 Py_buffer view;
2589 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2590 return NULL;
2591 new = PyBytes_FromStringAndSize(NULL, view.len);
2592 if (!new)
2593 goto fail;
2594 /* XXX(brett.cannon): Better way to get to internal buffer? */
2595 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2596 &view, view.len, 'C') < 0)
2597 goto fail;
2598 PyBuffer_Release(&view);
2599 return new;
2600 fail:
2601 Py_XDECREF(new);
2602 PyBuffer_Release(&view);
2603 return NULL;
2604 }
2605 if (PyUnicode_Check(x)) {
2606 PyErr_SetString(PyExc_TypeError,
2607 "cannot convert unicode object to bytes");
2608 return NULL;
2609 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002610
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002611 if (PyList_CheckExact(x)) {
2612 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2613 if (new == NULL)
2614 return NULL;
2615 for (i = 0; i < Py_SIZE(x); i++) {
2616 Py_ssize_t value = PyNumber_AsSsize_t(
2617 PyList_GET_ITEM(x, i), PyExc_ValueError);
2618 if (value == -1 && PyErr_Occurred()) {
2619 Py_DECREF(new);
2620 return NULL;
2621 }
2622 if (value < 0 || value >= 256) {
2623 PyErr_SetString(PyExc_ValueError,
2624 "bytes must be in range(0, 256)");
2625 Py_DECREF(new);
2626 return NULL;
2627 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002628 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002629 }
2630 return new;
2631 }
2632 if (PyTuple_CheckExact(x)) {
2633 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2634 if (new == NULL)
2635 return NULL;
2636 for (i = 0; i < Py_SIZE(x); i++) {
2637 Py_ssize_t value = PyNumber_AsSsize_t(
2638 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
2639 if (value == -1 && PyErr_Occurred()) {
2640 Py_DECREF(new);
2641 return NULL;
2642 }
2643 if (value < 0 || value >= 256) {
2644 PyErr_SetString(PyExc_ValueError,
2645 "bytes must be in range(0, 256)");
2646 Py_DECREF(new);
2647 return NULL;
2648 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002649 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002650 }
2651 return new;
2652 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00002653
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002654 /* For iterator version, create a string object and resize as needed */
2655 size = _PyObject_LengthHint(x, 64);
2656 if (size == -1 && PyErr_Occurred())
2657 return NULL;
2658 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
2659 returning a shared empty bytes string. This required because we
2660 want to call _PyBytes_Resize() the returned object, which we can
2661 only do on bytes objects with refcount == 1. */
2662 size += 1;
2663 new = PyBytes_FromStringAndSize(NULL, size);
2664 if (new == NULL)
2665 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002666
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002667 /* Get the iterator */
2668 it = PyObject_GetIter(x);
2669 if (it == NULL)
2670 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002671
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002672 /* Run the iterator to exhaustion */
2673 for (i = 0; ; i++) {
2674 PyObject *item;
2675 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002676
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002677 /* Get the next item */
2678 item = PyIter_Next(it);
2679 if (item == NULL) {
2680 if (PyErr_Occurred())
2681 goto error;
2682 break;
2683 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002684
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002685 /* Interpret it as an int (__index__) */
2686 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2687 Py_DECREF(item);
2688 if (value == -1 && PyErr_Occurred())
2689 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002690
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002691 /* Range check */
2692 if (value < 0 || value >= 256) {
2693 PyErr_SetString(PyExc_ValueError,
2694 "bytes must be in range(0, 256)");
2695 goto error;
2696 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002697
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002698 /* Append the byte */
2699 if (i >= size) {
2700 size = 2 * size + 1;
2701 if (_PyBytes_Resize(&new, size) < 0)
2702 goto error;
2703 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002704 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002705 }
2706 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002707
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002708 /* Clean up and return success */
2709 Py_DECREF(it);
2710 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002711
2712 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002713 /* Error handling when new != NULL */
2714 Py_XDECREF(it);
2715 Py_DECREF(new);
2716 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002717}
2718
2719static PyObject *
2720str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2721{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002722 PyObject *tmp, *pnew;
2723 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002724
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002725 assert(PyType_IsSubtype(type, &PyBytes_Type));
2726 tmp = bytes_new(&PyBytes_Type, args, kwds);
2727 if (tmp == NULL)
2728 return NULL;
2729 assert(PyBytes_CheckExact(tmp));
2730 n = PyBytes_GET_SIZE(tmp);
2731 pnew = type->tp_alloc(type, n);
2732 if (pnew != NULL) {
2733 Py_MEMCPY(PyBytes_AS_STRING(pnew),
2734 PyBytes_AS_STRING(tmp), n+1);
2735 ((PyBytesObject *)pnew)->ob_shash =
2736 ((PyBytesObject *)tmp)->ob_shash;
2737 }
2738 Py_DECREF(tmp);
2739 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002740}
2741
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002742PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002743"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002744bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002745bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
2746bytes(memory_view) -> bytes\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002747\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002748Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002749 - an iterable yielding integers in range(256)\n\
2750 - a text string encoded using the specified encoding\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002751 - a bytes or a buffer object\n\
2752 - any object implementing the buffer API.");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002753
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002754static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002755
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002756PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002757 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2758 "bytes",
2759 PyBytesObject_SIZE,
2760 sizeof(char),
2761 bytes_dealloc, /* tp_dealloc */
2762 0, /* tp_print */
2763 0, /* tp_getattr */
2764 0, /* tp_setattr */
2765 0, /* tp_reserved */
2766 (reprfunc)bytes_repr, /* tp_repr */
2767 0, /* tp_as_number */
2768 &bytes_as_sequence, /* tp_as_sequence */
2769 &bytes_as_mapping, /* tp_as_mapping */
2770 (hashfunc)bytes_hash, /* tp_hash */
2771 0, /* tp_call */
2772 bytes_str, /* tp_str */
2773 PyObject_GenericGetAttr, /* tp_getattro */
2774 0, /* tp_setattro */
2775 &bytes_as_buffer, /* tp_as_buffer */
2776 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2777 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2778 bytes_doc, /* tp_doc */
2779 0, /* tp_traverse */
2780 0, /* tp_clear */
2781 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2782 0, /* tp_weaklistoffset */
2783 bytes_iter, /* tp_iter */
2784 0, /* tp_iternext */
2785 bytes_methods, /* tp_methods */
2786 0, /* tp_members */
2787 0, /* tp_getset */
2788 &PyBaseObject_Type, /* tp_base */
2789 0, /* tp_dict */
2790 0, /* tp_descr_get */
2791 0, /* tp_descr_set */
2792 0, /* tp_dictoffset */
2793 0, /* tp_init */
2794 0, /* tp_alloc */
2795 bytes_new, /* tp_new */
2796 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002797};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002798
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002799void
2800PyBytes_Concat(register PyObject **pv, register PyObject *w)
2801{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002802 register PyObject *v;
2803 assert(pv != NULL);
2804 if (*pv == NULL)
2805 return;
2806 if (w == NULL) {
2807 Py_DECREF(*pv);
2808 *pv = NULL;
2809 return;
2810 }
2811 v = bytes_concat(*pv, w);
2812 Py_DECREF(*pv);
2813 *pv = v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002814}
2815
2816void
2817PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
2818{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002819 PyBytes_Concat(pv, w);
2820 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002821}
2822
2823
2824/* The following function breaks the notion that strings are immutable:
2825 it changes the size of a string. We get away with this only if there
2826 is only one module referencing the object. You can also think of it
2827 as creating a new string object and destroying the old one, only
2828 more efficiently. In any case, don't use this if the string may
2829 already be known to some other part of the code...
2830 Note that if there's not enough memory to resize the string, the original
2831 string object at *pv is deallocated, *pv is set to NULL, an "out of
2832 memory" exception is set, and -1 is returned. Else (on success) 0 is
2833 returned, and the value in *pv may or may not be the same as on input.
2834 As always, an extra byte is allocated for a trailing \0 byte (newsize
2835 does *not* include that), and a trailing \0 byte is stored.
2836*/
2837
2838int
2839_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2840{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002841 register PyObject *v;
2842 register PyBytesObject *sv;
2843 v = *pv;
2844 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
2845 *pv = 0;
2846 Py_DECREF(v);
2847 PyErr_BadInternalCall();
2848 return -1;
2849 }
2850 /* XXX UNREF/NEWREF interface should be more symmetrical */
2851 _Py_DEC_REFTOTAL;
2852 _Py_ForgetReference(v);
2853 *pv = (PyObject *)
2854 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
2855 if (*pv == NULL) {
2856 PyObject_Del(v);
2857 PyErr_NoMemory();
2858 return -1;
2859 }
2860 _Py_NewReference(*pv);
2861 sv = (PyBytesObject *) *pv;
2862 Py_SIZE(sv) = newsize;
2863 sv->ob_sval[newsize] = '\0';
2864 sv->ob_shash = -1; /* invalidate cached hash value */
2865 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002866}
2867
2868/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and
2869 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2870 * Python's regular ints.
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002871 * Return value: a new PyBytes*, or NULL if error.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002872 * . *pbuf is set to point into it,
2873 * *plen set to the # of chars following that.
2874 * Caller must decref it when done using pbuf.
2875 * The string starting at *pbuf is of the form
2876 * "-"? ("0x" | "0X")? digit+
2877 * "0x"/"0X" are present only for x and X conversions, with F_ALT
2878 * set in flags. The case of hex digits will be correct,
2879 * There will be at least prec digits, zero-filled on the left if
2880 * necessary to get that many.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002881 * val object to be converted
2882 * flags bitmask of format flags; only F_ALT is looked at
2883 * prec minimum number of digits; 0-fill on left if needed
2884 * type a character in [duoxX]; u acts the same as d
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002885 *
2886 * CAUTION: o, x and X conversions on regular ints can never
2887 * produce a '-' sign, but can for Python's unbounded ints.
2888 */
2889PyObject*
2890_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002891 char **pbuf, int *plen)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002892{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002893 PyObject *result = NULL;
2894 char *buf;
2895 Py_ssize_t i;
2896 int sign; /* 1 if '-', else 0 */
2897 int len; /* number of characters */
2898 Py_ssize_t llen;
2899 int numdigits; /* len == numnondigits + numdigits */
2900 int numnondigits = 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002901
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002902 /* Avoid exceeding SSIZE_T_MAX */
2903 if (prec > INT_MAX-3) {
2904 PyErr_SetString(PyExc_OverflowError,
2905 "precision too large");
2906 return NULL;
2907 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002908
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002909 switch (type) {
2910 case 'd':
2911 case 'u':
2912 /* Special-case boolean: we want 0/1 */
2913 if (PyBool_Check(val))
2914 result = PyNumber_ToBase(val, 10);
2915 else
2916 result = Py_TYPE(val)->tp_str(val);
2917 break;
2918 case 'o':
2919 numnondigits = 2;
2920 result = PyNumber_ToBase(val, 8);
2921 break;
2922 case 'x':
2923 case 'X':
2924 numnondigits = 2;
2925 result = PyNumber_ToBase(val, 16);
2926 break;
2927 default:
2928 assert(!"'type' not in [duoxX]");
2929 }
2930 if (!result)
2931 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002932
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002933 buf = _PyUnicode_AsString(result);
2934 if (!buf) {
2935 Py_DECREF(result);
2936 return NULL;
2937 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002938
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002939 /* To modify the string in-place, there can only be one reference. */
2940 if (Py_REFCNT(result) != 1) {
2941 PyErr_BadInternalCall();
2942 return NULL;
2943 }
Victor Stinner9e30aa52011-11-21 02:49:52 +01002944 llen = PyUnicode_GetLength(result);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002945 if (llen > INT_MAX) {
2946 PyErr_SetString(PyExc_ValueError,
2947 "string too large in _PyBytes_FormatLong");
2948 return NULL;
2949 }
2950 len = (int)llen;
2951 if (buf[len-1] == 'L') {
2952 --len;
2953 buf[len] = '\0';
2954 }
2955 sign = buf[0] == '-';
2956 numnondigits += sign;
2957 numdigits = len - numnondigits;
2958 assert(numdigits > 0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002959
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002960 /* Get rid of base marker unless F_ALT */
2961 if (((flags & F_ALT) == 0 &&
2962 (type == 'o' || type == 'x' || type == 'X'))) {
2963 assert(buf[sign] == '0');
2964 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
2965 buf[sign+1] == 'o');
2966 numnondigits -= 2;
2967 buf += 2;
2968 len -= 2;
2969 if (sign)
2970 buf[0] = '-';
2971 assert(len == numnondigits + numdigits);
2972 assert(numdigits > 0);
2973 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002974
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002975 /* Fill with leading zeroes to meet minimum width. */
2976 if (prec > numdigits) {
2977 PyObject *r1 = PyBytes_FromStringAndSize(NULL,
2978 numnondigits + prec);
2979 char *b1;
2980 if (!r1) {
2981 Py_DECREF(result);
2982 return NULL;
2983 }
2984 b1 = PyBytes_AS_STRING(r1);
2985 for (i = 0; i < numnondigits; ++i)
2986 *b1++ = *buf++;
2987 for (i = 0; i < prec - numdigits; i++)
2988 *b1++ = '0';
2989 for (i = 0; i < numdigits; i++)
2990 *b1++ = *buf++;
2991 *b1 = '\0';
2992 Py_DECREF(result);
2993 result = r1;
2994 buf = PyBytes_AS_STRING(result);
2995 len = numnondigits + prec;
2996 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002997
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002998 /* Fix up case for hex conversions. */
2999 if (type == 'X') {
3000 /* Need to convert all lower case letters to upper case.
3001 and need to convert 0x to 0X (and -0x to -0X). */
3002 for (i = 0; i < len; i++)
3003 if (buf[i] >= 'a' && buf[i] <= 'x')
3004 buf[i] -= 'a'-'A';
3005 }
3006 *pbuf = buf;
3007 *plen = len;
3008 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003009}
3010
3011void
3012PyBytes_Fini(void)
3013{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003014 int i;
3015 for (i = 0; i < UCHAR_MAX + 1; i++) {
3016 Py_XDECREF(characters[i]);
3017 characters[i] = NULL;
3018 }
3019 Py_XDECREF(nullstring);
3020 nullstring = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003021}
3022
Benjamin Peterson4116f362008-05-27 00:36:20 +00003023/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003024
3025typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003026 PyObject_HEAD
3027 Py_ssize_t it_index;
3028 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003029} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003030
3031static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003032striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003033{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003034 _PyObject_GC_UNTRACK(it);
3035 Py_XDECREF(it->it_seq);
3036 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003037}
3038
3039static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003040striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003041{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003042 Py_VISIT(it->it_seq);
3043 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003044}
3045
3046static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003047striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003048{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003049 PyBytesObject *seq;
3050 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003051
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003052 assert(it != NULL);
3053 seq = it->it_seq;
3054 if (seq == NULL)
3055 return NULL;
3056 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003057
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003058 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3059 item = PyLong_FromLong(
3060 (unsigned char)seq->ob_sval[it->it_index]);
3061 if (item != NULL)
3062 ++it->it_index;
3063 return item;
3064 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003065
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003066 Py_DECREF(seq);
3067 it->it_seq = NULL;
3068 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003069}
3070
3071static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003072striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003073{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003074 Py_ssize_t len = 0;
3075 if (it->it_seq)
3076 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3077 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003078}
3079
3080PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003081 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003082
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003083static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003084 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3085 length_hint_doc},
3086 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003087};
3088
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003089PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003090 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3091 "bytes_iterator", /* tp_name */
3092 sizeof(striterobject), /* tp_basicsize */
3093 0, /* tp_itemsize */
3094 /* methods */
3095 (destructor)striter_dealloc, /* tp_dealloc */
3096 0, /* tp_print */
3097 0, /* tp_getattr */
3098 0, /* tp_setattr */
3099 0, /* tp_reserved */
3100 0, /* tp_repr */
3101 0, /* tp_as_number */
3102 0, /* tp_as_sequence */
3103 0, /* tp_as_mapping */
3104 0, /* tp_hash */
3105 0, /* tp_call */
3106 0, /* tp_str */
3107 PyObject_GenericGetAttr, /* tp_getattro */
3108 0, /* tp_setattro */
3109 0, /* tp_as_buffer */
3110 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3111 0, /* tp_doc */
3112 (traverseproc)striter_traverse, /* tp_traverse */
3113 0, /* tp_clear */
3114 0, /* tp_richcompare */
3115 0, /* tp_weaklistoffset */
3116 PyObject_SelfIter, /* tp_iter */
3117 (iternextfunc)striter_next, /* tp_iternext */
3118 striter_methods, /* tp_methods */
3119 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003120};
3121
3122static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003123bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003124{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003125 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003126
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003127 if (!PyBytes_Check(seq)) {
3128 PyErr_BadInternalCall();
3129 return NULL;
3130 }
3131 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3132 if (it == NULL)
3133 return NULL;
3134 it->it_index = 0;
3135 Py_INCREF(seq);
3136 it->it_seq = (PyBytesObject *)seq;
3137 _PyObject_GC_TRACK(it);
3138 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003139}