blob: 88411b799bf8aa8bebdafecaa5784ddfb4f4e81c [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Christian Heimes90aa7642007-12-19 02:45:37 +000013 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000014
Gregory P. Smith60d241f2007-10-16 06:31:30 +000015 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000016 {
Antoine Pitroud1188562010-06-09 16:38:55 +000017 PyErr_Format(PyExc_TypeError,
18 "Type %.100s doesn't support the buffer API",
19 Py_TYPE(obj)->tp_name);
20 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000021 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000023 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000024 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000025 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000026}
27
Christian Heimes2c9c7a52008-05-26 13:42:13 +000028#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000029Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000030#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000031
Christian Heimes2c9c7a52008-05-26 13:42:13 +000032static PyBytesObject *characters[UCHAR_MAX + 1];
33static PyBytesObject *nullstring;
34
Mark Dickinsonfd24b322008-12-06 15:33:31 +000035/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
36 for a string of length n should request PyBytesObject_SIZE + n bytes.
37
38 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
39 3 bytes per string allocation on a typical system.
40*/
41#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
42
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000044 For PyBytes_FromString(), the parameter `str' points to a null-terminated
45 string containing exactly `size' bytes.
46
47 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
48 either NULL or else points to a string containing at least `size' bytes.
49 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
50 not have to be null-terminated. (Therefore it is safe to construct a
51 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
52 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
53 bytes (setting the last byte to the null terminating character) and you can
54 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000055 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000056 alter the data yourself, since the strings may be shared.
57
58 The PyObject member `op->ob_size', which denotes the number of "extra
59 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020060 allocated for string data, not counting the null terminating character.
61 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000062 PyBytes_FromStringAndSize()) or the length of the string in the `str'
63 parameter (for PyBytes_FromString()).
64*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000065PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000066PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000067{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000068 register PyBytesObject *op;
69 if (size < 0) {
70 PyErr_SetString(PyExc_SystemError,
71 "Negative size passed to PyBytes_FromStringAndSize");
72 return NULL;
73 }
74 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000075#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000076 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000077#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078 Py_INCREF(op);
79 return (PyObject *)op;
80 }
81 if (size == 1 && str != NULL &&
82 (op = characters[*str & UCHAR_MAX]) != NULL)
83 {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000084#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000086#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 Py_INCREF(op);
88 return (PyObject *)op;
89 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000090
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
92 PyErr_SetString(PyExc_OverflowError,
93 "byte string is too large");
94 return NULL;
95 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000097 /* Inline PyObject_NewVar */
98 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
99 if (op == NULL)
100 return PyErr_NoMemory();
101 PyObject_INIT_VAR(op, &PyBytes_Type, size);
102 op->ob_shash = -1;
103 if (str != NULL)
104 Py_MEMCPY(op->ob_sval, str, size);
105 op->ob_sval[size] = '\0';
106 /* share short strings */
107 if (size == 0) {
108 nullstring = op;
109 Py_INCREF(op);
110 } else if (size == 1 && str != NULL) {
111 characters[*str & UCHAR_MAX] = op;
112 Py_INCREF(op);
113 }
114 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000115}
116
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000117PyObject *
118PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000119{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000120 register size_t size;
121 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000122
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000123 assert(str != NULL);
124 size = strlen(str);
125 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
126 PyErr_SetString(PyExc_OverflowError,
127 "byte string is too long");
128 return NULL;
129 }
130 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000131#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000133#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 Py_INCREF(op);
135 return (PyObject *)op;
136 }
137 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000138#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000139 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000140#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 Py_INCREF(op);
142 return (PyObject *)op;
143 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000144
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 /* Inline PyObject_NewVar */
146 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
147 if (op == NULL)
148 return PyErr_NoMemory();
149 PyObject_INIT_VAR(op, &PyBytes_Type, size);
150 op->ob_shash = -1;
151 Py_MEMCPY(op->ob_sval, str, size+1);
152 /* share short strings */
153 if (size == 0) {
154 nullstring = op;
155 Py_INCREF(op);
156 } else if (size == 1) {
157 characters[*str & UCHAR_MAX] = op;
158 Py_INCREF(op);
159 }
160 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000161}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000162
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000163PyObject *
164PyBytes_FromFormatV(const char *format, va_list vargs)
165{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000166 va_list count;
167 Py_ssize_t n = 0;
168 const char* f;
169 char *s;
170 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000171
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000172 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000173 /* step 1: figure out how large a buffer we need */
174 for (f = format; *f; f++) {
175 if (*f == '%') {
176 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000177 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000178 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000179
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000180 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
181 * they don't affect the amount of space we reserve.
182 */
183 if ((*f == 'l' || *f == 'z') &&
184 (f[1] == 'd' || f[1] == 'u'))
185 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000186
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000187 switch (*f) {
188 case 'c':
189 (void)va_arg(count, int);
190 /* fall through... */
191 case '%':
192 n++;
193 break;
194 case 'd': case 'u': case 'i': case 'x':
195 (void) va_arg(count, int);
196 /* 20 bytes is enough to hold a 64-bit
197 integer. Decimal takes the most space.
198 This isn't enough for octal. */
199 n += 20;
200 break;
201 case 's':
202 s = va_arg(count, char*);
203 n += strlen(s);
204 break;
205 case 'p':
206 (void) va_arg(count, int);
207 /* maximum 64-bit pointer representation:
208 * 0xffffffffffffffff
209 * so 19 characters is enough.
210 * XXX I count 18 -- what's the extra for?
211 */
212 n += 19;
213 break;
214 default:
215 /* if we stumble upon an unknown
216 formatting code, copy the rest of
217 the format string to the output
218 string. (we cannot just skip the
219 code, since there's no way to know
220 what's in the argument list) */
221 n += strlen(p);
222 goto expand;
223 }
224 } else
225 n++;
226 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000227 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 /* step 2: fill the buffer */
229 /* Since we've analyzed how much space we need for the worst case,
230 use sprintf directly instead of the slower PyOS_snprintf. */
231 string = PyBytes_FromStringAndSize(NULL, n);
232 if (!string)
233 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000234
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000235 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000236
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000237 for (f = format; *f; f++) {
238 if (*f == '%') {
239 const char* p = f++;
240 Py_ssize_t i;
241 int longflag = 0;
242 int size_tflag = 0;
243 /* parse the width.precision part (we're only
244 interested in the precision value, if any) */
245 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000246 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000247 n = (n*10) + *f++ - '0';
248 if (*f == '.') {
249 f++;
250 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000251 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000252 n = (n*10) + *f++ - '0';
253 }
David Malcolm96960882010-11-05 17:23:41 +0000254 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000255 f++;
256 /* handle the long flag, but only for %ld and %lu.
257 others can be added when necessary. */
258 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
259 longflag = 1;
260 ++f;
261 }
262 /* handle the size_t flag. */
263 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
264 size_tflag = 1;
265 ++f;
266 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000267
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000268 switch (*f) {
269 case 'c':
270 *s++ = va_arg(vargs, int);
271 break;
272 case 'd':
273 if (longflag)
274 sprintf(s, "%ld", va_arg(vargs, long));
275 else if (size_tflag)
276 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
277 va_arg(vargs, Py_ssize_t));
278 else
279 sprintf(s, "%d", va_arg(vargs, int));
280 s += strlen(s);
281 break;
282 case 'u':
283 if (longflag)
284 sprintf(s, "%lu",
285 va_arg(vargs, unsigned long));
286 else if (size_tflag)
287 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
288 va_arg(vargs, size_t));
289 else
290 sprintf(s, "%u",
291 va_arg(vargs, unsigned int));
292 s += strlen(s);
293 break;
294 case 'i':
295 sprintf(s, "%i", va_arg(vargs, int));
296 s += strlen(s);
297 break;
298 case 'x':
299 sprintf(s, "%x", va_arg(vargs, int));
300 s += strlen(s);
301 break;
302 case 's':
303 p = va_arg(vargs, char*);
304 i = strlen(p);
305 if (n > 0 && i > n)
306 i = n;
307 Py_MEMCPY(s, p, i);
308 s += i;
309 break;
310 case 'p':
311 sprintf(s, "%p", va_arg(vargs, void*));
312 /* %p is ill-defined: ensure leading 0x. */
313 if (s[1] == 'X')
314 s[1] = 'x';
315 else if (s[1] != 'x') {
316 memmove(s+2, s, strlen(s)+1);
317 s[0] = '0';
318 s[1] = 'x';
319 }
320 s += strlen(s);
321 break;
322 case '%':
323 *s++ = '%';
324 break;
325 default:
326 strcpy(s, p);
327 s += strlen(s);
328 goto end;
329 }
330 } else
331 *s++ = *f;
332 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000333
334 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000335 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
336 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000337}
338
339PyObject *
340PyBytes_FromFormat(const char *format, ...)
341{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000342 PyObject* ret;
343 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000344
345#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000347#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000348 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000349#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 ret = PyBytes_FromFormatV(format, vargs);
351 va_end(vargs);
352 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000353}
354
355static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000356bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000357{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000358 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000359}
360
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000361/* Unescape a backslash-escaped string. If unicode is non-zero,
362 the string is a u-literal. If recode_encoding is non-zero,
363 the string is UTF-8 encoded and should be re-encoded in the
364 specified encoding. */
365
366PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 Py_ssize_t len,
368 const char *errors,
369 Py_ssize_t unicode,
370 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000371{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 int c;
373 char *p, *buf;
374 const char *end;
375 PyObject *v;
376 Py_ssize_t newlen = recode_encoding ? 4*len:len;
377 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
378 if (v == NULL)
379 return NULL;
380 p = buf = PyBytes_AsString(v);
381 end = s + len;
382 while (s < end) {
383 if (*s != '\\') {
384 non_esc:
385 if (recode_encoding && (*s & 0x80)) {
386 PyObject *u, *w;
387 char *r;
388 const char* t;
389 Py_ssize_t rn;
390 t = s;
391 /* Decode non-ASCII bytes as UTF-8. */
392 while (t < end && (*t & 0x80)) t++;
393 u = PyUnicode_DecodeUTF8(s, t - s, errors);
394 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000395
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000396 /* Recode them in target encoding. */
397 w = PyUnicode_AsEncodedString(
398 u, recode_encoding, errors);
399 Py_DECREF(u);
400 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000401
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000402 /* Append bytes to output buffer. */
403 assert(PyBytes_Check(w));
404 r = PyBytes_AS_STRING(w);
405 rn = PyBytes_GET_SIZE(w);
406 Py_MEMCPY(p, r, rn);
407 p += rn;
408 Py_DECREF(w);
409 s = t;
410 } else {
411 *p++ = *s++;
412 }
413 continue;
414 }
415 s++;
416 if (s==end) {
417 PyErr_SetString(PyExc_ValueError,
418 "Trailing \\ in string");
419 goto failed;
420 }
421 switch (*s++) {
422 /* XXX This assumes ASCII! */
423 case '\n': break;
424 case '\\': *p++ = '\\'; break;
425 case '\'': *p++ = '\''; break;
426 case '\"': *p++ = '\"'; break;
427 case 'b': *p++ = '\b'; break;
428 case 'f': *p++ = '\014'; break; /* FF */
429 case 't': *p++ = '\t'; break;
430 case 'n': *p++ = '\n'; break;
431 case 'r': *p++ = '\r'; break;
432 case 'v': *p++ = '\013'; break; /* VT */
433 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
434 case '0': case '1': case '2': case '3':
435 case '4': case '5': case '6': case '7':
436 c = s[-1] - '0';
437 if (s < end && '0' <= *s && *s <= '7') {
438 c = (c<<3) + *s++ - '0';
439 if (s < end && '0' <= *s && *s <= '7')
440 c = (c<<3) + *s++ - '0';
441 }
442 *p++ = c;
443 break;
444 case 'x':
David Malcolm96960882010-11-05 17:23:41 +0000445 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000446 unsigned int x = 0;
447 c = Py_CHARMASK(*s);
448 s++;
David Malcolm96960882010-11-05 17:23:41 +0000449 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000450 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000451 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000452 x = 10 + c - 'a';
453 else
454 x = 10 + c - 'A';
455 x = x << 4;
456 c = Py_CHARMASK(*s);
457 s++;
David Malcolm96960882010-11-05 17:23:41 +0000458 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000459 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000460 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000461 x += 10 + c - 'a';
462 else
463 x += 10 + c - 'A';
464 *p++ = x;
465 break;
466 }
467 if (!errors || strcmp(errors, "strict") == 0) {
468 PyErr_SetString(PyExc_ValueError,
469 "invalid \\x escape");
470 goto failed;
471 }
472 if (strcmp(errors, "replace") == 0) {
473 *p++ = '?';
474 } else if (strcmp(errors, "ignore") == 0)
475 /* do nothing */;
476 else {
477 PyErr_Format(PyExc_ValueError,
478 "decoding error; unknown "
479 "error handling code: %.400s",
480 errors);
481 goto failed;
482 }
483 default:
484 *p++ = '\\';
485 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +0200486 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000487 UTF-8 bytes may follow. */
488 }
489 }
490 if (p-buf < newlen)
491 _PyBytes_Resize(&v, p - buf);
492 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000493 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000494 Py_DECREF(v);
495 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000496}
497
498/* -------------------------------------------------------------------- */
499/* object api */
500
501Py_ssize_t
502PyBytes_Size(register PyObject *op)
503{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000504 if (!PyBytes_Check(op)) {
505 PyErr_Format(PyExc_TypeError,
506 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
507 return -1;
508 }
509 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000510}
511
512char *
513PyBytes_AsString(register PyObject *op)
514{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000515 if (!PyBytes_Check(op)) {
516 PyErr_Format(PyExc_TypeError,
517 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
518 return NULL;
519 }
520 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000521}
522
523int
524PyBytes_AsStringAndSize(register PyObject *obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000525 register char **s,
526 register Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000527{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000528 if (s == NULL) {
529 PyErr_BadInternalCall();
530 return -1;
531 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000532
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000533 if (!PyBytes_Check(obj)) {
534 PyErr_Format(PyExc_TypeError,
535 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
536 return -1;
537 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000538
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000539 *s = PyBytes_AS_STRING(obj);
540 if (len != NULL)
541 *len = PyBytes_GET_SIZE(obj);
542 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
543 PyErr_SetString(PyExc_TypeError,
544 "expected bytes with no null");
545 return -1;
546 }
547 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000548}
Neal Norwitz6968b052007-02-27 19:02:19 +0000549
550/* -------------------------------------------------------------------- */
551/* Methods */
552
Eric Smith0923d1d2009-04-16 20:16:10 +0000553#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000554
555#include "stringlib/fastsearch.h"
556#include "stringlib/count.h"
557#include "stringlib/find.h"
558#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000559#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000560#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000561
Eric Smith0f78bff2009-11-30 01:01:42 +0000562#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000563
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000564PyObject *
565PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000566{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000567 register PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200568 Py_ssize_t i, length = Py_SIZE(op);
569 size_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000570 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200571 unsigned char quote, *s, *p;
572
573 /* Compute size of output string */
574 squotes = dquotes = 0;
575 newsize = 3; /* b'' */
576 s = (unsigned char*)op->ob_sval;
577 for (i = 0; i < length; i++) {
578 switch(s[i]) {
579 case '\'': squotes++; newsize++; break;
580 case '"': dquotes++; newsize++; break;
581 case '\\': case '\t': case '\n': case '\r':
582 newsize += 2; break; /* \C */
583 default:
584 if (s[i] < ' ' || s[i] >= 0x7f)
585 newsize += 4; /* \xHH */
586 else
587 newsize++;
588 }
589 }
590 quote = '\'';
591 if (smartquotes && squotes && !dquotes)
592 quote = '"';
593 if (squotes && quote == '\'')
594 newsize += squotes;
Victor Stinner6430fd52011-09-29 04:02:13 +0200595
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200596 if (newsize > (PY_SSIZE_T_MAX - sizeof(PyUnicodeObject) - 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000597 PyErr_SetString(PyExc_OverflowError,
598 "bytes object is too large to make repr");
599 return NULL;
600 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200601
602 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000603 if (v == NULL) {
604 return NULL;
605 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200606 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000607
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200608 *p++ = 'b', *p++ = quote;
609 for (i = 0; i < length; i++) {
610 unsigned char c = op->ob_sval[i];
611 if (c == quote || c == '\\')
612 *p++ = '\\', *p++ = c;
613 else if (c == '\t')
614 *p++ = '\\', *p++ = 't';
615 else if (c == '\n')
616 *p++ = '\\', *p++ = 'n';
617 else if (c == '\r')
618 *p++ = '\\', *p++ = 'r';
619 else if (c < ' ' || c >= 0x7f) {
620 *p++ = '\\';
621 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200622 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
623 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000624 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200625 else
626 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000627 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200628 *p++ = quote;
629 return v;
Neal Norwitz6968b052007-02-27 19:02:19 +0000630}
631
Neal Norwitz6968b052007-02-27 19:02:19 +0000632static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000633bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000634{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000635 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000636}
637
Neal Norwitz6968b052007-02-27 19:02:19 +0000638static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000639bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000640{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000641 if (Py_BytesWarningFlag) {
642 if (PyErr_WarnEx(PyExc_BytesWarning,
643 "str() on a bytes instance", 1))
644 return NULL;
645 }
646 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000647}
648
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000649static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000650bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000651{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000652 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000653}
Neal Norwitz6968b052007-02-27 19:02:19 +0000654
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000655/* This is also used by PyBytes_Concat() */
656static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000657bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000658{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000659 Py_ssize_t size;
660 Py_buffer va, vb;
661 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000662
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000663 va.len = -1;
664 vb.len = -1;
665 if (_getbuffer(a, &va) < 0 ||
666 _getbuffer(b, &vb) < 0) {
667 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
668 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
669 goto done;
670 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000671
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000672 /* Optimize end cases */
673 if (va.len == 0 && PyBytes_CheckExact(b)) {
674 result = b;
675 Py_INCREF(result);
676 goto done;
677 }
678 if (vb.len == 0 && PyBytes_CheckExact(a)) {
679 result = a;
680 Py_INCREF(result);
681 goto done;
682 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000683
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000684 size = va.len + vb.len;
685 if (size < 0) {
686 PyErr_NoMemory();
687 goto done;
688 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000689
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000690 result = PyBytes_FromStringAndSize(NULL, size);
691 if (result != NULL) {
692 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
693 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
694 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000695
696 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000697 if (va.len != -1)
698 PyBuffer_Release(&va);
699 if (vb.len != -1)
700 PyBuffer_Release(&vb);
701 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000702}
Neal Norwitz6968b052007-02-27 19:02:19 +0000703
704static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000705bytes_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000706{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000707 register Py_ssize_t i;
708 register Py_ssize_t j;
709 register Py_ssize_t size;
710 register PyBytesObject *op;
711 size_t nbytes;
712 if (n < 0)
713 n = 0;
714 /* watch out for overflows: the size can overflow int,
715 * and the # of bytes needed can overflow size_t
716 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000717 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000718 PyErr_SetString(PyExc_OverflowError,
719 "repeated bytes are too long");
720 return NULL;
721 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000722 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000723 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
724 Py_INCREF(a);
725 return (PyObject *)a;
726 }
727 nbytes = (size_t)size;
728 if (nbytes + PyBytesObject_SIZE <= nbytes) {
729 PyErr_SetString(PyExc_OverflowError,
730 "repeated bytes are too long");
731 return NULL;
732 }
733 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
734 if (op == NULL)
735 return PyErr_NoMemory();
736 PyObject_INIT_VAR(op, &PyBytes_Type, size);
737 op->ob_shash = -1;
738 op->ob_sval[size] = '\0';
739 if (Py_SIZE(a) == 1 && n > 0) {
740 memset(op->ob_sval, a->ob_sval[0] , n);
741 return (PyObject *) op;
742 }
743 i = 0;
744 if (i < size) {
745 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
746 i = Py_SIZE(a);
747 }
748 while (i < size) {
749 j = (i <= size-i) ? i : size-i;
750 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
751 i += j;
752 }
753 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000754}
755
Guido van Rossum98297ee2007-11-06 21:34:58 +0000756static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000757bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000758{
759 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
760 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000761 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +0000762 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +0000763 PyErr_Clear();
764 if (_getbuffer(arg, &varg) < 0)
765 return -1;
766 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
767 varg.buf, varg.len, 0);
768 PyBuffer_Release(&varg);
769 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000770 }
771 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000772 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
773 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000774 }
775
Antoine Pitrou0010d372010-08-15 17:12:55 +0000776 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000777}
778
Neal Norwitz6968b052007-02-27 19:02:19 +0000779static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000780bytes_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000781{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000782 if (i < 0 || i >= Py_SIZE(a)) {
783 PyErr_SetString(PyExc_IndexError, "index out of range");
784 return NULL;
785 }
786 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000787}
788
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000789static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000790bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000791{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000792 int c;
793 Py_ssize_t len_a, len_b;
794 Py_ssize_t min_len;
795 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000796
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000797 /* Make sure both arguments are strings. */
798 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
799 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
800 (PyObject_IsInstance((PyObject*)a,
801 (PyObject*)&PyUnicode_Type) ||
802 PyObject_IsInstance((PyObject*)b,
803 (PyObject*)&PyUnicode_Type))) {
804 if (PyErr_WarnEx(PyExc_BytesWarning,
805 "Comparison between bytes and string", 1))
806 return NULL;
807 }
808 result = Py_NotImplemented;
809 goto out;
810 }
811 if (a == b) {
812 switch (op) {
813 case Py_EQ:case Py_LE:case Py_GE:
814 result = Py_True;
815 goto out;
816 case Py_NE:case Py_LT:case Py_GT:
817 result = Py_False;
818 goto out;
819 }
820 }
821 if (op == Py_EQ) {
822 /* Supporting Py_NE here as well does not save
823 much time, since Py_NE is rarely used. */
824 if (Py_SIZE(a) == Py_SIZE(b)
825 && (a->ob_sval[0] == b->ob_sval[0]
826 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
827 result = Py_True;
828 } else {
829 result = Py_False;
830 }
831 goto out;
832 }
833 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
834 min_len = (len_a < len_b) ? len_a : len_b;
835 if (min_len > 0) {
836 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
837 if (c==0)
838 c = memcmp(a->ob_sval, b->ob_sval, min_len);
839 } else
840 c = 0;
841 if (c == 0)
842 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
843 switch (op) {
844 case Py_LT: c = c < 0; break;
845 case Py_LE: c = c <= 0; break;
846 case Py_EQ: assert(0); break; /* unreachable */
847 case Py_NE: c = c != 0; break;
848 case Py_GT: c = c > 0; break;
849 case Py_GE: c = c >= 0; break;
850 default:
851 result = Py_NotImplemented;
852 goto out;
853 }
854 result = c ? Py_True : Py_False;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000855 out:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000856 Py_INCREF(result);
857 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000858}
859
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000860static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000861bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000862{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +0100863 if (a->ob_shash == -1) {
864 /* Can't fail */
865 a->ob_shash = _Py_HashBytes((unsigned char *) a->ob_sval, Py_SIZE(a));
866 }
867 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +0000868}
869
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000870static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000871bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000872{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000873 if (PyIndex_Check(item)) {
874 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
875 if (i == -1 && PyErr_Occurred())
876 return NULL;
877 if (i < 0)
878 i += PyBytes_GET_SIZE(self);
879 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
880 PyErr_SetString(PyExc_IndexError,
881 "index out of range");
882 return NULL;
883 }
884 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
885 }
886 else if (PySlice_Check(item)) {
887 Py_ssize_t start, stop, step, slicelength, cur, i;
888 char* source_buf;
889 char* result_buf;
890 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000891
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000892 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000893 PyBytes_GET_SIZE(self),
894 &start, &stop, &step, &slicelength) < 0) {
895 return NULL;
896 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000897
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000898 if (slicelength <= 0) {
899 return PyBytes_FromStringAndSize("", 0);
900 }
901 else if (start == 0 && step == 1 &&
902 slicelength == PyBytes_GET_SIZE(self) &&
903 PyBytes_CheckExact(self)) {
904 Py_INCREF(self);
905 return (PyObject *)self;
906 }
907 else if (step == 1) {
908 return PyBytes_FromStringAndSize(
909 PyBytes_AS_STRING(self) + start,
910 slicelength);
911 }
912 else {
913 source_buf = PyBytes_AS_STRING(self);
914 result = PyBytes_FromStringAndSize(NULL, slicelength);
915 if (result == NULL)
916 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000917
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000918 result_buf = PyBytes_AS_STRING(result);
919 for (cur = start, i = 0; i < slicelength;
920 cur += step, i++) {
921 result_buf[i] = source_buf[cur];
922 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000923
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000924 return result;
925 }
926 }
927 else {
928 PyErr_Format(PyExc_TypeError,
929 "byte indices must be integers, not %.200s",
930 Py_TYPE(item)->tp_name);
931 return NULL;
932 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000933}
934
935static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000936bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000937{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000938 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
939 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000940}
941
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000942static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000943 (lenfunc)bytes_length, /*sq_length*/
944 (binaryfunc)bytes_concat, /*sq_concat*/
945 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
946 (ssizeargfunc)bytes_item, /*sq_item*/
947 0, /*sq_slice*/
948 0, /*sq_ass_item*/
949 0, /*sq_ass_slice*/
950 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000951};
952
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000953static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000954 (lenfunc)bytes_length,
955 (binaryfunc)bytes_subscript,
956 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000957};
958
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000959static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000960 (getbufferproc)bytes_buffer_getbuffer,
961 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000962};
963
964
965#define LEFTSTRIP 0
966#define RIGHTSTRIP 1
967#define BOTHSTRIP 2
968
969/* Arrays indexed by above */
970static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
971
972#define STRIPNAME(i) (stripformat[i]+3)
973
Neal Norwitz6968b052007-02-27 19:02:19 +0000974PyDoc_STRVAR(split__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000975"B.split([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +0000976\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +0000977Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000978If sep is not specified or is None, B is split on ASCII whitespace\n\
979characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +0000980If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +0000981
982static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000983bytes_split(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +0000984{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000985 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
986 Py_ssize_t maxsplit = -1;
987 const char *s = PyBytes_AS_STRING(self), *sub;
988 Py_buffer vsub;
989 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +0000990
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000991 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
992 return NULL;
993 if (maxsplit < 0)
994 maxsplit = PY_SSIZE_T_MAX;
995 if (subobj == Py_None)
996 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
997 if (_getbuffer(subobj, &vsub) < 0)
998 return NULL;
999 sub = vsub.buf;
1000 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001001
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001002 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1003 PyBuffer_Release(&vsub);
1004 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001005}
1006
Neal Norwitz6968b052007-02-27 19:02:19 +00001007PyDoc_STRVAR(partition__doc__,
1008"B.partition(sep) -> (head, sep, tail)\n\
1009\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001010Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001011the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001012found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001013
1014static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001015bytes_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001016{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001017 const char *sep;
1018 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001019
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001020 if (PyBytes_Check(sep_obj)) {
1021 sep = PyBytes_AS_STRING(sep_obj);
1022 sep_len = PyBytes_GET_SIZE(sep_obj);
1023 }
1024 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1025 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001026
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001027 return stringlib_partition(
1028 (PyObject*) self,
1029 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1030 sep_obj, sep, sep_len
1031 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001032}
1033
1034PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti5b2b2422010-01-25 11:58:28 +00001035"B.rpartition(sep) -> (head, sep, tail)\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001036\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001037Search for the separator sep in B, starting at the end of B,\n\
1038and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001039part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001040bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001041
1042static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001043bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001044{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001045 const char *sep;
1046 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001047
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001048 if (PyBytes_Check(sep_obj)) {
1049 sep = PyBytes_AS_STRING(sep_obj);
1050 sep_len = PyBytes_GET_SIZE(sep_obj);
1051 }
1052 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1053 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001054
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001055 return stringlib_rpartition(
1056 (PyObject*) self,
1057 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1058 sep_obj, sep, sep_len
1059 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001060}
1061
Neal Norwitz6968b052007-02-27 19:02:19 +00001062PyDoc_STRVAR(rsplit__doc__,
Benjamin Peterson4116f362008-05-27 00:36:20 +00001063"B.rsplit([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001064\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001065Return a list of the sections in B, using sep as the delimiter,\n\
1066starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001067If sep is not given, B is split on ASCII whitespace characters\n\
1068(space, tab, return, newline, formfeed, vertical tab).\n\
1069If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001070
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001071
Neal Norwitz6968b052007-02-27 19:02:19 +00001072static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001073bytes_rsplit(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001074{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001075 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1076 Py_ssize_t maxsplit = -1;
1077 const char *s = PyBytes_AS_STRING(self), *sub;
1078 Py_buffer vsub;
1079 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001080
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001081 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1082 return NULL;
1083 if (maxsplit < 0)
1084 maxsplit = PY_SSIZE_T_MAX;
1085 if (subobj == Py_None)
1086 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1087 if (_getbuffer(subobj, &vsub) < 0)
1088 return NULL;
1089 sub = vsub.buf;
1090 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001091
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001092 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1093 PyBuffer_Release(&vsub);
1094 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001095}
1096
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001097
1098PyDoc_STRVAR(join__doc__,
1099"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001100\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001101Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001102Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1103
Neal Norwitz6968b052007-02-27 19:02:19 +00001104static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001105bytes_join(PyObject *self, PyObject *orig)
Neal Norwitz6968b052007-02-27 19:02:19 +00001106{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001107 char *sep = PyBytes_AS_STRING(self);
1108 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1109 PyObject *res = NULL;
1110 char *p;
1111 Py_ssize_t seqlen = 0;
1112 size_t sz = 0;
1113 Py_ssize_t i;
1114 PyObject *seq, *item;
Neal Norwitz6968b052007-02-27 19:02:19 +00001115
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001116 seq = PySequence_Fast(orig, "");
1117 if (seq == NULL) {
1118 return NULL;
1119 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001120
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001121 seqlen = PySequence_Size(seq);
1122 if (seqlen == 0) {
1123 Py_DECREF(seq);
1124 return PyBytes_FromString("");
1125 }
1126 if (seqlen == 1) {
1127 item = PySequence_Fast_GET_ITEM(seq, 0);
1128 if (PyBytes_CheckExact(item)) {
1129 Py_INCREF(item);
1130 Py_DECREF(seq);
1131 return item;
1132 }
1133 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001134
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001135 /* There are at least two things to join, or else we have a subclass
1136 * of the builtin types in the sequence.
1137 * Do a pre-pass to figure out the total amount of space we'll
1138 * need (sz), and see whether all argument are bytes.
1139 */
1140 /* XXX Shouldn't we use _getbuffer() on these items instead? */
1141 for (i = 0; i < seqlen; i++) {
1142 const size_t old_sz = sz;
1143 item = PySequence_Fast_GET_ITEM(seq, i);
1144 if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
1145 PyErr_Format(PyExc_TypeError,
1146 "sequence item %zd: expected bytes,"
1147 " %.80s found",
1148 i, Py_TYPE(item)->tp_name);
1149 Py_DECREF(seq);
1150 return NULL;
1151 }
1152 sz += Py_SIZE(item);
1153 if (i != 0)
1154 sz += seplen;
1155 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1156 PyErr_SetString(PyExc_OverflowError,
1157 "join() result is too long for bytes");
1158 Py_DECREF(seq);
1159 return NULL;
1160 }
1161 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001162
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001163 /* Allocate result space. */
1164 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1165 if (res == NULL) {
1166 Py_DECREF(seq);
1167 return NULL;
1168 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001169
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001170 /* Catenate everything. */
1171 /* I'm not worried about a PyByteArray item growing because there's
1172 nowhere in this function where we release the GIL. */
1173 p = PyBytes_AS_STRING(res);
1174 for (i = 0; i < seqlen; ++i) {
1175 size_t n;
1176 char *q;
1177 if (i) {
1178 Py_MEMCPY(p, sep, seplen);
1179 p += seplen;
1180 }
1181 item = PySequence_Fast_GET_ITEM(seq, i);
1182 n = Py_SIZE(item);
1183 if (PyBytes_Check(item))
1184 q = PyBytes_AS_STRING(item);
1185 else
1186 q = PyByteArray_AS_STRING(item);
1187 Py_MEMCPY(p, q, n);
1188 p += n;
1189 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001190
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001191 Py_DECREF(seq);
1192 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001193}
1194
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001195PyObject *
1196_PyBytes_Join(PyObject *sep, PyObject *x)
1197{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001198 assert(sep != NULL && PyBytes_Check(sep));
1199 assert(x != NULL);
1200 return bytes_join(sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001201}
1202
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001203/* helper macro to fixup start/end slice values */
1204#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001205 if (end > len) \
1206 end = len; \
1207 else if (end < 0) { \
1208 end += len; \
1209 if (end < 0) \
1210 end = 0; \
1211 } \
1212 if (start < 0) { \
1213 start += len; \
1214 if (start < 0) \
1215 start = 0; \
1216 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001217
1218Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001219bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001220{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001221 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001222 char byte;
1223 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001224 const char *sub;
1225 Py_ssize_t sub_len;
1226 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001227 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001228
Antoine Pitrouac65d962011-10-20 23:54:17 +02001229 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1230 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001231 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001232
Antoine Pitrouac65d962011-10-20 23:54:17 +02001233 if (subobj) {
1234 if (_getbuffer(subobj, &subbuf) < 0)
1235 return -2;
1236
1237 sub = subbuf.buf;
1238 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001239 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001240 else {
1241 sub = &byte;
1242 sub_len = 1;
1243 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001244
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001245 if (dir > 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001246 res = stringlib_find_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001247 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1248 sub, sub_len, start, end);
1249 else
Antoine Pitrouac65d962011-10-20 23:54:17 +02001250 res = stringlib_rfind_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001251 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1252 sub, sub_len, start, end);
Antoine Pitrouac65d962011-10-20 23:54:17 +02001253
1254 if (subobj)
1255 PyBuffer_Release(&subbuf);
1256
1257 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001258}
1259
1260
1261PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001262"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001263\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001264Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001265such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001266arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001267\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001268Return -1 on failure.");
1269
Neal Norwitz6968b052007-02-27 19:02:19 +00001270static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001271bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001272{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001273 Py_ssize_t result = bytes_find_internal(self, args, +1);
1274 if (result == -2)
1275 return NULL;
1276 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001277}
1278
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001279
1280PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001281"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001282\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001283Like B.find() but raise ValueError when the substring is not found.");
1284
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001285static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001286bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001287{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001288 Py_ssize_t result = bytes_find_internal(self, args, +1);
1289 if (result == -2)
1290 return NULL;
1291 if (result == -1) {
1292 PyErr_SetString(PyExc_ValueError,
1293 "substring not found");
1294 return NULL;
1295 }
1296 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001297}
1298
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001299
1300PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001301"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001302\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001303Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001304such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001305arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001306\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001307Return -1 on failure.");
1308
Neal Norwitz6968b052007-02-27 19:02:19 +00001309static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001310bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001311{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001312 Py_ssize_t result = bytes_find_internal(self, args, -1);
1313 if (result == -2)
1314 return NULL;
1315 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001316}
1317
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001318
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001319PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001320"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001321\n\
1322Like B.rfind() but raise ValueError when the substring is not found.");
1323
1324static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001325bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001326{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001327 Py_ssize_t result = bytes_find_internal(self, args, -1);
1328 if (result == -2)
1329 return NULL;
1330 if (result == -1) {
1331 PyErr_SetString(PyExc_ValueError,
1332 "substring not found");
1333 return NULL;
1334 }
1335 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001336}
1337
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001338
1339Py_LOCAL_INLINE(PyObject *)
1340do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001341{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001342 Py_buffer vsep;
1343 char *s = PyBytes_AS_STRING(self);
1344 Py_ssize_t len = PyBytes_GET_SIZE(self);
1345 char *sep;
1346 Py_ssize_t seplen;
1347 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001348
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001349 if (_getbuffer(sepobj, &vsep) < 0)
1350 return NULL;
1351 sep = vsep.buf;
1352 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001353
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001354 i = 0;
1355 if (striptype != RIGHTSTRIP) {
1356 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1357 i++;
1358 }
1359 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001360
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001361 j = len;
1362 if (striptype != LEFTSTRIP) {
1363 do {
1364 j--;
1365 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1366 j++;
1367 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001368
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001369 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001370
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001371 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1372 Py_INCREF(self);
1373 return (PyObject*)self;
1374 }
1375 else
1376 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001377}
1378
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001379
1380Py_LOCAL_INLINE(PyObject *)
1381do_strip(PyBytesObject *self, int striptype)
1382{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001383 char *s = PyBytes_AS_STRING(self);
1384 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001385
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001386 i = 0;
1387 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001388 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001389 i++;
1390 }
1391 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001392
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001393 j = len;
1394 if (striptype != LEFTSTRIP) {
1395 do {
1396 j--;
David Malcolm96960882010-11-05 17:23:41 +00001397 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001398 j++;
1399 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001400
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001401 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1402 Py_INCREF(self);
1403 return (PyObject*)self;
1404 }
1405 else
1406 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001407}
1408
1409
1410Py_LOCAL_INLINE(PyObject *)
1411do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1412{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001413 PyObject *sep = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001414
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001415 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1416 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001417
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001418 if (sep != NULL && sep != Py_None) {
1419 return do_xstrip(self, striptype, sep);
1420 }
1421 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001422}
1423
1424
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001425PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001426"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001427\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001428Strip leading and trailing bytes contained in the argument.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001429If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001430static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001431bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001432{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001433 if (PyTuple_GET_SIZE(args) == 0)
1434 return do_strip(self, BOTHSTRIP); /* Common case */
1435 else
1436 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001437}
1438
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001439
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001440PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001441"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001442\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001443Strip leading bytes contained in the argument.\n\
1444If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001445static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001446bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001447{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001448 if (PyTuple_GET_SIZE(args) == 0)
1449 return do_strip(self, LEFTSTRIP); /* Common case */
1450 else
1451 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001452}
1453
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001454
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001455PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001456"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001457\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001458Strip trailing bytes contained in the argument.\n\
1459If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001460static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001461bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001462{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001463 if (PyTuple_GET_SIZE(args) == 0)
1464 return do_strip(self, RIGHTSTRIP); /* Common case */
1465 else
1466 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001467}
Neal Norwitz6968b052007-02-27 19:02:19 +00001468
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001469
1470PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001471"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001472\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001473Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001474string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001475as in slice notation.");
1476
1477static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001478bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001479{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001480 PyObject *sub_obj;
1481 const char *str = PyBytes_AS_STRING(self), *sub;
1482 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001483 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001484 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001485
Antoine Pitrouac65d962011-10-20 23:54:17 +02001486 Py_buffer vsub;
1487 PyObject *count_obj;
1488
1489 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
1490 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001491 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001492
Antoine Pitrouac65d962011-10-20 23:54:17 +02001493 if (sub_obj) {
1494 if (_getbuffer(sub_obj, &vsub) < 0)
1495 return NULL;
1496
1497 sub = vsub.buf;
1498 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001499 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001500 else {
1501 sub = &byte;
1502 sub_len = 1;
1503 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001504
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001505 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001506
Antoine Pitrouac65d962011-10-20 23:54:17 +02001507 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001508 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
1509 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02001510
1511 if (sub_obj)
1512 PyBuffer_Release(&vsub);
1513
1514 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001515}
1516
1517
1518PyDoc_STRVAR(translate__doc__,
1519"B.translate(table[, deletechars]) -> bytes\n\
1520\n\
1521Return a copy of B, where all characters occurring in the\n\
1522optional argument deletechars are removed, and the remaining\n\
1523characters have been mapped through the given translation\n\
1524table, which must be a bytes object of length 256.");
1525
1526static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001527bytes_translate(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001528{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001529 register char *input, *output;
1530 const char *table;
1531 register Py_ssize_t i, c, changed = 0;
1532 PyObject *input_obj = (PyObject*)self;
1533 const char *output_start, *del_table=NULL;
1534 Py_ssize_t inlen, tablen, dellen = 0;
1535 PyObject *result;
1536 int trans_table[256];
1537 PyObject *tableobj, *delobj = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001538
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001539 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1540 &tableobj, &delobj))
1541 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001542
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001543 if (PyBytes_Check(tableobj)) {
1544 table = PyBytes_AS_STRING(tableobj);
1545 tablen = PyBytes_GET_SIZE(tableobj);
1546 }
1547 else if (tableobj == Py_None) {
1548 table = NULL;
1549 tablen = 256;
1550 }
1551 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1552 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001553
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001554 if (tablen != 256) {
1555 PyErr_SetString(PyExc_ValueError,
1556 "translation table must be 256 characters long");
1557 return NULL;
1558 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001559
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001560 if (delobj != NULL) {
1561 if (PyBytes_Check(delobj)) {
1562 del_table = PyBytes_AS_STRING(delobj);
1563 dellen = PyBytes_GET_SIZE(delobj);
1564 }
1565 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1566 return NULL;
1567 }
1568 else {
1569 del_table = NULL;
1570 dellen = 0;
1571 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001572
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001573 inlen = PyBytes_GET_SIZE(input_obj);
1574 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1575 if (result == NULL)
1576 return NULL;
1577 output_start = output = PyBytes_AsString(result);
1578 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001579
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001580 if (dellen == 0 && table != NULL) {
1581 /* If no deletions are required, use faster code */
1582 for (i = inlen; --i >= 0; ) {
1583 c = Py_CHARMASK(*input++);
1584 if (Py_CHARMASK((*output++ = table[c])) != c)
1585 changed = 1;
1586 }
1587 if (changed || !PyBytes_CheckExact(input_obj))
1588 return result;
1589 Py_DECREF(result);
1590 Py_INCREF(input_obj);
1591 return input_obj;
1592 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001593
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001594 if (table == NULL) {
1595 for (i = 0; i < 256; i++)
1596 trans_table[i] = Py_CHARMASK(i);
1597 } else {
1598 for (i = 0; i < 256; i++)
1599 trans_table[i] = Py_CHARMASK(table[i]);
1600 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001601
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001602 for (i = 0; i < dellen; i++)
1603 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001604
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001605 for (i = inlen; --i >= 0; ) {
1606 c = Py_CHARMASK(*input++);
1607 if (trans_table[c] != -1)
1608 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1609 continue;
1610 changed = 1;
1611 }
1612 if (!changed && PyBytes_CheckExact(input_obj)) {
1613 Py_DECREF(result);
1614 Py_INCREF(input_obj);
1615 return input_obj;
1616 }
1617 /* Fix the size of the resulting string */
1618 if (inlen > 0)
1619 _PyBytes_Resize(&result, output - output_start);
1620 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001621}
1622
1623
Georg Brandlabc38772009-04-12 15:51:51 +00001624static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001625bytes_maketrans(PyObject *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001626{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001627 return _Py_bytes_maketrans(args);
Georg Brandlabc38772009-04-12 15:51:51 +00001628}
1629
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001630/* find and count characters and substrings */
1631
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001632#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001633 ((char *)memchr((const void *)(target), c, target_len))
1634
1635/* String ops must return a string. */
1636/* If the object is subclass of string, create a copy */
1637Py_LOCAL(PyBytesObject *)
1638return_self(PyBytesObject *self)
1639{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001640 if (PyBytes_CheckExact(self)) {
1641 Py_INCREF(self);
1642 return self;
1643 }
1644 return (PyBytesObject *)PyBytes_FromStringAndSize(
1645 PyBytes_AS_STRING(self),
1646 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001647}
1648
1649Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00001650countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001651{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001652 Py_ssize_t count=0;
1653 const char *start=target;
1654 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001655
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001656 while ( (start=findchar(start, end-start, c)) != NULL ) {
1657 count++;
1658 if (count >= maxcount)
1659 break;
1660 start += 1;
1661 }
1662 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001663}
1664
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001665
1666/* Algorithms for different cases of string replacement */
1667
1668/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1669Py_LOCAL(PyBytesObject *)
1670replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001671 const char *to_s, Py_ssize_t to_len,
1672 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001673{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001674 char *self_s, *result_s;
1675 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001676 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001677 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001678
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001679 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001680
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001681 /* 1 at the end plus 1 after every character;
1682 count = min(maxcount, self_len + 1) */
1683 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001684 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001685 else
1686 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
1687 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001688
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001689 /* Check for overflow */
1690 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001691 assert(count > 0);
1692 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001693 PyErr_SetString(PyExc_OverflowError,
1694 "replacement bytes are too long");
1695 return NULL;
1696 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001697 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001698
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001699 if (! (result = (PyBytesObject *)
1700 PyBytes_FromStringAndSize(NULL, result_len)) )
1701 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001702
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001703 self_s = PyBytes_AS_STRING(self);
1704 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001705
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001706 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001707
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001708 /* Lay the first one down (guaranteed this will occur) */
1709 Py_MEMCPY(result_s, to_s, to_len);
1710 result_s += to_len;
1711 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001712
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001713 for (i=0; i<count; i++) {
1714 *result_s++ = *self_s++;
1715 Py_MEMCPY(result_s, to_s, to_len);
1716 result_s += to_len;
1717 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001718
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001719 /* Copy the rest of the original string */
1720 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001721
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001722 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001723}
1724
1725/* Special case for deleting a single character */
1726/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1727Py_LOCAL(PyBytesObject *)
1728replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001729 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001730{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001731 char *self_s, *result_s;
1732 char *start, *next, *end;
1733 Py_ssize_t self_len, result_len;
1734 Py_ssize_t count;
1735 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001736
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001737 self_len = PyBytes_GET_SIZE(self);
1738 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001739
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001740 count = countchar(self_s, self_len, from_c, maxcount);
1741 if (count == 0) {
1742 return return_self(self);
1743 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001744
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001745 result_len = self_len - count; /* from_len == 1 */
1746 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001747
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001748 if ( (result = (PyBytesObject *)
1749 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1750 return NULL;
1751 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001752
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001753 start = self_s;
1754 end = self_s + self_len;
1755 while (count-- > 0) {
1756 next = findchar(start, end-start, from_c);
1757 if (next == NULL)
1758 break;
1759 Py_MEMCPY(result_s, start, next-start);
1760 result_s += (next-start);
1761 start = next+1;
1762 }
1763 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001764
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001765 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001766}
1767
1768/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1769
1770Py_LOCAL(PyBytesObject *)
1771replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001772 const char *from_s, Py_ssize_t from_len,
1773 Py_ssize_t maxcount) {
1774 char *self_s, *result_s;
1775 char *start, *next, *end;
1776 Py_ssize_t self_len, result_len;
1777 Py_ssize_t count, offset;
1778 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001779
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001780 self_len = PyBytes_GET_SIZE(self);
1781 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001782
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001783 count = stringlib_count(self_s, self_len,
1784 from_s, from_len,
1785 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001786
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001787 if (count == 0) {
1788 /* no matches */
1789 return return_self(self);
1790 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001791
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001792 result_len = self_len - (count * from_len);
1793 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001794
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001795 if ( (result = (PyBytesObject *)
1796 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1797 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001798
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001799 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001800
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001801 start = self_s;
1802 end = self_s + self_len;
1803 while (count-- > 0) {
1804 offset = stringlib_find(start, end-start,
1805 from_s, from_len,
1806 0);
1807 if (offset == -1)
1808 break;
1809 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001810
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001811 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001812
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001813 result_s += (next-start);
1814 start = next+from_len;
1815 }
1816 Py_MEMCPY(result_s, start, end-start);
1817 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001818}
1819
1820/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1821Py_LOCAL(PyBytesObject *)
1822replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001823 char from_c, char to_c,
1824 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001825{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001826 char *self_s, *result_s, *start, *end, *next;
1827 Py_ssize_t self_len;
1828 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001829
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001830 /* The result string will be the same size */
1831 self_s = PyBytes_AS_STRING(self);
1832 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001833
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001834 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001835
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001836 if (next == NULL) {
1837 /* No matches; return the original string */
1838 return return_self(self);
1839 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001840
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001841 /* Need to make a new string */
1842 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1843 if (result == NULL)
1844 return NULL;
1845 result_s = PyBytes_AS_STRING(result);
1846 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001847
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001848 /* change everything in-place, starting with this one */
1849 start = result_s + (next-self_s);
1850 *start = to_c;
1851 start++;
1852 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001853
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001854 while (--maxcount > 0) {
1855 next = findchar(start, end-start, from_c);
1856 if (next == NULL)
1857 break;
1858 *next = to_c;
1859 start = next+1;
1860 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001861
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001862 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001863}
1864
1865/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1866Py_LOCAL(PyBytesObject *)
1867replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001868 const char *from_s, Py_ssize_t from_len,
1869 const char *to_s, Py_ssize_t to_len,
1870 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001871{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001872 char *result_s, *start, *end;
1873 char *self_s;
1874 Py_ssize_t self_len, offset;
1875 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001876
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001877 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001878
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001879 self_s = PyBytes_AS_STRING(self);
1880 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001881
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001882 offset = stringlib_find(self_s, self_len,
1883 from_s, from_len,
1884 0);
1885 if (offset == -1) {
1886 /* No matches; return the original string */
1887 return return_self(self);
1888 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001889
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001890 /* Need to make a new string */
1891 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1892 if (result == NULL)
1893 return NULL;
1894 result_s = PyBytes_AS_STRING(result);
1895 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001896
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001897 /* change everything in-place, starting with this one */
1898 start = result_s + offset;
1899 Py_MEMCPY(start, to_s, from_len);
1900 start += from_len;
1901 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001902
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001903 while ( --maxcount > 0) {
1904 offset = stringlib_find(start, end-start,
1905 from_s, from_len,
1906 0);
1907 if (offset==-1)
1908 break;
1909 Py_MEMCPY(start+offset, to_s, from_len);
1910 start += offset+from_len;
1911 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001912
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001913 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001914}
1915
1916/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1917Py_LOCAL(PyBytesObject *)
1918replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001919 char from_c,
1920 const char *to_s, Py_ssize_t to_len,
1921 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001922{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001923 char *self_s, *result_s;
1924 char *start, *next, *end;
1925 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001926 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001927 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001928
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001929 self_s = PyBytes_AS_STRING(self);
1930 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001931
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001932 count = countchar(self_s, self_len, from_c, maxcount);
1933 if (count == 0) {
1934 /* no matches, return unchanged */
1935 return return_self(self);
1936 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001937
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001938 /* use the difference between current and new, hence the "-1" */
1939 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001940 assert(count > 0);
1941 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001942 PyErr_SetString(PyExc_OverflowError,
1943 "replacement bytes are too long");
1944 return NULL;
1945 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001946 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001947
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001948 if ( (result = (PyBytesObject *)
1949 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1950 return NULL;
1951 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001952
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001953 start = self_s;
1954 end = self_s + self_len;
1955 while (count-- > 0) {
1956 next = findchar(start, end-start, from_c);
1957 if (next == NULL)
1958 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001959
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001960 if (next == start) {
1961 /* replace with the 'to' */
1962 Py_MEMCPY(result_s, to_s, to_len);
1963 result_s += to_len;
1964 start += 1;
1965 } else {
1966 /* copy the unchanged old then the 'to' */
1967 Py_MEMCPY(result_s, start, next-start);
1968 result_s += (next-start);
1969 Py_MEMCPY(result_s, to_s, to_len);
1970 result_s += to_len;
1971 start = next+1;
1972 }
1973 }
1974 /* Copy the remainder of the remaining string */
1975 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001976
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001977 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001978}
1979
1980/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1981Py_LOCAL(PyBytesObject *)
1982replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001983 const char *from_s, Py_ssize_t from_len,
1984 const char *to_s, Py_ssize_t to_len,
1985 Py_ssize_t maxcount) {
1986 char *self_s, *result_s;
1987 char *start, *next, *end;
1988 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001989 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001990 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001991
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001992 self_s = PyBytes_AS_STRING(self);
1993 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001994
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001995 count = stringlib_count(self_s, self_len,
1996 from_s, from_len,
1997 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001998
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001999 if (count == 0) {
2000 /* no matches, return unchanged */
2001 return return_self(self);
2002 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002003
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002004 /* Check for overflow */
2005 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002006 assert(count > 0);
2007 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002008 PyErr_SetString(PyExc_OverflowError,
2009 "replacement bytes are too long");
2010 return NULL;
2011 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002012 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002013
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002014 if ( (result = (PyBytesObject *)
2015 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2016 return NULL;
2017 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002018
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002019 start = self_s;
2020 end = self_s + self_len;
2021 while (count-- > 0) {
2022 offset = stringlib_find(start, end-start,
2023 from_s, from_len,
2024 0);
2025 if (offset == -1)
2026 break;
2027 next = start+offset;
2028 if (next == start) {
2029 /* replace with the 'to' */
2030 Py_MEMCPY(result_s, to_s, to_len);
2031 result_s += to_len;
2032 start += from_len;
2033 } else {
2034 /* copy the unchanged old then the 'to' */
2035 Py_MEMCPY(result_s, start, next-start);
2036 result_s += (next-start);
2037 Py_MEMCPY(result_s, to_s, to_len);
2038 result_s += to_len;
2039 start = next+from_len;
2040 }
2041 }
2042 /* Copy the remainder of the remaining string */
2043 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002044
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002045 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002046}
2047
2048
2049Py_LOCAL(PyBytesObject *)
2050replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002051 const char *from_s, Py_ssize_t from_len,
2052 const char *to_s, Py_ssize_t to_len,
2053 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002054{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002055 if (maxcount < 0) {
2056 maxcount = PY_SSIZE_T_MAX;
2057 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2058 /* nothing to do; return the original string */
2059 return return_self(self);
2060 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002061
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002062 if (maxcount == 0 ||
2063 (from_len == 0 && to_len == 0)) {
2064 /* nothing to do; return the original string */
2065 return return_self(self);
2066 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002067
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002068 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002069
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002070 if (from_len == 0) {
2071 /* insert the 'to' string everywhere. */
2072 /* >>> "Python".replace("", ".") */
2073 /* '.P.y.t.h.o.n.' */
2074 return replace_interleave(self, to_s, to_len, maxcount);
2075 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002076
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002077 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2078 /* point for an empty self string to generate a non-empty string */
2079 /* Special case so the remaining code always gets a non-empty string */
2080 if (PyBytes_GET_SIZE(self) == 0) {
2081 return return_self(self);
2082 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002083
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002084 if (to_len == 0) {
2085 /* delete all occurrences of 'from' string */
2086 if (from_len == 1) {
2087 return replace_delete_single_character(
2088 self, from_s[0], maxcount);
2089 } else {
2090 return replace_delete_substring(self, from_s,
2091 from_len, maxcount);
2092 }
2093 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002094
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002095 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002097 if (from_len == to_len) {
2098 if (from_len == 1) {
2099 return replace_single_character_in_place(
2100 self,
2101 from_s[0],
2102 to_s[0],
2103 maxcount);
2104 } else {
2105 return replace_substring_in_place(
2106 self, from_s, from_len, to_s, to_len,
2107 maxcount);
2108 }
2109 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002110
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002111 /* Otherwise use the more generic algorithms */
2112 if (from_len == 1) {
2113 return replace_single_character(self, from_s[0],
2114 to_s, to_len, maxcount);
2115 } else {
2116 /* len('from')>=2, len('to')>=1 */
2117 return replace_substring(self, from_s, from_len, to_s, to_len,
2118 maxcount);
2119 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002120}
2121
2122PyDoc_STRVAR(replace__doc__,
2123"B.replace(old, new[, count]) -> bytes\n\
2124\n\
2125Return a copy of B with all occurrences of subsection\n\
2126old replaced by new. If the optional argument count is\n\
Senthil Kumaran9a9dd1c2010-09-08 12:50:29 +00002127given, only first count occurances are replaced.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002128
2129static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002130bytes_replace(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002131{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002132 Py_ssize_t count = -1;
2133 PyObject *from, *to;
2134 const char *from_s, *to_s;
2135 Py_ssize_t from_len, to_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002136
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002137 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2138 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002139
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002140 if (PyBytes_Check(from)) {
2141 from_s = PyBytes_AS_STRING(from);
2142 from_len = PyBytes_GET_SIZE(from);
2143 }
2144 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2145 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002146
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002147 if (PyBytes_Check(to)) {
2148 to_s = PyBytes_AS_STRING(to);
2149 to_len = PyBytes_GET_SIZE(to);
2150 }
2151 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2152 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002153
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002154 return (PyObject *)replace((PyBytesObject *) self,
2155 from_s, from_len,
2156 to_s, to_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002157}
2158
2159/** End DALKE **/
2160
2161/* Matches the end (direction >= 0) or start (direction < 0) of self
2162 * against substr, using the start and end arguments. Returns
2163 * -1 on error, 0 if not found and 1 if found.
2164 */
2165Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002166_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002167 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002168{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002169 Py_ssize_t len = PyBytes_GET_SIZE(self);
2170 Py_ssize_t slen;
2171 const char* sub;
2172 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002173
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002174 if (PyBytes_Check(substr)) {
2175 sub = PyBytes_AS_STRING(substr);
2176 slen = PyBytes_GET_SIZE(substr);
2177 }
2178 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2179 return -1;
2180 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002181
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002182 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002183
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002184 if (direction < 0) {
2185 /* startswith */
2186 if (start+slen > len)
2187 return 0;
2188 } else {
2189 /* endswith */
2190 if (end-start < slen || start > len)
2191 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002192
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002193 if (end-slen > start)
2194 start = end - slen;
2195 }
2196 if (end-start >= slen)
2197 return ! memcmp(str+start, sub, slen);
2198 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002199}
2200
2201
2202PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002203"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002204\n\
2205Return True if B starts with the specified prefix, False otherwise.\n\
2206With optional start, test B beginning at that position.\n\
2207With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002208prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002209
2210static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002211bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002212{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002213 Py_ssize_t start = 0;
2214 Py_ssize_t end = PY_SSIZE_T_MAX;
2215 PyObject *subobj;
2216 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002217
Jesus Ceaac451502011-04-20 17:09:23 +02002218 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002219 return NULL;
2220 if (PyTuple_Check(subobj)) {
2221 Py_ssize_t i;
2222 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2223 result = _bytes_tailmatch(self,
2224 PyTuple_GET_ITEM(subobj, i),
2225 start, end, -1);
2226 if (result == -1)
2227 return NULL;
2228 else if (result) {
2229 Py_RETURN_TRUE;
2230 }
2231 }
2232 Py_RETURN_FALSE;
2233 }
2234 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002235 if (result == -1) {
2236 if (PyErr_ExceptionMatches(PyExc_TypeError))
2237 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2238 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002239 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002240 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002241 else
2242 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002243}
2244
2245
2246PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002247"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002248\n\
2249Return True if B ends with the specified suffix, False otherwise.\n\
2250With optional start, test B beginning at that position.\n\
2251With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002252suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002253
2254static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002255bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002256{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002257 Py_ssize_t start = 0;
2258 Py_ssize_t end = PY_SSIZE_T_MAX;
2259 PyObject *subobj;
2260 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002261
Jesus Ceaac451502011-04-20 17:09:23 +02002262 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002263 return NULL;
2264 if (PyTuple_Check(subobj)) {
2265 Py_ssize_t i;
2266 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2267 result = _bytes_tailmatch(self,
2268 PyTuple_GET_ITEM(subobj, i),
2269 start, end, +1);
2270 if (result == -1)
2271 return NULL;
2272 else if (result) {
2273 Py_RETURN_TRUE;
2274 }
2275 }
2276 Py_RETURN_FALSE;
2277 }
2278 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002279 if (result == -1) {
2280 if (PyErr_ExceptionMatches(PyExc_TypeError))
2281 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2282 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002283 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002284 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002285 else
2286 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002287}
2288
2289
2290PyDoc_STRVAR(decode__doc__,
Victor Stinnerc911bbf2010-11-07 19:04:46 +00002291"B.decode(encoding='utf-8', errors='strict') -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002292\n\
Victor Stinnere14e2122010-11-07 18:41:46 +00002293Decode B using the codec registered for encoding. Default encoding\n\
2294is 'utf-8'. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002295handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2296a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002297as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002298able to handle UnicodeDecodeErrors.");
2299
2300static PyObject *
Benjamin Peterson308d6372009-09-18 21:42:35 +00002301bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002302{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002303 const char *encoding = NULL;
2304 const char *errors = NULL;
2305 static char *kwlist[] = {"encoding", "errors", 0};
Guido van Rossumd624f182006-04-24 13:47:05 +00002306
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002307 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2308 return NULL;
2309 if (encoding == NULL)
2310 encoding = PyUnicode_GetDefaultEncoding();
2311 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002312}
2313
Guido van Rossum20188312006-05-05 15:15:40 +00002314
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002315PyDoc_STRVAR(splitlines__doc__,
2316"B.splitlines([keepends]) -> list of lines\n\
2317\n\
2318Return a list of the lines in B, breaking at line boundaries.\n\
2319Line breaks are not included in the resulting list unless keepends\n\
2320is given and true.");
2321
2322static PyObject*
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002323bytes_splitlines(PyObject *self, PyObject *args, PyObject *kwds)
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002324{
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002325 static char *kwlist[] = {"keepends", 0};
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002326 int keepends = 0;
2327
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002328 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:splitlines",
2329 kwlist, &keepends))
Antoine Pitroud1188562010-06-09 16:38:55 +00002330 return NULL;
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002331
2332 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002333 (PyObject*) self, PyBytes_AS_STRING(self),
2334 PyBytes_GET_SIZE(self), keepends
2335 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002336}
2337
2338
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002339PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002340"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002341\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002342Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002343Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002344Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002345
2346static int
Victor Stinner6430fd52011-09-29 04:02:13 +02002347hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002348{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002349 if (c >= 128)
2350 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002351 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002352 return c - '0';
2353 else {
David Malcolm96960882010-11-05 17:23:41 +00002354 if (Py_ISUPPER(c))
2355 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002356 if (c >= 'a' && c <= 'f')
2357 return c - 'a' + 10;
2358 }
2359 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002360}
2361
2362static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002363bytes_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002364{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002365 PyObject *newstring, *hexobj;
2366 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002367 Py_ssize_t hexlen, byteslen, i, j;
2368 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002369 void *data;
2370 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002371
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002372 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2373 return NULL;
2374 assert(PyUnicode_Check(hexobj));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002375 if (PyUnicode_READY(hexobj))
2376 return NULL;
2377 kind = PyUnicode_KIND(hexobj);
2378 data = PyUnicode_DATA(hexobj);
2379 hexlen = PyUnicode_GET_LENGTH(hexobj);
2380
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002381 byteslen = hexlen/2; /* This overestimates if there are spaces */
2382 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2383 if (!newstring)
2384 return NULL;
2385 buf = PyBytes_AS_STRING(newstring);
2386 for (i = j = 0; i < hexlen; i += 2) {
2387 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002388 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002389 i++;
2390 if (i >= hexlen)
2391 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002392 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
2393 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002394 if (top == -1 || bot == -1) {
2395 PyErr_Format(PyExc_ValueError,
2396 "non-hexadecimal number found in "
2397 "fromhex() arg at position %zd", i);
2398 goto error;
2399 }
2400 buf[j++] = (top << 4) + bot;
2401 }
2402 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2403 goto error;
2404 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002405
2406 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002407 Py_XDECREF(newstring);
2408 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002409}
2410
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002411PyDoc_STRVAR(sizeof__doc__,
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002412"B.__sizeof__() -> size of B in memory, in bytes");
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002413
2414static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002415bytes_sizeof(PyBytesObject *v)
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002416{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002417 Py_ssize_t res;
2418 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2419 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002420}
2421
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002422
2423static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002424bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002425{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002426 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002427}
2428
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002429
2430static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002431bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002432 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2433 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2434 _Py_capitalize__doc__},
2435 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2436 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2437 {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
2438 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2439 endswith__doc__},
2440 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2441 expandtabs__doc__},
2442 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2443 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2444 fromhex_doc},
2445 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2446 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2447 _Py_isalnum__doc__},
2448 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2449 _Py_isalpha__doc__},
2450 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2451 _Py_isdigit__doc__},
2452 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2453 _Py_islower__doc__},
2454 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2455 _Py_isspace__doc__},
2456 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2457 _Py_istitle__doc__},
2458 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2459 _Py_isupper__doc__},
2460 {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
2461 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2462 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2463 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2464 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
2465 _Py_maketrans__doc__},
2466 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2467 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2468 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2469 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2470 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2471 {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
2472 rpartition__doc__},
2473 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2474 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
2475 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002476 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002477 splitlines__doc__},
2478 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2479 startswith__doc__},
2480 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
2481 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2482 _Py_swapcase__doc__},
2483 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2484 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
2485 translate__doc__},
2486 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2487 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2488 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
2489 sizeof__doc__},
2490 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002491};
2492
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002493static PyObject *
2494str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2495
2496static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002497bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002498{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002499 PyObject *x = NULL;
2500 const char *encoding = NULL;
2501 const char *errors = NULL;
2502 PyObject *new = NULL;
2503 Py_ssize_t size;
2504 static char *kwlist[] = {"source", "encoding", "errors", 0};
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002505
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002506 if (type != &PyBytes_Type)
2507 return str_subtype_new(type, args, kwds);
2508 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2509 &encoding, &errors))
2510 return NULL;
2511 if (x == NULL) {
2512 if (encoding != NULL || errors != NULL) {
2513 PyErr_SetString(PyExc_TypeError,
2514 "encoding or errors without sequence "
2515 "argument");
2516 return NULL;
2517 }
2518 return PyBytes_FromString("");
2519 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002520
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002521 if (PyUnicode_Check(x)) {
2522 /* Encode via the codec registry */
2523 if (encoding == NULL) {
2524 PyErr_SetString(PyExc_TypeError,
2525 "string argument without an encoding");
2526 return NULL;
2527 }
2528 new = PyUnicode_AsEncodedString(x, encoding, errors);
2529 if (new == NULL)
2530 return NULL;
2531 assert(PyBytes_Check(new));
2532 return new;
2533 }
2534 /* Is it an integer? */
2535 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2536 if (size == -1 && PyErr_Occurred()) {
2537 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2538 return NULL;
2539 PyErr_Clear();
2540 }
2541 else if (size < 0) {
2542 PyErr_SetString(PyExc_ValueError, "negative count");
2543 return NULL;
2544 }
2545 else {
2546 new = PyBytes_FromStringAndSize(NULL, size);
2547 if (new == NULL) {
2548 return NULL;
2549 }
2550 if (size > 0) {
2551 memset(((PyBytesObject*)new)->ob_sval, 0, size);
2552 }
2553 return new;
2554 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002555
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002556 /* If it's not unicode, there can't be encoding or errors */
2557 if (encoding != NULL || errors != NULL) {
2558 PyErr_SetString(PyExc_TypeError,
2559 "encoding or errors without a string argument");
2560 return NULL;
2561 }
2562 return PyObject_Bytes(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002563}
2564
2565PyObject *
2566PyBytes_FromObject(PyObject *x)
2567{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002568 PyObject *new, *it;
2569 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002570
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002571 if (x == NULL) {
2572 PyErr_BadInternalCall();
2573 return NULL;
2574 }
2575 /* Use the modern buffer interface */
2576 if (PyObject_CheckBuffer(x)) {
2577 Py_buffer view;
2578 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2579 return NULL;
2580 new = PyBytes_FromStringAndSize(NULL, view.len);
2581 if (!new)
2582 goto fail;
2583 /* XXX(brett.cannon): Better way to get to internal buffer? */
2584 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2585 &view, view.len, 'C') < 0)
2586 goto fail;
2587 PyBuffer_Release(&view);
2588 return new;
2589 fail:
2590 Py_XDECREF(new);
2591 PyBuffer_Release(&view);
2592 return NULL;
2593 }
2594 if (PyUnicode_Check(x)) {
2595 PyErr_SetString(PyExc_TypeError,
2596 "cannot convert unicode object to bytes");
2597 return NULL;
2598 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002599
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002600 if (PyList_CheckExact(x)) {
2601 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2602 if (new == NULL)
2603 return NULL;
2604 for (i = 0; i < Py_SIZE(x); i++) {
2605 Py_ssize_t value = PyNumber_AsSsize_t(
2606 PyList_GET_ITEM(x, i), PyExc_ValueError);
2607 if (value == -1 && PyErr_Occurred()) {
2608 Py_DECREF(new);
2609 return NULL;
2610 }
2611 if (value < 0 || value >= 256) {
2612 PyErr_SetString(PyExc_ValueError,
2613 "bytes must be in range(0, 256)");
2614 Py_DECREF(new);
2615 return NULL;
2616 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002617 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002618 }
2619 return new;
2620 }
2621 if (PyTuple_CheckExact(x)) {
2622 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2623 if (new == NULL)
2624 return NULL;
2625 for (i = 0; i < Py_SIZE(x); i++) {
2626 Py_ssize_t value = PyNumber_AsSsize_t(
2627 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
2628 if (value == -1 && PyErr_Occurred()) {
2629 Py_DECREF(new);
2630 return NULL;
2631 }
2632 if (value < 0 || value >= 256) {
2633 PyErr_SetString(PyExc_ValueError,
2634 "bytes must be in range(0, 256)");
2635 Py_DECREF(new);
2636 return NULL;
2637 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002638 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002639 }
2640 return new;
2641 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00002642
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002643 /* For iterator version, create a string object and resize as needed */
2644 size = _PyObject_LengthHint(x, 64);
2645 if (size == -1 && PyErr_Occurred())
2646 return NULL;
2647 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
2648 returning a shared empty bytes string. This required because we
2649 want to call _PyBytes_Resize() the returned object, which we can
2650 only do on bytes objects with refcount == 1. */
2651 size += 1;
2652 new = PyBytes_FromStringAndSize(NULL, size);
2653 if (new == NULL)
2654 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002655
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002656 /* Get the iterator */
2657 it = PyObject_GetIter(x);
2658 if (it == NULL)
2659 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002660
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002661 /* Run the iterator to exhaustion */
2662 for (i = 0; ; i++) {
2663 PyObject *item;
2664 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002665
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002666 /* Get the next item */
2667 item = PyIter_Next(it);
2668 if (item == NULL) {
2669 if (PyErr_Occurred())
2670 goto error;
2671 break;
2672 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002673
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002674 /* Interpret it as an int (__index__) */
2675 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2676 Py_DECREF(item);
2677 if (value == -1 && PyErr_Occurred())
2678 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002679
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002680 /* Range check */
2681 if (value < 0 || value >= 256) {
2682 PyErr_SetString(PyExc_ValueError,
2683 "bytes must be in range(0, 256)");
2684 goto error;
2685 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002686
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002687 /* Append the byte */
2688 if (i >= size) {
2689 size = 2 * size + 1;
2690 if (_PyBytes_Resize(&new, size) < 0)
2691 goto error;
2692 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002693 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002694 }
2695 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002696
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002697 /* Clean up and return success */
2698 Py_DECREF(it);
2699 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002700
2701 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002702 /* Error handling when new != NULL */
2703 Py_XDECREF(it);
2704 Py_DECREF(new);
2705 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002706}
2707
2708static PyObject *
2709str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2710{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002711 PyObject *tmp, *pnew;
2712 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002713
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002714 assert(PyType_IsSubtype(type, &PyBytes_Type));
2715 tmp = bytes_new(&PyBytes_Type, args, kwds);
2716 if (tmp == NULL)
2717 return NULL;
2718 assert(PyBytes_CheckExact(tmp));
2719 n = PyBytes_GET_SIZE(tmp);
2720 pnew = type->tp_alloc(type, n);
2721 if (pnew != NULL) {
2722 Py_MEMCPY(PyBytes_AS_STRING(pnew),
2723 PyBytes_AS_STRING(tmp), n+1);
2724 ((PyBytesObject *)pnew)->ob_shash =
2725 ((PyBytesObject *)tmp)->ob_shash;
2726 }
2727 Py_DECREF(tmp);
2728 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002729}
2730
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002731PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002732"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002733bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002734bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
2735bytes(memory_view) -> bytes\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002736\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002737Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002738 - an iterable yielding integers in range(256)\n\
2739 - a text string encoded using the specified encoding\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002740 - a bytes or a buffer object\n\
2741 - any object implementing the buffer API.");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002742
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002743static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002744
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002745PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002746 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2747 "bytes",
2748 PyBytesObject_SIZE,
2749 sizeof(char),
2750 bytes_dealloc, /* tp_dealloc */
2751 0, /* tp_print */
2752 0, /* tp_getattr */
2753 0, /* tp_setattr */
2754 0, /* tp_reserved */
2755 (reprfunc)bytes_repr, /* tp_repr */
2756 0, /* tp_as_number */
2757 &bytes_as_sequence, /* tp_as_sequence */
2758 &bytes_as_mapping, /* tp_as_mapping */
2759 (hashfunc)bytes_hash, /* tp_hash */
2760 0, /* tp_call */
2761 bytes_str, /* tp_str */
2762 PyObject_GenericGetAttr, /* tp_getattro */
2763 0, /* tp_setattro */
2764 &bytes_as_buffer, /* tp_as_buffer */
2765 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2766 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2767 bytes_doc, /* tp_doc */
2768 0, /* tp_traverse */
2769 0, /* tp_clear */
2770 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2771 0, /* tp_weaklistoffset */
2772 bytes_iter, /* tp_iter */
2773 0, /* tp_iternext */
2774 bytes_methods, /* tp_methods */
2775 0, /* tp_members */
2776 0, /* tp_getset */
2777 &PyBaseObject_Type, /* tp_base */
2778 0, /* tp_dict */
2779 0, /* tp_descr_get */
2780 0, /* tp_descr_set */
2781 0, /* tp_dictoffset */
2782 0, /* tp_init */
2783 0, /* tp_alloc */
2784 bytes_new, /* tp_new */
2785 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002786};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002787
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002788void
2789PyBytes_Concat(register PyObject **pv, register PyObject *w)
2790{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002791 register PyObject *v;
2792 assert(pv != NULL);
2793 if (*pv == NULL)
2794 return;
2795 if (w == NULL) {
2796 Py_DECREF(*pv);
2797 *pv = NULL;
2798 return;
2799 }
2800 v = bytes_concat(*pv, w);
2801 Py_DECREF(*pv);
2802 *pv = v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002803}
2804
2805void
2806PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
2807{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002808 PyBytes_Concat(pv, w);
2809 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002810}
2811
2812
2813/* The following function breaks the notion that strings are immutable:
2814 it changes the size of a string. We get away with this only if there
2815 is only one module referencing the object. You can also think of it
2816 as creating a new string object and destroying the old one, only
2817 more efficiently. In any case, don't use this if the string may
2818 already be known to some other part of the code...
2819 Note that if there's not enough memory to resize the string, the original
2820 string object at *pv is deallocated, *pv is set to NULL, an "out of
2821 memory" exception is set, and -1 is returned. Else (on success) 0 is
2822 returned, and the value in *pv may or may not be the same as on input.
2823 As always, an extra byte is allocated for a trailing \0 byte (newsize
2824 does *not* include that), and a trailing \0 byte is stored.
2825*/
2826
2827int
2828_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2829{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002830 register PyObject *v;
2831 register PyBytesObject *sv;
2832 v = *pv;
2833 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
2834 *pv = 0;
2835 Py_DECREF(v);
2836 PyErr_BadInternalCall();
2837 return -1;
2838 }
2839 /* XXX UNREF/NEWREF interface should be more symmetrical */
2840 _Py_DEC_REFTOTAL;
2841 _Py_ForgetReference(v);
2842 *pv = (PyObject *)
2843 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
2844 if (*pv == NULL) {
2845 PyObject_Del(v);
2846 PyErr_NoMemory();
2847 return -1;
2848 }
2849 _Py_NewReference(*pv);
2850 sv = (PyBytesObject *) *pv;
2851 Py_SIZE(sv) = newsize;
2852 sv->ob_sval[newsize] = '\0';
2853 sv->ob_shash = -1; /* invalidate cached hash value */
2854 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002855}
2856
2857/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and
2858 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2859 * Python's regular ints.
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002860 * Return value: a new PyBytes*, or NULL if error.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002861 * . *pbuf is set to point into it,
2862 * *plen set to the # of chars following that.
2863 * Caller must decref it when done using pbuf.
2864 * The string starting at *pbuf is of the form
2865 * "-"? ("0x" | "0X")? digit+
2866 * "0x"/"0X" are present only for x and X conversions, with F_ALT
2867 * set in flags. The case of hex digits will be correct,
2868 * There will be at least prec digits, zero-filled on the left if
2869 * necessary to get that many.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002870 * val object to be converted
2871 * flags bitmask of format flags; only F_ALT is looked at
2872 * prec minimum number of digits; 0-fill on left if needed
2873 * type a character in [duoxX]; u acts the same as d
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002874 *
2875 * CAUTION: o, x and X conversions on regular ints can never
2876 * produce a '-' sign, but can for Python's unbounded ints.
2877 */
2878PyObject*
2879_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002880 char **pbuf, int *plen)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002881{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002882 PyObject *result = NULL;
2883 char *buf;
2884 Py_ssize_t i;
2885 int sign; /* 1 if '-', else 0 */
2886 int len; /* number of characters */
2887 Py_ssize_t llen;
2888 int numdigits; /* len == numnondigits + numdigits */
2889 int numnondigits = 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002890
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002891 /* Avoid exceeding SSIZE_T_MAX */
2892 if (prec > INT_MAX-3) {
2893 PyErr_SetString(PyExc_OverflowError,
2894 "precision too large");
2895 return NULL;
2896 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002897
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002898 switch (type) {
2899 case 'd':
2900 case 'u':
2901 /* Special-case boolean: we want 0/1 */
2902 if (PyBool_Check(val))
2903 result = PyNumber_ToBase(val, 10);
2904 else
2905 result = Py_TYPE(val)->tp_str(val);
2906 break;
2907 case 'o':
2908 numnondigits = 2;
2909 result = PyNumber_ToBase(val, 8);
2910 break;
2911 case 'x':
2912 case 'X':
2913 numnondigits = 2;
2914 result = PyNumber_ToBase(val, 16);
2915 break;
2916 default:
2917 assert(!"'type' not in [duoxX]");
2918 }
2919 if (!result)
2920 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002921
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002922 buf = _PyUnicode_AsString(result);
2923 if (!buf) {
2924 Py_DECREF(result);
2925 return NULL;
2926 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002927
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002928 /* To modify the string in-place, there can only be one reference. */
2929 if (Py_REFCNT(result) != 1) {
2930 PyErr_BadInternalCall();
2931 return NULL;
2932 }
Victor Stinner9e30aa52011-11-21 02:49:52 +01002933 llen = PyUnicode_GetLength(result);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002934 if (llen > INT_MAX) {
2935 PyErr_SetString(PyExc_ValueError,
2936 "string too large in _PyBytes_FormatLong");
2937 return NULL;
2938 }
2939 len = (int)llen;
2940 if (buf[len-1] == 'L') {
2941 --len;
2942 buf[len] = '\0';
2943 }
2944 sign = buf[0] == '-';
2945 numnondigits += sign;
2946 numdigits = len - numnondigits;
2947 assert(numdigits > 0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002948
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002949 /* Get rid of base marker unless F_ALT */
2950 if (((flags & F_ALT) == 0 &&
2951 (type == 'o' || type == 'x' || type == 'X'))) {
2952 assert(buf[sign] == '0');
2953 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
2954 buf[sign+1] == 'o');
2955 numnondigits -= 2;
2956 buf += 2;
2957 len -= 2;
2958 if (sign)
2959 buf[0] = '-';
2960 assert(len == numnondigits + numdigits);
2961 assert(numdigits > 0);
2962 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002963
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002964 /* Fill with leading zeroes to meet minimum width. */
2965 if (prec > numdigits) {
2966 PyObject *r1 = PyBytes_FromStringAndSize(NULL,
2967 numnondigits + prec);
2968 char *b1;
2969 if (!r1) {
2970 Py_DECREF(result);
2971 return NULL;
2972 }
2973 b1 = PyBytes_AS_STRING(r1);
2974 for (i = 0; i < numnondigits; ++i)
2975 *b1++ = *buf++;
2976 for (i = 0; i < prec - numdigits; i++)
2977 *b1++ = '0';
2978 for (i = 0; i < numdigits; i++)
2979 *b1++ = *buf++;
2980 *b1 = '\0';
2981 Py_DECREF(result);
2982 result = r1;
2983 buf = PyBytes_AS_STRING(result);
2984 len = numnondigits + prec;
2985 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002986
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002987 /* Fix up case for hex conversions. */
2988 if (type == 'X') {
2989 /* Need to convert all lower case letters to upper case.
2990 and need to convert 0x to 0X (and -0x to -0X). */
2991 for (i = 0; i < len; i++)
2992 if (buf[i] >= 'a' && buf[i] <= 'x')
2993 buf[i] -= 'a'-'A';
2994 }
2995 *pbuf = buf;
2996 *plen = len;
2997 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002998}
2999
3000void
3001PyBytes_Fini(void)
3002{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003003 int i;
3004 for (i = 0; i < UCHAR_MAX + 1; i++) {
3005 Py_XDECREF(characters[i]);
3006 characters[i] = NULL;
3007 }
3008 Py_XDECREF(nullstring);
3009 nullstring = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003010}
3011
Benjamin Peterson4116f362008-05-27 00:36:20 +00003012/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003013
3014typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003015 PyObject_HEAD
3016 Py_ssize_t it_index;
3017 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003018} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003019
3020static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003021striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003022{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003023 _PyObject_GC_UNTRACK(it);
3024 Py_XDECREF(it->it_seq);
3025 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003026}
3027
3028static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003029striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003030{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003031 Py_VISIT(it->it_seq);
3032 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003033}
3034
3035static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003036striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003037{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003038 PyBytesObject *seq;
3039 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003040
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003041 assert(it != NULL);
3042 seq = it->it_seq;
3043 if (seq == NULL)
3044 return NULL;
3045 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003046
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003047 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3048 item = PyLong_FromLong(
3049 (unsigned char)seq->ob_sval[it->it_index]);
3050 if (item != NULL)
3051 ++it->it_index;
3052 return item;
3053 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003054
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003055 Py_DECREF(seq);
3056 it->it_seq = NULL;
3057 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003058}
3059
3060static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003061striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003062{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003063 Py_ssize_t len = 0;
3064 if (it->it_seq)
3065 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3066 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003067}
3068
3069PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003070 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003071
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003072static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003073 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3074 length_hint_doc},
3075 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003076};
3077
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003078PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003079 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3080 "bytes_iterator", /* tp_name */
3081 sizeof(striterobject), /* tp_basicsize */
3082 0, /* tp_itemsize */
3083 /* methods */
3084 (destructor)striter_dealloc, /* tp_dealloc */
3085 0, /* tp_print */
3086 0, /* tp_getattr */
3087 0, /* tp_setattr */
3088 0, /* tp_reserved */
3089 0, /* tp_repr */
3090 0, /* tp_as_number */
3091 0, /* tp_as_sequence */
3092 0, /* tp_as_mapping */
3093 0, /* tp_hash */
3094 0, /* tp_call */
3095 0, /* tp_str */
3096 PyObject_GenericGetAttr, /* tp_getattro */
3097 0, /* tp_setattro */
3098 0, /* tp_as_buffer */
3099 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3100 0, /* tp_doc */
3101 (traverseproc)striter_traverse, /* tp_traverse */
3102 0, /* tp_clear */
3103 0, /* tp_richcompare */
3104 0, /* tp_weaklistoffset */
3105 PyObject_SelfIter, /* tp_iter */
3106 (iternextfunc)striter_next, /* tp_iternext */
3107 striter_methods, /* tp_methods */
3108 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003109};
3110
3111static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003112bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003113{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003114 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003115
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003116 if (!PyBytes_Check(seq)) {
3117 PyErr_BadInternalCall();
3118 return NULL;
3119 }
3120 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3121 if (it == NULL)
3122 return NULL;
3123 it->it_index = 0;
3124 Py_INCREF(seq);
3125 it->it_seq = (PyBytesObject *)seq;
3126 _PyObject_GC_TRACK(it);
3127 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003128}