blob: 0b95fdf60ae459018cfe4fe9c96cacc9d68d7d84 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Christian Heimes90aa7642007-12-19 02:45:37 +000013 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000014
Gregory P. Smith60d241f2007-10-16 06:31:30 +000015 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000016 {
Antoine Pitroud1188562010-06-09 16:38:55 +000017 PyErr_Format(PyExc_TypeError,
18 "Type %.100s doesn't support the buffer API",
19 Py_TYPE(obj)->tp_name);
20 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000021 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000023 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000024 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000025 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000026}
27
Christian Heimes2c9c7a52008-05-26 13:42:13 +000028#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000029Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000030#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000031
Christian Heimes2c9c7a52008-05-26 13:42:13 +000032static PyBytesObject *characters[UCHAR_MAX + 1];
33static PyBytesObject *nullstring;
34
Mark Dickinsonfd24b322008-12-06 15:33:31 +000035/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
36 for a string of length n should request PyBytesObject_SIZE + n bytes.
37
38 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
39 3 bytes per string allocation on a typical system.
40*/
41#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
42
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000044 For PyBytes_FromString(), the parameter `str' points to a null-terminated
45 string containing exactly `size' bytes.
46
47 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
48 either NULL or else points to a string containing at least `size' bytes.
49 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
50 not have to be null-terminated. (Therefore it is safe to construct a
51 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
52 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
53 bytes (setting the last byte to the null terminating character) and you can
54 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000055 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000056 alter the data yourself, since the strings may be shared.
57
58 The PyObject member `op->ob_size', which denotes the number of "extra
59 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020060 allocated for string data, not counting the null terminating character.
61 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000062 PyBytes_FromStringAndSize()) or the length of the string in the `str'
63 parameter (for PyBytes_FromString()).
64*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000065PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000066PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000067{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000068 register PyBytesObject *op;
69 if (size < 0) {
70 PyErr_SetString(PyExc_SystemError,
71 "Negative size passed to PyBytes_FromStringAndSize");
72 return NULL;
73 }
74 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000075#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000076 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000077#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078 Py_INCREF(op);
79 return (PyObject *)op;
80 }
81 if (size == 1 && str != NULL &&
82 (op = characters[*str & UCHAR_MAX]) != NULL)
83 {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000084#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000086#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 Py_INCREF(op);
88 return (PyObject *)op;
89 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000090
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
92 PyErr_SetString(PyExc_OverflowError,
93 "byte string is too large");
94 return NULL;
95 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000097 /* Inline PyObject_NewVar */
98 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
99 if (op == NULL)
100 return PyErr_NoMemory();
101 PyObject_INIT_VAR(op, &PyBytes_Type, size);
102 op->ob_shash = -1;
103 if (str != NULL)
104 Py_MEMCPY(op->ob_sval, str, size);
105 op->ob_sval[size] = '\0';
106 /* share short strings */
107 if (size == 0) {
108 nullstring = op;
109 Py_INCREF(op);
110 } else if (size == 1 && str != NULL) {
111 characters[*str & UCHAR_MAX] = op;
112 Py_INCREF(op);
113 }
114 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000115}
116
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000117PyObject *
118PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000119{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000120 register size_t size;
121 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000122
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000123 assert(str != NULL);
124 size = strlen(str);
125 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
126 PyErr_SetString(PyExc_OverflowError,
127 "byte string is too long");
128 return NULL;
129 }
130 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000131#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000133#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 Py_INCREF(op);
135 return (PyObject *)op;
136 }
137 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000138#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000139 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000140#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 Py_INCREF(op);
142 return (PyObject *)op;
143 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000144
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 /* Inline PyObject_NewVar */
146 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
147 if (op == NULL)
148 return PyErr_NoMemory();
149 PyObject_INIT_VAR(op, &PyBytes_Type, size);
150 op->ob_shash = -1;
151 Py_MEMCPY(op->ob_sval, str, size+1);
152 /* share short strings */
153 if (size == 0) {
154 nullstring = op;
155 Py_INCREF(op);
156 } else if (size == 1) {
157 characters[*str & UCHAR_MAX] = op;
158 Py_INCREF(op);
159 }
160 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000161}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000162
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000163PyObject *
164PyBytes_FromFormatV(const char *format, va_list vargs)
165{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000166 va_list count;
167 Py_ssize_t n = 0;
168 const char* f;
169 char *s;
170 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000171
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000172 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000173 /* step 1: figure out how large a buffer we need */
174 for (f = format; *f; f++) {
175 if (*f == '%') {
176 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000177 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000178 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000179
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000180 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
181 * they don't affect the amount of space we reserve.
182 */
183 if ((*f == 'l' || *f == 'z') &&
184 (f[1] == 'd' || f[1] == 'u'))
185 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000186
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000187 switch (*f) {
188 case 'c':
189 (void)va_arg(count, int);
190 /* fall through... */
191 case '%':
192 n++;
193 break;
194 case 'd': case 'u': case 'i': case 'x':
195 (void) va_arg(count, int);
196 /* 20 bytes is enough to hold a 64-bit
197 integer. Decimal takes the most space.
198 This isn't enough for octal. */
199 n += 20;
200 break;
201 case 's':
202 s = va_arg(count, char*);
203 n += strlen(s);
204 break;
205 case 'p':
206 (void) va_arg(count, int);
207 /* maximum 64-bit pointer representation:
208 * 0xffffffffffffffff
209 * so 19 characters is enough.
210 * XXX I count 18 -- what's the extra for?
211 */
212 n += 19;
213 break;
214 default:
215 /* if we stumble upon an unknown
216 formatting code, copy the rest of
217 the format string to the output
218 string. (we cannot just skip the
219 code, since there's no way to know
220 what's in the argument list) */
221 n += strlen(p);
222 goto expand;
223 }
224 } else
225 n++;
226 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000227 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 /* step 2: fill the buffer */
229 /* Since we've analyzed how much space we need for the worst case,
230 use sprintf directly instead of the slower PyOS_snprintf. */
231 string = PyBytes_FromStringAndSize(NULL, n);
232 if (!string)
233 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000234
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000235 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000236
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000237 for (f = format; *f; f++) {
238 if (*f == '%') {
239 const char* p = f++;
240 Py_ssize_t i;
241 int longflag = 0;
242 int size_tflag = 0;
243 /* parse the width.precision part (we're only
244 interested in the precision value, if any) */
245 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000246 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000247 n = (n*10) + *f++ - '0';
248 if (*f == '.') {
249 f++;
250 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000251 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000252 n = (n*10) + *f++ - '0';
253 }
David Malcolm96960882010-11-05 17:23:41 +0000254 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000255 f++;
256 /* handle the long flag, but only for %ld and %lu.
257 others can be added when necessary. */
258 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
259 longflag = 1;
260 ++f;
261 }
262 /* handle the size_t flag. */
263 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
264 size_tflag = 1;
265 ++f;
266 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000267
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000268 switch (*f) {
269 case 'c':
270 *s++ = va_arg(vargs, int);
271 break;
272 case 'd':
273 if (longflag)
274 sprintf(s, "%ld", va_arg(vargs, long));
275 else if (size_tflag)
276 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
277 va_arg(vargs, Py_ssize_t));
278 else
279 sprintf(s, "%d", va_arg(vargs, int));
280 s += strlen(s);
281 break;
282 case 'u':
283 if (longflag)
284 sprintf(s, "%lu",
285 va_arg(vargs, unsigned long));
286 else if (size_tflag)
287 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
288 va_arg(vargs, size_t));
289 else
290 sprintf(s, "%u",
291 va_arg(vargs, unsigned int));
292 s += strlen(s);
293 break;
294 case 'i':
295 sprintf(s, "%i", va_arg(vargs, int));
296 s += strlen(s);
297 break;
298 case 'x':
299 sprintf(s, "%x", va_arg(vargs, int));
300 s += strlen(s);
301 break;
302 case 's':
303 p = va_arg(vargs, char*);
304 i = strlen(p);
305 if (n > 0 && i > n)
306 i = n;
307 Py_MEMCPY(s, p, i);
308 s += i;
309 break;
310 case 'p':
311 sprintf(s, "%p", va_arg(vargs, void*));
312 /* %p is ill-defined: ensure leading 0x. */
313 if (s[1] == 'X')
314 s[1] = 'x';
315 else if (s[1] != 'x') {
316 memmove(s+2, s, strlen(s)+1);
317 s[0] = '0';
318 s[1] = 'x';
319 }
320 s += strlen(s);
321 break;
322 case '%':
323 *s++ = '%';
324 break;
325 default:
326 strcpy(s, p);
327 s += strlen(s);
328 goto end;
329 }
330 } else
331 *s++ = *f;
332 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000333
334 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000335 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
336 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000337}
338
339PyObject *
340PyBytes_FromFormat(const char *format, ...)
341{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000342 PyObject* ret;
343 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000344
345#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000347#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000348 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000349#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 ret = PyBytes_FromFormatV(format, vargs);
351 va_end(vargs);
352 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000353}
354
355static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000356bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000357{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000358 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000359}
360
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000361/* Unescape a backslash-escaped string. If unicode is non-zero,
362 the string is a u-literal. If recode_encoding is non-zero,
363 the string is UTF-8 encoded and should be re-encoded in the
364 specified encoding. */
365
366PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 Py_ssize_t len,
368 const char *errors,
369 Py_ssize_t unicode,
370 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000371{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 int c;
373 char *p, *buf;
374 const char *end;
375 PyObject *v;
376 Py_ssize_t newlen = recode_encoding ? 4*len:len;
377 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
378 if (v == NULL)
379 return NULL;
380 p = buf = PyBytes_AsString(v);
381 end = s + len;
382 while (s < end) {
383 if (*s != '\\') {
384 non_esc:
385 if (recode_encoding && (*s & 0x80)) {
386 PyObject *u, *w;
387 char *r;
388 const char* t;
389 Py_ssize_t rn;
390 t = s;
391 /* Decode non-ASCII bytes as UTF-8. */
392 while (t < end && (*t & 0x80)) t++;
393 u = PyUnicode_DecodeUTF8(s, t - s, errors);
394 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000395
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000396 /* Recode them in target encoding. */
397 w = PyUnicode_AsEncodedString(
398 u, recode_encoding, errors);
399 Py_DECREF(u);
400 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000401
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000402 /* Append bytes to output buffer. */
403 assert(PyBytes_Check(w));
404 r = PyBytes_AS_STRING(w);
405 rn = PyBytes_GET_SIZE(w);
406 Py_MEMCPY(p, r, rn);
407 p += rn;
408 Py_DECREF(w);
409 s = t;
410 } else {
411 *p++ = *s++;
412 }
413 continue;
414 }
415 s++;
416 if (s==end) {
417 PyErr_SetString(PyExc_ValueError,
418 "Trailing \\ in string");
419 goto failed;
420 }
421 switch (*s++) {
422 /* XXX This assumes ASCII! */
423 case '\n': break;
424 case '\\': *p++ = '\\'; break;
425 case '\'': *p++ = '\''; break;
426 case '\"': *p++ = '\"'; break;
427 case 'b': *p++ = '\b'; break;
428 case 'f': *p++ = '\014'; break; /* FF */
429 case 't': *p++ = '\t'; break;
430 case 'n': *p++ = '\n'; break;
431 case 'r': *p++ = '\r'; break;
432 case 'v': *p++ = '\013'; break; /* VT */
433 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
434 case '0': case '1': case '2': case '3':
435 case '4': case '5': case '6': case '7':
436 c = s[-1] - '0';
437 if (s < end && '0' <= *s && *s <= '7') {
438 c = (c<<3) + *s++ - '0';
439 if (s < end && '0' <= *s && *s <= '7')
440 c = (c<<3) + *s++ - '0';
441 }
442 *p++ = c;
443 break;
444 case 'x':
David Malcolm96960882010-11-05 17:23:41 +0000445 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000446 unsigned int x = 0;
447 c = Py_CHARMASK(*s);
448 s++;
David Malcolm96960882010-11-05 17:23:41 +0000449 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000450 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000451 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000452 x = 10 + c - 'a';
453 else
454 x = 10 + c - 'A';
455 x = x << 4;
456 c = Py_CHARMASK(*s);
457 s++;
David Malcolm96960882010-11-05 17:23:41 +0000458 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000459 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000460 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000461 x += 10 + c - 'a';
462 else
463 x += 10 + c - 'A';
464 *p++ = x;
465 break;
466 }
467 if (!errors || strcmp(errors, "strict") == 0) {
468 PyErr_SetString(PyExc_ValueError,
469 "invalid \\x escape");
470 goto failed;
471 }
472 if (strcmp(errors, "replace") == 0) {
473 *p++ = '?';
474 } else if (strcmp(errors, "ignore") == 0)
475 /* do nothing */;
476 else {
477 PyErr_Format(PyExc_ValueError,
478 "decoding error; unknown "
479 "error handling code: %.400s",
480 errors);
481 goto failed;
482 }
483 default:
484 *p++ = '\\';
485 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +0200486 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000487 UTF-8 bytes may follow. */
488 }
489 }
490 if (p-buf < newlen)
491 _PyBytes_Resize(&v, p - buf);
492 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000493 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000494 Py_DECREF(v);
495 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000496}
497
498/* -------------------------------------------------------------------- */
499/* object api */
500
501Py_ssize_t
502PyBytes_Size(register PyObject *op)
503{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000504 if (!PyBytes_Check(op)) {
505 PyErr_Format(PyExc_TypeError,
506 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
507 return -1;
508 }
509 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000510}
511
512char *
513PyBytes_AsString(register PyObject *op)
514{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000515 if (!PyBytes_Check(op)) {
516 PyErr_Format(PyExc_TypeError,
517 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
518 return NULL;
519 }
520 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000521}
522
523int
524PyBytes_AsStringAndSize(register PyObject *obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000525 register char **s,
526 register Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000527{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000528 if (s == NULL) {
529 PyErr_BadInternalCall();
530 return -1;
531 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000532
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000533 if (!PyBytes_Check(obj)) {
534 PyErr_Format(PyExc_TypeError,
535 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
536 return -1;
537 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000538
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000539 *s = PyBytes_AS_STRING(obj);
540 if (len != NULL)
541 *len = PyBytes_GET_SIZE(obj);
542 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
543 PyErr_SetString(PyExc_TypeError,
544 "expected bytes with no null");
545 return -1;
546 }
547 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000548}
Neal Norwitz6968b052007-02-27 19:02:19 +0000549
550/* -------------------------------------------------------------------- */
551/* Methods */
552
Eric Smith0923d1d2009-04-16 20:16:10 +0000553#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000554
555#include "stringlib/fastsearch.h"
556#include "stringlib/count.h"
557#include "stringlib/find.h"
558#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000559#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000560#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000561
Eric Smith0f78bff2009-11-30 01:01:42 +0000562#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000563
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000564PyObject *
565PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000566{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000567 register PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200568 Py_ssize_t i, length = Py_SIZE(op);
569 size_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000570 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200571 unsigned char quote, *s, *p;
572
573 /* Compute size of output string */
574 squotes = dquotes = 0;
575 newsize = 3; /* b'' */
576 s = (unsigned char*)op->ob_sval;
577 for (i = 0; i < length; i++) {
578 switch(s[i]) {
579 case '\'': squotes++; newsize++; break;
580 case '"': dquotes++; newsize++; break;
581 case '\\': case '\t': case '\n': case '\r':
582 newsize += 2; break; /* \C */
583 default:
584 if (s[i] < ' ' || s[i] >= 0x7f)
585 newsize += 4; /* \xHH */
586 else
587 newsize++;
588 }
589 }
590 quote = '\'';
591 if (smartquotes && squotes && !dquotes)
592 quote = '"';
593 if (squotes && quote == '\'')
594 newsize += squotes;
Victor Stinner6430fd52011-09-29 04:02:13 +0200595
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200596 if (newsize > (PY_SSIZE_T_MAX - sizeof(PyUnicodeObject) - 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000597 PyErr_SetString(PyExc_OverflowError,
598 "bytes object is too large to make repr");
599 return NULL;
600 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200601
602 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000603 if (v == NULL) {
604 return NULL;
605 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200606 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000607
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200608 *p++ = 'b', *p++ = quote;
609 for (i = 0; i < length; i++) {
610 unsigned char c = op->ob_sval[i];
611 if (c == quote || c == '\\')
612 *p++ = '\\', *p++ = c;
613 else if (c == '\t')
614 *p++ = '\\', *p++ = 't';
615 else if (c == '\n')
616 *p++ = '\\', *p++ = 'n';
617 else if (c == '\r')
618 *p++ = '\\', *p++ = 'r';
619 else if (c < ' ' || c >= 0x7f) {
620 *p++ = '\\';
621 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200622 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
623 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000624 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200625 else
626 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000627 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200628 *p++ = quote;
629 return v;
Neal Norwitz6968b052007-02-27 19:02:19 +0000630}
631
Neal Norwitz6968b052007-02-27 19:02:19 +0000632static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000633bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000634{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000635 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000636}
637
Neal Norwitz6968b052007-02-27 19:02:19 +0000638static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000639bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000640{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000641 if (Py_BytesWarningFlag) {
642 if (PyErr_WarnEx(PyExc_BytesWarning,
643 "str() on a bytes instance", 1))
644 return NULL;
645 }
646 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000647}
648
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000649static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000650bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000651{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000652 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000653}
Neal Norwitz6968b052007-02-27 19:02:19 +0000654
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000655/* This is also used by PyBytes_Concat() */
656static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000657bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000658{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000659 Py_ssize_t size;
660 Py_buffer va, vb;
661 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000662
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000663 va.len = -1;
664 vb.len = -1;
665 if (_getbuffer(a, &va) < 0 ||
666 _getbuffer(b, &vb) < 0) {
667 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
668 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
669 goto done;
670 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000671
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000672 /* Optimize end cases */
673 if (va.len == 0 && PyBytes_CheckExact(b)) {
674 result = b;
675 Py_INCREF(result);
676 goto done;
677 }
678 if (vb.len == 0 && PyBytes_CheckExact(a)) {
679 result = a;
680 Py_INCREF(result);
681 goto done;
682 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000683
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000684 size = va.len + vb.len;
685 if (size < 0) {
686 PyErr_NoMemory();
687 goto done;
688 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000689
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000690 result = PyBytes_FromStringAndSize(NULL, size);
691 if (result != NULL) {
692 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
693 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
694 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000695
696 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000697 if (va.len != -1)
698 PyBuffer_Release(&va);
699 if (vb.len != -1)
700 PyBuffer_Release(&vb);
701 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000702}
Neal Norwitz6968b052007-02-27 19:02:19 +0000703
704static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000705bytes_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000706{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000707 register Py_ssize_t i;
708 register Py_ssize_t j;
709 register Py_ssize_t size;
710 register PyBytesObject *op;
711 size_t nbytes;
712 if (n < 0)
713 n = 0;
714 /* watch out for overflows: the size can overflow int,
715 * and the # of bytes needed can overflow size_t
716 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000717 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000718 PyErr_SetString(PyExc_OverflowError,
719 "repeated bytes are too long");
720 return NULL;
721 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000722 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000723 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
724 Py_INCREF(a);
725 return (PyObject *)a;
726 }
727 nbytes = (size_t)size;
728 if (nbytes + PyBytesObject_SIZE <= nbytes) {
729 PyErr_SetString(PyExc_OverflowError,
730 "repeated bytes are too long");
731 return NULL;
732 }
733 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
734 if (op == NULL)
735 return PyErr_NoMemory();
736 PyObject_INIT_VAR(op, &PyBytes_Type, size);
737 op->ob_shash = -1;
738 op->ob_sval[size] = '\0';
739 if (Py_SIZE(a) == 1 && n > 0) {
740 memset(op->ob_sval, a->ob_sval[0] , n);
741 return (PyObject *) op;
742 }
743 i = 0;
744 if (i < size) {
745 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
746 i = Py_SIZE(a);
747 }
748 while (i < size) {
749 j = (i <= size-i) ? i : size-i;
750 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
751 i += j;
752 }
753 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000754}
755
Guido van Rossum98297ee2007-11-06 21:34:58 +0000756static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000757bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000758{
759 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
760 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000761 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +0000762 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +0000763 PyErr_Clear();
764 if (_getbuffer(arg, &varg) < 0)
765 return -1;
766 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
767 varg.buf, varg.len, 0);
768 PyBuffer_Release(&varg);
769 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000770 }
771 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000772 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
773 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000774 }
775
Antoine Pitrou0010d372010-08-15 17:12:55 +0000776 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000777}
778
Neal Norwitz6968b052007-02-27 19:02:19 +0000779static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000780bytes_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000781{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000782 if (i < 0 || i >= Py_SIZE(a)) {
783 PyErr_SetString(PyExc_IndexError, "index out of range");
784 return NULL;
785 }
786 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000787}
788
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000789static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000790bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000791{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000792 int c;
793 Py_ssize_t len_a, len_b;
794 Py_ssize_t min_len;
795 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000796
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000797 /* Make sure both arguments are strings. */
798 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
799 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
800 (PyObject_IsInstance((PyObject*)a,
801 (PyObject*)&PyUnicode_Type) ||
802 PyObject_IsInstance((PyObject*)b,
803 (PyObject*)&PyUnicode_Type))) {
804 if (PyErr_WarnEx(PyExc_BytesWarning,
805 "Comparison between bytes and string", 1))
806 return NULL;
807 }
808 result = Py_NotImplemented;
809 goto out;
810 }
811 if (a == b) {
812 switch (op) {
813 case Py_EQ:case Py_LE:case Py_GE:
814 result = Py_True;
815 goto out;
816 case Py_NE:case Py_LT:case Py_GT:
817 result = Py_False;
818 goto out;
819 }
820 }
821 if (op == Py_EQ) {
822 /* Supporting Py_NE here as well does not save
823 much time, since Py_NE is rarely used. */
824 if (Py_SIZE(a) == Py_SIZE(b)
825 && (a->ob_sval[0] == b->ob_sval[0]
826 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
827 result = Py_True;
828 } else {
829 result = Py_False;
830 }
831 goto out;
832 }
833 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
834 min_len = (len_a < len_b) ? len_a : len_b;
835 if (min_len > 0) {
836 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
837 if (c==0)
838 c = memcmp(a->ob_sval, b->ob_sval, min_len);
839 } else
840 c = 0;
841 if (c == 0)
842 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
843 switch (op) {
844 case Py_LT: c = c < 0; break;
845 case Py_LE: c = c <= 0; break;
846 case Py_EQ: assert(0); break; /* unreachable */
847 case Py_NE: c = c != 0; break;
848 case Py_GT: c = c > 0; break;
849 case Py_GE: c = c >= 0; break;
850 default:
851 result = Py_NotImplemented;
852 goto out;
853 }
854 result = c ? Py_True : Py_False;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000855 out:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000856 Py_INCREF(result);
857 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000858}
859
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000860static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000861bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000862{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +0100863 if (a->ob_shash == -1) {
864 /* Can't fail */
865 a->ob_shash = _Py_HashBytes((unsigned char *) a->ob_sval, Py_SIZE(a));
866 }
867 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +0000868}
869
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000870static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000871bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000872{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000873 if (PyIndex_Check(item)) {
874 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
875 if (i == -1 && PyErr_Occurred())
876 return NULL;
877 if (i < 0)
878 i += PyBytes_GET_SIZE(self);
879 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
880 PyErr_SetString(PyExc_IndexError,
881 "index out of range");
882 return NULL;
883 }
884 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
885 }
886 else if (PySlice_Check(item)) {
887 Py_ssize_t start, stop, step, slicelength, cur, i;
888 char* source_buf;
889 char* result_buf;
890 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000891
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000892 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000893 PyBytes_GET_SIZE(self),
894 &start, &stop, &step, &slicelength) < 0) {
895 return NULL;
896 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000897
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000898 if (slicelength <= 0) {
899 return PyBytes_FromStringAndSize("", 0);
900 }
901 else if (start == 0 && step == 1 &&
902 slicelength == PyBytes_GET_SIZE(self) &&
903 PyBytes_CheckExact(self)) {
904 Py_INCREF(self);
905 return (PyObject *)self;
906 }
907 else if (step == 1) {
908 return PyBytes_FromStringAndSize(
909 PyBytes_AS_STRING(self) + start,
910 slicelength);
911 }
912 else {
913 source_buf = PyBytes_AS_STRING(self);
914 result = PyBytes_FromStringAndSize(NULL, slicelength);
915 if (result == NULL)
916 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000917
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000918 result_buf = PyBytes_AS_STRING(result);
919 for (cur = start, i = 0; i < slicelength;
920 cur += step, i++) {
921 result_buf[i] = source_buf[cur];
922 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000923
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000924 return result;
925 }
926 }
927 else {
928 PyErr_Format(PyExc_TypeError,
929 "byte indices must be integers, not %.200s",
930 Py_TYPE(item)->tp_name);
931 return NULL;
932 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000933}
934
935static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000936bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000937{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000938 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
939 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000940}
941
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000942static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000943 (lenfunc)bytes_length, /*sq_length*/
944 (binaryfunc)bytes_concat, /*sq_concat*/
945 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
946 (ssizeargfunc)bytes_item, /*sq_item*/
947 0, /*sq_slice*/
948 0, /*sq_ass_item*/
949 0, /*sq_ass_slice*/
950 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000951};
952
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000953static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000954 (lenfunc)bytes_length,
955 (binaryfunc)bytes_subscript,
956 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000957};
958
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000959static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000960 (getbufferproc)bytes_buffer_getbuffer,
961 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000962};
963
964
965#define LEFTSTRIP 0
966#define RIGHTSTRIP 1
967#define BOTHSTRIP 2
968
969/* Arrays indexed by above */
970static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
971
972#define STRIPNAME(i) (stripformat[i]+3)
973
Neal Norwitz6968b052007-02-27 19:02:19 +0000974PyDoc_STRVAR(split__doc__,
Ezio Melotticda6b6d2012-02-26 09:39:55 +0200975"B.split(sep=None, maxsplit=-1) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +0000976\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +0000977Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000978If sep is not specified or is None, B is split on ASCII whitespace\n\
979characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +0000980If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +0000981
982static PyObject *
Ezio Melotticda6b6d2012-02-26 09:39:55 +0200983bytes_split(PyBytesObject *self, PyObject *args, PyObject *kwds)
Neal Norwitz6968b052007-02-27 19:02:19 +0000984{
Ezio Melotticda6b6d2012-02-26 09:39:55 +0200985 static char *kwlist[] = {"sep", "maxsplit", 0};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000986 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
987 Py_ssize_t maxsplit = -1;
988 const char *s = PyBytes_AS_STRING(self), *sub;
989 Py_buffer vsub;
990 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +0000991
Ezio Melotticda6b6d2012-02-26 09:39:55 +0200992 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:split",
993 kwlist, &subobj, &maxsplit))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000994 return NULL;
995 if (maxsplit < 0)
996 maxsplit = PY_SSIZE_T_MAX;
997 if (subobj == Py_None)
998 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
999 if (_getbuffer(subobj, &vsub) < 0)
1000 return NULL;
1001 sub = vsub.buf;
1002 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001003
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001004 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1005 PyBuffer_Release(&vsub);
1006 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001007}
1008
Neal Norwitz6968b052007-02-27 19:02:19 +00001009PyDoc_STRVAR(partition__doc__,
1010"B.partition(sep) -> (head, sep, tail)\n\
1011\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001012Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001013the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001014found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001015
1016static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001017bytes_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001018{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001019 const char *sep;
1020 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001021
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001022 if (PyBytes_Check(sep_obj)) {
1023 sep = PyBytes_AS_STRING(sep_obj);
1024 sep_len = PyBytes_GET_SIZE(sep_obj);
1025 }
1026 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1027 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001028
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001029 return stringlib_partition(
1030 (PyObject*) self,
1031 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1032 sep_obj, sep, sep_len
1033 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001034}
1035
1036PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti5b2b2422010-01-25 11:58:28 +00001037"B.rpartition(sep) -> (head, sep, tail)\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001038\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001039Search for the separator sep in B, starting at the end of B,\n\
1040and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001041part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001042bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001043
1044static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001045bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001046{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001047 const char *sep;
1048 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001049
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001050 if (PyBytes_Check(sep_obj)) {
1051 sep = PyBytes_AS_STRING(sep_obj);
1052 sep_len = PyBytes_GET_SIZE(sep_obj);
1053 }
1054 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1055 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001056
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001057 return stringlib_rpartition(
1058 (PyObject*) self,
1059 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1060 sep_obj, sep, sep_len
1061 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001062}
1063
Neal Norwitz6968b052007-02-27 19:02:19 +00001064PyDoc_STRVAR(rsplit__doc__,
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001065"B.rsplit(sep=None, maxsplit=-1) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001066\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001067Return a list of the sections in B, using sep as the delimiter,\n\
1068starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001069If sep is not given, B is split on ASCII whitespace characters\n\
1070(space, tab, return, newline, formfeed, vertical tab).\n\
1071If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001072
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001073
Neal Norwitz6968b052007-02-27 19:02:19 +00001074static PyObject *
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001075bytes_rsplit(PyBytesObject *self, PyObject *args, PyObject *kwds)
Neal Norwitz6968b052007-02-27 19:02:19 +00001076{
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001077 static char *kwlist[] = {"sep", "maxsplit", 0};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001078 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1079 Py_ssize_t maxsplit = -1;
1080 const char *s = PyBytes_AS_STRING(self), *sub;
1081 Py_buffer vsub;
1082 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001083
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001084 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:rsplit",
1085 kwlist, &subobj, &maxsplit))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001086 return NULL;
1087 if (maxsplit < 0)
1088 maxsplit = PY_SSIZE_T_MAX;
1089 if (subobj == Py_None)
1090 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1091 if (_getbuffer(subobj, &vsub) < 0)
1092 return NULL;
1093 sub = vsub.buf;
1094 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001095
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001096 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1097 PyBuffer_Release(&vsub);
1098 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001099}
1100
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001101
1102PyDoc_STRVAR(join__doc__,
1103"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001104\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001105Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001106Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1107
Neal Norwitz6968b052007-02-27 19:02:19 +00001108static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001109bytes_join(PyObject *self, PyObject *orig)
Neal Norwitz6968b052007-02-27 19:02:19 +00001110{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001111 char *sep = PyBytes_AS_STRING(self);
1112 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1113 PyObject *res = NULL;
1114 char *p;
1115 Py_ssize_t seqlen = 0;
1116 size_t sz = 0;
1117 Py_ssize_t i;
1118 PyObject *seq, *item;
Neal Norwitz6968b052007-02-27 19:02:19 +00001119
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001120 seq = PySequence_Fast(orig, "");
1121 if (seq == NULL) {
1122 return NULL;
1123 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001124
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001125 seqlen = PySequence_Size(seq);
1126 if (seqlen == 0) {
1127 Py_DECREF(seq);
1128 return PyBytes_FromString("");
1129 }
1130 if (seqlen == 1) {
1131 item = PySequence_Fast_GET_ITEM(seq, 0);
1132 if (PyBytes_CheckExact(item)) {
1133 Py_INCREF(item);
1134 Py_DECREF(seq);
1135 return item;
1136 }
1137 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001138
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001139 /* There are at least two things to join, or else we have a subclass
1140 * of the builtin types in the sequence.
1141 * Do a pre-pass to figure out the total amount of space we'll
1142 * need (sz), and see whether all argument are bytes.
1143 */
1144 /* XXX Shouldn't we use _getbuffer() on these items instead? */
1145 for (i = 0; i < seqlen; i++) {
1146 const size_t old_sz = sz;
1147 item = PySequence_Fast_GET_ITEM(seq, i);
1148 if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
1149 PyErr_Format(PyExc_TypeError,
1150 "sequence item %zd: expected bytes,"
1151 " %.80s found",
1152 i, Py_TYPE(item)->tp_name);
1153 Py_DECREF(seq);
1154 return NULL;
1155 }
1156 sz += Py_SIZE(item);
1157 if (i != 0)
1158 sz += seplen;
1159 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1160 PyErr_SetString(PyExc_OverflowError,
1161 "join() result is too long for bytes");
1162 Py_DECREF(seq);
1163 return NULL;
1164 }
1165 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001166
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001167 /* Allocate result space. */
1168 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1169 if (res == NULL) {
1170 Py_DECREF(seq);
1171 return NULL;
1172 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001173
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001174 /* Catenate everything. */
1175 /* I'm not worried about a PyByteArray item growing because there's
1176 nowhere in this function where we release the GIL. */
1177 p = PyBytes_AS_STRING(res);
1178 for (i = 0; i < seqlen; ++i) {
1179 size_t n;
1180 char *q;
1181 if (i) {
1182 Py_MEMCPY(p, sep, seplen);
1183 p += seplen;
1184 }
1185 item = PySequence_Fast_GET_ITEM(seq, i);
1186 n = Py_SIZE(item);
1187 if (PyBytes_Check(item))
1188 q = PyBytes_AS_STRING(item);
1189 else
1190 q = PyByteArray_AS_STRING(item);
1191 Py_MEMCPY(p, q, n);
1192 p += n;
1193 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001194
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001195 Py_DECREF(seq);
1196 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001197}
1198
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001199PyObject *
1200_PyBytes_Join(PyObject *sep, PyObject *x)
1201{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001202 assert(sep != NULL && PyBytes_Check(sep));
1203 assert(x != NULL);
1204 return bytes_join(sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001205}
1206
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001207/* helper macro to fixup start/end slice values */
1208#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001209 if (end > len) \
1210 end = len; \
1211 else if (end < 0) { \
1212 end += len; \
1213 if (end < 0) \
1214 end = 0; \
1215 } \
1216 if (start < 0) { \
1217 start += len; \
1218 if (start < 0) \
1219 start = 0; \
1220 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001221
1222Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001223bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001224{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001225 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001226 char byte;
1227 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001228 const char *sub;
1229 Py_ssize_t sub_len;
1230 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001231 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001232
Antoine Pitrouac65d962011-10-20 23:54:17 +02001233 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1234 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001235 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001236
Antoine Pitrouac65d962011-10-20 23:54:17 +02001237 if (subobj) {
1238 if (_getbuffer(subobj, &subbuf) < 0)
1239 return -2;
1240
1241 sub = subbuf.buf;
1242 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001243 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001244 else {
1245 sub = &byte;
1246 sub_len = 1;
1247 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001248
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001249 if (dir > 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001250 res = stringlib_find_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001251 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1252 sub, sub_len, start, end);
1253 else
Antoine Pitrouac65d962011-10-20 23:54:17 +02001254 res = stringlib_rfind_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001255 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1256 sub, sub_len, start, end);
Antoine Pitrouac65d962011-10-20 23:54:17 +02001257
1258 if (subobj)
1259 PyBuffer_Release(&subbuf);
1260
1261 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001262}
1263
1264
1265PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001266"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001267\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001268Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001269such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001270arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001271\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001272Return -1 on failure.");
1273
Neal Norwitz6968b052007-02-27 19:02:19 +00001274static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001275bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001276{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001277 Py_ssize_t result = bytes_find_internal(self, args, +1);
1278 if (result == -2)
1279 return NULL;
1280 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001281}
1282
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001283
1284PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001285"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001286\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001287Like B.find() but raise ValueError when the substring is not found.");
1288
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001289static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001290bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001291{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001292 Py_ssize_t result = bytes_find_internal(self, args, +1);
1293 if (result == -2)
1294 return NULL;
1295 if (result == -1) {
1296 PyErr_SetString(PyExc_ValueError,
1297 "substring not found");
1298 return NULL;
1299 }
1300 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001301}
1302
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001303
1304PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001305"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001306\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001307Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001308such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001309arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001310\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001311Return -1 on failure.");
1312
Neal Norwitz6968b052007-02-27 19:02:19 +00001313static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001314bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001315{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001316 Py_ssize_t result = bytes_find_internal(self, args, -1);
1317 if (result == -2)
1318 return NULL;
1319 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001320}
1321
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001322
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001323PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001324"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001325\n\
1326Like B.rfind() but raise ValueError when the substring is not found.");
1327
1328static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001329bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001330{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001331 Py_ssize_t result = bytes_find_internal(self, args, -1);
1332 if (result == -2)
1333 return NULL;
1334 if (result == -1) {
1335 PyErr_SetString(PyExc_ValueError,
1336 "substring not found");
1337 return NULL;
1338 }
1339 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001340}
1341
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001342
1343Py_LOCAL_INLINE(PyObject *)
1344do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001345{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001346 Py_buffer vsep;
1347 char *s = PyBytes_AS_STRING(self);
1348 Py_ssize_t len = PyBytes_GET_SIZE(self);
1349 char *sep;
1350 Py_ssize_t seplen;
1351 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001352
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001353 if (_getbuffer(sepobj, &vsep) < 0)
1354 return NULL;
1355 sep = vsep.buf;
1356 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001357
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001358 i = 0;
1359 if (striptype != RIGHTSTRIP) {
1360 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1361 i++;
1362 }
1363 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001364
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001365 j = len;
1366 if (striptype != LEFTSTRIP) {
1367 do {
1368 j--;
1369 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1370 j++;
1371 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001372
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001373 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001374
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001375 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1376 Py_INCREF(self);
1377 return (PyObject*)self;
1378 }
1379 else
1380 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001381}
1382
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001383
1384Py_LOCAL_INLINE(PyObject *)
1385do_strip(PyBytesObject *self, int striptype)
1386{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001387 char *s = PyBytes_AS_STRING(self);
1388 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001389
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001390 i = 0;
1391 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001392 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001393 i++;
1394 }
1395 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001396
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001397 j = len;
1398 if (striptype != LEFTSTRIP) {
1399 do {
1400 j--;
David Malcolm96960882010-11-05 17:23:41 +00001401 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001402 j++;
1403 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001404
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001405 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1406 Py_INCREF(self);
1407 return (PyObject*)self;
1408 }
1409 else
1410 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001411}
1412
1413
1414Py_LOCAL_INLINE(PyObject *)
1415do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1416{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001417 PyObject *sep = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001418
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001419 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1420 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001421
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001422 if (sep != NULL && sep != Py_None) {
1423 return do_xstrip(self, striptype, sep);
1424 }
1425 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001426}
1427
1428
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001429PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001430"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001431\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001432Strip leading and trailing bytes contained in the argument.\n\
Georg Brandlbeca27a2012-01-22 21:31:21 +01001433If the argument is omitted, strip leading and trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001434static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001435bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001436{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001437 if (PyTuple_GET_SIZE(args) == 0)
1438 return do_strip(self, BOTHSTRIP); /* Common case */
1439 else
1440 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001441}
1442
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001443
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001444PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001445"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001446\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001447Strip leading bytes contained in the argument.\n\
1448If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001449static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001450bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001451{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001452 if (PyTuple_GET_SIZE(args) == 0)
1453 return do_strip(self, LEFTSTRIP); /* Common case */
1454 else
1455 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001456}
1457
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001458
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001459PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001460"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001461\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001462Strip trailing bytes contained in the argument.\n\
1463If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001464static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001465bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001466{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001467 if (PyTuple_GET_SIZE(args) == 0)
1468 return do_strip(self, RIGHTSTRIP); /* Common case */
1469 else
1470 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001471}
Neal Norwitz6968b052007-02-27 19:02:19 +00001472
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001473
1474PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001475"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001476\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001477Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001478string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001479as in slice notation.");
1480
1481static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001482bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001483{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001484 PyObject *sub_obj;
1485 const char *str = PyBytes_AS_STRING(self), *sub;
1486 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001487 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001488 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001489
Antoine Pitrouac65d962011-10-20 23:54:17 +02001490 Py_buffer vsub;
1491 PyObject *count_obj;
1492
1493 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
1494 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001495 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001496
Antoine Pitrouac65d962011-10-20 23:54:17 +02001497 if (sub_obj) {
1498 if (_getbuffer(sub_obj, &vsub) < 0)
1499 return NULL;
1500
1501 sub = vsub.buf;
1502 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001503 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001504 else {
1505 sub = &byte;
1506 sub_len = 1;
1507 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001508
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001509 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001510
Antoine Pitrouac65d962011-10-20 23:54:17 +02001511 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001512 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
1513 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02001514
1515 if (sub_obj)
1516 PyBuffer_Release(&vsub);
1517
1518 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001519}
1520
1521
1522PyDoc_STRVAR(translate__doc__,
1523"B.translate(table[, deletechars]) -> bytes\n\
1524\n\
1525Return a copy of B, where all characters occurring in the\n\
1526optional argument deletechars are removed, and the remaining\n\
1527characters have been mapped through the given translation\n\
1528table, which must be a bytes object of length 256.");
1529
1530static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001531bytes_translate(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001532{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001533 register char *input, *output;
1534 const char *table;
1535 register Py_ssize_t i, c, changed = 0;
1536 PyObject *input_obj = (PyObject*)self;
1537 const char *output_start, *del_table=NULL;
1538 Py_ssize_t inlen, tablen, dellen = 0;
1539 PyObject *result;
1540 int trans_table[256];
1541 PyObject *tableobj, *delobj = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001542
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001543 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1544 &tableobj, &delobj))
1545 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001546
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001547 if (PyBytes_Check(tableobj)) {
1548 table = PyBytes_AS_STRING(tableobj);
1549 tablen = PyBytes_GET_SIZE(tableobj);
1550 }
1551 else if (tableobj == Py_None) {
1552 table = NULL;
1553 tablen = 256;
1554 }
1555 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1556 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001557
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001558 if (tablen != 256) {
1559 PyErr_SetString(PyExc_ValueError,
1560 "translation table must be 256 characters long");
1561 return NULL;
1562 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001563
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001564 if (delobj != NULL) {
1565 if (PyBytes_Check(delobj)) {
1566 del_table = PyBytes_AS_STRING(delobj);
1567 dellen = PyBytes_GET_SIZE(delobj);
1568 }
1569 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1570 return NULL;
1571 }
1572 else {
1573 del_table = NULL;
1574 dellen = 0;
1575 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001576
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001577 inlen = PyBytes_GET_SIZE(input_obj);
1578 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1579 if (result == NULL)
1580 return NULL;
1581 output_start = output = PyBytes_AsString(result);
1582 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001583
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001584 if (dellen == 0 && table != NULL) {
1585 /* If no deletions are required, use faster code */
1586 for (i = inlen; --i >= 0; ) {
1587 c = Py_CHARMASK(*input++);
1588 if (Py_CHARMASK((*output++ = table[c])) != c)
1589 changed = 1;
1590 }
1591 if (changed || !PyBytes_CheckExact(input_obj))
1592 return result;
1593 Py_DECREF(result);
1594 Py_INCREF(input_obj);
1595 return input_obj;
1596 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001597
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001598 if (table == NULL) {
1599 for (i = 0; i < 256; i++)
1600 trans_table[i] = Py_CHARMASK(i);
1601 } else {
1602 for (i = 0; i < 256; i++)
1603 trans_table[i] = Py_CHARMASK(table[i]);
1604 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001605
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001606 for (i = 0; i < dellen; i++)
1607 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001608
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001609 for (i = inlen; --i >= 0; ) {
1610 c = Py_CHARMASK(*input++);
1611 if (trans_table[c] != -1)
1612 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1613 continue;
1614 changed = 1;
1615 }
1616 if (!changed && PyBytes_CheckExact(input_obj)) {
1617 Py_DECREF(result);
1618 Py_INCREF(input_obj);
1619 return input_obj;
1620 }
1621 /* Fix the size of the resulting string */
1622 if (inlen > 0)
1623 _PyBytes_Resize(&result, output - output_start);
1624 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001625}
1626
1627
Georg Brandlabc38772009-04-12 15:51:51 +00001628static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001629bytes_maketrans(PyObject *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001630{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001631 return _Py_bytes_maketrans(args);
Georg Brandlabc38772009-04-12 15:51:51 +00001632}
1633
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001634/* find and count characters and substrings */
1635
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001636#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001637 ((char *)memchr((const void *)(target), c, target_len))
1638
1639/* String ops must return a string. */
1640/* If the object is subclass of string, create a copy */
1641Py_LOCAL(PyBytesObject *)
1642return_self(PyBytesObject *self)
1643{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001644 if (PyBytes_CheckExact(self)) {
1645 Py_INCREF(self);
1646 return self;
1647 }
1648 return (PyBytesObject *)PyBytes_FromStringAndSize(
1649 PyBytes_AS_STRING(self),
1650 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001651}
1652
1653Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00001654countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001655{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001656 Py_ssize_t count=0;
1657 const char *start=target;
1658 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001659
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001660 while ( (start=findchar(start, end-start, c)) != NULL ) {
1661 count++;
1662 if (count >= maxcount)
1663 break;
1664 start += 1;
1665 }
1666 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001667}
1668
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001669
1670/* Algorithms for different cases of string replacement */
1671
1672/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1673Py_LOCAL(PyBytesObject *)
1674replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001675 const char *to_s, Py_ssize_t to_len,
1676 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001677{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001678 char *self_s, *result_s;
1679 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001680 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001681 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001682
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001683 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001684
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001685 /* 1 at the end plus 1 after every character;
1686 count = min(maxcount, self_len + 1) */
1687 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001688 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001689 else
1690 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
1691 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001692
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001693 /* Check for overflow */
1694 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001695 assert(count > 0);
1696 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001697 PyErr_SetString(PyExc_OverflowError,
1698 "replacement bytes are too long");
1699 return NULL;
1700 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001701 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001702
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001703 if (! (result = (PyBytesObject *)
1704 PyBytes_FromStringAndSize(NULL, result_len)) )
1705 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001706
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001707 self_s = PyBytes_AS_STRING(self);
1708 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001709
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001710 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001711
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001712 /* Lay the first one down (guaranteed this will occur) */
1713 Py_MEMCPY(result_s, to_s, to_len);
1714 result_s += to_len;
1715 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001716
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001717 for (i=0; i<count; i++) {
1718 *result_s++ = *self_s++;
1719 Py_MEMCPY(result_s, to_s, to_len);
1720 result_s += to_len;
1721 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001722
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001723 /* Copy the rest of the original string */
1724 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001725
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001726 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001727}
1728
1729/* Special case for deleting a single character */
1730/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1731Py_LOCAL(PyBytesObject *)
1732replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001733 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001734{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001735 char *self_s, *result_s;
1736 char *start, *next, *end;
1737 Py_ssize_t self_len, result_len;
1738 Py_ssize_t count;
1739 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001740
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001741 self_len = PyBytes_GET_SIZE(self);
1742 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001743
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001744 count = countchar(self_s, self_len, from_c, maxcount);
1745 if (count == 0) {
1746 return return_self(self);
1747 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001748
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001749 result_len = self_len - count; /* from_len == 1 */
1750 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001751
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001752 if ( (result = (PyBytesObject *)
1753 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1754 return NULL;
1755 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001756
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001757 start = self_s;
1758 end = self_s + self_len;
1759 while (count-- > 0) {
1760 next = findchar(start, end-start, from_c);
1761 if (next == NULL)
1762 break;
1763 Py_MEMCPY(result_s, start, next-start);
1764 result_s += (next-start);
1765 start = next+1;
1766 }
1767 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001768
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001769 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001770}
1771
1772/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1773
1774Py_LOCAL(PyBytesObject *)
1775replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001776 const char *from_s, Py_ssize_t from_len,
1777 Py_ssize_t maxcount) {
1778 char *self_s, *result_s;
1779 char *start, *next, *end;
1780 Py_ssize_t self_len, result_len;
1781 Py_ssize_t count, offset;
1782 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001783
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001784 self_len = PyBytes_GET_SIZE(self);
1785 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001786
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001787 count = stringlib_count(self_s, self_len,
1788 from_s, from_len,
1789 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001790
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001791 if (count == 0) {
1792 /* no matches */
1793 return return_self(self);
1794 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001795
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001796 result_len = self_len - (count * from_len);
1797 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001798
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001799 if ( (result = (PyBytesObject *)
1800 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1801 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001802
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001803 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001804
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001805 start = self_s;
1806 end = self_s + self_len;
1807 while (count-- > 0) {
1808 offset = stringlib_find(start, end-start,
1809 from_s, from_len,
1810 0);
1811 if (offset == -1)
1812 break;
1813 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001814
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001815 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001816
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001817 result_s += (next-start);
1818 start = next+from_len;
1819 }
1820 Py_MEMCPY(result_s, start, end-start);
1821 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001822}
1823
1824/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1825Py_LOCAL(PyBytesObject *)
1826replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001827 char from_c, char to_c,
1828 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001829{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001830 char *self_s, *result_s, *start, *end, *next;
1831 Py_ssize_t self_len;
1832 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001833
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001834 /* The result string will be the same size */
1835 self_s = PyBytes_AS_STRING(self);
1836 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001837
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001838 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001839
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001840 if (next == NULL) {
1841 /* No matches; return the original string */
1842 return return_self(self);
1843 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001844
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001845 /* Need to make a new string */
1846 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1847 if (result == NULL)
1848 return NULL;
1849 result_s = PyBytes_AS_STRING(result);
1850 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001851
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001852 /* change everything in-place, starting with this one */
1853 start = result_s + (next-self_s);
1854 *start = to_c;
1855 start++;
1856 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001857
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001858 while (--maxcount > 0) {
1859 next = findchar(start, end-start, from_c);
1860 if (next == NULL)
1861 break;
1862 *next = to_c;
1863 start = next+1;
1864 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001865
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001866 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001867}
1868
1869/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1870Py_LOCAL(PyBytesObject *)
1871replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001872 const char *from_s, Py_ssize_t from_len,
1873 const char *to_s, Py_ssize_t to_len,
1874 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001875{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001876 char *result_s, *start, *end;
1877 char *self_s;
1878 Py_ssize_t self_len, offset;
1879 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001880
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001881 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001882
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001883 self_s = PyBytes_AS_STRING(self);
1884 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001885
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001886 offset = stringlib_find(self_s, self_len,
1887 from_s, from_len,
1888 0);
1889 if (offset == -1) {
1890 /* No matches; return the original string */
1891 return return_self(self);
1892 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001893
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001894 /* Need to make a new string */
1895 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1896 if (result == NULL)
1897 return NULL;
1898 result_s = PyBytes_AS_STRING(result);
1899 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001900
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001901 /* change everything in-place, starting with this one */
1902 start = result_s + offset;
1903 Py_MEMCPY(start, to_s, from_len);
1904 start += from_len;
1905 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001906
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001907 while ( --maxcount > 0) {
1908 offset = stringlib_find(start, end-start,
1909 from_s, from_len,
1910 0);
1911 if (offset==-1)
1912 break;
1913 Py_MEMCPY(start+offset, to_s, from_len);
1914 start += offset+from_len;
1915 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001916
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001917 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001918}
1919
1920/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1921Py_LOCAL(PyBytesObject *)
1922replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001923 char from_c,
1924 const char *to_s, Py_ssize_t to_len,
1925 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001926{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001927 char *self_s, *result_s;
1928 char *start, *next, *end;
1929 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001930 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001931 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001932
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001933 self_s = PyBytes_AS_STRING(self);
1934 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001935
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001936 count = countchar(self_s, self_len, from_c, maxcount);
1937 if (count == 0) {
1938 /* no matches, return unchanged */
1939 return return_self(self);
1940 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001941
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001942 /* use the difference between current and new, hence the "-1" */
1943 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001944 assert(count > 0);
1945 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001946 PyErr_SetString(PyExc_OverflowError,
1947 "replacement bytes are too long");
1948 return NULL;
1949 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001950 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001951
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001952 if ( (result = (PyBytesObject *)
1953 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1954 return NULL;
1955 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001956
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001957 start = self_s;
1958 end = self_s + self_len;
1959 while (count-- > 0) {
1960 next = findchar(start, end-start, from_c);
1961 if (next == NULL)
1962 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001963
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001964 if (next == start) {
1965 /* replace with the 'to' */
1966 Py_MEMCPY(result_s, to_s, to_len);
1967 result_s += to_len;
1968 start += 1;
1969 } else {
1970 /* copy the unchanged old then the 'to' */
1971 Py_MEMCPY(result_s, start, next-start);
1972 result_s += (next-start);
1973 Py_MEMCPY(result_s, to_s, to_len);
1974 result_s += to_len;
1975 start = next+1;
1976 }
1977 }
1978 /* Copy the remainder of the remaining string */
1979 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001980
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001981 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001982}
1983
1984/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1985Py_LOCAL(PyBytesObject *)
1986replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001987 const char *from_s, Py_ssize_t from_len,
1988 const char *to_s, Py_ssize_t to_len,
1989 Py_ssize_t maxcount) {
1990 char *self_s, *result_s;
1991 char *start, *next, *end;
1992 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001993 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001994 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001995
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001996 self_s = PyBytes_AS_STRING(self);
1997 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001998
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001999 count = stringlib_count(self_s, self_len,
2000 from_s, from_len,
2001 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002002
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002003 if (count == 0) {
2004 /* no matches, return unchanged */
2005 return return_self(self);
2006 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002007
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002008 /* Check for overflow */
2009 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002010 assert(count > 0);
2011 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002012 PyErr_SetString(PyExc_OverflowError,
2013 "replacement bytes are too long");
2014 return NULL;
2015 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002016 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002017
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002018 if ( (result = (PyBytesObject *)
2019 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2020 return NULL;
2021 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002022
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002023 start = self_s;
2024 end = self_s + self_len;
2025 while (count-- > 0) {
2026 offset = stringlib_find(start, end-start,
2027 from_s, from_len,
2028 0);
2029 if (offset == -1)
2030 break;
2031 next = start+offset;
2032 if (next == start) {
2033 /* replace with the 'to' */
2034 Py_MEMCPY(result_s, to_s, to_len);
2035 result_s += to_len;
2036 start += from_len;
2037 } else {
2038 /* copy the unchanged old then the 'to' */
2039 Py_MEMCPY(result_s, start, next-start);
2040 result_s += (next-start);
2041 Py_MEMCPY(result_s, to_s, to_len);
2042 result_s += to_len;
2043 start = next+from_len;
2044 }
2045 }
2046 /* Copy the remainder of the remaining string */
2047 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002048
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002049 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002050}
2051
2052
2053Py_LOCAL(PyBytesObject *)
2054replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002055 const char *from_s, Py_ssize_t from_len,
2056 const char *to_s, Py_ssize_t to_len,
2057 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002058{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002059 if (maxcount < 0) {
2060 maxcount = PY_SSIZE_T_MAX;
2061 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2062 /* nothing to do; return the original string */
2063 return return_self(self);
2064 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002065
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002066 if (maxcount == 0 ||
2067 (from_len == 0 && to_len == 0)) {
2068 /* nothing to do; return the original string */
2069 return return_self(self);
2070 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002071
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002072 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002073
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002074 if (from_len == 0) {
2075 /* insert the 'to' string everywhere. */
2076 /* >>> "Python".replace("", ".") */
2077 /* '.P.y.t.h.o.n.' */
2078 return replace_interleave(self, to_s, to_len, maxcount);
2079 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002080
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002081 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2082 /* point for an empty self string to generate a non-empty string */
2083 /* Special case so the remaining code always gets a non-empty string */
2084 if (PyBytes_GET_SIZE(self) == 0) {
2085 return return_self(self);
2086 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002087
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002088 if (to_len == 0) {
2089 /* delete all occurrences of 'from' string */
2090 if (from_len == 1) {
2091 return replace_delete_single_character(
2092 self, from_s[0], maxcount);
2093 } else {
2094 return replace_delete_substring(self, from_s,
2095 from_len, maxcount);
2096 }
2097 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002098
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002099 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002100
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002101 if (from_len == to_len) {
2102 if (from_len == 1) {
2103 return replace_single_character_in_place(
2104 self,
2105 from_s[0],
2106 to_s[0],
2107 maxcount);
2108 } else {
2109 return replace_substring_in_place(
2110 self, from_s, from_len, to_s, to_len,
2111 maxcount);
2112 }
2113 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002114
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002115 /* Otherwise use the more generic algorithms */
2116 if (from_len == 1) {
2117 return replace_single_character(self, from_s[0],
2118 to_s, to_len, maxcount);
2119 } else {
2120 /* len('from')>=2, len('to')>=1 */
2121 return replace_substring(self, from_s, from_len, to_s, to_len,
2122 maxcount);
2123 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002124}
2125
2126PyDoc_STRVAR(replace__doc__,
2127"B.replace(old, new[, count]) -> bytes\n\
2128\n\
2129Return a copy of B with all occurrences of subsection\n\
2130old replaced by new. If the optional argument count is\n\
Senthil Kumaran9a9dd1c2010-09-08 12:50:29 +00002131given, only first count occurances are replaced.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002132
2133static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002134bytes_replace(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002135{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002136 Py_ssize_t count = -1;
2137 PyObject *from, *to;
2138 const char *from_s, *to_s;
2139 Py_ssize_t from_len, to_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002140
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002141 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2142 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002143
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002144 if (PyBytes_Check(from)) {
2145 from_s = PyBytes_AS_STRING(from);
2146 from_len = PyBytes_GET_SIZE(from);
2147 }
2148 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2149 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002150
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002151 if (PyBytes_Check(to)) {
2152 to_s = PyBytes_AS_STRING(to);
2153 to_len = PyBytes_GET_SIZE(to);
2154 }
2155 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2156 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002157
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002158 return (PyObject *)replace((PyBytesObject *) self,
2159 from_s, from_len,
2160 to_s, to_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002161}
2162
2163/** End DALKE **/
2164
2165/* Matches the end (direction >= 0) or start (direction < 0) of self
2166 * against substr, using the start and end arguments. Returns
2167 * -1 on error, 0 if not found and 1 if found.
2168 */
2169Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002170_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002171 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002172{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002173 Py_ssize_t len = PyBytes_GET_SIZE(self);
2174 Py_ssize_t slen;
2175 const char* sub;
2176 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002177
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002178 if (PyBytes_Check(substr)) {
2179 sub = PyBytes_AS_STRING(substr);
2180 slen = PyBytes_GET_SIZE(substr);
2181 }
2182 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2183 return -1;
2184 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002185
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002186 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002187
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002188 if (direction < 0) {
2189 /* startswith */
2190 if (start+slen > len)
2191 return 0;
2192 } else {
2193 /* endswith */
2194 if (end-start < slen || start > len)
2195 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002196
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002197 if (end-slen > start)
2198 start = end - slen;
2199 }
2200 if (end-start >= slen)
2201 return ! memcmp(str+start, sub, slen);
2202 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002203}
2204
2205
2206PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002207"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002208\n\
2209Return True if B starts with the specified prefix, False otherwise.\n\
2210With optional start, test B beginning at that position.\n\
2211With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002212prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002213
2214static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002215bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002216{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002217 Py_ssize_t start = 0;
2218 Py_ssize_t end = PY_SSIZE_T_MAX;
2219 PyObject *subobj;
2220 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002221
Jesus Ceaac451502011-04-20 17:09:23 +02002222 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002223 return NULL;
2224 if (PyTuple_Check(subobj)) {
2225 Py_ssize_t i;
2226 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2227 result = _bytes_tailmatch(self,
2228 PyTuple_GET_ITEM(subobj, i),
2229 start, end, -1);
2230 if (result == -1)
2231 return NULL;
2232 else if (result) {
2233 Py_RETURN_TRUE;
2234 }
2235 }
2236 Py_RETURN_FALSE;
2237 }
2238 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002239 if (result == -1) {
2240 if (PyErr_ExceptionMatches(PyExc_TypeError))
2241 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2242 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002243 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002244 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002245 else
2246 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002247}
2248
2249
2250PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002251"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002252\n\
2253Return True if B ends with the specified suffix, False otherwise.\n\
2254With optional start, test B beginning at that position.\n\
2255With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002256suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002257
2258static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002259bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002260{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002261 Py_ssize_t start = 0;
2262 Py_ssize_t end = PY_SSIZE_T_MAX;
2263 PyObject *subobj;
2264 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002265
Jesus Ceaac451502011-04-20 17:09:23 +02002266 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002267 return NULL;
2268 if (PyTuple_Check(subobj)) {
2269 Py_ssize_t i;
2270 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2271 result = _bytes_tailmatch(self,
2272 PyTuple_GET_ITEM(subobj, i),
2273 start, end, +1);
2274 if (result == -1)
2275 return NULL;
2276 else if (result) {
2277 Py_RETURN_TRUE;
2278 }
2279 }
2280 Py_RETURN_FALSE;
2281 }
2282 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002283 if (result == -1) {
2284 if (PyErr_ExceptionMatches(PyExc_TypeError))
2285 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2286 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002287 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002288 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002289 else
2290 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002291}
2292
2293
2294PyDoc_STRVAR(decode__doc__,
Victor Stinnerc911bbf2010-11-07 19:04:46 +00002295"B.decode(encoding='utf-8', errors='strict') -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002296\n\
Victor Stinnere14e2122010-11-07 18:41:46 +00002297Decode B using the codec registered for encoding. Default encoding\n\
2298is 'utf-8'. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002299handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2300a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002301as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002302able to handle UnicodeDecodeErrors.");
2303
2304static PyObject *
Benjamin Peterson308d6372009-09-18 21:42:35 +00002305bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002306{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002307 const char *encoding = NULL;
2308 const char *errors = NULL;
2309 static char *kwlist[] = {"encoding", "errors", 0};
Guido van Rossumd624f182006-04-24 13:47:05 +00002310
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002311 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2312 return NULL;
2313 if (encoding == NULL)
2314 encoding = PyUnicode_GetDefaultEncoding();
2315 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002316}
2317
Guido van Rossum20188312006-05-05 15:15:40 +00002318
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002319PyDoc_STRVAR(splitlines__doc__,
2320"B.splitlines([keepends]) -> list of lines\n\
2321\n\
2322Return a list of the lines in B, breaking at line boundaries.\n\
2323Line breaks are not included in the resulting list unless keepends\n\
2324is given and true.");
2325
2326static PyObject*
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002327bytes_splitlines(PyObject *self, PyObject *args, PyObject *kwds)
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002328{
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002329 static char *kwlist[] = {"keepends", 0};
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002330 int keepends = 0;
2331
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002332 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:splitlines",
2333 kwlist, &keepends))
Antoine Pitroud1188562010-06-09 16:38:55 +00002334 return NULL;
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002335
2336 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002337 (PyObject*) self, PyBytes_AS_STRING(self),
2338 PyBytes_GET_SIZE(self), keepends
2339 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002340}
2341
2342
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002343PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002344"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002345\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002346Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002347Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002348Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002349
2350static int
Victor Stinner6430fd52011-09-29 04:02:13 +02002351hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002352{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002353 if (c >= 128)
2354 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002355 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002356 return c - '0';
2357 else {
David Malcolm96960882010-11-05 17:23:41 +00002358 if (Py_ISUPPER(c))
2359 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002360 if (c >= 'a' && c <= 'f')
2361 return c - 'a' + 10;
2362 }
2363 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002364}
2365
2366static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002367bytes_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002368{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002369 PyObject *newstring, *hexobj;
2370 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002371 Py_ssize_t hexlen, byteslen, i, j;
2372 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002373 void *data;
2374 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002375
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002376 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2377 return NULL;
2378 assert(PyUnicode_Check(hexobj));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002379 if (PyUnicode_READY(hexobj))
2380 return NULL;
2381 kind = PyUnicode_KIND(hexobj);
2382 data = PyUnicode_DATA(hexobj);
2383 hexlen = PyUnicode_GET_LENGTH(hexobj);
2384
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002385 byteslen = hexlen/2; /* This overestimates if there are spaces */
2386 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2387 if (!newstring)
2388 return NULL;
2389 buf = PyBytes_AS_STRING(newstring);
2390 for (i = j = 0; i < hexlen; i += 2) {
2391 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002392 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002393 i++;
2394 if (i >= hexlen)
2395 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002396 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
2397 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002398 if (top == -1 || bot == -1) {
2399 PyErr_Format(PyExc_ValueError,
2400 "non-hexadecimal number found in "
2401 "fromhex() arg at position %zd", i);
2402 goto error;
2403 }
2404 buf[j++] = (top << 4) + bot;
2405 }
2406 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2407 goto error;
2408 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002409
2410 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002411 Py_XDECREF(newstring);
2412 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002413}
2414
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002415PyDoc_STRVAR(sizeof__doc__,
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002416"B.__sizeof__() -> size of B in memory, in bytes");
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002417
2418static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002419bytes_sizeof(PyBytesObject *v)
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002420{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002421 Py_ssize_t res;
2422 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2423 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002424}
2425
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002426
2427static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002428bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002429{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002430 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002431}
2432
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002433
2434static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002435bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002436 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2437 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2438 _Py_capitalize__doc__},
2439 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2440 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2441 {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
2442 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2443 endswith__doc__},
2444 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2445 expandtabs__doc__},
2446 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2447 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2448 fromhex_doc},
2449 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2450 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2451 _Py_isalnum__doc__},
2452 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2453 _Py_isalpha__doc__},
2454 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2455 _Py_isdigit__doc__},
2456 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2457 _Py_islower__doc__},
2458 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2459 _Py_isspace__doc__},
2460 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2461 _Py_istitle__doc__},
2462 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2463 _Py_isupper__doc__},
2464 {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
2465 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2466 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2467 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2468 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
2469 _Py_maketrans__doc__},
2470 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2471 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2472 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2473 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2474 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2475 {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
2476 rpartition__doc__},
Ezio Melotticda6b6d2012-02-26 09:39:55 +02002477 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS | METH_KEYWORDS, rsplit__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002478 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
Ezio Melotticda6b6d2012-02-26 09:39:55 +02002479 {"split", (PyCFunction)bytes_split, METH_VARARGS | METH_KEYWORDS, split__doc__},
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002480 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002481 splitlines__doc__},
2482 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2483 startswith__doc__},
2484 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
2485 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2486 _Py_swapcase__doc__},
2487 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2488 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
2489 translate__doc__},
2490 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2491 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2492 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
2493 sizeof__doc__},
2494 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002495};
2496
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002497static PyObject *
2498str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2499
2500static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002501bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002502{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002503 PyObject *x = NULL;
2504 const char *encoding = NULL;
2505 const char *errors = NULL;
2506 PyObject *new = NULL;
2507 Py_ssize_t size;
2508 static char *kwlist[] = {"source", "encoding", "errors", 0};
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002509
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002510 if (type != &PyBytes_Type)
2511 return str_subtype_new(type, args, kwds);
2512 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2513 &encoding, &errors))
2514 return NULL;
2515 if (x == NULL) {
2516 if (encoding != NULL || errors != NULL) {
2517 PyErr_SetString(PyExc_TypeError,
2518 "encoding or errors without sequence "
2519 "argument");
2520 return NULL;
2521 }
2522 return PyBytes_FromString("");
2523 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002524
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002525 if (PyUnicode_Check(x)) {
2526 /* Encode via the codec registry */
2527 if (encoding == NULL) {
2528 PyErr_SetString(PyExc_TypeError,
2529 "string argument without an encoding");
2530 return NULL;
2531 }
2532 new = PyUnicode_AsEncodedString(x, encoding, errors);
2533 if (new == NULL)
2534 return NULL;
2535 assert(PyBytes_Check(new));
2536 return new;
2537 }
2538 /* Is it an integer? */
2539 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2540 if (size == -1 && PyErr_Occurred()) {
2541 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2542 return NULL;
2543 PyErr_Clear();
2544 }
2545 else if (size < 0) {
2546 PyErr_SetString(PyExc_ValueError, "negative count");
2547 return NULL;
2548 }
2549 else {
2550 new = PyBytes_FromStringAndSize(NULL, size);
2551 if (new == NULL) {
2552 return NULL;
2553 }
2554 if (size > 0) {
2555 memset(((PyBytesObject*)new)->ob_sval, 0, size);
2556 }
2557 return new;
2558 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002559
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002560 /* If it's not unicode, there can't be encoding or errors */
2561 if (encoding != NULL || errors != NULL) {
2562 PyErr_SetString(PyExc_TypeError,
2563 "encoding or errors without a string argument");
2564 return NULL;
2565 }
2566 return PyObject_Bytes(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002567}
2568
2569PyObject *
2570PyBytes_FromObject(PyObject *x)
2571{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002572 PyObject *new, *it;
2573 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002574
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002575 if (x == NULL) {
2576 PyErr_BadInternalCall();
2577 return NULL;
2578 }
2579 /* Use the modern buffer interface */
2580 if (PyObject_CheckBuffer(x)) {
2581 Py_buffer view;
2582 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2583 return NULL;
2584 new = PyBytes_FromStringAndSize(NULL, view.len);
2585 if (!new)
2586 goto fail;
2587 /* XXX(brett.cannon): Better way to get to internal buffer? */
2588 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2589 &view, view.len, 'C') < 0)
2590 goto fail;
2591 PyBuffer_Release(&view);
2592 return new;
2593 fail:
2594 Py_XDECREF(new);
2595 PyBuffer_Release(&view);
2596 return NULL;
2597 }
2598 if (PyUnicode_Check(x)) {
2599 PyErr_SetString(PyExc_TypeError,
2600 "cannot convert unicode object to bytes");
2601 return NULL;
2602 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002603
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002604 if (PyList_CheckExact(x)) {
2605 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2606 if (new == NULL)
2607 return NULL;
2608 for (i = 0; i < Py_SIZE(x); i++) {
2609 Py_ssize_t value = PyNumber_AsSsize_t(
2610 PyList_GET_ITEM(x, i), PyExc_ValueError);
2611 if (value == -1 && PyErr_Occurred()) {
2612 Py_DECREF(new);
2613 return NULL;
2614 }
2615 if (value < 0 || value >= 256) {
2616 PyErr_SetString(PyExc_ValueError,
2617 "bytes must be in range(0, 256)");
2618 Py_DECREF(new);
2619 return NULL;
2620 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002621 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002622 }
2623 return new;
2624 }
2625 if (PyTuple_CheckExact(x)) {
2626 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2627 if (new == NULL)
2628 return NULL;
2629 for (i = 0; i < Py_SIZE(x); i++) {
2630 Py_ssize_t value = PyNumber_AsSsize_t(
2631 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
2632 if (value == -1 && PyErr_Occurred()) {
2633 Py_DECREF(new);
2634 return NULL;
2635 }
2636 if (value < 0 || value >= 256) {
2637 PyErr_SetString(PyExc_ValueError,
2638 "bytes must be in range(0, 256)");
2639 Py_DECREF(new);
2640 return NULL;
2641 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002642 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002643 }
2644 return new;
2645 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00002646
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002647 /* For iterator version, create a string object and resize as needed */
2648 size = _PyObject_LengthHint(x, 64);
2649 if (size == -1 && PyErr_Occurred())
2650 return NULL;
2651 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
2652 returning a shared empty bytes string. This required because we
2653 want to call _PyBytes_Resize() the returned object, which we can
2654 only do on bytes objects with refcount == 1. */
2655 size += 1;
2656 new = PyBytes_FromStringAndSize(NULL, size);
2657 if (new == NULL)
2658 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002659
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002660 /* Get the iterator */
2661 it = PyObject_GetIter(x);
2662 if (it == NULL)
2663 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002664
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002665 /* Run the iterator to exhaustion */
2666 for (i = 0; ; i++) {
2667 PyObject *item;
2668 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002669
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002670 /* Get the next item */
2671 item = PyIter_Next(it);
2672 if (item == NULL) {
2673 if (PyErr_Occurred())
2674 goto error;
2675 break;
2676 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002677
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002678 /* Interpret it as an int (__index__) */
2679 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2680 Py_DECREF(item);
2681 if (value == -1 && PyErr_Occurred())
2682 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002683
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002684 /* Range check */
2685 if (value < 0 || value >= 256) {
2686 PyErr_SetString(PyExc_ValueError,
2687 "bytes must be in range(0, 256)");
2688 goto error;
2689 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002690
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002691 /* Append the byte */
2692 if (i >= size) {
2693 size = 2 * size + 1;
2694 if (_PyBytes_Resize(&new, size) < 0)
2695 goto error;
2696 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002697 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002698 }
2699 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002700
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002701 /* Clean up and return success */
2702 Py_DECREF(it);
2703 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002704
2705 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002706 /* Error handling when new != NULL */
2707 Py_XDECREF(it);
2708 Py_DECREF(new);
2709 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002710}
2711
2712static PyObject *
2713str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2714{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002715 PyObject *tmp, *pnew;
2716 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002717
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002718 assert(PyType_IsSubtype(type, &PyBytes_Type));
2719 tmp = bytes_new(&PyBytes_Type, args, kwds);
2720 if (tmp == NULL)
2721 return NULL;
2722 assert(PyBytes_CheckExact(tmp));
2723 n = PyBytes_GET_SIZE(tmp);
2724 pnew = type->tp_alloc(type, n);
2725 if (pnew != NULL) {
2726 Py_MEMCPY(PyBytes_AS_STRING(pnew),
2727 PyBytes_AS_STRING(tmp), n+1);
2728 ((PyBytesObject *)pnew)->ob_shash =
2729 ((PyBytesObject *)tmp)->ob_shash;
2730 }
2731 Py_DECREF(tmp);
2732 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002733}
2734
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002735PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002736"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002737bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002738bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002739bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2740bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002741\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002742Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002743 - an iterable yielding integers in range(256)\n\
2744 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002745 - any object implementing the buffer API.\n\
2746 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002747
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002748static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002749
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002750PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002751 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2752 "bytes",
2753 PyBytesObject_SIZE,
2754 sizeof(char),
2755 bytes_dealloc, /* tp_dealloc */
2756 0, /* tp_print */
2757 0, /* tp_getattr */
2758 0, /* tp_setattr */
2759 0, /* tp_reserved */
2760 (reprfunc)bytes_repr, /* tp_repr */
2761 0, /* tp_as_number */
2762 &bytes_as_sequence, /* tp_as_sequence */
2763 &bytes_as_mapping, /* tp_as_mapping */
2764 (hashfunc)bytes_hash, /* tp_hash */
2765 0, /* tp_call */
2766 bytes_str, /* tp_str */
2767 PyObject_GenericGetAttr, /* tp_getattro */
2768 0, /* tp_setattro */
2769 &bytes_as_buffer, /* tp_as_buffer */
2770 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2771 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2772 bytes_doc, /* tp_doc */
2773 0, /* tp_traverse */
2774 0, /* tp_clear */
2775 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2776 0, /* tp_weaklistoffset */
2777 bytes_iter, /* tp_iter */
2778 0, /* tp_iternext */
2779 bytes_methods, /* tp_methods */
2780 0, /* tp_members */
2781 0, /* tp_getset */
2782 &PyBaseObject_Type, /* tp_base */
2783 0, /* tp_dict */
2784 0, /* tp_descr_get */
2785 0, /* tp_descr_set */
2786 0, /* tp_dictoffset */
2787 0, /* tp_init */
2788 0, /* tp_alloc */
2789 bytes_new, /* tp_new */
2790 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002791};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002792
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002793void
2794PyBytes_Concat(register PyObject **pv, register PyObject *w)
2795{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002796 register PyObject *v;
2797 assert(pv != NULL);
2798 if (*pv == NULL)
2799 return;
2800 if (w == NULL) {
2801 Py_DECREF(*pv);
2802 *pv = NULL;
2803 return;
2804 }
2805 v = bytes_concat(*pv, w);
2806 Py_DECREF(*pv);
2807 *pv = v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002808}
2809
2810void
2811PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
2812{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002813 PyBytes_Concat(pv, w);
2814 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002815}
2816
2817
2818/* The following function breaks the notion that strings are immutable:
2819 it changes the size of a string. We get away with this only if there
2820 is only one module referencing the object. You can also think of it
2821 as creating a new string object and destroying the old one, only
2822 more efficiently. In any case, don't use this if the string may
2823 already be known to some other part of the code...
2824 Note that if there's not enough memory to resize the string, the original
2825 string object at *pv is deallocated, *pv is set to NULL, an "out of
2826 memory" exception is set, and -1 is returned. Else (on success) 0 is
2827 returned, and the value in *pv may or may not be the same as on input.
2828 As always, an extra byte is allocated for a trailing \0 byte (newsize
2829 does *not* include that), and a trailing \0 byte is stored.
2830*/
2831
2832int
2833_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2834{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002835 register PyObject *v;
2836 register PyBytesObject *sv;
2837 v = *pv;
2838 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
2839 *pv = 0;
2840 Py_DECREF(v);
2841 PyErr_BadInternalCall();
2842 return -1;
2843 }
2844 /* XXX UNREF/NEWREF interface should be more symmetrical */
2845 _Py_DEC_REFTOTAL;
2846 _Py_ForgetReference(v);
2847 *pv = (PyObject *)
2848 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
2849 if (*pv == NULL) {
2850 PyObject_Del(v);
2851 PyErr_NoMemory();
2852 return -1;
2853 }
2854 _Py_NewReference(*pv);
2855 sv = (PyBytesObject *) *pv;
2856 Py_SIZE(sv) = newsize;
2857 sv->ob_sval[newsize] = '\0';
2858 sv->ob_shash = -1; /* invalidate cached hash value */
2859 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002860}
2861
2862/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and
2863 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2864 * Python's regular ints.
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002865 * Return value: a new PyBytes*, or NULL if error.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002866 * . *pbuf is set to point into it,
2867 * *plen set to the # of chars following that.
2868 * Caller must decref it when done using pbuf.
2869 * The string starting at *pbuf is of the form
2870 * "-"? ("0x" | "0X")? digit+
2871 * "0x"/"0X" are present only for x and X conversions, with F_ALT
2872 * set in flags. The case of hex digits will be correct,
2873 * There will be at least prec digits, zero-filled on the left if
2874 * necessary to get that many.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002875 * val object to be converted
2876 * flags bitmask of format flags; only F_ALT is looked at
2877 * prec minimum number of digits; 0-fill on left if needed
2878 * type a character in [duoxX]; u acts the same as d
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002879 *
2880 * CAUTION: o, x and X conversions on regular ints can never
2881 * produce a '-' sign, but can for Python's unbounded ints.
2882 */
2883PyObject*
2884_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002885 char **pbuf, int *plen)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002886{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002887 PyObject *result = NULL;
2888 char *buf;
2889 Py_ssize_t i;
2890 int sign; /* 1 if '-', else 0 */
2891 int len; /* number of characters */
2892 Py_ssize_t llen;
2893 int numdigits; /* len == numnondigits + numdigits */
2894 int numnondigits = 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002895
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002896 /* Avoid exceeding SSIZE_T_MAX */
2897 if (prec > INT_MAX-3) {
2898 PyErr_SetString(PyExc_OverflowError,
2899 "precision too large");
2900 return NULL;
2901 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002902
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002903 switch (type) {
2904 case 'd':
2905 case 'u':
2906 /* Special-case boolean: we want 0/1 */
2907 if (PyBool_Check(val))
2908 result = PyNumber_ToBase(val, 10);
2909 else
2910 result = Py_TYPE(val)->tp_str(val);
2911 break;
2912 case 'o':
2913 numnondigits = 2;
2914 result = PyNumber_ToBase(val, 8);
2915 break;
2916 case 'x':
2917 case 'X':
2918 numnondigits = 2;
2919 result = PyNumber_ToBase(val, 16);
2920 break;
2921 default:
2922 assert(!"'type' not in [duoxX]");
2923 }
2924 if (!result)
2925 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002926
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002927 buf = _PyUnicode_AsString(result);
2928 if (!buf) {
2929 Py_DECREF(result);
2930 return NULL;
2931 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002932
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002933 /* To modify the string in-place, there can only be one reference. */
2934 if (Py_REFCNT(result) != 1) {
2935 PyErr_BadInternalCall();
2936 return NULL;
2937 }
Victor Stinner9e30aa52011-11-21 02:49:52 +01002938 llen = PyUnicode_GetLength(result);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002939 if (llen > INT_MAX) {
2940 PyErr_SetString(PyExc_ValueError,
2941 "string too large in _PyBytes_FormatLong");
2942 return NULL;
2943 }
2944 len = (int)llen;
2945 if (buf[len-1] == 'L') {
2946 --len;
2947 buf[len] = '\0';
2948 }
2949 sign = buf[0] == '-';
2950 numnondigits += sign;
2951 numdigits = len - numnondigits;
2952 assert(numdigits > 0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002953
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002954 /* Get rid of base marker unless F_ALT */
2955 if (((flags & F_ALT) == 0 &&
2956 (type == 'o' || type == 'x' || type == 'X'))) {
2957 assert(buf[sign] == '0');
2958 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
2959 buf[sign+1] == 'o');
2960 numnondigits -= 2;
2961 buf += 2;
2962 len -= 2;
2963 if (sign)
2964 buf[0] = '-';
2965 assert(len == numnondigits + numdigits);
2966 assert(numdigits > 0);
2967 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002968
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002969 /* Fill with leading zeroes to meet minimum width. */
2970 if (prec > numdigits) {
2971 PyObject *r1 = PyBytes_FromStringAndSize(NULL,
2972 numnondigits + prec);
2973 char *b1;
2974 if (!r1) {
2975 Py_DECREF(result);
2976 return NULL;
2977 }
2978 b1 = PyBytes_AS_STRING(r1);
2979 for (i = 0; i < numnondigits; ++i)
2980 *b1++ = *buf++;
2981 for (i = 0; i < prec - numdigits; i++)
2982 *b1++ = '0';
2983 for (i = 0; i < numdigits; i++)
2984 *b1++ = *buf++;
2985 *b1 = '\0';
2986 Py_DECREF(result);
2987 result = r1;
2988 buf = PyBytes_AS_STRING(result);
2989 len = numnondigits + prec;
2990 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002991
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002992 /* Fix up case for hex conversions. */
2993 if (type == 'X') {
2994 /* Need to convert all lower case letters to upper case.
2995 and need to convert 0x to 0X (and -0x to -0X). */
2996 for (i = 0; i < len; i++)
2997 if (buf[i] >= 'a' && buf[i] <= 'x')
2998 buf[i] -= 'a'-'A';
2999 }
3000 *pbuf = buf;
3001 *plen = len;
3002 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003003}
3004
3005void
3006PyBytes_Fini(void)
3007{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003008 int i;
3009 for (i = 0; i < UCHAR_MAX + 1; i++) {
3010 Py_XDECREF(characters[i]);
3011 characters[i] = NULL;
3012 }
3013 Py_XDECREF(nullstring);
3014 nullstring = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003015}
3016
Benjamin Peterson4116f362008-05-27 00:36:20 +00003017/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003018
3019typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003020 PyObject_HEAD
3021 Py_ssize_t it_index;
3022 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003023} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003024
3025static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003026striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003027{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003028 _PyObject_GC_UNTRACK(it);
3029 Py_XDECREF(it->it_seq);
3030 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003031}
3032
3033static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003034striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003035{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003036 Py_VISIT(it->it_seq);
3037 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003038}
3039
3040static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003041striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003042{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003043 PyBytesObject *seq;
3044 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003045
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003046 assert(it != NULL);
3047 seq = it->it_seq;
3048 if (seq == NULL)
3049 return NULL;
3050 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003051
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003052 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3053 item = PyLong_FromLong(
3054 (unsigned char)seq->ob_sval[it->it_index]);
3055 if (item != NULL)
3056 ++it->it_index;
3057 return item;
3058 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003059
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003060 Py_DECREF(seq);
3061 it->it_seq = NULL;
3062 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003063}
3064
3065static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003066striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003067{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003068 Py_ssize_t len = 0;
3069 if (it->it_seq)
3070 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3071 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003072}
3073
3074PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003075 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003076
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003077static PyObject *
3078striter_reduce(striterobject *it)
3079{
3080 if (it->it_seq != NULL) {
3081 return Py_BuildValue("N(O)n", _PyIter_GetBuiltin("iter"),
3082 it->it_seq, it->it_index);
3083 } else {
3084 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
3085 if (u == NULL)
3086 return NULL;
3087 return Py_BuildValue("N(N)", _PyIter_GetBuiltin("iter"), u);
3088 }
3089}
3090
3091PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3092
3093static PyObject *
3094striter_setstate(striterobject *it, PyObject *state)
3095{
3096 Py_ssize_t index = PyLong_AsSsize_t(state);
3097 if (index == -1 && PyErr_Occurred())
3098 return NULL;
3099 if (index < 0)
3100 index = 0;
3101 it->it_index = index;
3102 Py_RETURN_NONE;
3103}
3104
3105PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3106
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003107static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003108 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3109 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003110 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3111 reduce_doc},
3112 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3113 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003114 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003115};
3116
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003117PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003118 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3119 "bytes_iterator", /* tp_name */
3120 sizeof(striterobject), /* tp_basicsize */
3121 0, /* tp_itemsize */
3122 /* methods */
3123 (destructor)striter_dealloc, /* tp_dealloc */
3124 0, /* tp_print */
3125 0, /* tp_getattr */
3126 0, /* tp_setattr */
3127 0, /* tp_reserved */
3128 0, /* tp_repr */
3129 0, /* tp_as_number */
3130 0, /* tp_as_sequence */
3131 0, /* tp_as_mapping */
3132 0, /* tp_hash */
3133 0, /* tp_call */
3134 0, /* tp_str */
3135 PyObject_GenericGetAttr, /* tp_getattro */
3136 0, /* tp_setattro */
3137 0, /* tp_as_buffer */
3138 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3139 0, /* tp_doc */
3140 (traverseproc)striter_traverse, /* tp_traverse */
3141 0, /* tp_clear */
3142 0, /* tp_richcompare */
3143 0, /* tp_weaklistoffset */
3144 PyObject_SelfIter, /* tp_iter */
3145 (iternextfunc)striter_next, /* tp_iternext */
3146 striter_methods, /* tp_methods */
3147 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003148};
3149
3150static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003151bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003152{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003153 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003154
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003155 if (!PyBytes_Check(seq)) {
3156 PyErr_BadInternalCall();
3157 return NULL;
3158 }
3159 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3160 if (it == NULL)
3161 return NULL;
3162 it->it_index = 0;
3163 Py_INCREF(seq);
3164 it->it_seq = (PyBytesObject *)seq;
3165 _PyObject_GC_TRACK(it);
3166 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003167}