blob: ba1fefd89708a26155fd6154220a320b90f9fe21 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Antoine Pitroucfc22b42012-10-16 21:07:23 +020013 PyBufferProcs *bufferprocs;
14 if (PyBytes_CheckExact(obj)) {
15 /* Fast path, e.g. for .join() of many bytes objects */
16 Py_INCREF(obj);
17 view->obj = obj;
18 view->buf = PyBytes_AS_STRING(obj);
19 view->len = PyBytes_GET_SIZE(obj);
20 return view->len;
21 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Antoine Pitroucfc22b42012-10-16 21:07:23 +020023 bufferprocs = Py_TYPE(obj)->tp_as_buffer;
24 if (bufferprocs == NULL || bufferprocs->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000025 {
Antoine Pitroud1188562010-06-09 16:38:55 +000026 PyErr_Format(PyExc_TypeError,
27 "Type %.100s doesn't support the buffer API",
28 Py_TYPE(obj)->tp_name);
29 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000030 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000031
Antoine Pitroucfc22b42012-10-16 21:07:23 +020032 if (bufferprocs->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000033 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000034 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000035}
36
Christian Heimes2c9c7a52008-05-26 13:42:13 +000037#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000038Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000039#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000040
Christian Heimes2c9c7a52008-05-26 13:42:13 +000041static PyBytesObject *characters[UCHAR_MAX + 1];
42static PyBytesObject *nullstring;
43
Mark Dickinsonfd24b322008-12-06 15:33:31 +000044/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
45 for a string of length n should request PyBytesObject_SIZE + n bytes.
46
47 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
48 3 bytes per string allocation on a typical system.
49*/
50#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
51
Christian Heimes2c9c7a52008-05-26 13:42:13 +000052/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000053 For PyBytes_FromString(), the parameter `str' points to a null-terminated
54 string containing exactly `size' bytes.
55
56 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
57 either NULL or else points to a string containing at least `size' bytes.
58 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
59 not have to be null-terminated. (Therefore it is safe to construct a
60 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
61 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
62 bytes (setting the last byte to the null terminating character) and you can
63 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000064 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000065 alter the data yourself, since the strings may be shared.
66
67 The PyObject member `op->ob_size', which denotes the number of "extra
68 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020069 allocated for string data, not counting the null terminating character.
70 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000071 PyBytes_FromStringAndSize()) or the length of the string in the `str'
72 parameter (for PyBytes_FromString()).
73*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000074PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000075PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000076{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 register PyBytesObject *op;
78 if (size < 0) {
79 PyErr_SetString(PyExc_SystemError,
80 "Negative size passed to PyBytes_FromStringAndSize");
81 return NULL;
82 }
83 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000084#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000086#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 Py_INCREF(op);
88 return (PyObject *)op;
89 }
90 if (size == 1 && str != NULL &&
91 (op = characters[*str & UCHAR_MAX]) != NULL)
92 {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000093#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000094 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000095#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000096 Py_INCREF(op);
97 return (PyObject *)op;
98 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000099
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000100 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
101 PyErr_SetString(PyExc_OverflowError,
102 "byte string is too large");
103 return NULL;
104 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +0000105
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000106 /* Inline PyObject_NewVar */
107 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
108 if (op == NULL)
109 return PyErr_NoMemory();
110 PyObject_INIT_VAR(op, &PyBytes_Type, size);
111 op->ob_shash = -1;
112 if (str != NULL)
113 Py_MEMCPY(op->ob_sval, str, size);
114 op->ob_sval[size] = '\0';
115 /* share short strings */
116 if (size == 0) {
117 nullstring = op;
118 Py_INCREF(op);
119 } else if (size == 1 && str != NULL) {
120 characters[*str & UCHAR_MAX] = op;
121 Py_INCREF(op);
122 }
123 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000124}
125
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000126PyObject *
127PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000128{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000129 register size_t size;
130 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000131
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 assert(str != NULL);
133 size = strlen(str);
134 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
135 PyErr_SetString(PyExc_OverflowError,
136 "byte string is too long");
137 return NULL;
138 }
139 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000140#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000142#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000143 Py_INCREF(op);
144 return (PyObject *)op;
145 }
146 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000147#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000148 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000149#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000150 Py_INCREF(op);
151 return (PyObject *)op;
152 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000153
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 /* Inline PyObject_NewVar */
155 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
156 if (op == NULL)
157 return PyErr_NoMemory();
158 PyObject_INIT_VAR(op, &PyBytes_Type, size);
159 op->ob_shash = -1;
160 Py_MEMCPY(op->ob_sval, str, size+1);
161 /* share short strings */
162 if (size == 0) {
163 nullstring = op;
164 Py_INCREF(op);
165 } else if (size == 1) {
166 characters[*str & UCHAR_MAX] = op;
167 Py_INCREF(op);
168 }
169 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000170}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000171
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000172PyObject *
173PyBytes_FromFormatV(const char *format, va_list vargs)
174{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000175 va_list count;
176 Py_ssize_t n = 0;
177 const char* f;
178 char *s;
179 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000180
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000181 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000182 /* step 1: figure out how large a buffer we need */
183 for (f = format; *f; f++) {
184 if (*f == '%') {
185 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000186 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000187 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000188
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000189 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
190 * they don't affect the amount of space we reserve.
191 */
192 if ((*f == 'l' || *f == 'z') &&
193 (f[1] == 'd' || f[1] == 'u'))
194 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000195
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000196 switch (*f) {
197 case 'c':
198 (void)va_arg(count, int);
199 /* fall through... */
200 case '%':
201 n++;
202 break;
203 case 'd': case 'u': case 'i': case 'x':
204 (void) va_arg(count, int);
205 /* 20 bytes is enough to hold a 64-bit
206 integer. Decimal takes the most space.
207 This isn't enough for octal. */
208 n += 20;
209 break;
210 case 's':
211 s = va_arg(count, char*);
212 n += strlen(s);
213 break;
214 case 'p':
215 (void) va_arg(count, int);
216 /* maximum 64-bit pointer representation:
217 * 0xffffffffffffffff
218 * so 19 characters is enough.
219 * XXX I count 18 -- what's the extra for?
220 */
221 n += 19;
222 break;
223 default:
224 /* if we stumble upon an unknown
225 formatting code, copy the rest of
226 the format string to the output
227 string. (we cannot just skip the
228 code, since there's no way to know
229 what's in the argument list) */
230 n += strlen(p);
231 goto expand;
232 }
233 } else
234 n++;
235 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000236 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000237 /* step 2: fill the buffer */
238 /* Since we've analyzed how much space we need for the worst case,
239 use sprintf directly instead of the slower PyOS_snprintf. */
240 string = PyBytes_FromStringAndSize(NULL, n);
241 if (!string)
242 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000243
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000244 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000245
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000246 for (f = format; *f; f++) {
247 if (*f == '%') {
248 const char* p = f++;
249 Py_ssize_t i;
250 int longflag = 0;
251 int size_tflag = 0;
252 /* parse the width.precision part (we're only
253 interested in the precision value, if any) */
254 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000255 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000256 n = (n*10) + *f++ - '0';
257 if (*f == '.') {
258 f++;
259 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000260 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000261 n = (n*10) + *f++ - '0';
262 }
David Malcolm96960882010-11-05 17:23:41 +0000263 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000264 f++;
265 /* handle the long flag, but only for %ld and %lu.
266 others can be added when necessary. */
267 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
268 longflag = 1;
269 ++f;
270 }
271 /* handle the size_t flag. */
272 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
273 size_tflag = 1;
274 ++f;
275 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000276
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000277 switch (*f) {
278 case 'c':
279 *s++ = va_arg(vargs, int);
280 break;
281 case 'd':
282 if (longflag)
283 sprintf(s, "%ld", va_arg(vargs, long));
284 else if (size_tflag)
285 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
286 va_arg(vargs, Py_ssize_t));
287 else
288 sprintf(s, "%d", va_arg(vargs, int));
289 s += strlen(s);
290 break;
291 case 'u':
292 if (longflag)
293 sprintf(s, "%lu",
294 va_arg(vargs, unsigned long));
295 else if (size_tflag)
296 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
297 va_arg(vargs, size_t));
298 else
299 sprintf(s, "%u",
300 va_arg(vargs, unsigned int));
301 s += strlen(s);
302 break;
303 case 'i':
304 sprintf(s, "%i", va_arg(vargs, int));
305 s += strlen(s);
306 break;
307 case 'x':
308 sprintf(s, "%x", va_arg(vargs, int));
309 s += strlen(s);
310 break;
311 case 's':
312 p = va_arg(vargs, char*);
313 i = strlen(p);
314 if (n > 0 && i > n)
315 i = n;
316 Py_MEMCPY(s, p, i);
317 s += i;
318 break;
319 case 'p':
320 sprintf(s, "%p", va_arg(vargs, void*));
321 /* %p is ill-defined: ensure leading 0x. */
322 if (s[1] == 'X')
323 s[1] = 'x';
324 else if (s[1] != 'x') {
325 memmove(s+2, s, strlen(s)+1);
326 s[0] = '0';
327 s[1] = 'x';
328 }
329 s += strlen(s);
330 break;
331 case '%':
332 *s++ = '%';
333 break;
334 default:
335 strcpy(s, p);
336 s += strlen(s);
337 goto end;
338 }
339 } else
340 *s++ = *f;
341 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000342
343 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000344 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
345 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000346}
347
348PyObject *
349PyBytes_FromFormat(const char *format, ...)
350{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000351 PyObject* ret;
352 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000353
354#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000355 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000356#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000357 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000358#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 ret = PyBytes_FromFormatV(format, vargs);
360 va_end(vargs);
361 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000362}
363
364static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000365bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000366{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000368}
369
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000370/* Unescape a backslash-escaped string. If unicode is non-zero,
371 the string is a u-literal. If recode_encoding is non-zero,
372 the string is UTF-8 encoded and should be re-encoded in the
373 specified encoding. */
374
375PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000376 Py_ssize_t len,
377 const char *errors,
378 Py_ssize_t unicode,
379 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000380{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000381 int c;
382 char *p, *buf;
383 const char *end;
384 PyObject *v;
385 Py_ssize_t newlen = recode_encoding ? 4*len:len;
386 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
387 if (v == NULL)
388 return NULL;
389 p = buf = PyBytes_AsString(v);
390 end = s + len;
391 while (s < end) {
392 if (*s != '\\') {
393 non_esc:
394 if (recode_encoding && (*s & 0x80)) {
395 PyObject *u, *w;
396 char *r;
397 const char* t;
398 Py_ssize_t rn;
399 t = s;
400 /* Decode non-ASCII bytes as UTF-8. */
401 while (t < end && (*t & 0x80)) t++;
402 u = PyUnicode_DecodeUTF8(s, t - s, errors);
403 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000404
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000405 /* Recode them in target encoding. */
406 w = PyUnicode_AsEncodedString(
407 u, recode_encoding, errors);
408 Py_DECREF(u);
409 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000410
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000411 /* Append bytes to output buffer. */
412 assert(PyBytes_Check(w));
413 r = PyBytes_AS_STRING(w);
414 rn = PyBytes_GET_SIZE(w);
415 Py_MEMCPY(p, r, rn);
416 p += rn;
417 Py_DECREF(w);
418 s = t;
419 } else {
420 *p++ = *s++;
421 }
422 continue;
423 }
424 s++;
425 if (s==end) {
426 PyErr_SetString(PyExc_ValueError,
427 "Trailing \\ in string");
428 goto failed;
429 }
430 switch (*s++) {
431 /* XXX This assumes ASCII! */
432 case '\n': break;
433 case '\\': *p++ = '\\'; break;
434 case '\'': *p++ = '\''; break;
435 case '\"': *p++ = '\"'; break;
436 case 'b': *p++ = '\b'; break;
437 case 'f': *p++ = '\014'; break; /* FF */
438 case 't': *p++ = '\t'; break;
439 case 'n': *p++ = '\n'; break;
440 case 'r': *p++ = '\r'; break;
441 case 'v': *p++ = '\013'; break; /* VT */
442 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
443 case '0': case '1': case '2': case '3':
444 case '4': case '5': case '6': case '7':
445 c = s[-1] - '0';
446 if (s < end && '0' <= *s && *s <= '7') {
447 c = (c<<3) + *s++ - '0';
448 if (s < end && '0' <= *s && *s <= '7')
449 c = (c<<3) + *s++ - '0';
450 }
451 *p++ = c;
452 break;
453 case 'x':
David Malcolm96960882010-11-05 17:23:41 +0000454 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000455 unsigned int x = 0;
456 c = Py_CHARMASK(*s);
457 s++;
David Malcolm96960882010-11-05 17:23:41 +0000458 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000459 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000460 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000461 x = 10 + c - 'a';
462 else
463 x = 10 + c - 'A';
464 x = x << 4;
465 c = Py_CHARMASK(*s);
466 s++;
David Malcolm96960882010-11-05 17:23:41 +0000467 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000468 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000469 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000470 x += 10 + c - 'a';
471 else
472 x += 10 + c - 'A';
473 *p++ = x;
474 break;
475 }
476 if (!errors || strcmp(errors, "strict") == 0) {
477 PyErr_SetString(PyExc_ValueError,
478 "invalid \\x escape");
479 goto failed;
480 }
481 if (strcmp(errors, "replace") == 0) {
482 *p++ = '?';
483 } else if (strcmp(errors, "ignore") == 0)
484 /* do nothing */;
485 else {
486 PyErr_Format(PyExc_ValueError,
487 "decoding error; unknown "
488 "error handling code: %.400s",
489 errors);
490 goto failed;
491 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +0200492 /* skip \x */
493 if (s < end && Py_ISXDIGIT(s[0]))
494 s++; /* and a hexdigit */
495 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000496 default:
497 *p++ = '\\';
498 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +0200499 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000500 UTF-8 bytes may follow. */
501 }
502 }
503 if (p-buf < newlen)
504 _PyBytes_Resize(&v, p - buf);
505 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000506 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000507 Py_DECREF(v);
508 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000509}
510
511/* -------------------------------------------------------------------- */
512/* object api */
513
514Py_ssize_t
515PyBytes_Size(register PyObject *op)
516{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000517 if (!PyBytes_Check(op)) {
518 PyErr_Format(PyExc_TypeError,
519 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
520 return -1;
521 }
522 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000523}
524
525char *
526PyBytes_AsString(register PyObject *op)
527{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000528 if (!PyBytes_Check(op)) {
529 PyErr_Format(PyExc_TypeError,
530 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
531 return NULL;
532 }
533 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000534}
535
536int
537PyBytes_AsStringAndSize(register PyObject *obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000538 register char **s,
539 register Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000540{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000541 if (s == NULL) {
542 PyErr_BadInternalCall();
543 return -1;
544 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000545
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000546 if (!PyBytes_Check(obj)) {
547 PyErr_Format(PyExc_TypeError,
548 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
549 return -1;
550 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000551
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000552 *s = PyBytes_AS_STRING(obj);
553 if (len != NULL)
554 *len = PyBytes_GET_SIZE(obj);
555 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
556 PyErr_SetString(PyExc_TypeError,
557 "expected bytes with no null");
558 return -1;
559 }
560 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000561}
Neal Norwitz6968b052007-02-27 19:02:19 +0000562
563/* -------------------------------------------------------------------- */
564/* Methods */
565
Eric Smith0923d1d2009-04-16 20:16:10 +0000566#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000567
568#include "stringlib/fastsearch.h"
569#include "stringlib/count.h"
570#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +0200571#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000572#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000573#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000574#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000575
Eric Smith0f78bff2009-11-30 01:01:42 +0000576#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000577
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000578PyObject *
579PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000580{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000581 register PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200582 Py_ssize_t i, length = Py_SIZE(op);
583 size_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000584 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200585 unsigned char quote, *s, *p;
586
587 /* Compute size of output string */
588 squotes = dquotes = 0;
589 newsize = 3; /* b'' */
590 s = (unsigned char*)op->ob_sval;
591 for (i = 0; i < length; i++) {
592 switch(s[i]) {
593 case '\'': squotes++; newsize++; break;
594 case '"': dquotes++; newsize++; break;
595 case '\\': case '\t': case '\n': case '\r':
596 newsize += 2; break; /* \C */
597 default:
598 if (s[i] < ' ' || s[i] >= 0x7f)
599 newsize += 4; /* \xHH */
600 else
601 newsize++;
602 }
603 }
604 quote = '\'';
605 if (smartquotes && squotes && !dquotes)
606 quote = '"';
607 if (squotes && quote == '\'')
608 newsize += squotes;
Victor Stinner6430fd52011-09-29 04:02:13 +0200609
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200610 if (newsize > (PY_SSIZE_T_MAX - sizeof(PyUnicodeObject) - 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000611 PyErr_SetString(PyExc_OverflowError,
612 "bytes object is too large to make repr");
613 return NULL;
614 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200615
616 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000617 if (v == NULL) {
618 return NULL;
619 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200620 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000621
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200622 *p++ = 'b', *p++ = quote;
623 for (i = 0; i < length; i++) {
624 unsigned char c = op->ob_sval[i];
625 if (c == quote || c == '\\')
626 *p++ = '\\', *p++ = c;
627 else if (c == '\t')
628 *p++ = '\\', *p++ = 't';
629 else if (c == '\n')
630 *p++ = '\\', *p++ = 'n';
631 else if (c == '\r')
632 *p++ = '\\', *p++ = 'r';
633 else if (c < ' ' || c >= 0x7f) {
634 *p++ = '\\';
635 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200636 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
637 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000638 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200639 else
640 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000641 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200642 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +0200643 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200644 return v;
Neal Norwitz6968b052007-02-27 19:02:19 +0000645}
646
Neal Norwitz6968b052007-02-27 19:02:19 +0000647static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000648bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000649{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000650 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000651}
652
Neal Norwitz6968b052007-02-27 19:02:19 +0000653static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000654bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000655{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000656 if (Py_BytesWarningFlag) {
657 if (PyErr_WarnEx(PyExc_BytesWarning,
658 "str() on a bytes instance", 1))
659 return NULL;
660 }
661 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000662}
663
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000664static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000665bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000666{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000667 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000668}
Neal Norwitz6968b052007-02-27 19:02:19 +0000669
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000670/* This is also used by PyBytes_Concat() */
671static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000672bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000673{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000674 Py_ssize_t size;
675 Py_buffer va, vb;
676 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000677
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000678 va.len = -1;
679 vb.len = -1;
680 if (_getbuffer(a, &va) < 0 ||
681 _getbuffer(b, &vb) < 0) {
682 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
683 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
684 goto done;
685 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000686
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000687 /* Optimize end cases */
688 if (va.len == 0 && PyBytes_CheckExact(b)) {
689 result = b;
690 Py_INCREF(result);
691 goto done;
692 }
693 if (vb.len == 0 && PyBytes_CheckExact(a)) {
694 result = a;
695 Py_INCREF(result);
696 goto done;
697 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000698
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000699 size = va.len + vb.len;
700 if (size < 0) {
701 PyErr_NoMemory();
702 goto done;
703 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000704
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000705 result = PyBytes_FromStringAndSize(NULL, size);
706 if (result != NULL) {
707 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
708 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
709 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000710
711 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000712 if (va.len != -1)
713 PyBuffer_Release(&va);
714 if (vb.len != -1)
715 PyBuffer_Release(&vb);
716 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000717}
Neal Norwitz6968b052007-02-27 19:02:19 +0000718
719static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000720bytes_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000721{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000722 register Py_ssize_t i;
723 register Py_ssize_t j;
724 register Py_ssize_t size;
725 register PyBytesObject *op;
726 size_t nbytes;
727 if (n < 0)
728 n = 0;
729 /* watch out for overflows: the size can overflow int,
730 * and the # of bytes needed can overflow size_t
731 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000732 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000733 PyErr_SetString(PyExc_OverflowError,
734 "repeated bytes are too long");
735 return NULL;
736 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000737 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000738 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
739 Py_INCREF(a);
740 return (PyObject *)a;
741 }
742 nbytes = (size_t)size;
743 if (nbytes + PyBytesObject_SIZE <= nbytes) {
744 PyErr_SetString(PyExc_OverflowError,
745 "repeated bytes are too long");
746 return NULL;
747 }
748 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
749 if (op == NULL)
750 return PyErr_NoMemory();
751 PyObject_INIT_VAR(op, &PyBytes_Type, size);
752 op->ob_shash = -1;
753 op->ob_sval[size] = '\0';
754 if (Py_SIZE(a) == 1 && n > 0) {
755 memset(op->ob_sval, a->ob_sval[0] , n);
756 return (PyObject *) op;
757 }
758 i = 0;
759 if (i < size) {
760 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
761 i = Py_SIZE(a);
762 }
763 while (i < size) {
764 j = (i <= size-i) ? i : size-i;
765 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
766 i += j;
767 }
768 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000769}
770
Guido van Rossum98297ee2007-11-06 21:34:58 +0000771static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000772bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000773{
774 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
775 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000776 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +0000777 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +0000778 PyErr_Clear();
779 if (_getbuffer(arg, &varg) < 0)
780 return -1;
781 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
782 varg.buf, varg.len, 0);
783 PyBuffer_Release(&varg);
784 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000785 }
786 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000787 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
788 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000789 }
790
Antoine Pitrou0010d372010-08-15 17:12:55 +0000791 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000792}
793
Neal Norwitz6968b052007-02-27 19:02:19 +0000794static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000795bytes_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000796{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000797 if (i < 0 || i >= Py_SIZE(a)) {
798 PyErr_SetString(PyExc_IndexError, "index out of range");
799 return NULL;
800 }
801 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000802}
803
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000804static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000805bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000806{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000807 int c;
808 Py_ssize_t len_a, len_b;
809 Py_ssize_t min_len;
810 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000811
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000812 /* Make sure both arguments are strings. */
813 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
814 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
815 (PyObject_IsInstance((PyObject*)a,
816 (PyObject*)&PyUnicode_Type) ||
817 PyObject_IsInstance((PyObject*)b,
818 (PyObject*)&PyUnicode_Type))) {
819 if (PyErr_WarnEx(PyExc_BytesWarning,
820 "Comparison between bytes and string", 1))
821 return NULL;
822 }
823 result = Py_NotImplemented;
824 goto out;
825 }
826 if (a == b) {
827 switch (op) {
828 case Py_EQ:case Py_LE:case Py_GE:
829 result = Py_True;
830 goto out;
831 case Py_NE:case Py_LT:case Py_GT:
832 result = Py_False;
833 goto out;
834 }
835 }
836 if (op == Py_EQ) {
837 /* Supporting Py_NE here as well does not save
838 much time, since Py_NE is rarely used. */
839 if (Py_SIZE(a) == Py_SIZE(b)
840 && (a->ob_sval[0] == b->ob_sval[0]
841 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
842 result = Py_True;
843 } else {
844 result = Py_False;
845 }
846 goto out;
847 }
848 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
849 min_len = (len_a < len_b) ? len_a : len_b;
850 if (min_len > 0) {
851 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
852 if (c==0)
853 c = memcmp(a->ob_sval, b->ob_sval, min_len);
854 } else
855 c = 0;
856 if (c == 0)
857 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
858 switch (op) {
859 case Py_LT: c = c < 0; break;
860 case Py_LE: c = c <= 0; break;
861 case Py_EQ: assert(0); break; /* unreachable */
862 case Py_NE: c = c != 0; break;
863 case Py_GT: c = c > 0; break;
864 case Py_GE: c = c >= 0; break;
865 default:
866 result = Py_NotImplemented;
867 goto out;
868 }
869 result = c ? Py_True : Py_False;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000870 out:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000871 Py_INCREF(result);
872 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000873}
874
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000875static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000876bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000877{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +0100878 if (a->ob_shash == -1) {
879 /* Can't fail */
880 a->ob_shash = _Py_HashBytes((unsigned char *) a->ob_sval, Py_SIZE(a));
881 }
882 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +0000883}
884
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000885static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000886bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000887{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000888 if (PyIndex_Check(item)) {
889 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
890 if (i == -1 && PyErr_Occurred())
891 return NULL;
892 if (i < 0)
893 i += PyBytes_GET_SIZE(self);
894 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
895 PyErr_SetString(PyExc_IndexError,
896 "index out of range");
897 return NULL;
898 }
899 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
900 }
901 else if (PySlice_Check(item)) {
902 Py_ssize_t start, stop, step, slicelength, cur, i;
903 char* source_buf;
904 char* result_buf;
905 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000906
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000907 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000908 PyBytes_GET_SIZE(self),
909 &start, &stop, &step, &slicelength) < 0) {
910 return NULL;
911 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000912
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000913 if (slicelength <= 0) {
914 return PyBytes_FromStringAndSize("", 0);
915 }
916 else if (start == 0 && step == 1 &&
917 slicelength == PyBytes_GET_SIZE(self) &&
918 PyBytes_CheckExact(self)) {
919 Py_INCREF(self);
920 return (PyObject *)self;
921 }
922 else if (step == 1) {
923 return PyBytes_FromStringAndSize(
924 PyBytes_AS_STRING(self) + start,
925 slicelength);
926 }
927 else {
928 source_buf = PyBytes_AS_STRING(self);
929 result = PyBytes_FromStringAndSize(NULL, slicelength);
930 if (result == NULL)
931 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000932
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000933 result_buf = PyBytes_AS_STRING(result);
934 for (cur = start, i = 0; i < slicelength;
935 cur += step, i++) {
936 result_buf[i] = source_buf[cur];
937 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000938
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000939 return result;
940 }
941 }
942 else {
943 PyErr_Format(PyExc_TypeError,
944 "byte indices must be integers, not %.200s",
945 Py_TYPE(item)->tp_name);
946 return NULL;
947 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000948}
949
950static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000951bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000952{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000953 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
954 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000955}
956
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000957static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000958 (lenfunc)bytes_length, /*sq_length*/
959 (binaryfunc)bytes_concat, /*sq_concat*/
960 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
961 (ssizeargfunc)bytes_item, /*sq_item*/
962 0, /*sq_slice*/
963 0, /*sq_ass_item*/
964 0, /*sq_ass_slice*/
965 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000966};
967
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000968static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000969 (lenfunc)bytes_length,
970 (binaryfunc)bytes_subscript,
971 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000972};
973
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000974static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000975 (getbufferproc)bytes_buffer_getbuffer,
976 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000977};
978
979
980#define LEFTSTRIP 0
981#define RIGHTSTRIP 1
982#define BOTHSTRIP 2
983
984/* Arrays indexed by above */
985static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
986
987#define STRIPNAME(i) (stripformat[i]+3)
988
Neal Norwitz6968b052007-02-27 19:02:19 +0000989PyDoc_STRVAR(split__doc__,
Ezio Melotticda6b6d2012-02-26 09:39:55 +0200990"B.split(sep=None, maxsplit=-1) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +0000991\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +0000992Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000993If sep is not specified or is None, B is split on ASCII whitespace\n\
994characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +0000995If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +0000996
997static PyObject *
Ezio Melotticda6b6d2012-02-26 09:39:55 +0200998bytes_split(PyBytesObject *self, PyObject *args, PyObject *kwds)
Neal Norwitz6968b052007-02-27 19:02:19 +0000999{
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001000 static char *kwlist[] = {"sep", "maxsplit", 0};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001001 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1002 Py_ssize_t maxsplit = -1;
1003 const char *s = PyBytes_AS_STRING(self), *sub;
1004 Py_buffer vsub;
1005 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001006
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001007 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:split",
1008 kwlist, &subobj, &maxsplit))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001009 return NULL;
1010 if (maxsplit < 0)
1011 maxsplit = PY_SSIZE_T_MAX;
1012 if (subobj == Py_None)
1013 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1014 if (_getbuffer(subobj, &vsub) < 0)
1015 return NULL;
1016 sub = vsub.buf;
1017 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001018
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001019 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1020 PyBuffer_Release(&vsub);
1021 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001022}
1023
Neal Norwitz6968b052007-02-27 19:02:19 +00001024PyDoc_STRVAR(partition__doc__,
1025"B.partition(sep) -> (head, sep, tail)\n\
1026\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001027Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001028the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001029found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001030
1031static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001032bytes_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001033{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001034 const char *sep;
1035 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001036
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001037 if (PyBytes_Check(sep_obj)) {
1038 sep = PyBytes_AS_STRING(sep_obj);
1039 sep_len = PyBytes_GET_SIZE(sep_obj);
1040 }
1041 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1042 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001043
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001044 return stringlib_partition(
1045 (PyObject*) self,
1046 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1047 sep_obj, sep, sep_len
1048 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001049}
1050
1051PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti5b2b2422010-01-25 11:58:28 +00001052"B.rpartition(sep) -> (head, sep, tail)\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001053\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001054Search for the separator sep in B, starting at the end of B,\n\
1055and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001056part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001057bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001058
1059static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001060bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001061{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001062 const char *sep;
1063 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001064
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001065 if (PyBytes_Check(sep_obj)) {
1066 sep = PyBytes_AS_STRING(sep_obj);
1067 sep_len = PyBytes_GET_SIZE(sep_obj);
1068 }
1069 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1070 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001071
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001072 return stringlib_rpartition(
1073 (PyObject*) self,
1074 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1075 sep_obj, sep, sep_len
1076 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001077}
1078
Neal Norwitz6968b052007-02-27 19:02:19 +00001079PyDoc_STRVAR(rsplit__doc__,
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001080"B.rsplit(sep=None, maxsplit=-1) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001081\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001082Return a list of the sections in B, using sep as the delimiter,\n\
1083starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001084If sep is not given, B is split on ASCII whitespace characters\n\
1085(space, tab, return, newline, formfeed, vertical tab).\n\
1086If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001087
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001088
Neal Norwitz6968b052007-02-27 19:02:19 +00001089static PyObject *
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001090bytes_rsplit(PyBytesObject *self, PyObject *args, PyObject *kwds)
Neal Norwitz6968b052007-02-27 19:02:19 +00001091{
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001092 static char *kwlist[] = {"sep", "maxsplit", 0};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001093 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1094 Py_ssize_t maxsplit = -1;
1095 const char *s = PyBytes_AS_STRING(self), *sub;
1096 Py_buffer vsub;
1097 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001098
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001099 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:rsplit",
1100 kwlist, &subobj, &maxsplit))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001101 return NULL;
1102 if (maxsplit < 0)
1103 maxsplit = PY_SSIZE_T_MAX;
1104 if (subobj == Py_None)
1105 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1106 if (_getbuffer(subobj, &vsub) < 0)
1107 return NULL;
1108 sub = vsub.buf;
1109 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001110
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001111 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1112 PyBuffer_Release(&vsub);
1113 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001114}
1115
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001116
1117PyDoc_STRVAR(join__doc__,
1118"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001119\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001120Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001121Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1122
Neal Norwitz6968b052007-02-27 19:02:19 +00001123static PyObject *
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001124bytes_join(PyObject *self, PyObject *iterable)
Neal Norwitz6968b052007-02-27 19:02:19 +00001125{
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001126 return stringlib_bytes_join(self, iterable);
Neal Norwitz6968b052007-02-27 19:02:19 +00001127}
1128
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001129PyObject *
1130_PyBytes_Join(PyObject *sep, PyObject *x)
1131{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001132 assert(sep != NULL && PyBytes_Check(sep));
1133 assert(x != NULL);
1134 return bytes_join(sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001135}
1136
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001137/* helper macro to fixup start/end slice values */
1138#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001139 if (end > len) \
1140 end = len; \
1141 else if (end < 0) { \
1142 end += len; \
1143 if (end < 0) \
1144 end = 0; \
1145 } \
1146 if (start < 0) { \
1147 start += len; \
1148 if (start < 0) \
1149 start = 0; \
1150 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001151
1152Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001153bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001154{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001155 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001156 char byte;
1157 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001158 const char *sub;
1159 Py_ssize_t sub_len;
1160 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001161 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001162
Antoine Pitrouac65d962011-10-20 23:54:17 +02001163 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1164 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001165 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001166
Antoine Pitrouac65d962011-10-20 23:54:17 +02001167 if (subobj) {
1168 if (_getbuffer(subobj, &subbuf) < 0)
1169 return -2;
1170
1171 sub = subbuf.buf;
1172 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001173 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001174 else {
1175 sub = &byte;
1176 sub_len = 1;
1177 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001178
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001179 if (dir > 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001180 res = stringlib_find_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001181 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1182 sub, sub_len, start, end);
1183 else
Antoine Pitrouac65d962011-10-20 23:54:17 +02001184 res = stringlib_rfind_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001185 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1186 sub, sub_len, start, end);
Antoine Pitrouac65d962011-10-20 23:54:17 +02001187
1188 if (subobj)
1189 PyBuffer_Release(&subbuf);
1190
1191 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001192}
1193
1194
1195PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001196"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001197\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001198Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001199such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001200arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001201\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001202Return -1 on failure.");
1203
Neal Norwitz6968b052007-02-27 19:02:19 +00001204static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001205bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001206{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001207 Py_ssize_t result = bytes_find_internal(self, args, +1);
1208 if (result == -2)
1209 return NULL;
1210 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001211}
1212
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001213
1214PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001215"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001216\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001217Like B.find() but raise ValueError when the substring is not found.");
1218
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001219static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001220bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001221{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001222 Py_ssize_t result = bytes_find_internal(self, args, +1);
1223 if (result == -2)
1224 return NULL;
1225 if (result == -1) {
1226 PyErr_SetString(PyExc_ValueError,
1227 "substring not found");
1228 return NULL;
1229 }
1230 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001231}
1232
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001233
1234PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001235"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001236\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001237Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001238such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001239arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001240\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001241Return -1 on failure.");
1242
Neal Norwitz6968b052007-02-27 19:02:19 +00001243static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001244bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001245{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001246 Py_ssize_t result = bytes_find_internal(self, args, -1);
1247 if (result == -2)
1248 return NULL;
1249 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001250}
1251
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001252
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001253PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001254"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001255\n\
1256Like B.rfind() but raise ValueError when the substring is not found.");
1257
1258static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001259bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001260{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001261 Py_ssize_t result = bytes_find_internal(self, args, -1);
1262 if (result == -2)
1263 return NULL;
1264 if (result == -1) {
1265 PyErr_SetString(PyExc_ValueError,
1266 "substring not found");
1267 return NULL;
1268 }
1269 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001270}
1271
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001272
1273Py_LOCAL_INLINE(PyObject *)
1274do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001275{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001276 Py_buffer vsep;
1277 char *s = PyBytes_AS_STRING(self);
1278 Py_ssize_t len = PyBytes_GET_SIZE(self);
1279 char *sep;
1280 Py_ssize_t seplen;
1281 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001282
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001283 if (_getbuffer(sepobj, &vsep) < 0)
1284 return NULL;
1285 sep = vsep.buf;
1286 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001287
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001288 i = 0;
1289 if (striptype != RIGHTSTRIP) {
1290 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1291 i++;
1292 }
1293 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001294
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001295 j = len;
1296 if (striptype != LEFTSTRIP) {
1297 do {
1298 j--;
1299 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1300 j++;
1301 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001302
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001303 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001304
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001305 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1306 Py_INCREF(self);
1307 return (PyObject*)self;
1308 }
1309 else
1310 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001311}
1312
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001313
1314Py_LOCAL_INLINE(PyObject *)
1315do_strip(PyBytesObject *self, int striptype)
1316{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001317 char *s = PyBytes_AS_STRING(self);
1318 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001319
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001320 i = 0;
1321 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001322 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001323 i++;
1324 }
1325 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001326
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001327 j = len;
1328 if (striptype != LEFTSTRIP) {
1329 do {
1330 j--;
David Malcolm96960882010-11-05 17:23:41 +00001331 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001332 j++;
1333 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001334
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001335 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1336 Py_INCREF(self);
1337 return (PyObject*)self;
1338 }
1339 else
1340 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001341}
1342
1343
1344Py_LOCAL_INLINE(PyObject *)
1345do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1346{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001347 PyObject *sep = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001348
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001349 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1350 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001351
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001352 if (sep != NULL && sep != Py_None) {
1353 return do_xstrip(self, striptype, sep);
1354 }
1355 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001356}
1357
1358
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001359PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001360"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001361\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001362Strip leading and trailing bytes contained in the argument.\n\
Georg Brandlbeca27a2012-01-22 21:31:21 +01001363If the argument is omitted, strip leading and trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001364static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001365bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001366{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001367 if (PyTuple_GET_SIZE(args) == 0)
1368 return do_strip(self, BOTHSTRIP); /* Common case */
1369 else
1370 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001371}
1372
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001373
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001374PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001375"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001376\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001377Strip leading bytes contained in the argument.\n\
1378If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001379static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001380bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001381{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001382 if (PyTuple_GET_SIZE(args) == 0)
1383 return do_strip(self, LEFTSTRIP); /* Common case */
1384 else
1385 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001386}
1387
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001388
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001389PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001390"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001391\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001392Strip trailing bytes contained in the argument.\n\
1393If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001394static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001395bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001396{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001397 if (PyTuple_GET_SIZE(args) == 0)
1398 return do_strip(self, RIGHTSTRIP); /* Common case */
1399 else
1400 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001401}
Neal Norwitz6968b052007-02-27 19:02:19 +00001402
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001403
1404PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001405"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001406\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001407Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001408string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001409as in slice notation.");
1410
1411static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001412bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001413{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001414 PyObject *sub_obj;
1415 const char *str = PyBytes_AS_STRING(self), *sub;
1416 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001417 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001418 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001419
Antoine Pitrouac65d962011-10-20 23:54:17 +02001420 Py_buffer vsub;
1421 PyObject *count_obj;
1422
1423 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
1424 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001425 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001426
Antoine Pitrouac65d962011-10-20 23:54:17 +02001427 if (sub_obj) {
1428 if (_getbuffer(sub_obj, &vsub) < 0)
1429 return NULL;
1430
1431 sub = vsub.buf;
1432 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001433 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001434 else {
1435 sub = &byte;
1436 sub_len = 1;
1437 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001438
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001439 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001440
Antoine Pitrouac65d962011-10-20 23:54:17 +02001441 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001442 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
1443 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02001444
1445 if (sub_obj)
1446 PyBuffer_Release(&vsub);
1447
1448 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001449}
1450
1451
1452PyDoc_STRVAR(translate__doc__,
1453"B.translate(table[, deletechars]) -> bytes\n\
1454\n\
1455Return a copy of B, where all characters occurring in the\n\
1456optional argument deletechars are removed, and the remaining\n\
1457characters have been mapped through the given translation\n\
1458table, which must be a bytes object of length 256.");
1459
1460static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001461bytes_translate(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001462{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001463 register char *input, *output;
1464 const char *table;
1465 register Py_ssize_t i, c, changed = 0;
1466 PyObject *input_obj = (PyObject*)self;
1467 const char *output_start, *del_table=NULL;
1468 Py_ssize_t inlen, tablen, dellen = 0;
1469 PyObject *result;
1470 int trans_table[256];
1471 PyObject *tableobj, *delobj = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001472
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001473 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1474 &tableobj, &delobj))
1475 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001476
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001477 if (PyBytes_Check(tableobj)) {
1478 table = PyBytes_AS_STRING(tableobj);
1479 tablen = PyBytes_GET_SIZE(tableobj);
1480 }
1481 else if (tableobj == Py_None) {
1482 table = NULL;
1483 tablen = 256;
1484 }
1485 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1486 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001487
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001488 if (tablen != 256) {
1489 PyErr_SetString(PyExc_ValueError,
1490 "translation table must be 256 characters long");
1491 return NULL;
1492 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001493
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001494 if (delobj != NULL) {
1495 if (PyBytes_Check(delobj)) {
1496 del_table = PyBytes_AS_STRING(delobj);
1497 dellen = PyBytes_GET_SIZE(delobj);
1498 }
1499 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1500 return NULL;
1501 }
1502 else {
1503 del_table = NULL;
1504 dellen = 0;
1505 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001506
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001507 inlen = PyBytes_GET_SIZE(input_obj);
1508 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1509 if (result == NULL)
1510 return NULL;
1511 output_start = output = PyBytes_AsString(result);
1512 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001513
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001514 if (dellen == 0 && table != NULL) {
1515 /* If no deletions are required, use faster code */
1516 for (i = inlen; --i >= 0; ) {
1517 c = Py_CHARMASK(*input++);
1518 if (Py_CHARMASK((*output++ = table[c])) != c)
1519 changed = 1;
1520 }
1521 if (changed || !PyBytes_CheckExact(input_obj))
1522 return result;
1523 Py_DECREF(result);
1524 Py_INCREF(input_obj);
1525 return input_obj;
1526 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001527
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001528 if (table == NULL) {
1529 for (i = 0; i < 256; i++)
1530 trans_table[i] = Py_CHARMASK(i);
1531 } else {
1532 for (i = 0; i < 256; i++)
1533 trans_table[i] = Py_CHARMASK(table[i]);
1534 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001535
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001536 for (i = 0; i < dellen; i++)
1537 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001538
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001539 for (i = inlen; --i >= 0; ) {
1540 c = Py_CHARMASK(*input++);
1541 if (trans_table[c] != -1)
1542 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1543 continue;
1544 changed = 1;
1545 }
1546 if (!changed && PyBytes_CheckExact(input_obj)) {
1547 Py_DECREF(result);
1548 Py_INCREF(input_obj);
1549 return input_obj;
1550 }
1551 /* Fix the size of the resulting string */
1552 if (inlen > 0)
1553 _PyBytes_Resize(&result, output - output_start);
1554 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001555}
1556
1557
Georg Brandlabc38772009-04-12 15:51:51 +00001558static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001559bytes_maketrans(PyObject *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001560{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001561 return _Py_bytes_maketrans(args);
Georg Brandlabc38772009-04-12 15:51:51 +00001562}
1563
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001564/* find and count characters and substrings */
1565
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001566#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001567 ((char *)memchr((const void *)(target), c, target_len))
1568
1569/* String ops must return a string. */
1570/* If the object is subclass of string, create a copy */
1571Py_LOCAL(PyBytesObject *)
1572return_self(PyBytesObject *self)
1573{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001574 if (PyBytes_CheckExact(self)) {
1575 Py_INCREF(self);
1576 return self;
1577 }
1578 return (PyBytesObject *)PyBytes_FromStringAndSize(
1579 PyBytes_AS_STRING(self),
1580 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001581}
1582
1583Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00001584countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001585{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001586 Py_ssize_t count=0;
1587 const char *start=target;
1588 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001589
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001590 while ( (start=findchar(start, end-start, c)) != NULL ) {
1591 count++;
1592 if (count >= maxcount)
1593 break;
1594 start += 1;
1595 }
1596 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001597}
1598
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001599
1600/* Algorithms for different cases of string replacement */
1601
1602/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1603Py_LOCAL(PyBytesObject *)
1604replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001605 const char *to_s, Py_ssize_t to_len,
1606 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001607{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001608 char *self_s, *result_s;
1609 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001610 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001611 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001612
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001613 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001614
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001615 /* 1 at the end plus 1 after every character;
1616 count = min(maxcount, self_len + 1) */
1617 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001618 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001619 else
1620 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
1621 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001622
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001623 /* Check for overflow */
1624 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001625 assert(count > 0);
1626 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001627 PyErr_SetString(PyExc_OverflowError,
1628 "replacement bytes are too long");
1629 return NULL;
1630 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001631 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001632
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001633 if (! (result = (PyBytesObject *)
1634 PyBytes_FromStringAndSize(NULL, result_len)) )
1635 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001636
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001637 self_s = PyBytes_AS_STRING(self);
1638 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001639
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001640 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001641
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001642 /* Lay the first one down (guaranteed this will occur) */
1643 Py_MEMCPY(result_s, to_s, to_len);
1644 result_s += to_len;
1645 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001646
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001647 for (i=0; i<count; i++) {
1648 *result_s++ = *self_s++;
1649 Py_MEMCPY(result_s, to_s, to_len);
1650 result_s += to_len;
1651 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001652
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001653 /* Copy the rest of the original string */
1654 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001655
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001656 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001657}
1658
1659/* Special case for deleting a single character */
1660/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1661Py_LOCAL(PyBytesObject *)
1662replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001663 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001664{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001665 char *self_s, *result_s;
1666 char *start, *next, *end;
1667 Py_ssize_t self_len, result_len;
1668 Py_ssize_t count;
1669 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001670
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001671 self_len = PyBytes_GET_SIZE(self);
1672 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001673
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001674 count = countchar(self_s, self_len, from_c, maxcount);
1675 if (count == 0) {
1676 return return_self(self);
1677 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001678
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001679 result_len = self_len - count; /* from_len == 1 */
1680 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001681
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001682 if ( (result = (PyBytesObject *)
1683 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1684 return NULL;
1685 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001686
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001687 start = self_s;
1688 end = self_s + self_len;
1689 while (count-- > 0) {
1690 next = findchar(start, end-start, from_c);
1691 if (next == NULL)
1692 break;
1693 Py_MEMCPY(result_s, start, next-start);
1694 result_s += (next-start);
1695 start = next+1;
1696 }
1697 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001698
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001699 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001700}
1701
1702/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1703
1704Py_LOCAL(PyBytesObject *)
1705replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001706 const char *from_s, Py_ssize_t from_len,
1707 Py_ssize_t maxcount) {
1708 char *self_s, *result_s;
1709 char *start, *next, *end;
1710 Py_ssize_t self_len, result_len;
1711 Py_ssize_t count, offset;
1712 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001713
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001714 self_len = PyBytes_GET_SIZE(self);
1715 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001716
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001717 count = stringlib_count(self_s, self_len,
1718 from_s, from_len,
1719 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001720
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001721 if (count == 0) {
1722 /* no matches */
1723 return return_self(self);
1724 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001725
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001726 result_len = self_len - (count * from_len);
1727 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001728
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001729 if ( (result = (PyBytesObject *)
1730 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1731 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001732
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001733 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001734
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001735 start = self_s;
1736 end = self_s + self_len;
1737 while (count-- > 0) {
1738 offset = stringlib_find(start, end-start,
1739 from_s, from_len,
1740 0);
1741 if (offset == -1)
1742 break;
1743 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001744
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001745 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001746
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001747 result_s += (next-start);
1748 start = next+from_len;
1749 }
1750 Py_MEMCPY(result_s, start, end-start);
1751 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001752}
1753
1754/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1755Py_LOCAL(PyBytesObject *)
1756replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001757 char from_c, char to_c,
1758 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001759{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001760 char *self_s, *result_s, *start, *end, *next;
1761 Py_ssize_t self_len;
1762 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001763
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001764 /* The result string will be the same size */
1765 self_s = PyBytes_AS_STRING(self);
1766 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001767
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001768 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001769
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001770 if (next == NULL) {
1771 /* No matches; return the original string */
1772 return return_self(self);
1773 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001774
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001775 /* Need to make a new string */
1776 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1777 if (result == NULL)
1778 return NULL;
1779 result_s = PyBytes_AS_STRING(result);
1780 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001781
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001782 /* change everything in-place, starting with this one */
1783 start = result_s + (next-self_s);
1784 *start = to_c;
1785 start++;
1786 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001787
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001788 while (--maxcount > 0) {
1789 next = findchar(start, end-start, from_c);
1790 if (next == NULL)
1791 break;
1792 *next = to_c;
1793 start = next+1;
1794 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001795
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001796 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001797}
1798
1799/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1800Py_LOCAL(PyBytesObject *)
1801replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001802 const char *from_s, Py_ssize_t from_len,
1803 const char *to_s, Py_ssize_t to_len,
1804 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001805{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001806 char *result_s, *start, *end;
1807 char *self_s;
1808 Py_ssize_t self_len, offset;
1809 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001810
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001811 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001812
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001813 self_s = PyBytes_AS_STRING(self);
1814 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001815
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001816 offset = stringlib_find(self_s, self_len,
1817 from_s, from_len,
1818 0);
1819 if (offset == -1) {
1820 /* No matches; return the original string */
1821 return return_self(self);
1822 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001823
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001824 /* Need to make a new string */
1825 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1826 if (result == NULL)
1827 return NULL;
1828 result_s = PyBytes_AS_STRING(result);
1829 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001830
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001831 /* change everything in-place, starting with this one */
1832 start = result_s + offset;
1833 Py_MEMCPY(start, to_s, from_len);
1834 start += from_len;
1835 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001836
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001837 while ( --maxcount > 0) {
1838 offset = stringlib_find(start, end-start,
1839 from_s, from_len,
1840 0);
1841 if (offset==-1)
1842 break;
1843 Py_MEMCPY(start+offset, to_s, from_len);
1844 start += offset+from_len;
1845 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001846
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001847 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001848}
1849
1850/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1851Py_LOCAL(PyBytesObject *)
1852replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001853 char from_c,
1854 const char *to_s, Py_ssize_t to_len,
1855 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001856{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001857 char *self_s, *result_s;
1858 char *start, *next, *end;
1859 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001860 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001861 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001862
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001863 self_s = PyBytes_AS_STRING(self);
1864 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001865
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001866 count = countchar(self_s, self_len, from_c, maxcount);
1867 if (count == 0) {
1868 /* no matches, return unchanged */
1869 return return_self(self);
1870 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001871
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001872 /* use the difference between current and new, hence the "-1" */
1873 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001874 assert(count > 0);
1875 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001876 PyErr_SetString(PyExc_OverflowError,
1877 "replacement bytes are too long");
1878 return NULL;
1879 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001880 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001881
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001882 if ( (result = (PyBytesObject *)
1883 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1884 return NULL;
1885 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001886
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001887 start = self_s;
1888 end = self_s + self_len;
1889 while (count-- > 0) {
1890 next = findchar(start, end-start, from_c);
1891 if (next == NULL)
1892 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001893
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001894 if (next == start) {
1895 /* replace with the 'to' */
1896 Py_MEMCPY(result_s, to_s, to_len);
1897 result_s += to_len;
1898 start += 1;
1899 } else {
1900 /* copy the unchanged old then the 'to' */
1901 Py_MEMCPY(result_s, start, next-start);
1902 result_s += (next-start);
1903 Py_MEMCPY(result_s, to_s, to_len);
1904 result_s += to_len;
1905 start = next+1;
1906 }
1907 }
1908 /* Copy the remainder of the remaining string */
1909 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001910
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001911 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001912}
1913
1914/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1915Py_LOCAL(PyBytesObject *)
1916replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001917 const char *from_s, Py_ssize_t from_len,
1918 const char *to_s, Py_ssize_t to_len,
1919 Py_ssize_t maxcount) {
1920 char *self_s, *result_s;
1921 char *start, *next, *end;
1922 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001923 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001924 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001925
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001926 self_s = PyBytes_AS_STRING(self);
1927 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001928
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001929 count = stringlib_count(self_s, self_len,
1930 from_s, from_len,
1931 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001932
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001933 if (count == 0) {
1934 /* no matches, return unchanged */
1935 return return_self(self);
1936 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001937
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001938 /* Check for overflow */
1939 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001940 assert(count > 0);
1941 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001942 PyErr_SetString(PyExc_OverflowError,
1943 "replacement bytes are too long");
1944 return NULL;
1945 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001946 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001947
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001948 if ( (result = (PyBytesObject *)
1949 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1950 return NULL;
1951 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001952
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001953 start = self_s;
1954 end = self_s + self_len;
1955 while (count-- > 0) {
1956 offset = stringlib_find(start, end-start,
1957 from_s, from_len,
1958 0);
1959 if (offset == -1)
1960 break;
1961 next = start+offset;
1962 if (next == start) {
1963 /* replace with the 'to' */
1964 Py_MEMCPY(result_s, to_s, to_len);
1965 result_s += to_len;
1966 start += from_len;
1967 } else {
1968 /* copy the unchanged old then the 'to' */
1969 Py_MEMCPY(result_s, start, next-start);
1970 result_s += (next-start);
1971 Py_MEMCPY(result_s, to_s, to_len);
1972 result_s += to_len;
1973 start = next+from_len;
1974 }
1975 }
1976 /* Copy the remainder of the remaining string */
1977 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001978
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001979 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001980}
1981
1982
1983Py_LOCAL(PyBytesObject *)
1984replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001985 const char *from_s, Py_ssize_t from_len,
1986 const char *to_s, Py_ssize_t to_len,
1987 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001988{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001989 if (maxcount < 0) {
1990 maxcount = PY_SSIZE_T_MAX;
1991 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
1992 /* nothing to do; return the original string */
1993 return return_self(self);
1994 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001995
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001996 if (maxcount == 0 ||
1997 (from_len == 0 && to_len == 0)) {
1998 /* nothing to do; return the original string */
1999 return return_self(self);
2000 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002001
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002002 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002003
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002004 if (from_len == 0) {
2005 /* insert the 'to' string everywhere. */
2006 /* >>> "Python".replace("", ".") */
2007 /* '.P.y.t.h.o.n.' */
2008 return replace_interleave(self, to_s, to_len, maxcount);
2009 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002010
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002011 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2012 /* point for an empty self string to generate a non-empty string */
2013 /* Special case so the remaining code always gets a non-empty string */
2014 if (PyBytes_GET_SIZE(self) == 0) {
2015 return return_self(self);
2016 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002017
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002018 if (to_len == 0) {
2019 /* delete all occurrences of 'from' string */
2020 if (from_len == 1) {
2021 return replace_delete_single_character(
2022 self, from_s[0], maxcount);
2023 } else {
2024 return replace_delete_substring(self, from_s,
2025 from_len, maxcount);
2026 }
2027 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002028
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002029 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002030
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002031 if (from_len == to_len) {
2032 if (from_len == 1) {
2033 return replace_single_character_in_place(
2034 self,
2035 from_s[0],
2036 to_s[0],
2037 maxcount);
2038 } else {
2039 return replace_substring_in_place(
2040 self, from_s, from_len, to_s, to_len,
2041 maxcount);
2042 }
2043 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002044
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002045 /* Otherwise use the more generic algorithms */
2046 if (from_len == 1) {
2047 return replace_single_character(self, from_s[0],
2048 to_s, to_len, maxcount);
2049 } else {
2050 /* len('from')>=2, len('to')>=1 */
2051 return replace_substring(self, from_s, from_len, to_s, to_len,
2052 maxcount);
2053 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002054}
2055
2056PyDoc_STRVAR(replace__doc__,
2057"B.replace(old, new[, count]) -> bytes\n\
2058\n\
2059Return a copy of B with all occurrences of subsection\n\
2060old replaced by new. If the optional argument count is\n\
Senthil Kumaran9a9dd1c2010-09-08 12:50:29 +00002061given, only first count occurances are replaced.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002062
2063static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002064bytes_replace(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002065{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002066 Py_ssize_t count = -1;
2067 PyObject *from, *to;
2068 const char *from_s, *to_s;
2069 Py_ssize_t from_len, to_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002070
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002071 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2072 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002073
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002074 if (PyBytes_Check(from)) {
2075 from_s = PyBytes_AS_STRING(from);
2076 from_len = PyBytes_GET_SIZE(from);
2077 }
2078 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2079 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002080
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002081 if (PyBytes_Check(to)) {
2082 to_s = PyBytes_AS_STRING(to);
2083 to_len = PyBytes_GET_SIZE(to);
2084 }
2085 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2086 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002087
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002088 return (PyObject *)replace((PyBytesObject *) self,
2089 from_s, from_len,
2090 to_s, to_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002091}
2092
2093/** End DALKE **/
2094
2095/* Matches the end (direction >= 0) or start (direction < 0) of self
2096 * against substr, using the start and end arguments. Returns
2097 * -1 on error, 0 if not found and 1 if found.
2098 */
2099Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002100_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002101 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002102{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002103 Py_ssize_t len = PyBytes_GET_SIZE(self);
2104 Py_ssize_t slen;
2105 const char* sub;
2106 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002107
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002108 if (PyBytes_Check(substr)) {
2109 sub = PyBytes_AS_STRING(substr);
2110 slen = PyBytes_GET_SIZE(substr);
2111 }
2112 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2113 return -1;
2114 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002115
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002116 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002117
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002118 if (direction < 0) {
2119 /* startswith */
2120 if (start+slen > len)
2121 return 0;
2122 } else {
2123 /* endswith */
2124 if (end-start < slen || start > len)
2125 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002126
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002127 if (end-slen > start)
2128 start = end - slen;
2129 }
2130 if (end-start >= slen)
2131 return ! memcmp(str+start, sub, slen);
2132 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002133}
2134
2135
2136PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002137"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002138\n\
2139Return True if B starts with the specified prefix, False otherwise.\n\
2140With optional start, test B beginning at that position.\n\
2141With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002142prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002143
2144static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002145bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002146{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002147 Py_ssize_t start = 0;
2148 Py_ssize_t end = PY_SSIZE_T_MAX;
2149 PyObject *subobj;
2150 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002151
Jesus Ceaac451502011-04-20 17:09:23 +02002152 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002153 return NULL;
2154 if (PyTuple_Check(subobj)) {
2155 Py_ssize_t i;
2156 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2157 result = _bytes_tailmatch(self,
2158 PyTuple_GET_ITEM(subobj, i),
2159 start, end, -1);
2160 if (result == -1)
2161 return NULL;
2162 else if (result) {
2163 Py_RETURN_TRUE;
2164 }
2165 }
2166 Py_RETURN_FALSE;
2167 }
2168 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002169 if (result == -1) {
2170 if (PyErr_ExceptionMatches(PyExc_TypeError))
2171 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2172 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002173 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002174 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002175 else
2176 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002177}
2178
2179
2180PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002181"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002182\n\
2183Return True if B ends with the specified suffix, False otherwise.\n\
2184With optional start, test B beginning at that position.\n\
2185With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002186suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002187
2188static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002189bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002190{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002191 Py_ssize_t start = 0;
2192 Py_ssize_t end = PY_SSIZE_T_MAX;
2193 PyObject *subobj;
2194 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002195
Jesus Ceaac451502011-04-20 17:09:23 +02002196 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002197 return NULL;
2198 if (PyTuple_Check(subobj)) {
2199 Py_ssize_t i;
2200 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2201 result = _bytes_tailmatch(self,
2202 PyTuple_GET_ITEM(subobj, i),
2203 start, end, +1);
2204 if (result == -1)
2205 return NULL;
2206 else if (result) {
2207 Py_RETURN_TRUE;
2208 }
2209 }
2210 Py_RETURN_FALSE;
2211 }
2212 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002213 if (result == -1) {
2214 if (PyErr_ExceptionMatches(PyExc_TypeError))
2215 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2216 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002217 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002218 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002219 else
2220 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002221}
2222
2223
2224PyDoc_STRVAR(decode__doc__,
Victor Stinnerc911bbf2010-11-07 19:04:46 +00002225"B.decode(encoding='utf-8', errors='strict') -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002226\n\
Victor Stinnere14e2122010-11-07 18:41:46 +00002227Decode B using the codec registered for encoding. Default encoding\n\
2228is 'utf-8'. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002229handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2230a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002231as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002232able to handle UnicodeDecodeErrors.");
2233
2234static PyObject *
Benjamin Peterson308d6372009-09-18 21:42:35 +00002235bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002236{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002237 const char *encoding = NULL;
2238 const char *errors = NULL;
2239 static char *kwlist[] = {"encoding", "errors", 0};
Guido van Rossumd624f182006-04-24 13:47:05 +00002240
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002241 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2242 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002243 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002244}
2245
Guido van Rossum20188312006-05-05 15:15:40 +00002246
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002247PyDoc_STRVAR(splitlines__doc__,
2248"B.splitlines([keepends]) -> list of lines\n\
2249\n\
2250Return a list of the lines in B, breaking at line boundaries.\n\
2251Line breaks are not included in the resulting list unless keepends\n\
2252is given and true.");
2253
2254static PyObject*
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002255bytes_splitlines(PyObject *self, PyObject *args, PyObject *kwds)
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002256{
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002257 static char *kwlist[] = {"keepends", 0};
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002258 int keepends = 0;
2259
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002260 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:splitlines",
2261 kwlist, &keepends))
Antoine Pitroud1188562010-06-09 16:38:55 +00002262 return NULL;
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002263
2264 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002265 (PyObject*) self, PyBytes_AS_STRING(self),
2266 PyBytes_GET_SIZE(self), keepends
2267 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002268}
2269
2270
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002271PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002272"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002273\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002274Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002275Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002276Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002277
2278static int
Victor Stinner6430fd52011-09-29 04:02:13 +02002279hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002280{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002281 if (c >= 128)
2282 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002283 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002284 return c - '0';
2285 else {
David Malcolm96960882010-11-05 17:23:41 +00002286 if (Py_ISUPPER(c))
2287 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002288 if (c >= 'a' && c <= 'f')
2289 return c - 'a' + 10;
2290 }
2291 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002292}
2293
2294static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002295bytes_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002296{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002297 PyObject *newstring, *hexobj;
2298 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002299 Py_ssize_t hexlen, byteslen, i, j;
2300 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002301 void *data;
2302 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002303
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002304 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2305 return NULL;
2306 assert(PyUnicode_Check(hexobj));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002307 if (PyUnicode_READY(hexobj))
2308 return NULL;
2309 kind = PyUnicode_KIND(hexobj);
2310 data = PyUnicode_DATA(hexobj);
2311 hexlen = PyUnicode_GET_LENGTH(hexobj);
2312
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002313 byteslen = hexlen/2; /* This overestimates if there are spaces */
2314 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2315 if (!newstring)
2316 return NULL;
2317 buf = PyBytes_AS_STRING(newstring);
2318 for (i = j = 0; i < hexlen; i += 2) {
2319 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002320 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002321 i++;
2322 if (i >= hexlen)
2323 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002324 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
2325 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002326 if (top == -1 || bot == -1) {
2327 PyErr_Format(PyExc_ValueError,
2328 "non-hexadecimal number found in "
2329 "fromhex() arg at position %zd", i);
2330 goto error;
2331 }
2332 buf[j++] = (top << 4) + bot;
2333 }
2334 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2335 goto error;
2336 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002337
2338 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002339 Py_XDECREF(newstring);
2340 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002341}
2342
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002343PyDoc_STRVAR(sizeof__doc__,
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002344"B.__sizeof__() -> size of B in memory, in bytes");
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002345
2346static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002347bytes_sizeof(PyBytesObject *v)
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002348{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002349 Py_ssize_t res;
2350 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2351 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002352}
2353
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002354
2355static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002356bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002357{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002358 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002359}
2360
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002361
2362static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002363bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002364 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2365 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2366 _Py_capitalize__doc__},
2367 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2368 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2369 {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
2370 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2371 endswith__doc__},
2372 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2373 expandtabs__doc__},
2374 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2375 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2376 fromhex_doc},
2377 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2378 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2379 _Py_isalnum__doc__},
2380 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2381 _Py_isalpha__doc__},
2382 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2383 _Py_isdigit__doc__},
2384 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2385 _Py_islower__doc__},
2386 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2387 _Py_isspace__doc__},
2388 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2389 _Py_istitle__doc__},
2390 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2391 _Py_isupper__doc__},
2392 {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
2393 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2394 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2395 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2396 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
2397 _Py_maketrans__doc__},
2398 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2399 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2400 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2401 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2402 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2403 {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
2404 rpartition__doc__},
Ezio Melotticda6b6d2012-02-26 09:39:55 +02002405 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS | METH_KEYWORDS, rsplit__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002406 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
Ezio Melotticda6b6d2012-02-26 09:39:55 +02002407 {"split", (PyCFunction)bytes_split, METH_VARARGS | METH_KEYWORDS, split__doc__},
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002408 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002409 splitlines__doc__},
2410 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2411 startswith__doc__},
2412 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
2413 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2414 _Py_swapcase__doc__},
2415 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2416 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
2417 translate__doc__},
2418 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2419 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2420 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
2421 sizeof__doc__},
2422 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002423};
2424
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002425static PyObject *
2426str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2427
2428static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002429bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002430{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002431 PyObject *x = NULL;
2432 const char *encoding = NULL;
2433 const char *errors = NULL;
2434 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002435 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002436 Py_ssize_t size;
2437 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002438 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002439
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002440 if (type != &PyBytes_Type)
2441 return str_subtype_new(type, args, kwds);
2442 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2443 &encoding, &errors))
2444 return NULL;
2445 if (x == NULL) {
2446 if (encoding != NULL || errors != NULL) {
2447 PyErr_SetString(PyExc_TypeError,
2448 "encoding or errors without sequence "
2449 "argument");
2450 return NULL;
2451 }
2452 return PyBytes_FromString("");
2453 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002454
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002455 if (PyUnicode_Check(x)) {
2456 /* Encode via the codec registry */
2457 if (encoding == NULL) {
2458 PyErr_SetString(PyExc_TypeError,
2459 "string argument without an encoding");
2460 return NULL;
2461 }
2462 new = PyUnicode_AsEncodedString(x, encoding, errors);
2463 if (new == NULL)
2464 return NULL;
2465 assert(PyBytes_Check(new));
2466 return new;
2467 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002468
2469 /* We'd like to call PyObject_Bytes here, but we need to check for an
2470 integer argument before deferring to PyBytes_FromObject, something
2471 PyObject_Bytes doesn't do. */
2472 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2473 if (func != NULL) {
2474 new = PyObject_CallFunctionObjArgs(func, NULL);
2475 Py_DECREF(func);
2476 if (new == NULL)
2477 return NULL;
2478 if (!PyBytes_Check(new)) {
2479 PyErr_Format(PyExc_TypeError,
2480 "__bytes__ returned non-bytes (type %.200s)",
2481 Py_TYPE(new)->tp_name);
2482 Py_DECREF(new);
2483 return NULL;
2484 }
2485 return new;
2486 }
2487 else if (PyErr_Occurred())
2488 return NULL;
2489
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002490 /* Is it an integer? */
2491 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2492 if (size == -1 && PyErr_Occurred()) {
2493 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2494 return NULL;
2495 PyErr_Clear();
2496 }
2497 else if (size < 0) {
2498 PyErr_SetString(PyExc_ValueError, "negative count");
2499 return NULL;
2500 }
2501 else {
2502 new = PyBytes_FromStringAndSize(NULL, size);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002503 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002504 return NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002505 if (size > 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002506 memset(((PyBytesObject*)new)->ob_sval, 0, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002507 return new;
2508 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002509
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002510 /* If it's not unicode, there can't be encoding or errors */
2511 if (encoding != NULL || errors != NULL) {
2512 PyErr_SetString(PyExc_TypeError,
2513 "encoding or errors without a string argument");
2514 return NULL;
2515 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002516
2517 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002518}
2519
2520PyObject *
2521PyBytes_FromObject(PyObject *x)
2522{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002523 PyObject *new, *it;
2524 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002525
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002526 if (x == NULL) {
2527 PyErr_BadInternalCall();
2528 return NULL;
2529 }
Larry Hastingsca28e992012-05-24 22:58:30 -07002530
2531 if (PyBytes_CheckExact(x)) {
2532 Py_INCREF(x);
2533 return x;
2534 }
2535
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002536 /* Use the modern buffer interface */
2537 if (PyObject_CheckBuffer(x)) {
2538 Py_buffer view;
2539 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2540 return NULL;
2541 new = PyBytes_FromStringAndSize(NULL, view.len);
2542 if (!new)
2543 goto fail;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002544 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2545 &view, view.len, 'C') < 0)
2546 goto fail;
2547 PyBuffer_Release(&view);
2548 return new;
2549 fail:
2550 Py_XDECREF(new);
2551 PyBuffer_Release(&view);
2552 return NULL;
2553 }
2554 if (PyUnicode_Check(x)) {
2555 PyErr_SetString(PyExc_TypeError,
2556 "cannot convert unicode object to bytes");
2557 return NULL;
2558 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002559
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002560 if (PyList_CheckExact(x)) {
2561 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2562 if (new == NULL)
2563 return NULL;
2564 for (i = 0; i < Py_SIZE(x); i++) {
2565 Py_ssize_t value = PyNumber_AsSsize_t(
2566 PyList_GET_ITEM(x, i), PyExc_ValueError);
2567 if (value == -1 && PyErr_Occurred()) {
2568 Py_DECREF(new);
2569 return NULL;
2570 }
2571 if (value < 0 || value >= 256) {
2572 PyErr_SetString(PyExc_ValueError,
2573 "bytes must be in range(0, 256)");
2574 Py_DECREF(new);
2575 return NULL;
2576 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002577 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002578 }
2579 return new;
2580 }
2581 if (PyTuple_CheckExact(x)) {
2582 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2583 if (new == NULL)
2584 return NULL;
2585 for (i = 0; i < Py_SIZE(x); i++) {
2586 Py_ssize_t value = PyNumber_AsSsize_t(
2587 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
2588 if (value == -1 && PyErr_Occurred()) {
2589 Py_DECREF(new);
2590 return NULL;
2591 }
2592 if (value < 0 || value >= 256) {
2593 PyErr_SetString(PyExc_ValueError,
2594 "bytes must be in range(0, 256)");
2595 Py_DECREF(new);
2596 return NULL;
2597 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002598 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002599 }
2600 return new;
2601 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00002602
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002603 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02002604 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002605 if (size == -1 && PyErr_Occurred())
2606 return NULL;
2607 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
2608 returning a shared empty bytes string. This required because we
2609 want to call _PyBytes_Resize() the returned object, which we can
2610 only do on bytes objects with refcount == 1. */
2611 size += 1;
2612 new = PyBytes_FromStringAndSize(NULL, size);
2613 if (new == NULL)
2614 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002615
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002616 /* Get the iterator */
2617 it = PyObject_GetIter(x);
2618 if (it == NULL)
2619 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002620
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002621 /* Run the iterator to exhaustion */
2622 for (i = 0; ; i++) {
2623 PyObject *item;
2624 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002625
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002626 /* Get the next item */
2627 item = PyIter_Next(it);
2628 if (item == NULL) {
2629 if (PyErr_Occurred())
2630 goto error;
2631 break;
2632 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002633
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002634 /* Interpret it as an int (__index__) */
2635 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2636 Py_DECREF(item);
2637 if (value == -1 && PyErr_Occurred())
2638 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002639
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002640 /* Range check */
2641 if (value < 0 || value >= 256) {
2642 PyErr_SetString(PyExc_ValueError,
2643 "bytes must be in range(0, 256)");
2644 goto error;
2645 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002646
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002647 /* Append the byte */
2648 if (i >= size) {
2649 size = 2 * size + 1;
2650 if (_PyBytes_Resize(&new, size) < 0)
2651 goto error;
2652 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002653 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002654 }
2655 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002656
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002657 /* Clean up and return success */
2658 Py_DECREF(it);
2659 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002660
2661 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002662 /* Error handling when new != NULL */
2663 Py_XDECREF(it);
2664 Py_DECREF(new);
2665 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002666}
2667
2668static PyObject *
2669str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2670{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002671 PyObject *tmp, *pnew;
2672 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002673
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002674 assert(PyType_IsSubtype(type, &PyBytes_Type));
2675 tmp = bytes_new(&PyBytes_Type, args, kwds);
2676 if (tmp == NULL)
2677 return NULL;
2678 assert(PyBytes_CheckExact(tmp));
2679 n = PyBytes_GET_SIZE(tmp);
2680 pnew = type->tp_alloc(type, n);
2681 if (pnew != NULL) {
2682 Py_MEMCPY(PyBytes_AS_STRING(pnew),
2683 PyBytes_AS_STRING(tmp), n+1);
2684 ((PyBytesObject *)pnew)->ob_shash =
2685 ((PyBytesObject *)tmp)->ob_shash;
2686 }
2687 Py_DECREF(tmp);
2688 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002689}
2690
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002691PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002692"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002693bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002694bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002695bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2696bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002697\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002698Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002699 - an iterable yielding integers in range(256)\n\
2700 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002701 - any object implementing the buffer API.\n\
2702 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002703
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002704static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002705
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002706PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002707 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2708 "bytes",
2709 PyBytesObject_SIZE,
2710 sizeof(char),
2711 bytes_dealloc, /* tp_dealloc */
2712 0, /* tp_print */
2713 0, /* tp_getattr */
2714 0, /* tp_setattr */
2715 0, /* tp_reserved */
2716 (reprfunc)bytes_repr, /* tp_repr */
2717 0, /* tp_as_number */
2718 &bytes_as_sequence, /* tp_as_sequence */
2719 &bytes_as_mapping, /* tp_as_mapping */
2720 (hashfunc)bytes_hash, /* tp_hash */
2721 0, /* tp_call */
2722 bytes_str, /* tp_str */
2723 PyObject_GenericGetAttr, /* tp_getattro */
2724 0, /* tp_setattro */
2725 &bytes_as_buffer, /* tp_as_buffer */
2726 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2727 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2728 bytes_doc, /* tp_doc */
2729 0, /* tp_traverse */
2730 0, /* tp_clear */
2731 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2732 0, /* tp_weaklistoffset */
2733 bytes_iter, /* tp_iter */
2734 0, /* tp_iternext */
2735 bytes_methods, /* tp_methods */
2736 0, /* tp_members */
2737 0, /* tp_getset */
2738 &PyBaseObject_Type, /* tp_base */
2739 0, /* tp_dict */
2740 0, /* tp_descr_get */
2741 0, /* tp_descr_set */
2742 0, /* tp_dictoffset */
2743 0, /* tp_init */
2744 0, /* tp_alloc */
2745 bytes_new, /* tp_new */
2746 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002747};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002748
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002749void
2750PyBytes_Concat(register PyObject **pv, register PyObject *w)
2751{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002752 register PyObject *v;
2753 assert(pv != NULL);
2754 if (*pv == NULL)
2755 return;
2756 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002757 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002758 return;
2759 }
2760 v = bytes_concat(*pv, w);
2761 Py_DECREF(*pv);
2762 *pv = v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002763}
2764
2765void
2766PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
2767{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002768 PyBytes_Concat(pv, w);
2769 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002770}
2771
2772
2773/* The following function breaks the notion that strings are immutable:
2774 it changes the size of a string. We get away with this only if there
2775 is only one module referencing the object. You can also think of it
2776 as creating a new string object and destroying the old one, only
2777 more efficiently. In any case, don't use this if the string may
2778 already be known to some other part of the code...
2779 Note that if there's not enough memory to resize the string, the original
2780 string object at *pv is deallocated, *pv is set to NULL, an "out of
2781 memory" exception is set, and -1 is returned. Else (on success) 0 is
2782 returned, and the value in *pv may or may not be the same as on input.
2783 As always, an extra byte is allocated for a trailing \0 byte (newsize
2784 does *not* include that), and a trailing \0 byte is stored.
2785*/
2786
2787int
2788_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2789{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002790 register PyObject *v;
2791 register PyBytesObject *sv;
2792 v = *pv;
2793 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
2794 *pv = 0;
2795 Py_DECREF(v);
2796 PyErr_BadInternalCall();
2797 return -1;
2798 }
2799 /* XXX UNREF/NEWREF interface should be more symmetrical */
2800 _Py_DEC_REFTOTAL;
2801 _Py_ForgetReference(v);
2802 *pv = (PyObject *)
2803 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
2804 if (*pv == NULL) {
2805 PyObject_Del(v);
2806 PyErr_NoMemory();
2807 return -1;
2808 }
2809 _Py_NewReference(*pv);
2810 sv = (PyBytesObject *) *pv;
2811 Py_SIZE(sv) = newsize;
2812 sv->ob_sval[newsize] = '\0';
2813 sv->ob_shash = -1; /* invalidate cached hash value */
2814 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002815}
2816
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002817void
2818PyBytes_Fini(void)
2819{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002820 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002821 for (i = 0; i < UCHAR_MAX + 1; i++)
2822 Py_CLEAR(characters[i]);
2823 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002824}
2825
Benjamin Peterson4116f362008-05-27 00:36:20 +00002826/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002827
2828typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002829 PyObject_HEAD
2830 Py_ssize_t it_index;
2831 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002832} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002833
2834static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002835striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002836{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002837 _PyObject_GC_UNTRACK(it);
2838 Py_XDECREF(it->it_seq);
2839 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002840}
2841
2842static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002843striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002844{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002845 Py_VISIT(it->it_seq);
2846 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002847}
2848
2849static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002850striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002851{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002852 PyBytesObject *seq;
2853 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002854
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002855 assert(it != NULL);
2856 seq = it->it_seq;
2857 if (seq == NULL)
2858 return NULL;
2859 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002860
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002861 if (it->it_index < PyBytes_GET_SIZE(seq)) {
2862 item = PyLong_FromLong(
2863 (unsigned char)seq->ob_sval[it->it_index]);
2864 if (item != NULL)
2865 ++it->it_index;
2866 return item;
2867 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002868
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002869 Py_DECREF(seq);
2870 it->it_seq = NULL;
2871 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002872}
2873
2874static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002875striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002876{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002877 Py_ssize_t len = 0;
2878 if (it->it_seq)
2879 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
2880 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002881}
2882
2883PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002884 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002885
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002886static PyObject *
2887striter_reduce(striterobject *it)
2888{
2889 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02002890 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002891 it->it_seq, it->it_index);
2892 } else {
2893 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
2894 if (u == NULL)
2895 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02002896 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002897 }
2898}
2899
2900PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
2901
2902static PyObject *
2903striter_setstate(striterobject *it, PyObject *state)
2904{
2905 Py_ssize_t index = PyLong_AsSsize_t(state);
2906 if (index == -1 && PyErr_Occurred())
2907 return NULL;
2908 if (index < 0)
2909 index = 0;
2910 it->it_index = index;
2911 Py_RETURN_NONE;
2912}
2913
2914PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
2915
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002916static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002917 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
2918 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002919 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
2920 reduce_doc},
2921 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
2922 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002923 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002924};
2925
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002926PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002927 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2928 "bytes_iterator", /* tp_name */
2929 sizeof(striterobject), /* tp_basicsize */
2930 0, /* tp_itemsize */
2931 /* methods */
2932 (destructor)striter_dealloc, /* tp_dealloc */
2933 0, /* tp_print */
2934 0, /* tp_getattr */
2935 0, /* tp_setattr */
2936 0, /* tp_reserved */
2937 0, /* tp_repr */
2938 0, /* tp_as_number */
2939 0, /* tp_as_sequence */
2940 0, /* tp_as_mapping */
2941 0, /* tp_hash */
2942 0, /* tp_call */
2943 0, /* tp_str */
2944 PyObject_GenericGetAttr, /* tp_getattro */
2945 0, /* tp_setattro */
2946 0, /* tp_as_buffer */
2947 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
2948 0, /* tp_doc */
2949 (traverseproc)striter_traverse, /* tp_traverse */
2950 0, /* tp_clear */
2951 0, /* tp_richcompare */
2952 0, /* tp_weaklistoffset */
2953 PyObject_SelfIter, /* tp_iter */
2954 (iternextfunc)striter_next, /* tp_iternext */
2955 striter_methods, /* tp_methods */
2956 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002957};
2958
2959static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002960bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002961{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002962 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002963
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002964 if (!PyBytes_Check(seq)) {
2965 PyErr_BadInternalCall();
2966 return NULL;
2967 }
2968 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
2969 if (it == NULL)
2970 return NULL;
2971 it->it_index = 0;
2972 Py_INCREF(seq);
2973 it->it_seq = (PyBytesObject *)seq;
2974 _PyObject_GC_TRACK(it);
2975 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002976}