blob: b64178d623db143f661cc78193020ba1b1c8070d [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00008
Neal Norwitz2bad9702007-08-27 06:19:22 +00009static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000010_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000011{
Christian Heimes90aa7642007-12-19 02:45:37 +000012 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000013
Gregory P. Smith60d241f2007-10-16 06:31:30 +000014 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000015 {
16 PyErr_Format(PyExc_TypeError,
17 "Type %.100s doesn't support the buffer API",
Christian Heimes90aa7642007-12-19 02:45:37 +000018 Py_TYPE(obj)->tp_name);
Guido van Rossuma74184e2007-08-29 04:05:57 +000019 return -1;
20 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000021
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000022 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
23 return -1;
24 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000025}
26
Christian Heimes2c9c7a52008-05-26 13:42:13 +000027#ifdef COUNT_ALLOCS
28int null_strings, one_strings;
29#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000030
Christian Heimes2c9c7a52008-05-26 13:42:13 +000031static PyBytesObject *characters[UCHAR_MAX + 1];
32static PyBytesObject *nullstring;
33
34/*
35 For both PyBytes_FromString() and PyBytes_FromStringAndSize(), the
36 parameter `size' denotes number of characters to allocate, not counting any
37 null terminating character.
38
39 For PyBytes_FromString(), the parameter `str' points to a null-terminated
40 string containing exactly `size' bytes.
41
42 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
43 either NULL or else points to a string containing at least `size' bytes.
44 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
45 not have to be null-terminated. (Therefore it is safe to construct a
46 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
47 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
48 bytes (setting the last byte to the null terminating character) and you can
49 fill in the data yourself. If `str' is non-NULL then the resulting
50 PyString object must be treated as immutable and you must not fill in nor
51 alter the data yourself, since the strings may be shared.
52
53 The PyObject member `op->ob_size', which denotes the number of "extra
54 items" in a variable-size object, will contain the number of bytes
55 allocated for string data, not counting the null terminating character. It
56 is therefore equal to the equal to the `size' parameter (for
57 PyBytes_FromStringAndSize()) or the length of the string in the `str'
58 parameter (for PyBytes_FromString()).
59*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000060PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000061PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000062{
Christian Heimes2c9c7a52008-05-26 13:42:13 +000063 register PyBytesObject *op;
64 if (size < 0) {
65 PyErr_SetString(PyExc_SystemError,
66 "Negative size passed to PyBytes_FromStringAndSize");
67 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +000068 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000069 if (size == 0 && (op = nullstring) != NULL) {
70#ifdef COUNT_ALLOCS
71 null_strings++;
72#endif
73 Py_INCREF(op);
74 return (PyObject *)op;
75 }
76 if (size == 1 && str != NULL &&
77 (op = characters[*str & UCHAR_MAX]) != NULL)
78 {
79#ifdef COUNT_ALLOCS
80 one_strings++;
81#endif
82 Py_INCREF(op);
83 return (PyObject *)op;
84 }
85
86 /* Inline PyObject_NewVar */
87 op = (PyBytesObject *)PyObject_MALLOC(sizeof(PyBytesObject) + size);
88 if (op == NULL)
89 return PyErr_NoMemory();
90 PyObject_INIT_VAR(op, &PyBytes_Type, size);
91 op->ob_shash = -1;
92 if (str != NULL)
93 Py_MEMCPY(op->ob_sval, str, size);
94 op->ob_sval[size] = '\0';
95 /* share short strings */
96 if (size == 0) {
97 nullstring = op;
98 Py_INCREF(op);
99 } else if (size == 1 && str != NULL) {
100 characters[*str & UCHAR_MAX] = op;
101 Py_INCREF(op);
102 }
103 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000104}
105
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000106PyObject *
107PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000108{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000109 register size_t size;
110 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000111
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000112 assert(str != NULL);
113 size = strlen(str);
114 if (size > PY_SSIZE_T_MAX) {
115 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000116 "byte string is too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000117 return NULL;
118 }
119 if (size == 0 && (op = nullstring) != NULL) {
120#ifdef COUNT_ALLOCS
121 null_strings++;
122#endif
123 Py_INCREF(op);
124 return (PyObject *)op;
125 }
126 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
127#ifdef COUNT_ALLOCS
128 one_strings++;
129#endif
130 Py_INCREF(op);
131 return (PyObject *)op;
132 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000133
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000134 /* Inline PyObject_NewVar */
135 op = (PyBytesObject *)PyObject_MALLOC(sizeof(PyBytesObject) + size);
136 if (op == NULL)
137 return PyErr_NoMemory();
138 PyObject_INIT_VAR(op, &PyBytes_Type, size);
139 op->ob_shash = -1;
140 Py_MEMCPY(op->ob_sval, str, size+1);
141 /* share short strings */
142 if (size == 0) {
143 nullstring = op;
144 Py_INCREF(op);
145 } else if (size == 1) {
146 characters[*str & UCHAR_MAX] = op;
147 Py_INCREF(op);
148 }
149 return (PyObject *) op;
150}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000151
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000152PyObject *
153PyBytes_FromFormatV(const char *format, va_list vargs)
154{
155 va_list count;
156 Py_ssize_t n = 0;
157 const char* f;
158 char *s;
159 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000160
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000161#ifdef VA_LIST_IS_ARRAY
162 Py_MEMCPY(count, vargs, sizeof(va_list));
163#else
164#ifdef __va_copy
165 __va_copy(count, vargs);
166#else
167 count = vargs;
168#endif
169#endif
170 /* step 1: figure out how large a buffer we need */
171 for (f = format; *f; f++) {
172 if (*f == '%') {
173 const char* p = f;
174 while (*++f && *f != '%' && !ISALPHA(*f))
175 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000176
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000177 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
178 * they don't affect the amount of space we reserve.
179 */
180 if ((*f == 'l' || *f == 'z') &&
181 (f[1] == 'd' || f[1] == 'u'))
182 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000183
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000184 switch (*f) {
185 case 'c':
186 (void)va_arg(count, int);
187 /* fall through... */
188 case '%':
189 n++;
190 break;
191 case 'd': case 'u': case 'i': case 'x':
192 (void) va_arg(count, int);
193 /* 20 bytes is enough to hold a 64-bit
194 integer. Decimal takes the most space.
195 This isn't enough for octal. */
196 n += 20;
197 break;
198 case 's':
199 s = va_arg(count, char*);
200 n += strlen(s);
201 break;
202 case 'p':
203 (void) va_arg(count, int);
204 /* maximum 64-bit pointer representation:
205 * 0xffffffffffffffff
206 * so 19 characters is enough.
207 * XXX I count 18 -- what's the extra for?
208 */
209 n += 19;
210 break;
211 default:
212 /* if we stumble upon an unknown
213 formatting code, copy the rest of
214 the format string to the output
215 string. (we cannot just skip the
216 code, since there's no way to know
217 what's in the argument list) */
218 n += strlen(p);
219 goto expand;
220 }
221 } else
222 n++;
223 }
224 expand:
225 /* step 2: fill the buffer */
226 /* Since we've analyzed how much space we need for the worst case,
227 use sprintf directly instead of the slower PyOS_snprintf. */
228 string = PyBytes_FromStringAndSize(NULL, n);
229 if (!string)
230 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000231
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000232 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000233
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000234 for (f = format; *f; f++) {
235 if (*f == '%') {
236 const char* p = f++;
237 Py_ssize_t i;
238 int longflag = 0;
239 int size_tflag = 0;
240 /* parse the width.precision part (we're only
241 interested in the precision value, if any) */
242 n = 0;
243 while (ISDIGIT(*f))
244 n = (n*10) + *f++ - '0';
245 if (*f == '.') {
246 f++;
247 n = 0;
248 while (ISDIGIT(*f))
249 n = (n*10) + *f++ - '0';
250 }
251 while (*f && *f != '%' && !ISALPHA(*f))
252 f++;
253 /* handle the long flag, but only for %ld and %lu.
254 others can be added when necessary. */
255 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
256 longflag = 1;
257 ++f;
258 }
259 /* handle the size_t flag. */
260 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
261 size_tflag = 1;
262 ++f;
263 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000264
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000265 switch (*f) {
266 case 'c':
267 *s++ = va_arg(vargs, int);
268 break;
269 case 'd':
270 if (longflag)
271 sprintf(s, "%ld", va_arg(vargs, long));
272 else if (size_tflag)
273 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
274 va_arg(vargs, Py_ssize_t));
275 else
276 sprintf(s, "%d", va_arg(vargs, int));
277 s += strlen(s);
278 break;
279 case 'u':
280 if (longflag)
281 sprintf(s, "%lu",
282 va_arg(vargs, unsigned long));
283 else if (size_tflag)
284 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
285 va_arg(vargs, size_t));
286 else
287 sprintf(s, "%u",
288 va_arg(vargs, unsigned int));
289 s += strlen(s);
290 break;
291 case 'i':
292 sprintf(s, "%i", va_arg(vargs, int));
293 s += strlen(s);
294 break;
295 case 'x':
296 sprintf(s, "%x", va_arg(vargs, int));
297 s += strlen(s);
298 break;
299 case 's':
300 p = va_arg(vargs, char*);
301 i = strlen(p);
302 if (n > 0 && i > n)
303 i = n;
304 Py_MEMCPY(s, p, i);
305 s += i;
306 break;
307 case 'p':
308 sprintf(s, "%p", va_arg(vargs, void*));
309 /* %p is ill-defined: ensure leading 0x. */
310 if (s[1] == 'X')
311 s[1] = 'x';
312 else if (s[1] != 'x') {
313 memmove(s+2, s, strlen(s)+1);
314 s[0] = '0';
315 s[1] = 'x';
316 }
317 s += strlen(s);
318 break;
319 case '%':
320 *s++ = '%';
321 break;
322 default:
323 strcpy(s, p);
324 s += strlen(s);
325 goto end;
326 }
327 } else
328 *s++ = *f;
329 }
330
331 end:
332 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
333 return string;
334}
335
336PyObject *
337PyBytes_FromFormat(const char *format, ...)
338{
339 PyObject* ret;
340 va_list vargs;
341
342#ifdef HAVE_STDARG_PROTOTYPES
343 va_start(vargs, format);
344#else
345 va_start(vargs);
346#endif
347 ret = PyBytes_FromFormatV(format, vargs);
348 va_end(vargs);
349 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000350}
351
352static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000353string_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000354{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000355 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000356}
357
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000358/* Unescape a backslash-escaped string. If unicode is non-zero,
359 the string is a u-literal. If recode_encoding is non-zero,
360 the string is UTF-8 encoded and should be re-encoded in the
361 specified encoding. */
362
363PyObject *PyBytes_DecodeEscape(const char *s,
364 Py_ssize_t len,
365 const char *errors,
366 Py_ssize_t unicode,
367 const char *recode_encoding)
368{
369 int c;
370 char *p, *buf;
371 const char *end;
372 PyObject *v;
373 Py_ssize_t newlen = recode_encoding ? 4*len:len;
374 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
375 if (v == NULL)
376 return NULL;
377 p = buf = PyBytes_AsString(v);
378 end = s + len;
379 while (s < end) {
380 if (*s != '\\') {
381 non_esc:
382 if (recode_encoding && (*s & 0x80)) {
383 PyObject *u, *w;
384 char *r;
385 const char* t;
386 Py_ssize_t rn;
387 t = s;
388 /* Decode non-ASCII bytes as UTF-8. */
389 while (t < end && (*t & 0x80)) t++;
390 u = PyUnicode_DecodeUTF8(s, t - s, errors);
391 if(!u) goto failed;
392
393 /* Recode them in target encoding. */
394 w = PyUnicode_AsEncodedString(
395 u, recode_encoding, errors);
396 Py_DECREF(u);
397 if (!w) goto failed;
398
399 /* Append bytes to output buffer. */
400 assert(PyBytes_Check(w));
401 r = PyBytes_AS_STRING(w);
402 rn = PyBytes_GET_SIZE(w);
403 Py_MEMCPY(p, r, rn);
404 p += rn;
405 Py_DECREF(w);
406 s = t;
407 } else {
408 *p++ = *s++;
409 }
410 continue;
411 }
412 s++;
413 if (s==end) {
414 PyErr_SetString(PyExc_ValueError,
415 "Trailing \\ in string");
416 goto failed;
417 }
418 switch (*s++) {
419 /* XXX This assumes ASCII! */
420 case '\n': break;
421 case '\\': *p++ = '\\'; break;
422 case '\'': *p++ = '\''; break;
423 case '\"': *p++ = '\"'; break;
424 case 'b': *p++ = '\b'; break;
425 case 'f': *p++ = '\014'; break; /* FF */
426 case 't': *p++ = '\t'; break;
427 case 'n': *p++ = '\n'; break;
428 case 'r': *p++ = '\r'; break;
429 case 'v': *p++ = '\013'; break; /* VT */
430 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
431 case '0': case '1': case '2': case '3':
432 case '4': case '5': case '6': case '7':
433 c = s[-1] - '0';
434 if (s < end && '0' <= *s && *s <= '7') {
435 c = (c<<3) + *s++ - '0';
436 if (s < end && '0' <= *s && *s <= '7')
437 c = (c<<3) + *s++ - '0';
438 }
439 *p++ = c;
440 break;
441 case 'x':
442 if (s+1 < end && ISXDIGIT(s[0]) && ISXDIGIT(s[1])) {
443 unsigned int x = 0;
444 c = Py_CHARMASK(*s);
445 s++;
446 if (ISDIGIT(c))
447 x = c - '0';
448 else if (ISLOWER(c))
449 x = 10 + c - 'a';
450 else
451 x = 10 + c - 'A';
452 x = x << 4;
453 c = Py_CHARMASK(*s);
454 s++;
455 if (ISDIGIT(c))
456 x += c - '0';
457 else if (ISLOWER(c))
458 x += 10 + c - 'a';
459 else
460 x += 10 + c - 'A';
461 *p++ = x;
462 break;
463 }
464 if (!errors || strcmp(errors, "strict") == 0) {
465 PyErr_SetString(PyExc_ValueError,
466 "invalid \\x escape");
467 goto failed;
468 }
469 if (strcmp(errors, "replace") == 0) {
470 *p++ = '?';
471 } else if (strcmp(errors, "ignore") == 0)
472 /* do nothing */;
473 else {
474 PyErr_Format(PyExc_ValueError,
475 "decoding error; unknown "
476 "error handling code: %.400s",
477 errors);
478 goto failed;
479 }
480 default:
481 *p++ = '\\';
482 s--;
483 goto non_esc; /* an arbitry number of unescaped
484 UTF-8 bytes may follow. */
485 }
486 }
487 if (p-buf < newlen)
488 _PyBytes_Resize(&v, p - buf);
489 return v;
490 failed:
491 Py_DECREF(v);
492 return NULL;
493}
494
495/* -------------------------------------------------------------------- */
496/* object api */
497
498Py_ssize_t
499PyBytes_Size(register PyObject *op)
500{
501 if (!PyBytes_Check(op)) {
502 PyErr_Format(PyExc_TypeError,
503 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
504 return -1;
505 }
506 return Py_SIZE(op);
507}
508
509char *
510PyBytes_AsString(register PyObject *op)
511{
512 if (!PyBytes_Check(op)) {
513 PyErr_Format(PyExc_TypeError,
514 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
515 return NULL;
516 }
517 return ((PyBytesObject *)op)->ob_sval;
518}
519
520int
521PyBytes_AsStringAndSize(register PyObject *obj,
522 register char **s,
523 register Py_ssize_t *len)
524{
525 if (s == NULL) {
526 PyErr_BadInternalCall();
527 return -1;
528 }
529
530 if (!PyBytes_Check(obj)) {
531 PyErr_Format(PyExc_TypeError,
532 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
533 return -1;
534 }
535
536 *s = PyBytes_AS_STRING(obj);
537 if (len != NULL)
538 *len = PyBytes_GET_SIZE(obj);
539 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
540 PyErr_SetString(PyExc_TypeError,
541 "expected bytes with no null");
542 return -1;
543 }
544 return 0;
545}
Neal Norwitz6968b052007-02-27 19:02:19 +0000546
547/* -------------------------------------------------------------------- */
548/* Methods */
549
550#define STRINGLIB_CHAR char
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000551
Neal Norwitz6968b052007-02-27 19:02:19 +0000552#define STRINGLIB_CMP memcmp
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000553#define STRINGLIB_LEN PyBytes_GET_SIZE
554#define STRINGLIB_NEW PyBytes_FromStringAndSize
555#define STRINGLIB_STR PyBytes_AS_STRING
556/* #define STRINGLIB_WANT_CONTAINS_OBJ 1 */
557
558#define STRINGLIB_EMPTY nullstring
559#define STRINGLIB_CHECK_EXACT PyBytes_CheckExact
560#define STRINGLIB_MUTABLE 0
Neal Norwitz6968b052007-02-27 19:02:19 +0000561
562#include "stringlib/fastsearch.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000563
Neal Norwitz6968b052007-02-27 19:02:19 +0000564#include "stringlib/count.h"
565#include "stringlib/find.h"
566#include "stringlib/partition.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000567#include "stringlib/ctype.h"
568#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000569
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000570#define _Py_InsertThousandsGrouping _PyBytes_InsertThousandsGrouping
571#include "stringlib/localeutil.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000572
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000573PyObject *
574PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000575{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000576 static const char *hexdigits = "0123456789abcdef";
577 register PyBytesObject* op = (PyBytesObject*) obj;
578 Py_ssize_t length = Py_SIZE(op);
579 size_t newsize = 3 + 4 * length;
580 PyObject *v;
581 if (newsize > PY_SSIZE_T_MAX || (newsize-3) / 4 != length) {
582 PyErr_SetString(PyExc_OverflowError,
583 "bytes object is too large to make repr");
584 return NULL;
585 }
586 v = PyUnicode_FromUnicode(NULL, newsize);
587 if (v == NULL) {
588 return NULL;
589 }
590 else {
591 register Py_ssize_t i;
592 register Py_UNICODE c;
593 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
594 int quote;
595
596 /* Figure out which quote to use; single is preferred */
597 quote = '\'';
598 if (smartquotes) {
599 char *test, *start;
600 start = PyBytes_AS_STRING(op);
601 for (test = start; test < start+length; ++test) {
602 if (*test == '"') {
603 quote = '\''; /* back to single */
604 goto decided;
605 }
606 else if (*test == '\'')
607 quote = '"';
608 }
609 decided:
610 ;
611 }
612
613 *p++ = 'b', *p++ = quote;
614 for (i = 0; i < length; i++) {
615 /* There's at least enough room for a hex escape
616 and a closing quote. */
617 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
618 c = op->ob_sval[i];
619 if (c == quote || c == '\\')
620 *p++ = '\\', *p++ = c;
621 else if (c == '\t')
622 *p++ = '\\', *p++ = 't';
623 else if (c == '\n')
624 *p++ = '\\', *p++ = 'n';
625 else if (c == '\r')
626 *p++ = '\\', *p++ = 'r';
627 else if (c < ' ' || c >= 0x7f) {
628 *p++ = '\\';
629 *p++ = 'x';
630 *p++ = hexdigits[(c & 0xf0) >> 4];
631 *p++ = hexdigits[c & 0xf];
632 }
633 else
634 *p++ = c;
635 }
636 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
637 *p++ = quote;
638 *p = '\0';
639 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
640 Py_DECREF(v);
641 return NULL;
642 }
643 return v;
644 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000645}
646
Neal Norwitz6968b052007-02-27 19:02:19 +0000647static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000648string_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000649{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000650 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000651}
652
Neal Norwitz6968b052007-02-27 19:02:19 +0000653static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000654string_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000655{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000656 if (Py_BytesWarningFlag) {
657 if (PyErr_WarnEx(PyExc_BytesWarning,
658 "str() on a bytes instance", 1))
659 return NULL;
660 }
661 return string_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000662}
663
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000664static Py_ssize_t
665string_length(PyBytesObject *a)
666{
667 return Py_SIZE(a);
668}
Neal Norwitz6968b052007-02-27 19:02:19 +0000669
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000670/* This is also used by PyBytes_Concat() */
671static PyObject *
672string_concat(PyObject *a, PyObject *b)
673{
674 Py_ssize_t size;
675 Py_buffer va, vb;
676 PyObject *result = NULL;
677
678 va.len = -1;
679 vb.len = -1;
680 if (_getbuffer(a, &va) < 0 ||
681 _getbuffer(b, &vb) < 0) {
682 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
683 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
684 goto done;
685 }
686
687 /* Optimize end cases */
688 if (va.len == 0 && PyBytes_CheckExact(b)) {
689 result = b;
690 Py_INCREF(result);
691 goto done;
692 }
693 if (vb.len == 0 && PyBytes_CheckExact(a)) {
694 result = a;
695 Py_INCREF(result);
696 goto done;
697 }
698
699 size = va.len + vb.len;
700 if (size < 0) {
701 PyErr_NoMemory();
702 goto done;
703 }
704
705 result = PyBytes_FromStringAndSize(NULL, size);
706 if (result != NULL) {
707 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
708 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
709 }
710
711 done:
712 if (va.len != -1)
713 PyObject_ReleaseBuffer(a, &va);
714 if (vb.len != -1)
715 PyObject_ReleaseBuffer(b, &vb);
716 return result;
717}
Neal Norwitz6968b052007-02-27 19:02:19 +0000718
719static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000720string_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000721{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000722 register Py_ssize_t i;
723 register Py_ssize_t j;
724 register Py_ssize_t size;
725 register PyBytesObject *op;
726 size_t nbytes;
727 if (n < 0)
728 n = 0;
729 /* watch out for overflows: the size can overflow int,
730 * and the # of bytes needed can overflow size_t
731 */
732 size = Py_SIZE(a) * n;
733 if (n && size / n != Py_SIZE(a)) {
734 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000735 "repeated bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000736 return NULL;
737 }
738 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
739 Py_INCREF(a);
740 return (PyObject *)a;
741 }
742 nbytes = (size_t)size;
743 if (nbytes + sizeof(PyBytesObject) <= nbytes) {
744 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000745 "repeated bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000746 return NULL;
747 }
748 op = (PyBytesObject *)
749 PyObject_MALLOC(sizeof(PyBytesObject) + nbytes);
750 if (op == NULL)
751 return PyErr_NoMemory();
752 PyObject_INIT_VAR(op, &PyBytes_Type, size);
753 op->ob_shash = -1;
754 op->ob_sval[size] = '\0';
755 if (Py_SIZE(a) == 1 && n > 0) {
756 memset(op->ob_sval, a->ob_sval[0] , n);
757 return (PyObject *) op;
758 }
759 i = 0;
760 if (i < size) {
761 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
762 i = Py_SIZE(a);
763 }
764 while (i < size) {
765 j = (i <= size-i) ? i : size-i;
766 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
767 i += j;
768 }
769 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000770}
771
Guido van Rossum98297ee2007-11-06 21:34:58 +0000772static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000773string_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000774{
775 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
776 if (ival == -1 && PyErr_Occurred()) {
777 Py_buffer varg;
778 int pos;
779 PyErr_Clear();
780 if (_getbuffer(arg, &varg) < 0)
781 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000782 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
Guido van Rossum98297ee2007-11-06 21:34:58 +0000783 varg.buf, varg.len, 0);
784 PyObject_ReleaseBuffer(arg, &varg);
785 return pos >= 0;
786 }
787 if (ival < 0 || ival >= 256) {
788 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
789 return -1;
790 }
791
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000792 return memchr(PyBytes_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000793}
794
Neal Norwitz6968b052007-02-27 19:02:19 +0000795static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000796string_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000797{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000798 if (i < 0 || i >= Py_SIZE(a)) {
Benjamin Peterson4116f362008-05-27 00:36:20 +0000799 PyErr_SetString(PyExc_IndexError, "index out of range");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000800 return NULL;
801 }
802 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000803}
804
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000805static PyObject*
806string_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000807{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000808 int c;
809 Py_ssize_t len_a, len_b;
810 Py_ssize_t min_len;
811 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000812
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000813 /* Make sure both arguments are strings. */
814 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
815 if (Py_BytesWarningFlag && (op == Py_EQ) &&
816 (PyObject_IsInstance((PyObject*)a,
817 (PyObject*)&PyUnicode_Type) ||
818 PyObject_IsInstance((PyObject*)b,
819 (PyObject*)&PyUnicode_Type))) {
820 if (PyErr_WarnEx(PyExc_BytesWarning,
821 "Comparsion between bytes and string", 1))
822 return NULL;
823 }
824 result = Py_NotImplemented;
825 goto out;
826 }
827 if (a == b) {
828 switch (op) {
829 case Py_EQ:case Py_LE:case Py_GE:
830 result = Py_True;
831 goto out;
832 case Py_NE:case Py_LT:case Py_GT:
833 result = Py_False;
834 goto out;
835 }
836 }
837 if (op == Py_EQ) {
838 /* Supporting Py_NE here as well does not save
839 much time, since Py_NE is rarely used. */
840 if (Py_SIZE(a) == Py_SIZE(b)
841 && (a->ob_sval[0] == b->ob_sval[0]
842 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
843 result = Py_True;
844 } else {
845 result = Py_False;
846 }
847 goto out;
848 }
849 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
850 min_len = (len_a < len_b) ? len_a : len_b;
851 if (min_len > 0) {
852 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
853 if (c==0)
854 c = memcmp(a->ob_sval, b->ob_sval, min_len);
855 } else
856 c = 0;
857 if (c == 0)
858 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
859 switch (op) {
860 case Py_LT: c = c < 0; break;
861 case Py_LE: c = c <= 0; break;
862 case Py_EQ: assert(0); break; /* unreachable */
863 case Py_NE: c = c != 0; break;
864 case Py_GT: c = c > 0; break;
865 case Py_GE: c = c >= 0; break;
866 default:
867 result = Py_NotImplemented;
868 goto out;
869 }
870 result = c ? Py_True : Py_False;
871 out:
872 Py_INCREF(result);
873 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000874}
875
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000876static long
877string_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000878{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000879 register Py_ssize_t len;
880 register unsigned char *p;
881 register long x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000882
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000883 if (a->ob_shash != -1)
884 return a->ob_shash;
885 len = Py_SIZE(a);
886 p = (unsigned char *) a->ob_sval;
887 x = *p << 7;
888 while (--len >= 0)
889 x = (1000003*x) ^ *p++;
890 x ^= Py_SIZE(a);
891 if (x == -1)
892 x = -2;
893 a->ob_shash = x;
894 return x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000895}
896
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000897static PyObject*
898string_subscript(PyBytesObject* self, PyObject* item)
899{
900 if (PyIndex_Check(item)) {
901 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
902 if (i == -1 && PyErr_Occurred())
903 return NULL;
904 if (i < 0)
905 i += PyBytes_GET_SIZE(self);
906 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
907 PyErr_SetString(PyExc_IndexError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000908 "index out of range");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000909 return NULL;
910 }
911 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
912 }
913 else if (PySlice_Check(item)) {
914 Py_ssize_t start, stop, step, slicelength, cur, i;
915 char* source_buf;
916 char* result_buf;
917 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000918
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000919 if (PySlice_GetIndicesEx((PySliceObject*)item,
920 PyBytes_GET_SIZE(self),
921 &start, &stop, &step, &slicelength) < 0) {
922 return NULL;
923 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000924
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000925 if (slicelength <= 0) {
926 return PyBytes_FromStringAndSize("", 0);
927 }
928 else if (start == 0 && step == 1 &&
929 slicelength == PyBytes_GET_SIZE(self) &&
930 PyBytes_CheckExact(self)) {
931 Py_INCREF(self);
932 return (PyObject *)self;
933 }
934 else if (step == 1) {
935 return PyBytes_FromStringAndSize(
936 PyBytes_AS_STRING(self) + start,
937 slicelength);
938 }
939 else {
940 source_buf = PyBytes_AsString((PyObject*)self);
941 result_buf = (char *)PyMem_Malloc(slicelength);
942 if (result_buf == NULL)
943 return PyErr_NoMemory();
Neal Norwitz6968b052007-02-27 19:02:19 +0000944
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000945 for (cur = start, i = 0; i < slicelength;
946 cur += step, i++) {
947 result_buf[i] = source_buf[cur];
948 }
949
950 result = PyBytes_FromStringAndSize(result_buf,
951 slicelength);
952 PyMem_Free(result_buf);
953 return result;
954 }
955 }
956 else {
957 PyErr_Format(PyExc_TypeError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000958 "byte indices must be integers, not %.200s",
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000959 Py_TYPE(item)->tp_name);
960 return NULL;
961 }
962}
963
964static int
965string_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
966{
967 return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_SIZE(self),
968 0, flags);
969}
970
971static PySequenceMethods string_as_sequence = {
972 (lenfunc)string_length, /*sq_length*/
973 (binaryfunc)string_concat, /*sq_concat*/
974 (ssizeargfunc)string_repeat, /*sq_repeat*/
975 (ssizeargfunc)string_item, /*sq_item*/
976 0, /*sq_slice*/
977 0, /*sq_ass_item*/
978 0, /*sq_ass_slice*/
979 (objobjproc)string_contains /*sq_contains*/
980};
981
982static PyMappingMethods string_as_mapping = {
983 (lenfunc)string_length,
984 (binaryfunc)string_subscript,
985 0,
986};
987
988static PyBufferProcs string_as_buffer = {
989 (getbufferproc)string_buffer_getbuffer,
990 NULL,
991};
992
993
994#define LEFTSTRIP 0
995#define RIGHTSTRIP 1
996#define BOTHSTRIP 2
997
998/* Arrays indexed by above */
999static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1000
1001#define STRIPNAME(i) (stripformat[i]+3)
1002
Neal Norwitz6968b052007-02-27 19:02:19 +00001003
1004/* Don't call if length < 2 */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001005#define Py_STRING_MATCH(target, offset, pattern, length) \
1006 (target[offset] == pattern[0] && \
1007 target[offset+length-1] == pattern[length-1] && \
Neal Norwitz6968b052007-02-27 19:02:19 +00001008 !memcmp(target+offset+1, pattern+1, length-2) )
1009
1010
Neal Norwitz6968b052007-02-27 19:02:19 +00001011/* Overallocate the initial list to reduce the number of reallocs for small
1012 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1013 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1014 text (roughly 11 words per line) and field delimited data (usually 1-10
1015 fields). For large strings the split algorithms are bandwidth limited
1016 so increasing the preallocation likely will not improve things.*/
1017
1018#define MAX_PREALLOC 12
1019
1020/* 5 splits gives 6 elements */
1021#define PREALLOC_SIZE(maxsplit) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001022 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
Neal Norwitz6968b052007-02-27 19:02:19 +00001023
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001024#define SPLIT_ADD(data, left, right) { \
1025 str = PyBytes_FromStringAndSize((data) + (left), \
1026 (right) - (left)); \
1027 if (str == NULL) \
1028 goto onError; \
1029 if (count < MAX_PREALLOC) { \
1030 PyList_SET_ITEM(list, count, str); \
1031 } else { \
1032 if (PyList_Append(list, str)) { \
1033 Py_DECREF(str); \
1034 goto onError; \
1035 } \
1036 else \
1037 Py_DECREF(str); \
1038 } \
1039 count++; }
Neal Norwitz6968b052007-02-27 19:02:19 +00001040
1041/* Always force the list to the expected size. */
Christian Heimes90aa7642007-12-19 02:45:37 +00001042#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
Neal Norwitz6968b052007-02-27 19:02:19 +00001043
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001044#define SKIP_SPACE(s, i, len) { while (i<len && ISSPACE(s[i])) i++; }
1045#define SKIP_NONSPACE(s, i, len) { while (i<len && !ISSPACE(s[i])) i++; }
1046#define RSKIP_SPACE(s, i) { while (i>=0 && ISSPACE(s[i])) i--; }
1047#define RSKIP_NONSPACE(s, i) { while (i>=0 && !ISSPACE(s[i])) i--; }
Neal Norwitz6968b052007-02-27 19:02:19 +00001048
1049Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001050split_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Neal Norwitz6968b052007-02-27 19:02:19 +00001051{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001052 const char *s = PyBytes_AS_STRING(self);
1053 Py_ssize_t i, j, count=0;
1054 PyObject *str;
1055 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Neal Norwitz6968b052007-02-27 19:02:19 +00001056
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001057 if (list == NULL)
1058 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001059
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001060 i = j = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +00001061
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001062 while (maxsplit-- > 0) {
1063 SKIP_SPACE(s, i, len);
1064 if (i==len) break;
1065 j = i; i++;
1066 SKIP_NONSPACE(s, i, len);
1067 if (j == 0 && i == len && PyBytes_CheckExact(self)) {
1068 /* No whitespace in self, so just use it as list[0] */
1069 Py_INCREF(self);
1070 PyList_SET_ITEM(list, 0, (PyObject *)self);
1071 count++;
1072 break;
1073 }
1074 SPLIT_ADD(s, j, i);
1075 }
1076
1077 if (i < len) {
1078 /* Only occurs when maxsplit was reached */
1079 /* Skip any remaining whitespace and copy to end of string */
1080 SKIP_SPACE(s, i, len);
1081 if (i != len)
1082 SPLIT_ADD(s, i, len);
1083 }
1084 FIX_PREALLOC_SIZE(list);
1085 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001086 onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001087 Py_DECREF(list);
1088 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001089}
1090
Guido van Rossum8f950672007-09-10 16:53:45 +00001091Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001092split_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Guido van Rossum8f950672007-09-10 16:53:45 +00001093{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001094 const char *s = PyBytes_AS_STRING(self);
1095 register Py_ssize_t i, j, count=0;
1096 PyObject *str;
1097 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Guido van Rossum8f950672007-09-10 16:53:45 +00001098
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001099 if (list == NULL)
1100 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001101
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001102 i = j = 0;
1103 while ((j < len) && (maxcount-- > 0)) {
1104 for(; j<len; j++) {
1105 /* I found that using memchr makes no difference */
1106 if (s[j] == ch) {
1107 SPLIT_ADD(s, i, j);
1108 i = j = j + 1;
1109 break;
1110 }
1111 }
1112 }
1113 if (i == 0 && count == 0 && PyBytes_CheckExact(self)) {
1114 /* ch not in self, so just use self as list[0] */
1115 Py_INCREF(self);
1116 PyList_SET_ITEM(list, 0, (PyObject *)self);
1117 count++;
1118 }
1119 else if (i <= len) {
1120 SPLIT_ADD(s, i, len);
1121 }
1122 FIX_PREALLOC_SIZE(list);
1123 return list;
Guido van Rossum8f950672007-09-10 16:53:45 +00001124
1125 onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001126 Py_DECREF(list);
1127 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001128}
1129
Neal Norwitz6968b052007-02-27 19:02:19 +00001130PyDoc_STRVAR(split__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001131"B.split([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001132\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001133Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001134If sep is not specified or is None, B is split on ASCII whitespace\n\
1135characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001136If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001137
1138static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001139string_split(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001140{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001141 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
1142 Py_ssize_t maxsplit = -1, count=0;
1143 const char *s = PyBytes_AS_STRING(self), *sub;
1144 Py_buffer vsub;
1145 PyObject *list, *str, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001146#ifdef USE_FAST
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001147 Py_ssize_t pos;
Neal Norwitz6968b052007-02-27 19:02:19 +00001148#endif
1149
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001150 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1151 return NULL;
1152 if (maxsplit < 0)
1153 maxsplit = PY_SSIZE_T_MAX;
1154 if (subobj == Py_None)
1155 return split_whitespace(self, len, maxsplit);
1156 if (_getbuffer(subobj, &vsub) < 0)
1157 return NULL;
1158 sub = vsub.buf;
1159 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001160
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001161 if (n == 0) {
1162 PyErr_SetString(PyExc_ValueError, "empty separator");
1163 PyObject_ReleaseBuffer(subobj, &vsub);
1164 return NULL;
1165 }
1166 else if (n == 1)
1167 return split_char(self, len, sub[0], maxsplit);
Guido van Rossum8f950672007-09-10 16:53:45 +00001168
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001169 list = PyList_New(PREALLOC_SIZE(maxsplit));
1170 if (list == NULL) {
1171 PyObject_ReleaseBuffer(subobj, &vsub);
1172 return NULL;
1173 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001174
1175#ifdef USE_FAST
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001176 i = j = 0;
1177 while (maxsplit-- > 0) {
1178 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1179 if (pos < 0)
1180 break;
1181 j = i+pos;
1182 SPLIT_ADD(s, i, j);
1183 i = j + n;
1184 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001185#else
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001186 i = j = 0;
1187 while ((j+n <= len) && (maxsplit-- > 0)) {
1188 for (; j+n <= len; j++) {
1189 if (Py_STRING_MATCH(s, j, sub, n)) {
1190 SPLIT_ADD(s, i, j);
1191 i = j = j + n;
1192 break;
1193 }
1194 }
1195 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001196#endif
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001197 SPLIT_ADD(s, i, len);
1198 FIX_PREALLOC_SIZE(list);
1199 PyObject_ReleaseBuffer(subobj, &vsub);
1200 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001201
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001202 onError:
1203 Py_DECREF(list);
1204 PyObject_ReleaseBuffer(subobj, &vsub);
1205 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001206}
1207
Neal Norwitz6968b052007-02-27 19:02:19 +00001208PyDoc_STRVAR(partition__doc__,
1209"B.partition(sep) -> (head, sep, tail)\n\
1210\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001211Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001212the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001213found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001214
1215static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001216string_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001217{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001218 const char *sep;
1219 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001220
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001221 if (PyBytes_Check(sep_obj)) {
1222 sep = PyBytes_AS_STRING(sep_obj);
1223 sep_len = PyBytes_GET_SIZE(sep_obj);
1224 }
1225 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1226 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001227
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001228 return stringlib_partition(
1229 (PyObject*) self,
1230 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1231 sep_obj, sep, sep_len
1232 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001233}
1234
1235PyDoc_STRVAR(rpartition__doc__,
1236"B.rpartition(sep) -> (tail, sep, head)\n\
1237\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001238Search for the separator sep in B, starting at the end of B,\n\
1239and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001240part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001241bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001242
1243static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001244string_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001245{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001246 const char *sep;
1247 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001248
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001249 if (PyBytes_Check(sep_obj)) {
1250 sep = PyBytes_AS_STRING(sep_obj);
1251 sep_len = PyBytes_GET_SIZE(sep_obj);
1252 }
1253 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1254 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001255
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001256 return stringlib_rpartition(
1257 (PyObject*) self,
1258 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1259 sep_obj, sep, sep_len
1260 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001261}
1262
1263Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001264rsplit_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Neal Norwitz6968b052007-02-27 19:02:19 +00001265{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001266 const char *s = PyBytes_AS_STRING(self);
1267 Py_ssize_t i, j, count=0;
1268 PyObject *str;
1269 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Neal Norwitz6968b052007-02-27 19:02:19 +00001270
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001271 if (list == NULL)
1272 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001273
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001274 i = j = len-1;
Neal Norwitz6968b052007-02-27 19:02:19 +00001275
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001276 while (maxsplit-- > 0) {
1277 RSKIP_SPACE(s, i);
1278 if (i<0) break;
1279 j = i; i--;
1280 RSKIP_NONSPACE(s, i);
1281 if (j == len-1 && i < 0 && PyBytes_CheckExact(self)) {
1282 /* No whitespace in self, so just use it as list[0] */
1283 Py_INCREF(self);
1284 PyList_SET_ITEM(list, 0, (PyObject *)self);
1285 count++;
1286 break;
1287 }
1288 SPLIT_ADD(s, i + 1, j + 1);
1289 }
1290 if (i >= 0) {
1291 /* Only occurs when maxsplit was reached. Skip any remaining
1292 whitespace and copy to beginning of string. */
1293 RSKIP_SPACE(s, i);
1294 if (i >= 0)
1295 SPLIT_ADD(s, 0, i + 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001296
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001297 }
1298 FIX_PREALLOC_SIZE(list);
1299 if (PyList_Reverse(list) < 0)
1300 goto onError;
1301 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001302 onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001303 Py_DECREF(list);
1304 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001305}
1306
Guido van Rossum8f950672007-09-10 16:53:45 +00001307Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001308rsplit_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Guido van Rossum8f950672007-09-10 16:53:45 +00001309{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001310 const char *s = PyBytes_AS_STRING(self);
1311 register Py_ssize_t i, j, count=0;
1312 PyObject *str;
1313 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Guido van Rossum8f950672007-09-10 16:53:45 +00001314
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001315 if (list == NULL)
1316 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001317
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001318 i = j = len - 1;
1319 while ((i >= 0) && (maxcount-- > 0)) {
1320 for (; i >= 0; i--) {
1321 if (s[i] == ch) {
1322 SPLIT_ADD(s, i + 1, j + 1);
1323 j = i = i - 1;
1324 break;
1325 }
1326 }
1327 }
1328 if (i < 0 && count == 0 && PyBytes_CheckExact(self)) {
1329 /* ch not in self, so just use self as list[0] */
1330 Py_INCREF(self);
1331 PyList_SET_ITEM(list, 0, (PyObject *)self);
1332 count++;
1333 }
1334 else if (j >= -1) {
1335 SPLIT_ADD(s, 0, j + 1);
1336 }
1337 FIX_PREALLOC_SIZE(list);
1338 if (PyList_Reverse(list) < 0)
1339 goto onError;
1340 return list;
Guido van Rossum8f950672007-09-10 16:53:45 +00001341
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001342 onError:
1343 Py_DECREF(list);
1344 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001345}
1346
Neal Norwitz6968b052007-02-27 19:02:19 +00001347PyDoc_STRVAR(rsplit__doc__,
Benjamin Peterson4116f362008-05-27 00:36:20 +00001348"B.rsplit([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001349\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001350Return a list of the sections in B, using sep as the delimiter,\n\
1351starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001352If sep is not given, B is split on ASCII whitespace characters\n\
1353(space, tab, return, newline, formfeed, vertical tab).\n\
1354If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001355
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001356
Neal Norwitz6968b052007-02-27 19:02:19 +00001357static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001358string_rsplit(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001359{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001360 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
1361 Py_ssize_t maxsplit = -1, count=0;
1362 const char *s, *sub;
1363 Py_buffer vsub;
1364 PyObject *list, *str, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001365
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001366 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1367 return NULL;
1368 if (maxsplit < 0)
1369 maxsplit = PY_SSIZE_T_MAX;
1370 if (subobj == Py_None)
1371 return rsplit_whitespace(self, len, maxsplit);
1372 if (_getbuffer(subobj, &vsub) < 0)
1373 return NULL;
1374 sub = vsub.buf;
1375 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001376
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001377 if (n == 0) {
1378 PyErr_SetString(PyExc_ValueError, "empty separator");
1379 PyObject_ReleaseBuffer(subobj, &vsub);
1380 return NULL;
1381 }
1382 else if (n == 1)
1383 return rsplit_char(self, len, sub[0], maxsplit);
Guido van Rossum8f950672007-09-10 16:53:45 +00001384
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001385 list = PyList_New(PREALLOC_SIZE(maxsplit));
1386 if (list == NULL) {
1387 PyObject_ReleaseBuffer(subobj, &vsub);
1388 return NULL;
1389 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001390
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001391 j = len;
1392 i = j - n;
Neal Norwitz6968b052007-02-27 19:02:19 +00001393
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001394 s = PyBytes_AS_STRING(self);
1395 while ( (i >= 0) && (maxsplit-- > 0) ) {
1396 for (; i>=0; i--) {
1397 if (Py_STRING_MATCH(s, i, sub, n)) {
1398 SPLIT_ADD(s, i + n, j);
1399 j = i;
1400 i -= n;
1401 break;
1402 }
1403 }
1404 }
1405 SPLIT_ADD(s, 0, j);
1406 FIX_PREALLOC_SIZE(list);
1407 if (PyList_Reverse(list) < 0)
1408 goto onError;
1409 PyObject_ReleaseBuffer(subobj, &vsub);
1410 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001411
1412onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001413 Py_DECREF(list);
1414 PyObject_ReleaseBuffer(subobj, &vsub);
1415 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001416}
1417
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001418#undef SPLIT_ADD
1419#undef MAX_PREALLOC
1420#undef PREALLOC_SIZE
1421
1422
1423PyDoc_STRVAR(join__doc__,
1424"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001425\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001426Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001427Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1428
Neal Norwitz6968b052007-02-27 19:02:19 +00001429static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001430string_join(PyObject *self, PyObject *orig)
Neal Norwitz6968b052007-02-27 19:02:19 +00001431{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001432 char *sep = PyBytes_AS_STRING(self);
1433 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1434 PyObject *res = NULL;
1435 char *p;
1436 Py_ssize_t seqlen = 0;
1437 size_t sz = 0;
1438 Py_ssize_t i;
1439 PyObject *seq, *item;
Neal Norwitz6968b052007-02-27 19:02:19 +00001440
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001441 seq = PySequence_Fast(orig, "");
1442 if (seq == NULL) {
1443 return NULL;
1444 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001445
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001446 seqlen = PySequence_Size(seq);
1447 if (seqlen == 0) {
1448 Py_DECREF(seq);
1449 return PyBytes_FromString("");
1450 }
1451 if (seqlen == 1) {
1452 item = PySequence_Fast_GET_ITEM(seq, 0);
1453 if (PyBytes_CheckExact(item)) {
1454 Py_INCREF(item);
1455 Py_DECREF(seq);
1456 return item;
1457 }
1458 }
1459
1460 /* There are at least two things to join, or else we have a subclass
1461 * of the builtin types in the sequence.
1462 * Do a pre-pass to figure out the total amount of space we'll
1463 * need (sz), and see whether all argument are bytes.
1464 */
1465 /* XXX Shouldn't we use _getbuffer() on these items instead? */
1466 for (i = 0; i < seqlen; i++) {
1467 const size_t old_sz = sz;
1468 item = PySequence_Fast_GET_ITEM(seq, i);
1469 if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
1470 PyErr_Format(PyExc_TypeError,
1471 "sequence item %zd: expected bytes,"
1472 " %.80s found",
1473 i, Py_TYPE(item)->tp_name);
1474 Py_DECREF(seq);
1475 return NULL;
1476 }
1477 sz += Py_SIZE(item);
1478 if (i != 0)
1479 sz += seplen;
1480 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1481 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00001482 "join() result is too long for bytes");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001483 Py_DECREF(seq);
1484 return NULL;
1485 }
1486 }
1487
1488 /* Allocate result space. */
1489 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1490 if (res == NULL) {
1491 Py_DECREF(seq);
1492 return NULL;
1493 }
1494
1495 /* Catenate everything. */
1496 /* I'm not worried about a PyByteArray item growing because there's
1497 nowhere in this function where we release the GIL. */
1498 p = PyBytes_AS_STRING(res);
1499 for (i = 0; i < seqlen; ++i) {
1500 size_t n;
1501 char *q;
1502 if (i) {
1503 Py_MEMCPY(p, sep, seplen);
1504 p += seplen;
1505 }
1506 item = PySequence_Fast_GET_ITEM(seq, i);
1507 n = Py_SIZE(item);
1508 if (PyBytes_Check(item))
1509 q = PyBytes_AS_STRING(item);
1510 else
1511 q = PyByteArray_AS_STRING(item);
1512 Py_MEMCPY(p, q, n);
1513 p += n;
1514 }
1515
1516 Py_DECREF(seq);
1517 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001518}
1519
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001520PyObject *
1521_PyBytes_Join(PyObject *sep, PyObject *x)
1522{
1523 assert(sep != NULL && PyBytes_Check(sep));
1524 assert(x != NULL);
1525 return string_join(sep, x);
1526}
1527
1528Py_LOCAL_INLINE(void)
1529string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1530{
1531 if (*end > len)
1532 *end = len;
1533 else if (*end < 0)
1534 *end += len;
1535 if (*end < 0)
1536 *end = 0;
1537 if (*start < 0)
1538 *start += len;
1539 if (*start < 0)
1540 *start = 0;
1541}
1542
1543Py_LOCAL_INLINE(Py_ssize_t)
1544string_find_internal(PyBytesObject *self, PyObject *args, int dir)
1545{
1546 PyObject *subobj;
1547 const char *sub;
1548 Py_ssize_t sub_len;
1549 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1550 PyObject *obj_start=Py_None, *obj_end=Py_None;
1551
1552 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1553 &obj_start, &obj_end))
1554 return -2;
1555 /* To support None in "start" and "end" arguments, meaning
1556 the same as if they were not passed.
1557 */
1558 if (obj_start != Py_None)
1559 if (!_PyEval_SliceIndex(obj_start, &start))
1560 return -2;
1561 if (obj_end != Py_None)
1562 if (!_PyEval_SliceIndex(obj_end, &end))
1563 return -2;
1564
1565 if (PyBytes_Check(subobj)) {
1566 sub = PyBytes_AS_STRING(subobj);
1567 sub_len = PyBytes_GET_SIZE(subobj);
1568 }
1569 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1570 /* XXX - the "expected a character buffer object" is pretty
1571 confusing for a non-expert. remap to something else ? */
1572 return -2;
1573
1574 if (dir > 0)
1575 return stringlib_find_slice(
1576 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1577 sub, sub_len, start, end);
1578 else
1579 return stringlib_rfind_slice(
1580 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1581 sub, sub_len, start, end);
1582}
1583
1584
1585PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001586"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001587\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001588Return the lowest index in S where substring sub is found,\n\
1589such that sub is contained within s[start:end]. Optional\n\
1590arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001591\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001592Return -1 on failure.");
1593
Neal Norwitz6968b052007-02-27 19:02:19 +00001594static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001595string_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001596{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001597 Py_ssize_t result = string_find_internal(self, args, +1);
1598 if (result == -2)
1599 return NULL;
1600 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001601}
1602
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001603
1604PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001605"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001606\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001607Like B.find() but raise ValueError when the substring is not found.");
1608
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001609static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001610string_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001611{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001612 Py_ssize_t result = string_find_internal(self, args, +1);
1613 if (result == -2)
1614 return NULL;
1615 if (result == -1) {
1616 PyErr_SetString(PyExc_ValueError,
1617 "substring not found");
1618 return NULL;
1619 }
1620 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001621}
1622
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001623
1624PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001625"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001626\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001627Return the highest index in B where substring sub is found,\n\
1628such that sub is contained within s[start:end]. Optional\n\
1629arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001630\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001631Return -1 on failure.");
1632
Neal Norwitz6968b052007-02-27 19:02:19 +00001633static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001634string_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001635{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001636 Py_ssize_t result = string_find_internal(self, args, -1);
1637 if (result == -2)
1638 return NULL;
1639 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001640}
1641
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001642
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001643PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001644"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001645\n\
1646Like B.rfind() but raise ValueError when the substring is not found.");
1647
1648static PyObject *
1649string_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001650{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001651 Py_ssize_t result = string_find_internal(self, args, -1);
1652 if (result == -2)
1653 return NULL;
1654 if (result == -1) {
1655 PyErr_SetString(PyExc_ValueError,
1656 "substring not found");
1657 return NULL;
1658 }
1659 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001660}
1661
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001662
1663Py_LOCAL_INLINE(PyObject *)
1664do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001665{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001666 Py_buffer vsep;
1667 char *s = PyBytes_AS_STRING(self);
1668 Py_ssize_t len = PyBytes_GET_SIZE(self);
1669 char *sep;
1670 Py_ssize_t seplen;
1671 Py_ssize_t i, j;
1672
1673 if (_getbuffer(sepobj, &vsep) < 0)
1674 return NULL;
1675 sep = vsep.buf;
1676 seplen = vsep.len;
1677
1678 i = 0;
1679 if (striptype != RIGHTSTRIP) {
1680 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1681 i++;
1682 }
1683 }
1684
1685 j = len;
1686 if (striptype != LEFTSTRIP) {
1687 do {
1688 j--;
1689 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1690 j++;
1691 }
1692
1693 PyObject_ReleaseBuffer(sepobj, &vsep);
1694
1695 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1696 Py_INCREF(self);
1697 return (PyObject*)self;
1698 }
1699 else
1700 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001701}
1702
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001703
1704Py_LOCAL_INLINE(PyObject *)
1705do_strip(PyBytesObject *self, int striptype)
1706{
1707 char *s = PyBytes_AS_STRING(self);
1708 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
1709
1710 i = 0;
1711 if (striptype != RIGHTSTRIP) {
1712 while (i < len && ISSPACE(s[i])) {
1713 i++;
1714 }
1715 }
1716
1717 j = len;
1718 if (striptype != LEFTSTRIP) {
1719 do {
1720 j--;
1721 } while (j >= i && ISSPACE(s[j]));
1722 j++;
1723 }
1724
1725 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1726 Py_INCREF(self);
1727 return (PyObject*)self;
1728 }
1729 else
1730 return PyBytes_FromStringAndSize(s+i, j-i);
1731}
1732
1733
1734Py_LOCAL_INLINE(PyObject *)
1735do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1736{
1737 PyObject *sep = NULL;
1738
1739 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1740 return NULL;
1741
1742 if (sep != NULL && sep != Py_None) {
1743 return do_xstrip(self, striptype, sep);
1744 }
1745 return do_strip(self, striptype);
1746}
1747
1748
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001749PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001750"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001751\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001752Strip leading and trailing bytes contained in the argument.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001753If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001754static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001755string_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001756{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001757 if (PyTuple_GET_SIZE(args) == 0)
1758 return do_strip(self, BOTHSTRIP); /* Common case */
1759 else
1760 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001761}
1762
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001763
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001764PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001765"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001766\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001767Strip leading bytes contained in the argument.\n\
1768If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001769static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001770string_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001771{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001772 if (PyTuple_GET_SIZE(args) == 0)
1773 return do_strip(self, LEFTSTRIP); /* Common case */
1774 else
1775 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001776}
1777
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001778
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001779PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001780"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001781\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001782Strip trailing bytes contained in the argument.\n\
1783If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001784static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001785string_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001786{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001787 if (PyTuple_GET_SIZE(args) == 0)
1788 return do_strip(self, RIGHTSTRIP); /* Common case */
1789 else
1790 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001791}
Neal Norwitz6968b052007-02-27 19:02:19 +00001792
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001793
1794PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001795"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001796\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001797Return the number of non-overlapping occurrences of substring sub in\n\
1798string S[start:end]. Optional arguments start and end are interpreted\n\
1799as in slice notation.");
1800
1801static PyObject *
1802string_count(PyBytesObject *self, PyObject *args)
1803{
1804 PyObject *sub_obj;
1805 const char *str = PyBytes_AS_STRING(self), *sub;
1806 Py_ssize_t sub_len;
1807 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1808
1809 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1810 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1811 return NULL;
1812
1813 if (PyBytes_Check(sub_obj)) {
1814 sub = PyBytes_AS_STRING(sub_obj);
1815 sub_len = PyBytes_GET_SIZE(sub_obj);
1816 }
1817 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1818 return NULL;
1819
1820 string_adjust_indices(&start, &end, PyBytes_GET_SIZE(self));
1821
1822 return PyLong_FromSsize_t(
1823 stringlib_count(str + start, end - start, sub, sub_len)
1824 );
1825}
1826
1827
1828PyDoc_STRVAR(translate__doc__,
1829"B.translate(table[, deletechars]) -> bytes\n\
1830\n\
1831Return a copy of B, where all characters occurring in the\n\
1832optional argument deletechars are removed, and the remaining\n\
1833characters have been mapped through the given translation\n\
1834table, which must be a bytes object of length 256.");
1835
1836static PyObject *
1837string_translate(PyBytesObject *self, PyObject *args)
1838{
1839 register char *input, *output;
1840 const char *table;
1841 register Py_ssize_t i, c, changed = 0;
1842 PyObject *input_obj = (PyObject*)self;
1843 const char *output_start, *del_table=NULL;
1844 Py_ssize_t inlen, tablen, dellen = 0;
1845 PyObject *result;
1846 int trans_table[256];
1847 PyObject *tableobj, *delobj = NULL;
1848
1849 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1850 &tableobj, &delobj))
1851 return NULL;
1852
1853 if (PyBytes_Check(tableobj)) {
1854 table = PyBytes_AS_STRING(tableobj);
1855 tablen = PyBytes_GET_SIZE(tableobj);
1856 }
1857 else if (tableobj == Py_None) {
1858 table = NULL;
1859 tablen = 256;
1860 }
1861 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1862 return NULL;
1863
1864 if (tablen != 256) {
1865 PyErr_SetString(PyExc_ValueError,
1866 "translation table must be 256 characters long");
1867 return NULL;
1868 }
1869
1870 if (delobj != NULL) {
1871 if (PyBytes_Check(delobj)) {
1872 del_table = PyBytes_AS_STRING(delobj);
1873 dellen = PyBytes_GET_SIZE(delobj);
1874 }
1875 else if (PyUnicode_Check(delobj)) {
1876 PyErr_SetString(PyExc_TypeError,
1877 "deletions are implemented differently for unicode");
1878 return NULL;
1879 }
1880 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1881 return NULL;
1882 }
1883 else {
1884 del_table = NULL;
1885 dellen = 0;
1886 }
1887
1888 inlen = PyBytes_GET_SIZE(input_obj);
1889 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1890 if (result == NULL)
1891 return NULL;
1892 output_start = output = PyBytes_AsString(result);
1893 input = PyBytes_AS_STRING(input_obj);
1894
1895 if (dellen == 0 && table != NULL) {
1896 /* If no deletions are required, use faster code */
1897 for (i = inlen; --i >= 0; ) {
1898 c = Py_CHARMASK(*input++);
1899 if (Py_CHARMASK((*output++ = table[c])) != c)
1900 changed = 1;
1901 }
1902 if (changed || !PyBytes_CheckExact(input_obj))
1903 return result;
1904 Py_DECREF(result);
1905 Py_INCREF(input_obj);
1906 return input_obj;
1907 }
1908
1909 if (table == NULL) {
1910 for (i = 0; i < 256; i++)
1911 trans_table[i] = Py_CHARMASK(i);
1912 } else {
1913 for (i = 0; i < 256; i++)
1914 trans_table[i] = Py_CHARMASK(table[i]);
1915 }
1916
1917 for (i = 0; i < dellen; i++)
1918 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1919
1920 for (i = inlen; --i >= 0; ) {
1921 c = Py_CHARMASK(*input++);
1922 if (trans_table[c] != -1)
1923 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1924 continue;
1925 changed = 1;
1926 }
1927 if (!changed && PyBytes_CheckExact(input_obj)) {
1928 Py_DECREF(result);
1929 Py_INCREF(input_obj);
1930 return input_obj;
1931 }
1932 /* Fix the size of the resulting string */
1933 if (inlen > 0)
1934 _PyBytes_Resize(&result, output - output_start);
1935 return result;
1936}
1937
1938
1939#define FORWARD 1
1940#define REVERSE -1
1941
1942/* find and count characters and substrings */
1943
1944#define findchar(target, target_len, c) \
1945 ((char *)memchr((const void *)(target), c, target_len))
1946
1947/* String ops must return a string. */
1948/* If the object is subclass of string, create a copy */
1949Py_LOCAL(PyBytesObject *)
1950return_self(PyBytesObject *self)
1951{
1952 if (PyBytes_CheckExact(self)) {
1953 Py_INCREF(self);
1954 return self;
1955 }
1956 return (PyBytesObject *)PyBytes_FromStringAndSize(
1957 PyBytes_AS_STRING(self),
1958 PyBytes_GET_SIZE(self));
1959}
1960
1961Py_LOCAL_INLINE(Py_ssize_t)
1962countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
1963{
1964 Py_ssize_t count=0;
1965 const char *start=target;
1966 const char *end=target+target_len;
1967
1968 while ( (start=findchar(start, end-start, c)) != NULL ) {
1969 count++;
1970 if (count >= maxcount)
1971 break;
1972 start += 1;
1973 }
1974 return count;
1975}
1976
1977Py_LOCAL(Py_ssize_t)
1978findstring(const char *target, Py_ssize_t target_len,
1979 const char *pattern, Py_ssize_t pattern_len,
1980 Py_ssize_t start,
1981 Py_ssize_t end,
1982 int direction)
1983{
1984 if (start < 0) {
1985 start += target_len;
1986 if (start < 0)
1987 start = 0;
1988 }
1989 if (end > target_len) {
1990 end = target_len;
1991 } else if (end < 0) {
1992 end += target_len;
1993 if (end < 0)
1994 end = 0;
1995 }
1996
1997 /* zero-length substrings always match at the first attempt */
1998 if (pattern_len == 0)
1999 return (direction > 0) ? start : end;
2000
2001 end -= pattern_len;
2002
2003 if (direction < 0) {
2004 for (; end >= start; end--)
2005 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2006 return end;
2007 } else {
2008 for (; start <= end; start++)
2009 if (Py_STRING_MATCH(target, start,pattern,pattern_len))
2010 return start;
2011 }
2012 return -1;
2013}
2014
2015Py_LOCAL_INLINE(Py_ssize_t)
2016countstring(const char *target, Py_ssize_t target_len,
2017 const char *pattern, Py_ssize_t pattern_len,
2018 Py_ssize_t start,
2019 Py_ssize_t end,
2020 int direction, Py_ssize_t maxcount)
2021{
2022 Py_ssize_t count=0;
2023
2024 if (start < 0) {
2025 start += target_len;
2026 if (start < 0)
2027 start = 0;
2028 }
2029 if (end > target_len) {
2030 end = target_len;
2031 } else if (end < 0) {
2032 end += target_len;
2033 if (end < 0)
2034 end = 0;
2035 }
2036
2037 /* zero-length substrings match everywhere */
2038 if (pattern_len == 0 || maxcount == 0) {
2039 if (target_len+1 < maxcount)
2040 return target_len+1;
2041 return maxcount;
2042 }
2043
2044 end -= pattern_len;
2045 if (direction < 0) {
2046 for (; (end >= start); end--)
2047 if (Py_STRING_MATCH(target, end,pattern,pattern_len)) {
2048 count++;
2049 if (--maxcount <= 0) break;
2050 end -= pattern_len-1;
2051 }
2052 } else {
2053 for (; (start <= end); start++)
2054 if (Py_STRING_MATCH(target, start,
2055 pattern, pattern_len)) {
2056 count++;
2057 if (--maxcount <= 0)
2058 break;
2059 start += pattern_len-1;
2060 }
2061 }
2062 return count;
2063}
2064
2065
2066/* Algorithms for different cases of string replacement */
2067
2068/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2069Py_LOCAL(PyBytesObject *)
2070replace_interleave(PyBytesObject *self,
2071 const char *to_s, Py_ssize_t to_len,
2072 Py_ssize_t maxcount)
2073{
2074 char *self_s, *result_s;
2075 Py_ssize_t self_len, result_len;
2076 Py_ssize_t count, i, product;
2077 PyBytesObject *result;
2078
2079 self_len = PyBytes_GET_SIZE(self);
2080
2081 /* 1 at the end plus 1 after every character */
2082 count = self_len+1;
2083 if (maxcount < count)
2084 count = maxcount;
2085
2086 /* Check for overflow */
2087 /* result_len = count * to_len + self_len; */
2088 product = count * to_len;
2089 if (product / to_len != count) {
2090 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002091 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002092 return NULL;
2093 }
2094 result_len = product + self_len;
2095 if (result_len < 0) {
2096 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002097 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002098 return NULL;
2099 }
2100
2101 if (! (result = (PyBytesObject *)
2102 PyBytes_FromStringAndSize(NULL, result_len)) )
2103 return NULL;
2104
2105 self_s = PyBytes_AS_STRING(self);
2106 result_s = PyBytes_AS_STRING(result);
2107
2108 /* TODO: special case single character, which doesn't need memcpy */
2109
2110 /* Lay the first one down (guaranteed this will occur) */
2111 Py_MEMCPY(result_s, to_s, to_len);
2112 result_s += to_len;
2113 count -= 1;
2114
2115 for (i=0; i<count; i++) {
2116 *result_s++ = *self_s++;
2117 Py_MEMCPY(result_s, to_s, to_len);
2118 result_s += to_len;
2119 }
2120
2121 /* Copy the rest of the original string */
2122 Py_MEMCPY(result_s, self_s, self_len-i);
2123
2124 return result;
2125}
2126
2127/* Special case for deleting a single character */
2128/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2129Py_LOCAL(PyBytesObject *)
2130replace_delete_single_character(PyBytesObject *self,
2131 char from_c, Py_ssize_t maxcount)
2132{
2133 char *self_s, *result_s;
2134 char *start, *next, *end;
2135 Py_ssize_t self_len, result_len;
2136 Py_ssize_t count;
2137 PyBytesObject *result;
2138
2139 self_len = PyBytes_GET_SIZE(self);
2140 self_s = PyBytes_AS_STRING(self);
2141
2142 count = countchar(self_s, self_len, from_c, maxcount);
2143 if (count == 0) {
2144 return return_self(self);
2145 }
2146
2147 result_len = self_len - count; /* from_len == 1 */
2148 assert(result_len>=0);
2149
2150 if ( (result = (PyBytesObject *)
2151 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2152 return NULL;
2153 result_s = PyBytes_AS_STRING(result);
2154
2155 start = self_s;
2156 end = self_s + self_len;
2157 while (count-- > 0) {
2158 next = findchar(start, end-start, from_c);
2159 if (next == NULL)
2160 break;
2161 Py_MEMCPY(result_s, start, next-start);
2162 result_s += (next-start);
2163 start = next+1;
2164 }
2165 Py_MEMCPY(result_s, start, end-start);
2166
2167 return result;
2168}
2169
2170/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2171
2172Py_LOCAL(PyBytesObject *)
2173replace_delete_substring(PyBytesObject *self,
2174 const char *from_s, Py_ssize_t from_len,
2175 Py_ssize_t maxcount) {
2176 char *self_s, *result_s;
2177 char *start, *next, *end;
2178 Py_ssize_t self_len, result_len;
2179 Py_ssize_t count, offset;
2180 PyBytesObject *result;
2181
2182 self_len = PyBytes_GET_SIZE(self);
2183 self_s = PyBytes_AS_STRING(self);
2184
2185 count = countstring(self_s, self_len,
2186 from_s, from_len,
2187 0, self_len, 1,
2188 maxcount);
2189
2190 if (count == 0) {
2191 /* no matches */
2192 return return_self(self);
2193 }
2194
2195 result_len = self_len - (count * from_len);
2196 assert (result_len>=0);
2197
2198 if ( (result = (PyBytesObject *)
2199 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2200 return NULL;
2201
2202 result_s = PyBytes_AS_STRING(result);
2203
2204 start = self_s;
2205 end = self_s + self_len;
2206 while (count-- > 0) {
2207 offset = findstring(start, end-start,
2208 from_s, from_len,
2209 0, end-start, FORWARD);
2210 if (offset == -1)
2211 break;
2212 next = start + offset;
2213
2214 Py_MEMCPY(result_s, start, next-start);
2215
2216 result_s += (next-start);
2217 start = next+from_len;
2218 }
2219 Py_MEMCPY(result_s, start, end-start);
2220 return result;
2221}
2222
2223/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2224Py_LOCAL(PyBytesObject *)
2225replace_single_character_in_place(PyBytesObject *self,
2226 char from_c, char to_c,
2227 Py_ssize_t maxcount)
2228{
2229 char *self_s, *result_s, *start, *end, *next;
2230 Py_ssize_t self_len;
2231 PyBytesObject *result;
2232
2233 /* The result string will be the same size */
2234 self_s = PyBytes_AS_STRING(self);
2235 self_len = PyBytes_GET_SIZE(self);
2236
2237 next = findchar(self_s, self_len, from_c);
2238
2239 if (next == NULL) {
2240 /* No matches; return the original string */
2241 return return_self(self);
2242 }
2243
2244 /* Need to make a new string */
2245 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2246 if (result == NULL)
2247 return NULL;
2248 result_s = PyBytes_AS_STRING(result);
2249 Py_MEMCPY(result_s, self_s, self_len);
2250
2251 /* change everything in-place, starting with this one */
2252 start = result_s + (next-self_s);
2253 *start = to_c;
2254 start++;
2255 end = result_s + self_len;
2256
2257 while (--maxcount > 0) {
2258 next = findchar(start, end-start, from_c);
2259 if (next == NULL)
2260 break;
2261 *next = to_c;
2262 start = next+1;
2263 }
2264
2265 return result;
2266}
2267
2268/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2269Py_LOCAL(PyBytesObject *)
2270replace_substring_in_place(PyBytesObject *self,
2271 const char *from_s, Py_ssize_t from_len,
2272 const char *to_s, Py_ssize_t to_len,
2273 Py_ssize_t maxcount)
2274{
2275 char *result_s, *start, *end;
2276 char *self_s;
2277 Py_ssize_t self_len, offset;
2278 PyBytesObject *result;
2279
2280 /* The result string will be the same size */
2281
2282 self_s = PyBytes_AS_STRING(self);
2283 self_len = PyBytes_GET_SIZE(self);
2284
2285 offset = findstring(self_s, self_len,
2286 from_s, from_len,
2287 0, self_len, FORWARD);
2288 if (offset == -1) {
2289 /* No matches; return the original string */
2290 return return_self(self);
2291 }
2292
2293 /* Need to make a new string */
2294 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2295 if (result == NULL)
2296 return NULL;
2297 result_s = PyBytes_AS_STRING(result);
2298 Py_MEMCPY(result_s, self_s, self_len);
2299
2300 /* change everything in-place, starting with this one */
2301 start = result_s + offset;
2302 Py_MEMCPY(start, to_s, from_len);
2303 start += from_len;
2304 end = result_s + self_len;
2305
2306 while ( --maxcount > 0) {
2307 offset = findstring(start, end-start,
2308 from_s, from_len,
2309 0, end-start, FORWARD);
2310 if (offset==-1)
2311 break;
2312 Py_MEMCPY(start+offset, to_s, from_len);
2313 start += offset+from_len;
2314 }
2315
2316 return result;
2317}
2318
2319/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2320Py_LOCAL(PyBytesObject *)
2321replace_single_character(PyBytesObject *self,
2322 char from_c,
2323 const char *to_s, Py_ssize_t to_len,
2324 Py_ssize_t maxcount)
2325{
2326 char *self_s, *result_s;
2327 char *start, *next, *end;
2328 Py_ssize_t self_len, result_len;
2329 Py_ssize_t count, product;
2330 PyBytesObject *result;
2331
2332 self_s = PyBytes_AS_STRING(self);
2333 self_len = PyBytes_GET_SIZE(self);
2334
2335 count = countchar(self_s, self_len, from_c, maxcount);
2336 if (count == 0) {
2337 /* no matches, return unchanged */
2338 return return_self(self);
2339 }
2340
2341 /* use the difference between current and new, hence the "-1" */
2342 /* result_len = self_len + count * (to_len-1) */
2343 product = count * (to_len-1);
2344 if (product / (to_len-1) != count) {
2345 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002346 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002347 return NULL;
2348 }
2349 result_len = self_len + product;
2350 if (result_len < 0) {
2351 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002352 "replacment bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002353 return NULL;
2354 }
2355
2356 if ( (result = (PyBytesObject *)
2357 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2358 return NULL;
2359 result_s = PyBytes_AS_STRING(result);
2360
2361 start = self_s;
2362 end = self_s + self_len;
2363 while (count-- > 0) {
2364 next = findchar(start, end-start, from_c);
2365 if (next == NULL)
2366 break;
2367
2368 if (next == start) {
2369 /* replace with the 'to' */
2370 Py_MEMCPY(result_s, to_s, to_len);
2371 result_s += to_len;
2372 start += 1;
2373 } else {
2374 /* copy the unchanged old then the 'to' */
2375 Py_MEMCPY(result_s, start, next-start);
2376 result_s += (next-start);
2377 Py_MEMCPY(result_s, to_s, to_len);
2378 result_s += to_len;
2379 start = next+1;
2380 }
2381 }
2382 /* Copy the remainder of the remaining string */
2383 Py_MEMCPY(result_s, start, end-start);
2384
2385 return result;
2386}
2387
2388/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2389Py_LOCAL(PyBytesObject *)
2390replace_substring(PyBytesObject *self,
2391 const char *from_s, Py_ssize_t from_len,
2392 const char *to_s, Py_ssize_t to_len,
2393 Py_ssize_t maxcount) {
2394 char *self_s, *result_s;
2395 char *start, *next, *end;
2396 Py_ssize_t self_len, result_len;
2397 Py_ssize_t count, offset, product;
2398 PyBytesObject *result;
2399
2400 self_s = PyBytes_AS_STRING(self);
2401 self_len = PyBytes_GET_SIZE(self);
2402
2403 count = countstring(self_s, self_len,
2404 from_s, from_len,
2405 0, self_len, FORWARD, maxcount);
2406 if (count == 0) {
2407 /* no matches, return unchanged */
2408 return return_self(self);
2409 }
2410
2411 /* Check for overflow */
2412 /* result_len = self_len + count * (to_len-from_len) */
2413 product = count * (to_len-from_len);
2414 if (product / (to_len-from_len) != count) {
2415 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002416 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002417 return NULL;
2418 }
2419 result_len = self_len + product;
2420 if (result_len < 0) {
2421 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002422 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002423 return NULL;
2424 }
2425
2426 if ( (result = (PyBytesObject *)
2427 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2428 return NULL;
2429 result_s = PyBytes_AS_STRING(result);
2430
2431 start = self_s;
2432 end = self_s + self_len;
2433 while (count-- > 0) {
2434 offset = findstring(start, end-start,
2435 from_s, from_len,
2436 0, end-start, FORWARD);
2437 if (offset == -1)
2438 break;
2439 next = start+offset;
2440 if (next == start) {
2441 /* replace with the 'to' */
2442 Py_MEMCPY(result_s, to_s, to_len);
2443 result_s += to_len;
2444 start += from_len;
2445 } else {
2446 /* copy the unchanged old then the 'to' */
2447 Py_MEMCPY(result_s, start, next-start);
2448 result_s += (next-start);
2449 Py_MEMCPY(result_s, to_s, to_len);
2450 result_s += to_len;
2451 start = next+from_len;
2452 }
2453 }
2454 /* Copy the remainder of the remaining string */
2455 Py_MEMCPY(result_s, start, end-start);
2456
2457 return result;
2458}
2459
2460
2461Py_LOCAL(PyBytesObject *)
2462replace(PyBytesObject *self,
2463 const char *from_s, Py_ssize_t from_len,
2464 const char *to_s, Py_ssize_t to_len,
2465 Py_ssize_t maxcount)
2466{
2467 if (maxcount < 0) {
2468 maxcount = PY_SSIZE_T_MAX;
2469 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2470 /* nothing to do; return the original string */
2471 return return_self(self);
2472 }
2473
2474 if (maxcount == 0 ||
2475 (from_len == 0 && to_len == 0)) {
2476 /* nothing to do; return the original string */
2477 return return_self(self);
2478 }
2479
2480 /* Handle zero-length special cases */
2481
2482 if (from_len == 0) {
2483 /* insert the 'to' string everywhere. */
2484 /* >>> "Python".replace("", ".") */
2485 /* '.P.y.t.h.o.n.' */
2486 return replace_interleave(self, to_s, to_len, maxcount);
2487 }
2488
2489 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2490 /* point for an empty self string to generate a non-empty string */
2491 /* Special case so the remaining code always gets a non-empty string */
2492 if (PyBytes_GET_SIZE(self) == 0) {
2493 return return_self(self);
2494 }
2495
2496 if (to_len == 0) {
Georg Brandl17cb8a82008-05-30 08:20:09 +00002497 /* delete all occurrences of 'from' string */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002498 if (from_len == 1) {
2499 return replace_delete_single_character(
2500 self, from_s[0], maxcount);
2501 } else {
2502 return replace_delete_substring(self, from_s,
2503 from_len, maxcount);
2504 }
2505 }
2506
2507 /* Handle special case where both strings have the same length */
2508
2509 if (from_len == to_len) {
2510 if (from_len == 1) {
2511 return replace_single_character_in_place(
2512 self,
2513 from_s[0],
2514 to_s[0],
2515 maxcount);
2516 } else {
2517 return replace_substring_in_place(
2518 self, from_s, from_len, to_s, to_len,
2519 maxcount);
2520 }
2521 }
2522
2523 /* Otherwise use the more generic algorithms */
2524 if (from_len == 1) {
2525 return replace_single_character(self, from_s[0],
2526 to_s, to_len, maxcount);
2527 } else {
2528 /* len('from')>=2, len('to')>=1 */
2529 return replace_substring(self, from_s, from_len, to_s, to_len,
2530 maxcount);
2531 }
2532}
2533
2534PyDoc_STRVAR(replace__doc__,
2535"B.replace(old, new[, count]) -> bytes\n\
2536\n\
2537Return a copy of B with all occurrences of subsection\n\
2538old replaced by new. If the optional argument count is\n\
2539given, only the first count occurrences are replaced.");
2540
2541static PyObject *
2542string_replace(PyBytesObject *self, PyObject *args)
2543{
2544 Py_ssize_t count = -1;
2545 PyObject *from, *to;
2546 const char *from_s, *to_s;
2547 Py_ssize_t from_len, to_len;
2548
2549 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2550 return NULL;
2551
2552 if (PyBytes_Check(from)) {
2553 from_s = PyBytes_AS_STRING(from);
2554 from_len = PyBytes_GET_SIZE(from);
2555 }
2556 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2557 return NULL;
2558
2559 if (PyBytes_Check(to)) {
2560 to_s = PyBytes_AS_STRING(to);
2561 to_len = PyBytes_GET_SIZE(to);
2562 }
2563 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2564 return NULL;
2565
2566 return (PyObject *)replace((PyBytesObject *) self,
2567 from_s, from_len,
2568 to_s, to_len, count);
2569}
2570
2571/** End DALKE **/
2572
2573/* Matches the end (direction >= 0) or start (direction < 0) of self
2574 * against substr, using the start and end arguments. Returns
2575 * -1 on error, 0 if not found and 1 if found.
2576 */
2577Py_LOCAL(int)
2578_string_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
2579 Py_ssize_t end, int direction)
2580{
2581 Py_ssize_t len = PyBytes_GET_SIZE(self);
2582 Py_ssize_t slen;
2583 const char* sub;
2584 const char* str;
2585
2586 if (PyBytes_Check(substr)) {
2587 sub = PyBytes_AS_STRING(substr);
2588 slen = PyBytes_GET_SIZE(substr);
2589 }
2590 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2591 return -1;
2592 str = PyBytes_AS_STRING(self);
2593
2594 string_adjust_indices(&start, &end, len);
2595
2596 if (direction < 0) {
2597 /* startswith */
2598 if (start+slen > len)
2599 return 0;
2600 } else {
2601 /* endswith */
2602 if (end-start < slen || start > len)
2603 return 0;
2604
2605 if (end-slen > start)
2606 start = end - slen;
2607 }
2608 if (end-start >= slen)
2609 return ! memcmp(str+start, sub, slen);
2610 return 0;
2611}
2612
2613
2614PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002615"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002616\n\
2617Return True if B starts with the specified prefix, False otherwise.\n\
2618With optional start, test B beginning at that position.\n\
2619With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002620prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002621
2622static PyObject *
2623string_startswith(PyBytesObject *self, PyObject *args)
2624{
2625 Py_ssize_t start = 0;
2626 Py_ssize_t end = PY_SSIZE_T_MAX;
2627 PyObject *subobj;
2628 int result;
2629
2630 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2631 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2632 return NULL;
2633 if (PyTuple_Check(subobj)) {
2634 Py_ssize_t i;
2635 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2636 result = _string_tailmatch(self,
2637 PyTuple_GET_ITEM(subobj, i),
2638 start, end, -1);
2639 if (result == -1)
2640 return NULL;
2641 else if (result) {
2642 Py_RETURN_TRUE;
2643 }
2644 }
2645 Py_RETURN_FALSE;
2646 }
2647 result = _string_tailmatch(self, subobj, start, end, -1);
2648 if (result == -1)
2649 return NULL;
2650 else
2651 return PyBool_FromLong(result);
2652}
2653
2654
2655PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002656"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002657\n\
2658Return True if B ends with the specified suffix, False otherwise.\n\
2659With optional start, test B beginning at that position.\n\
2660With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002661suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002662
2663static PyObject *
2664string_endswith(PyBytesObject *self, PyObject *args)
2665{
2666 Py_ssize_t start = 0;
2667 Py_ssize_t end = PY_SSIZE_T_MAX;
2668 PyObject *subobj;
2669 int result;
2670
2671 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2672 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2673 return NULL;
2674 if (PyTuple_Check(subobj)) {
2675 Py_ssize_t i;
2676 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2677 result = _string_tailmatch(self,
2678 PyTuple_GET_ITEM(subobj, i),
2679 start, end, +1);
2680 if (result == -1)
2681 return NULL;
2682 else if (result) {
2683 Py_RETURN_TRUE;
2684 }
2685 }
2686 Py_RETURN_FALSE;
2687 }
2688 result = _string_tailmatch(self, subobj, start, end, +1);
2689 if (result == -1)
2690 return NULL;
2691 else
2692 return PyBool_FromLong(result);
2693}
2694
2695
2696PyDoc_STRVAR(decode__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002697"B.decode([encoding[, errors]]) -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002698\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002699Decode S using the codec registered for encoding. encoding defaults\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002700to the default encoding. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002701handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2702a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002703as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002704able to handle UnicodeDecodeErrors.");
2705
2706static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002707string_decode(PyObject *self, PyObject *args)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002708{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002709 const char *encoding = NULL;
2710 const char *errors = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +00002711
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002712 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2713 return NULL;
2714 if (encoding == NULL)
2715 encoding = PyUnicode_GetDefaultEncoding();
2716 return PyCodec_Decode(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002717}
2718
Guido van Rossum20188312006-05-05 15:15:40 +00002719
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002720PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002721"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002722\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002723Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002724Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002725Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002726
2727static int
Guido van Rossumae404e22007-10-26 21:46:44 +00002728hex_digit_to_int(Py_UNICODE c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002729{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002730 if (c >= 128)
2731 return -1;
2732 if (ISDIGIT(c))
2733 return c - '0';
2734 else {
2735 if (ISUPPER(c))
2736 c = TOLOWER(c);
2737 if (c >= 'a' && c <= 'f')
2738 return c - 'a' + 10;
2739 }
2740 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002741}
2742
2743static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002744string_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002745{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002746 PyObject *newstring, *hexobj;
2747 char *buf;
2748 Py_UNICODE *hex;
2749 Py_ssize_t hexlen, byteslen, i, j;
2750 int top, bot;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002751
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002752 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2753 return NULL;
2754 assert(PyUnicode_Check(hexobj));
2755 hexlen = PyUnicode_GET_SIZE(hexobj);
2756 hex = PyUnicode_AS_UNICODE(hexobj);
2757 byteslen = hexlen/2; /* This overestimates if there are spaces */
2758 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2759 if (!newstring)
2760 return NULL;
2761 buf = PyBytes_AS_STRING(newstring);
2762 for (i = j = 0; i < hexlen; i += 2) {
2763 /* skip over spaces in the input */
2764 while (hex[i] == ' ')
2765 i++;
2766 if (i >= hexlen)
2767 break;
2768 top = hex_digit_to_int(hex[i]);
2769 bot = hex_digit_to_int(hex[i+1]);
2770 if (top == -1 || bot == -1) {
2771 PyErr_Format(PyExc_ValueError,
2772 "non-hexadecimal number found in "
2773 "fromhex() arg at position %zd", i);
2774 goto error;
2775 }
2776 buf[j++] = (top << 4) + bot;
2777 }
2778 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2779 goto error;
2780 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002781
2782 error:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002783 Py_XDECREF(newstring);
2784 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002785}
2786
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002787
2788static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002789string_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002790{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002791 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002792}
2793
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002794
2795static PyMethodDef
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002796string_methods[] = {
2797 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
2798 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2799 _Py_capitalize__doc__},
2800 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2801 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
2802 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
2803 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
2804 endswith__doc__},
2805 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2806 expandtabs__doc__},
2807 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
2808 {"fromhex", (PyCFunction)string_fromhex, METH_VARARGS|METH_CLASS,
2809 fromhex_doc},
2810 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
2811 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2812 _Py_isalnum__doc__},
2813 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2814 _Py_isalpha__doc__},
2815 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2816 _Py_isdigit__doc__},
2817 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2818 _Py_islower__doc__},
2819 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2820 _Py_isspace__doc__},
2821 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2822 _Py_istitle__doc__},
2823 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2824 _Py_isupper__doc__},
2825 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2826 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2827 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2828 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
2829 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
2830 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2831 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2832 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
2833 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2834 {"rpartition", (PyCFunction)string_rpartition, METH_O,
2835 rpartition__doc__},
2836 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
2837 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
2838 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2839 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
2840 splitlines__doc__},
2841 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
2842 startswith__doc__},
2843 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
2844 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2845 _Py_swapcase__doc__},
2846 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2847 {"translate", (PyCFunction)string_translate, METH_VARARGS,
2848 translate__doc__},
2849 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2850 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2851 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002852};
2853
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002854static PyObject *
2855str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2856
2857static PyObject *
2858string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2859{
2860 PyObject *x = NULL, *it;
2861 const char *encoding = NULL;
2862 const char *errors = NULL;
2863 PyObject *new = NULL;
2864 Py_ssize_t i, size;
2865 static char *kwlist[] = {"source", "encoding", "errors", 0};
2866
2867 if (type != &PyBytes_Type)
2868 return str_subtype_new(type, args, kwds);
2869 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2870 &encoding, &errors))
2871 return NULL;
2872 if (x == NULL) {
2873 if (encoding != NULL || errors != NULL) {
2874 PyErr_SetString(PyExc_TypeError,
2875 "encoding or errors without sequence "
2876 "argument");
2877 return NULL;
2878 }
2879 return PyBytes_FromString("");
2880 }
2881
2882 if (PyUnicode_Check(x)) {
2883 /* Encode via the codec registry */
2884 if (encoding == NULL) {
2885 PyErr_SetString(PyExc_TypeError,
2886 "string argument without an encoding");
2887 return NULL;
2888 }
2889 new = PyCodec_Encode(x, encoding, errors);
2890 if (new == NULL)
2891 return NULL;
2892 assert(PyBytes_Check(new));
2893 return new;
2894 }
2895
2896 /* If it's not unicode, there can't be encoding or errors */
2897 if (encoding != NULL || errors != NULL) {
2898 PyErr_SetString(PyExc_TypeError,
2899 "encoding or errors without a string argument");
2900 return NULL;
2901 }
2902
2903 /* Is it an int? */
2904 size = PyNumber_AsSsize_t(x, PyExc_ValueError);
2905 if (size == -1 && PyErr_Occurred()) {
2906 PyErr_Clear();
2907 }
2908 else {
2909 if (size < 0) {
2910 PyErr_SetString(PyExc_ValueError, "negative count");
2911 return NULL;
2912 }
2913 new = PyBytes_FromStringAndSize(NULL, size);
2914 if (new == NULL) {
2915 return NULL;
2916 }
2917 if (size > 0) {
2918 memset(((PyBytesObject*)new)->ob_sval, 0, size);
2919 }
2920 return new;
2921 }
2922
2923 /* Use the modern buffer interface */
2924 if (PyObject_CheckBuffer(x)) {
2925 Py_buffer view;
2926 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2927 return NULL;
2928 new = PyBytes_FromStringAndSize(NULL, view.len);
2929 if (!new)
2930 goto fail;
2931 // XXX(brett.cannon): Better way to get to internal buffer?
2932 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2933 &view, view.len, 'C') < 0)
2934 goto fail;
2935 PyObject_ReleaseBuffer(x, &view);
2936 return new;
2937 fail:
2938 Py_XDECREF(new);
2939 PyObject_ReleaseBuffer(x, &view);
2940 return NULL;
2941 }
2942
2943 /* For iterator version, create a string object and resize as needed */
2944 /* XXX(gb): is 64 a good value? also, optimize if length is known */
2945 /* XXX(guido): perhaps use Pysequence_Fast() -- I can't imagine the
2946 input being a truly long iterator. */
2947 size = 64;
2948 new = PyBytes_FromStringAndSize(NULL, size);
2949 if (new == NULL)
2950 return NULL;
2951
2952 /* XXX Optimize this if the arguments is a list, tuple */
2953
2954 /* Get the iterator */
2955 it = PyObject_GetIter(x);
2956 if (it == NULL)
2957 goto error;
2958
2959 /* Run the iterator to exhaustion */
2960 for (i = 0; ; i++) {
2961 PyObject *item;
2962 Py_ssize_t value;
2963
2964 /* Get the next item */
2965 item = PyIter_Next(it);
2966 if (item == NULL) {
2967 if (PyErr_Occurred())
2968 goto error;
2969 break;
2970 }
2971
2972 /* Interpret it as an int (__index__) */
2973 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2974 Py_DECREF(item);
2975 if (value == -1 && PyErr_Occurred())
2976 goto error;
2977
2978 /* Range check */
2979 if (value < 0 || value >= 256) {
2980 PyErr_SetString(PyExc_ValueError,
2981 "bytes must be in range(0, 256)");
2982 goto error;
2983 }
2984
2985 /* Append the byte */
2986 if (i >= size) {
2987 size *= 2;
2988 if (_PyBytes_Resize(&new, size) < 0)
2989 goto error;
2990 }
2991 ((PyBytesObject *)new)->ob_sval[i] = value;
2992 }
2993 _PyBytes_Resize(&new, i);
2994
2995 /* Clean up and return success */
2996 Py_DECREF(it);
2997 return new;
2998
2999 error:
3000 /* Error handling when new != NULL */
3001 Py_XDECREF(it);
3002 Py_DECREF(new);
3003 return NULL;
3004}
3005
3006static PyObject *
3007str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3008{
3009 PyObject *tmp, *pnew;
3010 Py_ssize_t n;
3011
3012 assert(PyType_IsSubtype(type, &PyBytes_Type));
3013 tmp = string_new(&PyBytes_Type, args, kwds);
3014 if (tmp == NULL)
3015 return NULL;
3016 assert(PyBytes_CheckExact(tmp));
3017 n = PyBytes_GET_SIZE(tmp);
3018 pnew = type->tp_alloc(type, n);
3019 if (pnew != NULL) {
3020 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3021 PyBytes_AS_STRING(tmp), n+1);
3022 ((PyBytesObject *)pnew)->ob_shash =
3023 ((PyBytesObject *)tmp)->ob_shash;
3024 }
3025 Py_DECREF(tmp);
3026 return pnew;
3027}
3028
3029PyDoc_STRVAR(string_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003030"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003031bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003032bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
3033bytes(memory_view) -> bytes\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003034\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003035Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003036 - an iterable yielding integers in range(256)\n\
3037 - a text string encoded using the specified encoding\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003038 - a bytes or a buffer object\n\
3039 - any object implementing the buffer API.");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003040
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003041static PyObject *str_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003042
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003043PyTypeObject PyBytes_Type = {
3044 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3045 "bytes",
3046 sizeof(PyBytesObject),
3047 sizeof(char),
3048 string_dealloc, /* tp_dealloc */
3049 0, /* tp_print */
3050 0, /* tp_getattr */
3051 0, /* tp_setattr */
3052 0, /* tp_compare */
3053 (reprfunc)string_repr, /* tp_repr */
3054 0, /* tp_as_number */
3055 &string_as_sequence, /* tp_as_sequence */
3056 &string_as_mapping, /* tp_as_mapping */
3057 (hashfunc)string_hash, /* tp_hash */
3058 0, /* tp_call */
3059 string_str, /* tp_str */
3060 PyObject_GenericGetAttr, /* tp_getattro */
3061 0, /* tp_setattro */
3062 &string_as_buffer, /* tp_as_buffer */
3063 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3064 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3065 string_doc, /* tp_doc */
3066 0, /* tp_traverse */
3067 0, /* tp_clear */
3068 (richcmpfunc)string_richcompare, /* tp_richcompare */
3069 0, /* tp_weaklistoffset */
3070 str_iter, /* tp_iter */
3071 0, /* tp_iternext */
3072 string_methods, /* tp_methods */
3073 0, /* tp_members */
3074 0, /* tp_getset */
3075 &PyBaseObject_Type, /* tp_base */
3076 0, /* tp_dict */
3077 0, /* tp_descr_get */
3078 0, /* tp_descr_set */
3079 0, /* tp_dictoffset */
3080 0, /* tp_init */
3081 0, /* tp_alloc */
3082 string_new, /* tp_new */
3083 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003084};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003085
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003086void
3087PyBytes_Concat(register PyObject **pv, register PyObject *w)
3088{
3089 register PyObject *v;
3090 assert(pv != NULL);
3091 if (*pv == NULL)
3092 return;
3093 if (w == NULL) {
3094 Py_DECREF(*pv);
3095 *pv = NULL;
3096 return;
3097 }
3098 v = string_concat(*pv, w);
3099 Py_DECREF(*pv);
3100 *pv = v;
3101}
3102
3103void
3104PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
3105{
3106 PyBytes_Concat(pv, w);
3107 Py_XDECREF(w);
3108}
3109
3110
3111/* The following function breaks the notion that strings are immutable:
3112 it changes the size of a string. We get away with this only if there
3113 is only one module referencing the object. You can also think of it
3114 as creating a new string object and destroying the old one, only
3115 more efficiently. In any case, don't use this if the string may
3116 already be known to some other part of the code...
3117 Note that if there's not enough memory to resize the string, the original
3118 string object at *pv is deallocated, *pv is set to NULL, an "out of
3119 memory" exception is set, and -1 is returned. Else (on success) 0 is
3120 returned, and the value in *pv may or may not be the same as on input.
3121 As always, an extra byte is allocated for a trailing \0 byte (newsize
3122 does *not* include that), and a trailing \0 byte is stored.
3123*/
3124
3125int
3126_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3127{
3128 register PyObject *v;
3129 register PyBytesObject *sv;
3130 v = *pv;
3131 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
3132 *pv = 0;
3133 Py_DECREF(v);
3134 PyErr_BadInternalCall();
3135 return -1;
3136 }
3137 /* XXX UNREF/NEWREF interface should be more symmetrical */
3138 _Py_DEC_REFTOTAL;
3139 _Py_ForgetReference(v);
3140 *pv = (PyObject *)
3141 PyObject_REALLOC((char *)v, sizeof(PyBytesObject) + newsize);
3142 if (*pv == NULL) {
3143 PyObject_Del(v);
3144 PyErr_NoMemory();
3145 return -1;
3146 }
3147 _Py_NewReference(*pv);
3148 sv = (PyBytesObject *) *pv;
3149 Py_SIZE(sv) = newsize;
3150 sv->ob_sval[newsize] = '\0';
3151 sv->ob_shash = -1; /* invalidate cached hash value */
3152 return 0;
3153}
3154
3155/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and
3156 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3157 * Python's regular ints.
3158 * Return value: a new PyString*, or NULL if error.
3159 * . *pbuf is set to point into it,
3160 * *plen set to the # of chars following that.
3161 * Caller must decref it when done using pbuf.
3162 * The string starting at *pbuf is of the form
3163 * "-"? ("0x" | "0X")? digit+
3164 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3165 * set in flags. The case of hex digits will be correct,
3166 * There will be at least prec digits, zero-filled on the left if
3167 * necessary to get that many.
3168 * val object to be converted
3169 * flags bitmask of format flags; only F_ALT is looked at
3170 * prec minimum number of digits; 0-fill on left if needed
3171 * type a character in [duoxX]; u acts the same as d
3172 *
3173 * CAUTION: o, x and X conversions on regular ints can never
3174 * produce a '-' sign, but can for Python's unbounded ints.
3175 */
3176PyObject*
3177_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,
3178 char **pbuf, int *plen)
3179{
3180 PyObject *result = NULL;
3181 char *buf;
3182 Py_ssize_t i;
3183 int sign; /* 1 if '-', else 0 */
3184 int len; /* number of characters */
3185 Py_ssize_t llen;
3186 int numdigits; /* len == numnondigits + numdigits */
3187 int numnondigits = 0;
3188
3189 /* Avoid exceeding SSIZE_T_MAX */
3190 if (prec > PY_SSIZE_T_MAX-3) {
3191 PyErr_SetString(PyExc_OverflowError,
3192 "precision too large");
3193 return NULL;
3194 }
3195
3196 switch (type) {
3197 case 'd':
3198 case 'u':
3199 /* Special-case boolean: we want 0/1 */
3200 if (PyBool_Check(val))
3201 result = PyNumber_ToBase(val, 10);
3202 else
3203 result = Py_TYPE(val)->tp_str(val);
3204 break;
3205 case 'o':
3206 numnondigits = 2;
3207 result = PyNumber_ToBase(val, 8);
3208 break;
3209 case 'x':
3210 case 'X':
3211 numnondigits = 2;
3212 result = PyNumber_ToBase(val, 16);
3213 break;
3214 default:
3215 assert(!"'type' not in [duoxX]");
3216 }
3217 if (!result)
3218 return NULL;
3219
3220 buf = PyUnicode_AsString(result);
3221 if (!buf) {
3222 Py_DECREF(result);
3223 return NULL;
3224 }
3225
3226 /* To modify the string in-place, there can only be one reference. */
3227 if (Py_REFCNT(result) != 1) {
3228 PyErr_BadInternalCall();
3229 return NULL;
3230 }
3231 llen = PyUnicode_GetSize(result);
3232 if (llen > INT_MAX) {
3233 PyErr_SetString(PyExc_ValueError,
3234 "string too large in _PyBytes_FormatLong");
3235 return NULL;
3236 }
3237 len = (int)llen;
3238 if (buf[len-1] == 'L') {
3239 --len;
3240 buf[len] = '\0';
3241 }
3242 sign = buf[0] == '-';
3243 numnondigits += sign;
3244 numdigits = len - numnondigits;
3245 assert(numdigits > 0);
3246
3247 /* Get rid of base marker unless F_ALT */
3248 if (((flags & F_ALT) == 0 &&
3249 (type == 'o' || type == 'x' || type == 'X'))) {
3250 assert(buf[sign] == '0');
3251 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
3252 buf[sign+1] == 'o');
3253 numnondigits -= 2;
3254 buf += 2;
3255 len -= 2;
3256 if (sign)
3257 buf[0] = '-';
3258 assert(len == numnondigits + numdigits);
3259 assert(numdigits > 0);
3260 }
3261
3262 /* Fill with leading zeroes to meet minimum width. */
3263 if (prec > numdigits) {
3264 PyObject *r1 = PyBytes_FromStringAndSize(NULL,
3265 numnondigits + prec);
3266 char *b1;
3267 if (!r1) {
3268 Py_DECREF(result);
3269 return NULL;
3270 }
3271 b1 = PyBytes_AS_STRING(r1);
3272 for (i = 0; i < numnondigits; ++i)
3273 *b1++ = *buf++;
3274 for (i = 0; i < prec - numdigits; i++)
3275 *b1++ = '0';
3276 for (i = 0; i < numdigits; i++)
3277 *b1++ = *buf++;
3278 *b1 = '\0';
3279 Py_DECREF(result);
3280 result = r1;
3281 buf = PyBytes_AS_STRING(result);
3282 len = numnondigits + prec;
3283 }
3284
3285 /* Fix up case for hex conversions. */
3286 if (type == 'X') {
3287 /* Need to convert all lower case letters to upper case.
3288 and need to convert 0x to 0X (and -0x to -0X). */
3289 for (i = 0; i < len; i++)
3290 if (buf[i] >= 'a' && buf[i] <= 'x')
3291 buf[i] -= 'a'-'A';
3292 }
3293 *pbuf = buf;
3294 *plen = len;
3295 return result;
3296}
3297
3298void
3299PyBytes_Fini(void)
3300{
3301 int i;
3302 for (i = 0; i < UCHAR_MAX + 1; i++) {
3303 Py_XDECREF(characters[i]);
3304 characters[i] = NULL;
3305 }
3306 Py_XDECREF(nullstring);
3307 nullstring = NULL;
3308}
3309
Benjamin Peterson4116f362008-05-27 00:36:20 +00003310/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003311
3312typedef struct {
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003313 PyObject_HEAD
3314 Py_ssize_t it_index;
3315 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3316} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003317
3318static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003319striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003320{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003321 _PyObject_GC_UNTRACK(it);
3322 Py_XDECREF(it->it_seq);
3323 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003324}
3325
3326static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003327striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003328{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003329 Py_VISIT(it->it_seq);
3330 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003331}
3332
3333static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003334striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003335{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003336 PyBytesObject *seq;
3337 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003338
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003339 assert(it != NULL);
3340 seq = it->it_seq;
3341 if (seq == NULL)
3342 return NULL;
3343 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003344
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003345 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3346 item = PyLong_FromLong(
3347 (unsigned char)seq->ob_sval[it->it_index]);
3348 if (item != NULL)
3349 ++it->it_index;
3350 return item;
3351 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003352
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003353 Py_DECREF(seq);
3354 it->it_seq = NULL;
3355 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003356}
3357
3358static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003359striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003360{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003361 Py_ssize_t len = 0;
3362 if (it->it_seq)
3363 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3364 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003365}
3366
3367PyDoc_STRVAR(length_hint_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003368 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003369
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003370static PyMethodDef striter_methods[] = {
3371 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3372 length_hint_doc},
3373 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003374};
3375
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003376PyTypeObject PyBytesIter_Type = {
3377 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3378 "bytes_iterator", /* tp_name */
3379 sizeof(striterobject), /* tp_basicsize */
3380 0, /* tp_itemsize */
3381 /* methods */
3382 (destructor)striter_dealloc, /* tp_dealloc */
3383 0, /* tp_print */
3384 0, /* tp_getattr */
3385 0, /* tp_setattr */
3386 0, /* tp_compare */
3387 0, /* tp_repr */
3388 0, /* tp_as_number */
3389 0, /* tp_as_sequence */
3390 0, /* tp_as_mapping */
3391 0, /* tp_hash */
3392 0, /* tp_call */
3393 0, /* tp_str */
3394 PyObject_GenericGetAttr, /* tp_getattro */
3395 0, /* tp_setattro */
3396 0, /* tp_as_buffer */
3397 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3398 0, /* tp_doc */
3399 (traverseproc)striter_traverse, /* tp_traverse */
3400 0, /* tp_clear */
3401 0, /* tp_richcompare */
3402 0, /* tp_weaklistoffset */
3403 PyObject_SelfIter, /* tp_iter */
3404 (iternextfunc)striter_next, /* tp_iternext */
3405 striter_methods, /* tp_methods */
3406 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003407};
3408
3409static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003410str_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003411{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003412 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003413
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003414 if (!PyBytes_Check(seq)) {
3415 PyErr_BadInternalCall();
3416 return NULL;
3417 }
3418 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3419 if (it == NULL)
3420 return NULL;
3421 it->it_index = 0;
3422 Py_INCREF(seq);
3423 it->it_seq = (PyBytesObject *)seq;
3424 _PyObject_GC_TRACK(it);
3425 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003426}