blob: b9ba73f75832d56a99570f13a347845ec6e3a357 [file] [log] [blame]
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001/* String object implementation */
2
3/* XXX This is now called 'bytes' as far as the user is concerned.
4 Many docstrings and error messages need to be cleaned up. */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00006#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00007
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00008#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00009
Gregory P. Smith60d241f2007-10-16 06:31:30 +000010#include "bytes_methods.h"
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000011
Neal Norwitz2bad9702007-08-27 06:19:22 +000012static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000013_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000014{
Christian Heimes90aa7642007-12-19 02:45:37 +000015 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000016
Gregory P. Smith60d241f2007-10-16 06:31:30 +000017 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000018 {
19 PyErr_Format(PyExc_TypeError,
20 "Type %.100s doesn't support the buffer API",
Christian Heimes90aa7642007-12-19 02:45:37 +000021 Py_TYPE(obj)->tp_name);
Guido van Rossuma74184e2007-08-29 04:05:57 +000022 return -1;
23 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000024
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000025 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
26 return -1;
27 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000028}
29
Christian Heimes2c9c7a52008-05-26 13:42:13 +000030#ifdef COUNT_ALLOCS
31int null_strings, one_strings;
32#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000033
Christian Heimes2c9c7a52008-05-26 13:42:13 +000034static PyBytesObject *characters[UCHAR_MAX + 1];
35static PyBytesObject *nullstring;
36
37/*
38 For both PyBytes_FromString() and PyBytes_FromStringAndSize(), the
39 parameter `size' denotes number of characters to allocate, not counting any
40 null terminating character.
41
42 For PyBytes_FromString(), the parameter `str' points to a null-terminated
43 string containing exactly `size' bytes.
44
45 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
46 either NULL or else points to a string containing at least `size' bytes.
47 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
48 not have to be null-terminated. (Therefore it is safe to construct a
49 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
50 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
51 bytes (setting the last byte to the null terminating character) and you can
52 fill in the data yourself. If `str' is non-NULL then the resulting
53 PyString object must be treated as immutable and you must not fill in nor
54 alter the data yourself, since the strings may be shared.
55
56 The PyObject member `op->ob_size', which denotes the number of "extra
57 items" in a variable-size object, will contain the number of bytes
58 allocated for string data, not counting the null terminating character. It
59 is therefore equal to the equal to the `size' parameter (for
60 PyBytes_FromStringAndSize()) or the length of the string in the `str'
61 parameter (for PyBytes_FromString()).
62*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000063PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000064PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000065{
Christian Heimes2c9c7a52008-05-26 13:42:13 +000066 register PyBytesObject *op;
67 if (size < 0) {
68 PyErr_SetString(PyExc_SystemError,
69 "Negative size passed to PyBytes_FromStringAndSize");
70 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +000071 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000072 if (size == 0 && (op = nullstring) != NULL) {
73#ifdef COUNT_ALLOCS
74 null_strings++;
75#endif
76 Py_INCREF(op);
77 return (PyObject *)op;
78 }
79 if (size == 1 && str != NULL &&
80 (op = characters[*str & UCHAR_MAX]) != NULL)
81 {
82#ifdef COUNT_ALLOCS
83 one_strings++;
84#endif
85 Py_INCREF(op);
86 return (PyObject *)op;
87 }
88
89 /* Inline PyObject_NewVar */
90 op = (PyBytesObject *)PyObject_MALLOC(sizeof(PyBytesObject) + size);
91 if (op == NULL)
92 return PyErr_NoMemory();
93 PyObject_INIT_VAR(op, &PyBytes_Type, size);
94 op->ob_shash = -1;
95 if (str != NULL)
96 Py_MEMCPY(op->ob_sval, str, size);
97 op->ob_sval[size] = '\0';
98 /* share short strings */
99 if (size == 0) {
100 nullstring = op;
101 Py_INCREF(op);
102 } else if (size == 1 && str != NULL) {
103 characters[*str & UCHAR_MAX] = op;
104 Py_INCREF(op);
105 }
106 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000107}
108
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000109PyObject *
110PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000111{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000112 register size_t size;
113 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000114
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000115 assert(str != NULL);
116 size = strlen(str);
117 if (size > PY_SSIZE_T_MAX) {
118 PyErr_SetString(PyExc_OverflowError,
119 "string is too long for a Python string");
120 return NULL;
121 }
122 if (size == 0 && (op = nullstring) != NULL) {
123#ifdef COUNT_ALLOCS
124 null_strings++;
125#endif
126 Py_INCREF(op);
127 return (PyObject *)op;
128 }
129 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
130#ifdef COUNT_ALLOCS
131 one_strings++;
132#endif
133 Py_INCREF(op);
134 return (PyObject *)op;
135 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000136
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000137 /* Inline PyObject_NewVar */
138 op = (PyBytesObject *)PyObject_MALLOC(sizeof(PyBytesObject) + size);
139 if (op == NULL)
140 return PyErr_NoMemory();
141 PyObject_INIT_VAR(op, &PyBytes_Type, size);
142 op->ob_shash = -1;
143 Py_MEMCPY(op->ob_sval, str, size+1);
144 /* share short strings */
145 if (size == 0) {
146 nullstring = op;
147 Py_INCREF(op);
148 } else if (size == 1) {
149 characters[*str & UCHAR_MAX] = op;
150 Py_INCREF(op);
151 }
152 return (PyObject *) op;
153}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000154
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000155PyObject *
156PyBytes_FromFormatV(const char *format, va_list vargs)
157{
158 va_list count;
159 Py_ssize_t n = 0;
160 const char* f;
161 char *s;
162 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000163
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000164#ifdef VA_LIST_IS_ARRAY
165 Py_MEMCPY(count, vargs, sizeof(va_list));
166#else
167#ifdef __va_copy
168 __va_copy(count, vargs);
169#else
170 count = vargs;
171#endif
172#endif
173 /* step 1: figure out how large a buffer we need */
174 for (f = format; *f; f++) {
175 if (*f == '%') {
176 const char* p = f;
177 while (*++f && *f != '%' && !ISALPHA(*f))
178 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000179
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000180 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
181 * they don't affect the amount of space we reserve.
182 */
183 if ((*f == 'l' || *f == 'z') &&
184 (f[1] == 'd' || f[1] == 'u'))
185 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000186
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000187 switch (*f) {
188 case 'c':
189 (void)va_arg(count, int);
190 /* fall through... */
191 case '%':
192 n++;
193 break;
194 case 'd': case 'u': case 'i': case 'x':
195 (void) va_arg(count, int);
196 /* 20 bytes is enough to hold a 64-bit
197 integer. Decimal takes the most space.
198 This isn't enough for octal. */
199 n += 20;
200 break;
201 case 's':
202 s = va_arg(count, char*);
203 n += strlen(s);
204 break;
205 case 'p':
206 (void) va_arg(count, int);
207 /* maximum 64-bit pointer representation:
208 * 0xffffffffffffffff
209 * so 19 characters is enough.
210 * XXX I count 18 -- what's the extra for?
211 */
212 n += 19;
213 break;
214 default:
215 /* if we stumble upon an unknown
216 formatting code, copy the rest of
217 the format string to the output
218 string. (we cannot just skip the
219 code, since there's no way to know
220 what's in the argument list) */
221 n += strlen(p);
222 goto expand;
223 }
224 } else
225 n++;
226 }
227 expand:
228 /* step 2: fill the buffer */
229 /* Since we've analyzed how much space we need for the worst case,
230 use sprintf directly instead of the slower PyOS_snprintf. */
231 string = PyBytes_FromStringAndSize(NULL, n);
232 if (!string)
233 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000234
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000235 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000236
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000237 for (f = format; *f; f++) {
238 if (*f == '%') {
239 const char* p = f++;
240 Py_ssize_t i;
241 int longflag = 0;
242 int size_tflag = 0;
243 /* parse the width.precision part (we're only
244 interested in the precision value, if any) */
245 n = 0;
246 while (ISDIGIT(*f))
247 n = (n*10) + *f++ - '0';
248 if (*f == '.') {
249 f++;
250 n = 0;
251 while (ISDIGIT(*f))
252 n = (n*10) + *f++ - '0';
253 }
254 while (*f && *f != '%' && !ISALPHA(*f))
255 f++;
256 /* handle the long flag, but only for %ld and %lu.
257 others can be added when necessary. */
258 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
259 longflag = 1;
260 ++f;
261 }
262 /* handle the size_t flag. */
263 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
264 size_tflag = 1;
265 ++f;
266 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000267
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000268 switch (*f) {
269 case 'c':
270 *s++ = va_arg(vargs, int);
271 break;
272 case 'd':
273 if (longflag)
274 sprintf(s, "%ld", va_arg(vargs, long));
275 else if (size_tflag)
276 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
277 va_arg(vargs, Py_ssize_t));
278 else
279 sprintf(s, "%d", va_arg(vargs, int));
280 s += strlen(s);
281 break;
282 case 'u':
283 if (longflag)
284 sprintf(s, "%lu",
285 va_arg(vargs, unsigned long));
286 else if (size_tflag)
287 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
288 va_arg(vargs, size_t));
289 else
290 sprintf(s, "%u",
291 va_arg(vargs, unsigned int));
292 s += strlen(s);
293 break;
294 case 'i':
295 sprintf(s, "%i", va_arg(vargs, int));
296 s += strlen(s);
297 break;
298 case 'x':
299 sprintf(s, "%x", va_arg(vargs, int));
300 s += strlen(s);
301 break;
302 case 's':
303 p = va_arg(vargs, char*);
304 i = strlen(p);
305 if (n > 0 && i > n)
306 i = n;
307 Py_MEMCPY(s, p, i);
308 s += i;
309 break;
310 case 'p':
311 sprintf(s, "%p", va_arg(vargs, void*));
312 /* %p is ill-defined: ensure leading 0x. */
313 if (s[1] == 'X')
314 s[1] = 'x';
315 else if (s[1] != 'x') {
316 memmove(s+2, s, strlen(s)+1);
317 s[0] = '0';
318 s[1] = 'x';
319 }
320 s += strlen(s);
321 break;
322 case '%':
323 *s++ = '%';
324 break;
325 default:
326 strcpy(s, p);
327 s += strlen(s);
328 goto end;
329 }
330 } else
331 *s++ = *f;
332 }
333
334 end:
335 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
336 return string;
337}
338
339PyObject *
340PyBytes_FromFormat(const char *format, ...)
341{
342 PyObject* ret;
343 va_list vargs;
344
345#ifdef HAVE_STDARG_PROTOTYPES
346 va_start(vargs, format);
347#else
348 va_start(vargs);
349#endif
350 ret = PyBytes_FromFormatV(format, vargs);
351 va_end(vargs);
352 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000353}
354
355static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000356string_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000357{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000358 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000359}
360
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000361/* Unescape a backslash-escaped string. If unicode is non-zero,
362 the string is a u-literal. If recode_encoding is non-zero,
363 the string is UTF-8 encoded and should be re-encoded in the
364 specified encoding. */
365
366PyObject *PyBytes_DecodeEscape(const char *s,
367 Py_ssize_t len,
368 const char *errors,
369 Py_ssize_t unicode,
370 const char *recode_encoding)
371{
372 int c;
373 char *p, *buf;
374 const char *end;
375 PyObject *v;
376 Py_ssize_t newlen = recode_encoding ? 4*len:len;
377 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
378 if (v == NULL)
379 return NULL;
380 p = buf = PyBytes_AsString(v);
381 end = s + len;
382 while (s < end) {
383 if (*s != '\\') {
384 non_esc:
385 if (recode_encoding && (*s & 0x80)) {
386 PyObject *u, *w;
387 char *r;
388 const char* t;
389 Py_ssize_t rn;
390 t = s;
391 /* Decode non-ASCII bytes as UTF-8. */
392 while (t < end && (*t & 0x80)) t++;
393 u = PyUnicode_DecodeUTF8(s, t - s, errors);
394 if(!u) goto failed;
395
396 /* Recode them in target encoding. */
397 w = PyUnicode_AsEncodedString(
398 u, recode_encoding, errors);
399 Py_DECREF(u);
400 if (!w) goto failed;
401
402 /* Append bytes to output buffer. */
403 assert(PyBytes_Check(w));
404 r = PyBytes_AS_STRING(w);
405 rn = PyBytes_GET_SIZE(w);
406 Py_MEMCPY(p, r, rn);
407 p += rn;
408 Py_DECREF(w);
409 s = t;
410 } else {
411 *p++ = *s++;
412 }
413 continue;
414 }
415 s++;
416 if (s==end) {
417 PyErr_SetString(PyExc_ValueError,
418 "Trailing \\ in string");
419 goto failed;
420 }
421 switch (*s++) {
422 /* XXX This assumes ASCII! */
423 case '\n': break;
424 case '\\': *p++ = '\\'; break;
425 case '\'': *p++ = '\''; break;
426 case '\"': *p++ = '\"'; break;
427 case 'b': *p++ = '\b'; break;
428 case 'f': *p++ = '\014'; break; /* FF */
429 case 't': *p++ = '\t'; break;
430 case 'n': *p++ = '\n'; break;
431 case 'r': *p++ = '\r'; break;
432 case 'v': *p++ = '\013'; break; /* VT */
433 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
434 case '0': case '1': case '2': case '3':
435 case '4': case '5': case '6': case '7':
436 c = s[-1] - '0';
437 if (s < end && '0' <= *s && *s <= '7') {
438 c = (c<<3) + *s++ - '0';
439 if (s < end && '0' <= *s && *s <= '7')
440 c = (c<<3) + *s++ - '0';
441 }
442 *p++ = c;
443 break;
444 case 'x':
445 if (s+1 < end && ISXDIGIT(s[0]) && ISXDIGIT(s[1])) {
446 unsigned int x = 0;
447 c = Py_CHARMASK(*s);
448 s++;
449 if (ISDIGIT(c))
450 x = c - '0';
451 else if (ISLOWER(c))
452 x = 10 + c - 'a';
453 else
454 x = 10 + c - 'A';
455 x = x << 4;
456 c = Py_CHARMASK(*s);
457 s++;
458 if (ISDIGIT(c))
459 x += c - '0';
460 else if (ISLOWER(c))
461 x += 10 + c - 'a';
462 else
463 x += 10 + c - 'A';
464 *p++ = x;
465 break;
466 }
467 if (!errors || strcmp(errors, "strict") == 0) {
468 PyErr_SetString(PyExc_ValueError,
469 "invalid \\x escape");
470 goto failed;
471 }
472 if (strcmp(errors, "replace") == 0) {
473 *p++ = '?';
474 } else if (strcmp(errors, "ignore") == 0)
475 /* do nothing */;
476 else {
477 PyErr_Format(PyExc_ValueError,
478 "decoding error; unknown "
479 "error handling code: %.400s",
480 errors);
481 goto failed;
482 }
483 default:
484 *p++ = '\\';
485 s--;
486 goto non_esc; /* an arbitry number of unescaped
487 UTF-8 bytes may follow. */
488 }
489 }
490 if (p-buf < newlen)
491 _PyBytes_Resize(&v, p - buf);
492 return v;
493 failed:
494 Py_DECREF(v);
495 return NULL;
496}
497
498/* -------------------------------------------------------------------- */
499/* object api */
500
501Py_ssize_t
502PyBytes_Size(register PyObject *op)
503{
504 if (!PyBytes_Check(op)) {
505 PyErr_Format(PyExc_TypeError,
506 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
507 return -1;
508 }
509 return Py_SIZE(op);
510}
511
512char *
513PyBytes_AsString(register PyObject *op)
514{
515 if (!PyBytes_Check(op)) {
516 PyErr_Format(PyExc_TypeError,
517 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
518 return NULL;
519 }
520 return ((PyBytesObject *)op)->ob_sval;
521}
522
523int
524PyBytes_AsStringAndSize(register PyObject *obj,
525 register char **s,
526 register Py_ssize_t *len)
527{
528 if (s == NULL) {
529 PyErr_BadInternalCall();
530 return -1;
531 }
532
533 if (!PyBytes_Check(obj)) {
534 PyErr_Format(PyExc_TypeError,
535 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
536 return -1;
537 }
538
539 *s = PyBytes_AS_STRING(obj);
540 if (len != NULL)
541 *len = PyBytes_GET_SIZE(obj);
542 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
543 PyErr_SetString(PyExc_TypeError,
544 "expected bytes with no null");
545 return -1;
546 }
547 return 0;
548}
Neal Norwitz6968b052007-02-27 19:02:19 +0000549
550/* -------------------------------------------------------------------- */
551/* Methods */
552
553#define STRINGLIB_CHAR char
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000554
Neal Norwitz6968b052007-02-27 19:02:19 +0000555#define STRINGLIB_CMP memcmp
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000556#define STRINGLIB_LEN PyBytes_GET_SIZE
557#define STRINGLIB_NEW PyBytes_FromStringAndSize
558#define STRINGLIB_STR PyBytes_AS_STRING
559/* #define STRINGLIB_WANT_CONTAINS_OBJ 1 */
560
561#define STRINGLIB_EMPTY nullstring
562#define STRINGLIB_CHECK_EXACT PyBytes_CheckExact
563#define STRINGLIB_MUTABLE 0
Neal Norwitz6968b052007-02-27 19:02:19 +0000564
565#include "stringlib/fastsearch.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000566
Neal Norwitz6968b052007-02-27 19:02:19 +0000567#include "stringlib/count.h"
568#include "stringlib/find.h"
569#include "stringlib/partition.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000570#include "stringlib/ctype.h"
571#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000572
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000573#define _Py_InsertThousandsGrouping _PyBytes_InsertThousandsGrouping
574#include "stringlib/localeutil.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000575
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000576PyObject *
577PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000578{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000579 static const char *hexdigits = "0123456789abcdef";
580 register PyBytesObject* op = (PyBytesObject*) obj;
581 Py_ssize_t length = Py_SIZE(op);
582 size_t newsize = 3 + 4 * length;
583 PyObject *v;
584 if (newsize > PY_SSIZE_T_MAX || (newsize-3) / 4 != length) {
585 PyErr_SetString(PyExc_OverflowError,
586 "bytes object is too large to make repr");
587 return NULL;
588 }
589 v = PyUnicode_FromUnicode(NULL, newsize);
590 if (v == NULL) {
591 return NULL;
592 }
593 else {
594 register Py_ssize_t i;
595 register Py_UNICODE c;
596 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
597 int quote;
598
599 /* Figure out which quote to use; single is preferred */
600 quote = '\'';
601 if (smartquotes) {
602 char *test, *start;
603 start = PyBytes_AS_STRING(op);
604 for (test = start; test < start+length; ++test) {
605 if (*test == '"') {
606 quote = '\''; /* back to single */
607 goto decided;
608 }
609 else if (*test == '\'')
610 quote = '"';
611 }
612 decided:
613 ;
614 }
615
616 *p++ = 'b', *p++ = quote;
617 for (i = 0; i < length; i++) {
618 /* There's at least enough room for a hex escape
619 and a closing quote. */
620 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
621 c = op->ob_sval[i];
622 if (c == quote || c == '\\')
623 *p++ = '\\', *p++ = c;
624 else if (c == '\t')
625 *p++ = '\\', *p++ = 't';
626 else if (c == '\n')
627 *p++ = '\\', *p++ = 'n';
628 else if (c == '\r')
629 *p++ = '\\', *p++ = 'r';
630 else if (c < ' ' || c >= 0x7f) {
631 *p++ = '\\';
632 *p++ = 'x';
633 *p++ = hexdigits[(c & 0xf0) >> 4];
634 *p++ = hexdigits[c & 0xf];
635 }
636 else
637 *p++ = c;
638 }
639 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
640 *p++ = quote;
641 *p = '\0';
642 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
643 Py_DECREF(v);
644 return NULL;
645 }
646 return v;
647 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000648}
649
Neal Norwitz6968b052007-02-27 19:02:19 +0000650static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000651string_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000652{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000653 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000654}
655
Neal Norwitz6968b052007-02-27 19:02:19 +0000656static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000657string_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000658{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000659 if (Py_BytesWarningFlag) {
660 if (PyErr_WarnEx(PyExc_BytesWarning,
661 "str() on a bytes instance", 1))
662 return NULL;
663 }
664 return string_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000665}
666
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000667static Py_ssize_t
668string_length(PyBytesObject *a)
669{
670 return Py_SIZE(a);
671}
Neal Norwitz6968b052007-02-27 19:02:19 +0000672
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000673/* This is also used by PyBytes_Concat() */
674static PyObject *
675string_concat(PyObject *a, PyObject *b)
676{
677 Py_ssize_t size;
678 Py_buffer va, vb;
679 PyObject *result = NULL;
680
681 va.len = -1;
682 vb.len = -1;
683 if (_getbuffer(a, &va) < 0 ||
684 _getbuffer(b, &vb) < 0) {
685 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
686 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
687 goto done;
688 }
689
690 /* Optimize end cases */
691 if (va.len == 0 && PyBytes_CheckExact(b)) {
692 result = b;
693 Py_INCREF(result);
694 goto done;
695 }
696 if (vb.len == 0 && PyBytes_CheckExact(a)) {
697 result = a;
698 Py_INCREF(result);
699 goto done;
700 }
701
702 size = va.len + vb.len;
703 if (size < 0) {
704 PyErr_NoMemory();
705 goto done;
706 }
707
708 result = PyBytes_FromStringAndSize(NULL, size);
709 if (result != NULL) {
710 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
711 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
712 }
713
714 done:
715 if (va.len != -1)
716 PyObject_ReleaseBuffer(a, &va);
717 if (vb.len != -1)
718 PyObject_ReleaseBuffer(b, &vb);
719 return result;
720}
Neal Norwitz6968b052007-02-27 19:02:19 +0000721
722static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000723string_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000724{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000725 register Py_ssize_t i;
726 register Py_ssize_t j;
727 register Py_ssize_t size;
728 register PyBytesObject *op;
729 size_t nbytes;
730 if (n < 0)
731 n = 0;
732 /* watch out for overflows: the size can overflow int,
733 * and the # of bytes needed can overflow size_t
734 */
735 size = Py_SIZE(a) * n;
736 if (n && size / n != Py_SIZE(a)) {
737 PyErr_SetString(PyExc_OverflowError,
738 "repeated string is too long");
739 return NULL;
740 }
741 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
742 Py_INCREF(a);
743 return (PyObject *)a;
744 }
745 nbytes = (size_t)size;
746 if (nbytes + sizeof(PyBytesObject) <= nbytes) {
747 PyErr_SetString(PyExc_OverflowError,
748 "repeated string is too long");
749 return NULL;
750 }
751 op = (PyBytesObject *)
752 PyObject_MALLOC(sizeof(PyBytesObject) + nbytes);
753 if (op == NULL)
754 return PyErr_NoMemory();
755 PyObject_INIT_VAR(op, &PyBytes_Type, size);
756 op->ob_shash = -1;
757 op->ob_sval[size] = '\0';
758 if (Py_SIZE(a) == 1 && n > 0) {
759 memset(op->ob_sval, a->ob_sval[0] , n);
760 return (PyObject *) op;
761 }
762 i = 0;
763 if (i < size) {
764 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
765 i = Py_SIZE(a);
766 }
767 while (i < size) {
768 j = (i <= size-i) ? i : size-i;
769 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
770 i += j;
771 }
772 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000773}
774
Guido van Rossum98297ee2007-11-06 21:34:58 +0000775static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000776string_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000777{
778 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
779 if (ival == -1 && PyErr_Occurred()) {
780 Py_buffer varg;
781 int pos;
782 PyErr_Clear();
783 if (_getbuffer(arg, &varg) < 0)
784 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000785 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
Guido van Rossum98297ee2007-11-06 21:34:58 +0000786 varg.buf, varg.len, 0);
787 PyObject_ReleaseBuffer(arg, &varg);
788 return pos >= 0;
789 }
790 if (ival < 0 || ival >= 256) {
791 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
792 return -1;
793 }
794
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000795 return memchr(PyBytes_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000796}
797
Neal Norwitz6968b052007-02-27 19:02:19 +0000798static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000799string_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000800{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000801 if (i < 0 || i >= Py_SIZE(a)) {
802 PyErr_SetString(PyExc_IndexError, "string index out of range");
803 return NULL;
804 }
805 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000806}
807
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000808static PyObject*
809string_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000810{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000811 int c;
812 Py_ssize_t len_a, len_b;
813 Py_ssize_t min_len;
814 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000815
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000816 /* Make sure both arguments are strings. */
817 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
818 if (Py_BytesWarningFlag && (op == Py_EQ) &&
819 (PyObject_IsInstance((PyObject*)a,
820 (PyObject*)&PyUnicode_Type) ||
821 PyObject_IsInstance((PyObject*)b,
822 (PyObject*)&PyUnicode_Type))) {
823 if (PyErr_WarnEx(PyExc_BytesWarning,
824 "Comparsion between bytes and string", 1))
825 return NULL;
826 }
827 result = Py_NotImplemented;
828 goto out;
829 }
830 if (a == b) {
831 switch (op) {
832 case Py_EQ:case Py_LE:case Py_GE:
833 result = Py_True;
834 goto out;
835 case Py_NE:case Py_LT:case Py_GT:
836 result = Py_False;
837 goto out;
838 }
839 }
840 if (op == Py_EQ) {
841 /* Supporting Py_NE here as well does not save
842 much time, since Py_NE is rarely used. */
843 if (Py_SIZE(a) == Py_SIZE(b)
844 && (a->ob_sval[0] == b->ob_sval[0]
845 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
846 result = Py_True;
847 } else {
848 result = Py_False;
849 }
850 goto out;
851 }
852 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
853 min_len = (len_a < len_b) ? len_a : len_b;
854 if (min_len > 0) {
855 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
856 if (c==0)
857 c = memcmp(a->ob_sval, b->ob_sval, min_len);
858 } else
859 c = 0;
860 if (c == 0)
861 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
862 switch (op) {
863 case Py_LT: c = c < 0; break;
864 case Py_LE: c = c <= 0; break;
865 case Py_EQ: assert(0); break; /* unreachable */
866 case Py_NE: c = c != 0; break;
867 case Py_GT: c = c > 0; break;
868 case Py_GE: c = c >= 0; break;
869 default:
870 result = Py_NotImplemented;
871 goto out;
872 }
873 result = c ? Py_True : Py_False;
874 out:
875 Py_INCREF(result);
876 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000877}
878
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000879static long
880string_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000881{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000882 register Py_ssize_t len;
883 register unsigned char *p;
884 register long x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000885
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000886 if (a->ob_shash != -1)
887 return a->ob_shash;
888 len = Py_SIZE(a);
889 p = (unsigned char *) a->ob_sval;
890 x = *p << 7;
891 while (--len >= 0)
892 x = (1000003*x) ^ *p++;
893 x ^= Py_SIZE(a);
894 if (x == -1)
895 x = -2;
896 a->ob_shash = x;
897 return x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000898}
899
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000900static PyObject*
901string_subscript(PyBytesObject* self, PyObject* item)
902{
903 if (PyIndex_Check(item)) {
904 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
905 if (i == -1 && PyErr_Occurred())
906 return NULL;
907 if (i < 0)
908 i += PyBytes_GET_SIZE(self);
909 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
910 PyErr_SetString(PyExc_IndexError,
911 "string index out of range");
912 return NULL;
913 }
914 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
915 }
916 else if (PySlice_Check(item)) {
917 Py_ssize_t start, stop, step, slicelength, cur, i;
918 char* source_buf;
919 char* result_buf;
920 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000921
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000922 if (PySlice_GetIndicesEx((PySliceObject*)item,
923 PyBytes_GET_SIZE(self),
924 &start, &stop, &step, &slicelength) < 0) {
925 return NULL;
926 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000927
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000928 if (slicelength <= 0) {
929 return PyBytes_FromStringAndSize("", 0);
930 }
931 else if (start == 0 && step == 1 &&
932 slicelength == PyBytes_GET_SIZE(self) &&
933 PyBytes_CheckExact(self)) {
934 Py_INCREF(self);
935 return (PyObject *)self;
936 }
937 else if (step == 1) {
938 return PyBytes_FromStringAndSize(
939 PyBytes_AS_STRING(self) + start,
940 slicelength);
941 }
942 else {
943 source_buf = PyBytes_AsString((PyObject*)self);
944 result_buf = (char *)PyMem_Malloc(slicelength);
945 if (result_buf == NULL)
946 return PyErr_NoMemory();
Neal Norwitz6968b052007-02-27 19:02:19 +0000947
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000948 for (cur = start, i = 0; i < slicelength;
949 cur += step, i++) {
950 result_buf[i] = source_buf[cur];
951 }
952
953 result = PyBytes_FromStringAndSize(result_buf,
954 slicelength);
955 PyMem_Free(result_buf);
956 return result;
957 }
958 }
959 else {
960 PyErr_Format(PyExc_TypeError,
961 "string indices must be integers, not %.200s",
962 Py_TYPE(item)->tp_name);
963 return NULL;
964 }
965}
966
967static int
968string_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
969{
970 return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_SIZE(self),
971 0, flags);
972}
973
974static PySequenceMethods string_as_sequence = {
975 (lenfunc)string_length, /*sq_length*/
976 (binaryfunc)string_concat, /*sq_concat*/
977 (ssizeargfunc)string_repeat, /*sq_repeat*/
978 (ssizeargfunc)string_item, /*sq_item*/
979 0, /*sq_slice*/
980 0, /*sq_ass_item*/
981 0, /*sq_ass_slice*/
982 (objobjproc)string_contains /*sq_contains*/
983};
984
985static PyMappingMethods string_as_mapping = {
986 (lenfunc)string_length,
987 (binaryfunc)string_subscript,
988 0,
989};
990
991static PyBufferProcs string_as_buffer = {
992 (getbufferproc)string_buffer_getbuffer,
993 NULL,
994};
995
996
997#define LEFTSTRIP 0
998#define RIGHTSTRIP 1
999#define BOTHSTRIP 2
1000
1001/* Arrays indexed by above */
1002static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1003
1004#define STRIPNAME(i) (stripformat[i]+3)
1005
Neal Norwitz6968b052007-02-27 19:02:19 +00001006
1007/* Don't call if length < 2 */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001008#define Py_STRING_MATCH(target, offset, pattern, length) \
1009 (target[offset] == pattern[0] && \
1010 target[offset+length-1] == pattern[length-1] && \
Neal Norwitz6968b052007-02-27 19:02:19 +00001011 !memcmp(target+offset+1, pattern+1, length-2) )
1012
1013
Neal Norwitz6968b052007-02-27 19:02:19 +00001014/* Overallocate the initial list to reduce the number of reallocs for small
1015 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1016 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1017 text (roughly 11 words per line) and field delimited data (usually 1-10
1018 fields). For large strings the split algorithms are bandwidth limited
1019 so increasing the preallocation likely will not improve things.*/
1020
1021#define MAX_PREALLOC 12
1022
1023/* 5 splits gives 6 elements */
1024#define PREALLOC_SIZE(maxsplit) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001025 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
Neal Norwitz6968b052007-02-27 19:02:19 +00001026
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001027#define SPLIT_ADD(data, left, right) { \
1028 str = PyBytes_FromStringAndSize((data) + (left), \
1029 (right) - (left)); \
1030 if (str == NULL) \
1031 goto onError; \
1032 if (count < MAX_PREALLOC) { \
1033 PyList_SET_ITEM(list, count, str); \
1034 } else { \
1035 if (PyList_Append(list, str)) { \
1036 Py_DECREF(str); \
1037 goto onError; \
1038 } \
1039 else \
1040 Py_DECREF(str); \
1041 } \
1042 count++; }
Neal Norwitz6968b052007-02-27 19:02:19 +00001043
1044/* Always force the list to the expected size. */
Christian Heimes90aa7642007-12-19 02:45:37 +00001045#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
Neal Norwitz6968b052007-02-27 19:02:19 +00001046
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001047#define SKIP_SPACE(s, i, len) { while (i<len && ISSPACE(s[i])) i++; }
1048#define SKIP_NONSPACE(s, i, len) { while (i<len && !ISSPACE(s[i])) i++; }
1049#define RSKIP_SPACE(s, i) { while (i>=0 && ISSPACE(s[i])) i--; }
1050#define RSKIP_NONSPACE(s, i) { while (i>=0 && !ISSPACE(s[i])) i--; }
Neal Norwitz6968b052007-02-27 19:02:19 +00001051
1052Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001053split_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Neal Norwitz6968b052007-02-27 19:02:19 +00001054{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001055 const char *s = PyBytes_AS_STRING(self);
1056 Py_ssize_t i, j, count=0;
1057 PyObject *str;
1058 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Neal Norwitz6968b052007-02-27 19:02:19 +00001059
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001060 if (list == NULL)
1061 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001062
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001063 i = j = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +00001064
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001065 while (maxsplit-- > 0) {
1066 SKIP_SPACE(s, i, len);
1067 if (i==len) break;
1068 j = i; i++;
1069 SKIP_NONSPACE(s, i, len);
1070 if (j == 0 && i == len && PyBytes_CheckExact(self)) {
1071 /* No whitespace in self, so just use it as list[0] */
1072 Py_INCREF(self);
1073 PyList_SET_ITEM(list, 0, (PyObject *)self);
1074 count++;
1075 break;
1076 }
1077 SPLIT_ADD(s, j, i);
1078 }
1079
1080 if (i < len) {
1081 /* Only occurs when maxsplit was reached */
1082 /* Skip any remaining whitespace and copy to end of string */
1083 SKIP_SPACE(s, i, len);
1084 if (i != len)
1085 SPLIT_ADD(s, i, len);
1086 }
1087 FIX_PREALLOC_SIZE(list);
1088 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001089 onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001090 Py_DECREF(list);
1091 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001092}
1093
Guido van Rossum8f950672007-09-10 16:53:45 +00001094Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001095split_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Guido van Rossum8f950672007-09-10 16:53:45 +00001096{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001097 const char *s = PyBytes_AS_STRING(self);
1098 register Py_ssize_t i, j, count=0;
1099 PyObject *str;
1100 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Guido van Rossum8f950672007-09-10 16:53:45 +00001101
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001102 if (list == NULL)
1103 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001104
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001105 i = j = 0;
1106 while ((j < len) && (maxcount-- > 0)) {
1107 for(; j<len; j++) {
1108 /* I found that using memchr makes no difference */
1109 if (s[j] == ch) {
1110 SPLIT_ADD(s, i, j);
1111 i = j = j + 1;
1112 break;
1113 }
1114 }
1115 }
1116 if (i == 0 && count == 0 && PyBytes_CheckExact(self)) {
1117 /* ch not in self, so just use self as list[0] */
1118 Py_INCREF(self);
1119 PyList_SET_ITEM(list, 0, (PyObject *)self);
1120 count++;
1121 }
1122 else if (i <= len) {
1123 SPLIT_ADD(s, i, len);
1124 }
1125 FIX_PREALLOC_SIZE(list);
1126 return list;
Guido van Rossum8f950672007-09-10 16:53:45 +00001127
1128 onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001129 Py_DECREF(list);
1130 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001131}
1132
Neal Norwitz6968b052007-02-27 19:02:19 +00001133PyDoc_STRVAR(split__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001134"B.split([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001135\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001136Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001137If sep is not specified or is None, B is split on ASCII whitespace\n\
1138characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001139If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001140
1141static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001142string_split(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001143{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001144 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
1145 Py_ssize_t maxsplit = -1, count=0;
1146 const char *s = PyBytes_AS_STRING(self), *sub;
1147 Py_buffer vsub;
1148 PyObject *list, *str, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001149#ifdef USE_FAST
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001150 Py_ssize_t pos;
Neal Norwitz6968b052007-02-27 19:02:19 +00001151#endif
1152
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001153 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1154 return NULL;
1155 if (maxsplit < 0)
1156 maxsplit = PY_SSIZE_T_MAX;
1157 if (subobj == Py_None)
1158 return split_whitespace(self, len, maxsplit);
1159 if (_getbuffer(subobj, &vsub) < 0)
1160 return NULL;
1161 sub = vsub.buf;
1162 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001163
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001164 if (n == 0) {
1165 PyErr_SetString(PyExc_ValueError, "empty separator");
1166 PyObject_ReleaseBuffer(subobj, &vsub);
1167 return NULL;
1168 }
1169 else if (n == 1)
1170 return split_char(self, len, sub[0], maxsplit);
Guido van Rossum8f950672007-09-10 16:53:45 +00001171
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001172 list = PyList_New(PREALLOC_SIZE(maxsplit));
1173 if (list == NULL) {
1174 PyObject_ReleaseBuffer(subobj, &vsub);
1175 return NULL;
1176 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001177
1178#ifdef USE_FAST
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001179 i = j = 0;
1180 while (maxsplit-- > 0) {
1181 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1182 if (pos < 0)
1183 break;
1184 j = i+pos;
1185 SPLIT_ADD(s, i, j);
1186 i = j + n;
1187 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001188#else
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001189 i = j = 0;
1190 while ((j+n <= len) && (maxsplit-- > 0)) {
1191 for (; j+n <= len; j++) {
1192 if (Py_STRING_MATCH(s, j, sub, n)) {
1193 SPLIT_ADD(s, i, j);
1194 i = j = j + n;
1195 break;
1196 }
1197 }
1198 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001199#endif
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001200 SPLIT_ADD(s, i, len);
1201 FIX_PREALLOC_SIZE(list);
1202 PyObject_ReleaseBuffer(subobj, &vsub);
1203 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001204
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001205 onError:
1206 Py_DECREF(list);
1207 PyObject_ReleaseBuffer(subobj, &vsub);
1208 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001209}
1210
Neal Norwitz6968b052007-02-27 19:02:19 +00001211PyDoc_STRVAR(partition__doc__,
1212"B.partition(sep) -> (head, sep, tail)\n\
1213\n\
1214Searches for the separator sep in B, and returns the part before it,\n\
1215the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001216found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001217
1218static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001219string_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001220{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001221 const char *sep;
1222 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001223
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001224 if (PyBytes_Check(sep_obj)) {
1225 sep = PyBytes_AS_STRING(sep_obj);
1226 sep_len = PyBytes_GET_SIZE(sep_obj);
1227 }
1228 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1229 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001230
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001231 return stringlib_partition(
1232 (PyObject*) self,
1233 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1234 sep_obj, sep, sep_len
1235 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001236}
1237
1238PyDoc_STRVAR(rpartition__doc__,
1239"B.rpartition(sep) -> (tail, sep, head)\n\
1240\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001241Searches for the separator sep in B, starting at the end of B,\n\
1242and returns the part before it, the separator itself, and the\n\
1243part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001244bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001245
1246static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001247string_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001248{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001249 const char *sep;
1250 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001251
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001252 if (PyBytes_Check(sep_obj)) {
1253 sep = PyBytes_AS_STRING(sep_obj);
1254 sep_len = PyBytes_GET_SIZE(sep_obj);
1255 }
1256 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1257 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001258
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001259 return stringlib_rpartition(
1260 (PyObject*) self,
1261 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1262 sep_obj, sep, sep_len
1263 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001264}
1265
1266Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001267rsplit_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Neal Norwitz6968b052007-02-27 19:02:19 +00001268{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001269 const char *s = PyBytes_AS_STRING(self);
1270 Py_ssize_t i, j, count=0;
1271 PyObject *str;
1272 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Neal Norwitz6968b052007-02-27 19:02:19 +00001273
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001274 if (list == NULL)
1275 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001276
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001277 i = j = len-1;
Neal Norwitz6968b052007-02-27 19:02:19 +00001278
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001279 while (maxsplit-- > 0) {
1280 RSKIP_SPACE(s, i);
1281 if (i<0) break;
1282 j = i; i--;
1283 RSKIP_NONSPACE(s, i);
1284 if (j == len-1 && i < 0 && PyBytes_CheckExact(self)) {
1285 /* No whitespace in self, so just use it as list[0] */
1286 Py_INCREF(self);
1287 PyList_SET_ITEM(list, 0, (PyObject *)self);
1288 count++;
1289 break;
1290 }
1291 SPLIT_ADD(s, i + 1, j + 1);
1292 }
1293 if (i >= 0) {
1294 /* Only occurs when maxsplit was reached. Skip any remaining
1295 whitespace and copy to beginning of string. */
1296 RSKIP_SPACE(s, i);
1297 if (i >= 0)
1298 SPLIT_ADD(s, 0, i + 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001299
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001300 }
1301 FIX_PREALLOC_SIZE(list);
1302 if (PyList_Reverse(list) < 0)
1303 goto onError;
1304 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001305 onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001306 Py_DECREF(list);
1307 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001308}
1309
Guido van Rossum8f950672007-09-10 16:53:45 +00001310Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001311rsplit_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Guido van Rossum8f950672007-09-10 16:53:45 +00001312{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001313 const char *s = PyBytes_AS_STRING(self);
1314 register Py_ssize_t i, j, count=0;
1315 PyObject *str;
1316 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Guido van Rossum8f950672007-09-10 16:53:45 +00001317
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001318 if (list == NULL)
1319 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001320
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001321 i = j = len - 1;
1322 while ((i >= 0) && (maxcount-- > 0)) {
1323 for (; i >= 0; i--) {
1324 if (s[i] == ch) {
1325 SPLIT_ADD(s, i + 1, j + 1);
1326 j = i = i - 1;
1327 break;
1328 }
1329 }
1330 }
1331 if (i < 0 && count == 0 && PyBytes_CheckExact(self)) {
1332 /* ch not in self, so just use self as list[0] */
1333 Py_INCREF(self);
1334 PyList_SET_ITEM(list, 0, (PyObject *)self);
1335 count++;
1336 }
1337 else if (j >= -1) {
1338 SPLIT_ADD(s, 0, j + 1);
1339 }
1340 FIX_PREALLOC_SIZE(list);
1341 if (PyList_Reverse(list) < 0)
1342 goto onError;
1343 return list;
Guido van Rossum8f950672007-09-10 16:53:45 +00001344
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001345 onError:
1346 Py_DECREF(list);
1347 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001348}
1349
Neal Norwitz6968b052007-02-27 19:02:19 +00001350PyDoc_STRVAR(rsplit__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001351"B.rsplit([sep[, maxsplit]]) -> list of strings\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001352\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001353Return a list of the sections in B, using sep as the delimiter,\n\
1354starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001355If sep is not given, B is split on ASCII whitespace characters\n\
1356(space, tab, return, newline, formfeed, vertical tab).\n\
1357If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001358
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001359
Neal Norwitz6968b052007-02-27 19:02:19 +00001360static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001361string_rsplit(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001362{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001363 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
1364 Py_ssize_t maxsplit = -1, count=0;
1365 const char *s, *sub;
1366 Py_buffer vsub;
1367 PyObject *list, *str, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001368
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001369 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1370 return NULL;
1371 if (maxsplit < 0)
1372 maxsplit = PY_SSIZE_T_MAX;
1373 if (subobj == Py_None)
1374 return rsplit_whitespace(self, len, maxsplit);
1375 if (_getbuffer(subobj, &vsub) < 0)
1376 return NULL;
1377 sub = vsub.buf;
1378 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001379
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001380 if (n == 0) {
1381 PyErr_SetString(PyExc_ValueError, "empty separator");
1382 PyObject_ReleaseBuffer(subobj, &vsub);
1383 return NULL;
1384 }
1385 else if (n == 1)
1386 return rsplit_char(self, len, sub[0], maxsplit);
Guido van Rossum8f950672007-09-10 16:53:45 +00001387
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001388 list = PyList_New(PREALLOC_SIZE(maxsplit));
1389 if (list == NULL) {
1390 PyObject_ReleaseBuffer(subobj, &vsub);
1391 return NULL;
1392 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001393
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001394 j = len;
1395 i = j - n;
Neal Norwitz6968b052007-02-27 19:02:19 +00001396
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001397 s = PyBytes_AS_STRING(self);
1398 while ( (i >= 0) && (maxsplit-- > 0) ) {
1399 for (; i>=0; i--) {
1400 if (Py_STRING_MATCH(s, i, sub, n)) {
1401 SPLIT_ADD(s, i + n, j);
1402 j = i;
1403 i -= n;
1404 break;
1405 }
1406 }
1407 }
1408 SPLIT_ADD(s, 0, j);
1409 FIX_PREALLOC_SIZE(list);
1410 if (PyList_Reverse(list) < 0)
1411 goto onError;
1412 PyObject_ReleaseBuffer(subobj, &vsub);
1413 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001414
1415onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001416 Py_DECREF(list);
1417 PyObject_ReleaseBuffer(subobj, &vsub);
1418 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001419}
1420
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001421#undef SPLIT_ADD
1422#undef MAX_PREALLOC
1423#undef PREALLOC_SIZE
1424
1425
1426PyDoc_STRVAR(join__doc__,
1427"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001428\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001429Concatenates any number of bytes objects, with B in between each pair.\n\
1430Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1431
Neal Norwitz6968b052007-02-27 19:02:19 +00001432static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001433string_join(PyObject *self, PyObject *orig)
Neal Norwitz6968b052007-02-27 19:02:19 +00001434{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001435 char *sep = PyBytes_AS_STRING(self);
1436 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1437 PyObject *res = NULL;
1438 char *p;
1439 Py_ssize_t seqlen = 0;
1440 size_t sz = 0;
1441 Py_ssize_t i;
1442 PyObject *seq, *item;
Neal Norwitz6968b052007-02-27 19:02:19 +00001443
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001444 seq = PySequence_Fast(orig, "");
1445 if (seq == NULL) {
1446 return NULL;
1447 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001448
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001449 seqlen = PySequence_Size(seq);
1450 if (seqlen == 0) {
1451 Py_DECREF(seq);
1452 return PyBytes_FromString("");
1453 }
1454 if (seqlen == 1) {
1455 item = PySequence_Fast_GET_ITEM(seq, 0);
1456 if (PyBytes_CheckExact(item)) {
1457 Py_INCREF(item);
1458 Py_DECREF(seq);
1459 return item;
1460 }
1461 }
1462
1463 /* There are at least two things to join, or else we have a subclass
1464 * of the builtin types in the sequence.
1465 * Do a pre-pass to figure out the total amount of space we'll
1466 * need (sz), and see whether all argument are bytes.
1467 */
1468 /* XXX Shouldn't we use _getbuffer() on these items instead? */
1469 for (i = 0; i < seqlen; i++) {
1470 const size_t old_sz = sz;
1471 item = PySequence_Fast_GET_ITEM(seq, i);
1472 if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
1473 PyErr_Format(PyExc_TypeError,
1474 "sequence item %zd: expected bytes,"
1475 " %.80s found",
1476 i, Py_TYPE(item)->tp_name);
1477 Py_DECREF(seq);
1478 return NULL;
1479 }
1480 sz += Py_SIZE(item);
1481 if (i != 0)
1482 sz += seplen;
1483 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1484 PyErr_SetString(PyExc_OverflowError,
1485 "join() result is too long for a Python string");
1486 Py_DECREF(seq);
1487 return NULL;
1488 }
1489 }
1490
1491 /* Allocate result space. */
1492 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1493 if (res == NULL) {
1494 Py_DECREF(seq);
1495 return NULL;
1496 }
1497
1498 /* Catenate everything. */
1499 /* I'm not worried about a PyByteArray item growing because there's
1500 nowhere in this function where we release the GIL. */
1501 p = PyBytes_AS_STRING(res);
1502 for (i = 0; i < seqlen; ++i) {
1503 size_t n;
1504 char *q;
1505 if (i) {
1506 Py_MEMCPY(p, sep, seplen);
1507 p += seplen;
1508 }
1509 item = PySequence_Fast_GET_ITEM(seq, i);
1510 n = Py_SIZE(item);
1511 if (PyBytes_Check(item))
1512 q = PyBytes_AS_STRING(item);
1513 else
1514 q = PyByteArray_AS_STRING(item);
1515 Py_MEMCPY(p, q, n);
1516 p += n;
1517 }
1518
1519 Py_DECREF(seq);
1520 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001521}
1522
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001523PyObject *
1524_PyBytes_Join(PyObject *sep, PyObject *x)
1525{
1526 assert(sep != NULL && PyBytes_Check(sep));
1527 assert(x != NULL);
1528 return string_join(sep, x);
1529}
1530
1531Py_LOCAL_INLINE(void)
1532string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1533{
1534 if (*end > len)
1535 *end = len;
1536 else if (*end < 0)
1537 *end += len;
1538 if (*end < 0)
1539 *end = 0;
1540 if (*start < 0)
1541 *start += len;
1542 if (*start < 0)
1543 *start = 0;
1544}
1545
1546Py_LOCAL_INLINE(Py_ssize_t)
1547string_find_internal(PyBytesObject *self, PyObject *args, int dir)
1548{
1549 PyObject *subobj;
1550 const char *sub;
1551 Py_ssize_t sub_len;
1552 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1553 PyObject *obj_start=Py_None, *obj_end=Py_None;
1554
1555 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1556 &obj_start, &obj_end))
1557 return -2;
1558 /* To support None in "start" and "end" arguments, meaning
1559 the same as if they were not passed.
1560 */
1561 if (obj_start != Py_None)
1562 if (!_PyEval_SliceIndex(obj_start, &start))
1563 return -2;
1564 if (obj_end != Py_None)
1565 if (!_PyEval_SliceIndex(obj_end, &end))
1566 return -2;
1567
1568 if (PyBytes_Check(subobj)) {
1569 sub = PyBytes_AS_STRING(subobj);
1570 sub_len = PyBytes_GET_SIZE(subobj);
1571 }
1572 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1573 /* XXX - the "expected a character buffer object" is pretty
1574 confusing for a non-expert. remap to something else ? */
1575 return -2;
1576
1577 if (dir > 0)
1578 return stringlib_find_slice(
1579 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1580 sub, sub_len, start, end);
1581 else
1582 return stringlib_rfind_slice(
1583 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1584 sub, sub_len, start, end);
1585}
1586
1587
1588PyDoc_STRVAR(find__doc__,
1589"B.find(sub [,start [,end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001590\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001591Return the lowest index in S where substring sub is found,\n\
1592such that sub is contained within s[start:end]. Optional\n\
1593arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001594\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001595Return -1 on failure.");
1596
Neal Norwitz6968b052007-02-27 19:02:19 +00001597static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001598string_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001599{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001600 Py_ssize_t result = string_find_internal(self, args, +1);
1601 if (result == -2)
1602 return NULL;
1603 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001604}
1605
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001606
1607PyDoc_STRVAR(index__doc__,
1608"B.index(sub [,start [,end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001609\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001610Like B.find() but raise ValueError when the substring is not found.");
1611
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001612static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001613string_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001614{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001615 Py_ssize_t result = string_find_internal(self, args, +1);
1616 if (result == -2)
1617 return NULL;
1618 if (result == -1) {
1619 PyErr_SetString(PyExc_ValueError,
1620 "substring not found");
1621 return NULL;
1622 }
1623 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001624}
1625
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001626
1627PyDoc_STRVAR(rfind__doc__,
1628"B.rfind(sub [,start [,end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001629\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001630Return the highest index in B where substring sub is found,\n\
1631such that sub is contained within s[start:end]. Optional\n\
1632arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001633\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001634Return -1 on failure.");
1635
Neal Norwitz6968b052007-02-27 19:02:19 +00001636static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001637string_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001638{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001639 Py_ssize_t result = string_find_internal(self, args, -1);
1640 if (result == -2)
1641 return NULL;
1642 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001643}
1644
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001645
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001646PyDoc_STRVAR(rindex__doc__,
1647"B.rindex(sub [,start [,end]]) -> int\n\
1648\n\
1649Like B.rfind() but raise ValueError when the substring is not found.");
1650
1651static PyObject *
1652string_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001653{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001654 Py_ssize_t result = string_find_internal(self, args, -1);
1655 if (result == -2)
1656 return NULL;
1657 if (result == -1) {
1658 PyErr_SetString(PyExc_ValueError,
1659 "substring not found");
1660 return NULL;
1661 }
1662 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001663}
1664
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001665
1666Py_LOCAL_INLINE(PyObject *)
1667do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001668{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001669 Py_buffer vsep;
1670 char *s = PyBytes_AS_STRING(self);
1671 Py_ssize_t len = PyBytes_GET_SIZE(self);
1672 char *sep;
1673 Py_ssize_t seplen;
1674 Py_ssize_t i, j;
1675
1676 if (_getbuffer(sepobj, &vsep) < 0)
1677 return NULL;
1678 sep = vsep.buf;
1679 seplen = vsep.len;
1680
1681 i = 0;
1682 if (striptype != RIGHTSTRIP) {
1683 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1684 i++;
1685 }
1686 }
1687
1688 j = len;
1689 if (striptype != LEFTSTRIP) {
1690 do {
1691 j--;
1692 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1693 j++;
1694 }
1695
1696 PyObject_ReleaseBuffer(sepobj, &vsep);
1697
1698 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1699 Py_INCREF(self);
1700 return (PyObject*)self;
1701 }
1702 else
1703 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001704}
1705
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001706
1707Py_LOCAL_INLINE(PyObject *)
1708do_strip(PyBytesObject *self, int striptype)
1709{
1710 char *s = PyBytes_AS_STRING(self);
1711 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
1712
1713 i = 0;
1714 if (striptype != RIGHTSTRIP) {
1715 while (i < len && ISSPACE(s[i])) {
1716 i++;
1717 }
1718 }
1719
1720 j = len;
1721 if (striptype != LEFTSTRIP) {
1722 do {
1723 j--;
1724 } while (j >= i && ISSPACE(s[j]));
1725 j++;
1726 }
1727
1728 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1729 Py_INCREF(self);
1730 return (PyObject*)self;
1731 }
1732 else
1733 return PyBytes_FromStringAndSize(s+i, j-i);
1734}
1735
1736
1737Py_LOCAL_INLINE(PyObject *)
1738do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1739{
1740 PyObject *sep = NULL;
1741
1742 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1743 return NULL;
1744
1745 if (sep != NULL && sep != Py_None) {
1746 return do_xstrip(self, striptype, sep);
1747 }
1748 return do_strip(self, striptype);
1749}
1750
1751
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001752PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001753"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001754\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001755Strip leading and trailing bytes contained in the argument.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001756If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001757static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001758string_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001759{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001760 if (PyTuple_GET_SIZE(args) == 0)
1761 return do_strip(self, BOTHSTRIP); /* Common case */
1762 else
1763 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001764}
1765
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001766
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001767PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001768"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001769\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001770Strip leading bytes contained in the argument.\n\
1771If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001772static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001773string_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001774{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001775 if (PyTuple_GET_SIZE(args) == 0)
1776 return do_strip(self, LEFTSTRIP); /* Common case */
1777 else
1778 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001779}
1780
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001781
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001782PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001783"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001784\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001785Strip trailing bytes contained in the argument.\n\
1786If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001787static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001788string_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001789{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001790 if (PyTuple_GET_SIZE(args) == 0)
1791 return do_strip(self, RIGHTSTRIP); /* Common case */
1792 else
1793 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001794}
Neal Norwitz6968b052007-02-27 19:02:19 +00001795
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001796
1797PyDoc_STRVAR(count__doc__,
1798"B.count(sub [,start [,end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001799\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001800Return the number of non-overlapping occurrences of substring sub in\n\
1801string S[start:end]. Optional arguments start and end are interpreted\n\
1802as in slice notation.");
1803
1804static PyObject *
1805string_count(PyBytesObject *self, PyObject *args)
1806{
1807 PyObject *sub_obj;
1808 const char *str = PyBytes_AS_STRING(self), *sub;
1809 Py_ssize_t sub_len;
1810 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1811
1812 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1813 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1814 return NULL;
1815
1816 if (PyBytes_Check(sub_obj)) {
1817 sub = PyBytes_AS_STRING(sub_obj);
1818 sub_len = PyBytes_GET_SIZE(sub_obj);
1819 }
1820 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1821 return NULL;
1822
1823 string_adjust_indices(&start, &end, PyBytes_GET_SIZE(self));
1824
1825 return PyLong_FromSsize_t(
1826 stringlib_count(str + start, end - start, sub, sub_len)
1827 );
1828}
1829
1830
1831PyDoc_STRVAR(translate__doc__,
1832"B.translate(table[, deletechars]) -> bytes\n\
1833\n\
1834Return a copy of B, where all characters occurring in the\n\
1835optional argument deletechars are removed, and the remaining\n\
1836characters have been mapped through the given translation\n\
1837table, which must be a bytes object of length 256.");
1838
1839static PyObject *
1840string_translate(PyBytesObject *self, PyObject *args)
1841{
1842 register char *input, *output;
1843 const char *table;
1844 register Py_ssize_t i, c, changed = 0;
1845 PyObject *input_obj = (PyObject*)self;
1846 const char *output_start, *del_table=NULL;
1847 Py_ssize_t inlen, tablen, dellen = 0;
1848 PyObject *result;
1849 int trans_table[256];
1850 PyObject *tableobj, *delobj = NULL;
1851
1852 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1853 &tableobj, &delobj))
1854 return NULL;
1855
1856 if (PyBytes_Check(tableobj)) {
1857 table = PyBytes_AS_STRING(tableobj);
1858 tablen = PyBytes_GET_SIZE(tableobj);
1859 }
1860 else if (tableobj == Py_None) {
1861 table = NULL;
1862 tablen = 256;
1863 }
1864 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1865 return NULL;
1866
1867 if (tablen != 256) {
1868 PyErr_SetString(PyExc_ValueError,
1869 "translation table must be 256 characters long");
1870 return NULL;
1871 }
1872
1873 if (delobj != NULL) {
1874 if (PyBytes_Check(delobj)) {
1875 del_table = PyBytes_AS_STRING(delobj);
1876 dellen = PyBytes_GET_SIZE(delobj);
1877 }
1878 else if (PyUnicode_Check(delobj)) {
1879 PyErr_SetString(PyExc_TypeError,
1880 "deletions are implemented differently for unicode");
1881 return NULL;
1882 }
1883 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1884 return NULL;
1885 }
1886 else {
1887 del_table = NULL;
1888 dellen = 0;
1889 }
1890
1891 inlen = PyBytes_GET_SIZE(input_obj);
1892 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1893 if (result == NULL)
1894 return NULL;
1895 output_start = output = PyBytes_AsString(result);
1896 input = PyBytes_AS_STRING(input_obj);
1897
1898 if (dellen == 0 && table != NULL) {
1899 /* If no deletions are required, use faster code */
1900 for (i = inlen; --i >= 0; ) {
1901 c = Py_CHARMASK(*input++);
1902 if (Py_CHARMASK((*output++ = table[c])) != c)
1903 changed = 1;
1904 }
1905 if (changed || !PyBytes_CheckExact(input_obj))
1906 return result;
1907 Py_DECREF(result);
1908 Py_INCREF(input_obj);
1909 return input_obj;
1910 }
1911
1912 if (table == NULL) {
1913 for (i = 0; i < 256; i++)
1914 trans_table[i] = Py_CHARMASK(i);
1915 } else {
1916 for (i = 0; i < 256; i++)
1917 trans_table[i] = Py_CHARMASK(table[i]);
1918 }
1919
1920 for (i = 0; i < dellen; i++)
1921 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1922
1923 for (i = inlen; --i >= 0; ) {
1924 c = Py_CHARMASK(*input++);
1925 if (trans_table[c] != -1)
1926 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1927 continue;
1928 changed = 1;
1929 }
1930 if (!changed && PyBytes_CheckExact(input_obj)) {
1931 Py_DECREF(result);
1932 Py_INCREF(input_obj);
1933 return input_obj;
1934 }
1935 /* Fix the size of the resulting string */
1936 if (inlen > 0)
1937 _PyBytes_Resize(&result, output - output_start);
1938 return result;
1939}
1940
1941
1942#define FORWARD 1
1943#define REVERSE -1
1944
1945/* find and count characters and substrings */
1946
1947#define findchar(target, target_len, c) \
1948 ((char *)memchr((const void *)(target), c, target_len))
1949
1950/* String ops must return a string. */
1951/* If the object is subclass of string, create a copy */
1952Py_LOCAL(PyBytesObject *)
1953return_self(PyBytesObject *self)
1954{
1955 if (PyBytes_CheckExact(self)) {
1956 Py_INCREF(self);
1957 return self;
1958 }
1959 return (PyBytesObject *)PyBytes_FromStringAndSize(
1960 PyBytes_AS_STRING(self),
1961 PyBytes_GET_SIZE(self));
1962}
1963
1964Py_LOCAL_INLINE(Py_ssize_t)
1965countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
1966{
1967 Py_ssize_t count=0;
1968 const char *start=target;
1969 const char *end=target+target_len;
1970
1971 while ( (start=findchar(start, end-start, c)) != NULL ) {
1972 count++;
1973 if (count >= maxcount)
1974 break;
1975 start += 1;
1976 }
1977 return count;
1978}
1979
1980Py_LOCAL(Py_ssize_t)
1981findstring(const char *target, Py_ssize_t target_len,
1982 const char *pattern, Py_ssize_t pattern_len,
1983 Py_ssize_t start,
1984 Py_ssize_t end,
1985 int direction)
1986{
1987 if (start < 0) {
1988 start += target_len;
1989 if (start < 0)
1990 start = 0;
1991 }
1992 if (end > target_len) {
1993 end = target_len;
1994 } else if (end < 0) {
1995 end += target_len;
1996 if (end < 0)
1997 end = 0;
1998 }
1999
2000 /* zero-length substrings always match at the first attempt */
2001 if (pattern_len == 0)
2002 return (direction > 0) ? start : end;
2003
2004 end -= pattern_len;
2005
2006 if (direction < 0) {
2007 for (; end >= start; end--)
2008 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2009 return end;
2010 } else {
2011 for (; start <= end; start++)
2012 if (Py_STRING_MATCH(target, start,pattern,pattern_len))
2013 return start;
2014 }
2015 return -1;
2016}
2017
2018Py_LOCAL_INLINE(Py_ssize_t)
2019countstring(const char *target, Py_ssize_t target_len,
2020 const char *pattern, Py_ssize_t pattern_len,
2021 Py_ssize_t start,
2022 Py_ssize_t end,
2023 int direction, Py_ssize_t maxcount)
2024{
2025 Py_ssize_t count=0;
2026
2027 if (start < 0) {
2028 start += target_len;
2029 if (start < 0)
2030 start = 0;
2031 }
2032 if (end > target_len) {
2033 end = target_len;
2034 } else if (end < 0) {
2035 end += target_len;
2036 if (end < 0)
2037 end = 0;
2038 }
2039
2040 /* zero-length substrings match everywhere */
2041 if (pattern_len == 0 || maxcount == 0) {
2042 if (target_len+1 < maxcount)
2043 return target_len+1;
2044 return maxcount;
2045 }
2046
2047 end -= pattern_len;
2048 if (direction < 0) {
2049 for (; (end >= start); end--)
2050 if (Py_STRING_MATCH(target, end,pattern,pattern_len)) {
2051 count++;
2052 if (--maxcount <= 0) break;
2053 end -= pattern_len-1;
2054 }
2055 } else {
2056 for (; (start <= end); start++)
2057 if (Py_STRING_MATCH(target, start,
2058 pattern, pattern_len)) {
2059 count++;
2060 if (--maxcount <= 0)
2061 break;
2062 start += pattern_len-1;
2063 }
2064 }
2065 return count;
2066}
2067
2068
2069/* Algorithms for different cases of string replacement */
2070
2071/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2072Py_LOCAL(PyBytesObject *)
2073replace_interleave(PyBytesObject *self,
2074 const char *to_s, Py_ssize_t to_len,
2075 Py_ssize_t maxcount)
2076{
2077 char *self_s, *result_s;
2078 Py_ssize_t self_len, result_len;
2079 Py_ssize_t count, i, product;
2080 PyBytesObject *result;
2081
2082 self_len = PyBytes_GET_SIZE(self);
2083
2084 /* 1 at the end plus 1 after every character */
2085 count = self_len+1;
2086 if (maxcount < count)
2087 count = maxcount;
2088
2089 /* Check for overflow */
2090 /* result_len = count * to_len + self_len; */
2091 product = count * to_len;
2092 if (product / to_len != count) {
2093 PyErr_SetString(PyExc_OverflowError,
2094 "replace string is too long");
2095 return NULL;
2096 }
2097 result_len = product + self_len;
2098 if (result_len < 0) {
2099 PyErr_SetString(PyExc_OverflowError,
2100 "replace string is too long");
2101 return NULL;
2102 }
2103
2104 if (! (result = (PyBytesObject *)
2105 PyBytes_FromStringAndSize(NULL, result_len)) )
2106 return NULL;
2107
2108 self_s = PyBytes_AS_STRING(self);
2109 result_s = PyBytes_AS_STRING(result);
2110
2111 /* TODO: special case single character, which doesn't need memcpy */
2112
2113 /* Lay the first one down (guaranteed this will occur) */
2114 Py_MEMCPY(result_s, to_s, to_len);
2115 result_s += to_len;
2116 count -= 1;
2117
2118 for (i=0; i<count; i++) {
2119 *result_s++ = *self_s++;
2120 Py_MEMCPY(result_s, to_s, to_len);
2121 result_s += to_len;
2122 }
2123
2124 /* Copy the rest of the original string */
2125 Py_MEMCPY(result_s, self_s, self_len-i);
2126
2127 return result;
2128}
2129
2130/* Special case for deleting a single character */
2131/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2132Py_LOCAL(PyBytesObject *)
2133replace_delete_single_character(PyBytesObject *self,
2134 char from_c, Py_ssize_t maxcount)
2135{
2136 char *self_s, *result_s;
2137 char *start, *next, *end;
2138 Py_ssize_t self_len, result_len;
2139 Py_ssize_t count;
2140 PyBytesObject *result;
2141
2142 self_len = PyBytes_GET_SIZE(self);
2143 self_s = PyBytes_AS_STRING(self);
2144
2145 count = countchar(self_s, self_len, from_c, maxcount);
2146 if (count == 0) {
2147 return return_self(self);
2148 }
2149
2150 result_len = self_len - count; /* from_len == 1 */
2151 assert(result_len>=0);
2152
2153 if ( (result = (PyBytesObject *)
2154 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2155 return NULL;
2156 result_s = PyBytes_AS_STRING(result);
2157
2158 start = self_s;
2159 end = self_s + self_len;
2160 while (count-- > 0) {
2161 next = findchar(start, end-start, from_c);
2162 if (next == NULL)
2163 break;
2164 Py_MEMCPY(result_s, start, next-start);
2165 result_s += (next-start);
2166 start = next+1;
2167 }
2168 Py_MEMCPY(result_s, start, end-start);
2169
2170 return result;
2171}
2172
2173/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2174
2175Py_LOCAL(PyBytesObject *)
2176replace_delete_substring(PyBytesObject *self,
2177 const char *from_s, Py_ssize_t from_len,
2178 Py_ssize_t maxcount) {
2179 char *self_s, *result_s;
2180 char *start, *next, *end;
2181 Py_ssize_t self_len, result_len;
2182 Py_ssize_t count, offset;
2183 PyBytesObject *result;
2184
2185 self_len = PyBytes_GET_SIZE(self);
2186 self_s = PyBytes_AS_STRING(self);
2187
2188 count = countstring(self_s, self_len,
2189 from_s, from_len,
2190 0, self_len, 1,
2191 maxcount);
2192
2193 if (count == 0) {
2194 /* no matches */
2195 return return_self(self);
2196 }
2197
2198 result_len = self_len - (count * from_len);
2199 assert (result_len>=0);
2200
2201 if ( (result = (PyBytesObject *)
2202 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2203 return NULL;
2204
2205 result_s = PyBytes_AS_STRING(result);
2206
2207 start = self_s;
2208 end = self_s + self_len;
2209 while (count-- > 0) {
2210 offset = findstring(start, end-start,
2211 from_s, from_len,
2212 0, end-start, FORWARD);
2213 if (offset == -1)
2214 break;
2215 next = start + offset;
2216
2217 Py_MEMCPY(result_s, start, next-start);
2218
2219 result_s += (next-start);
2220 start = next+from_len;
2221 }
2222 Py_MEMCPY(result_s, start, end-start);
2223 return result;
2224}
2225
2226/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2227Py_LOCAL(PyBytesObject *)
2228replace_single_character_in_place(PyBytesObject *self,
2229 char from_c, char to_c,
2230 Py_ssize_t maxcount)
2231{
2232 char *self_s, *result_s, *start, *end, *next;
2233 Py_ssize_t self_len;
2234 PyBytesObject *result;
2235
2236 /* The result string will be the same size */
2237 self_s = PyBytes_AS_STRING(self);
2238 self_len = PyBytes_GET_SIZE(self);
2239
2240 next = findchar(self_s, self_len, from_c);
2241
2242 if (next == NULL) {
2243 /* No matches; return the original string */
2244 return return_self(self);
2245 }
2246
2247 /* Need to make a new string */
2248 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2249 if (result == NULL)
2250 return NULL;
2251 result_s = PyBytes_AS_STRING(result);
2252 Py_MEMCPY(result_s, self_s, self_len);
2253
2254 /* change everything in-place, starting with this one */
2255 start = result_s + (next-self_s);
2256 *start = to_c;
2257 start++;
2258 end = result_s + self_len;
2259
2260 while (--maxcount > 0) {
2261 next = findchar(start, end-start, from_c);
2262 if (next == NULL)
2263 break;
2264 *next = to_c;
2265 start = next+1;
2266 }
2267
2268 return result;
2269}
2270
2271/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2272Py_LOCAL(PyBytesObject *)
2273replace_substring_in_place(PyBytesObject *self,
2274 const char *from_s, Py_ssize_t from_len,
2275 const char *to_s, Py_ssize_t to_len,
2276 Py_ssize_t maxcount)
2277{
2278 char *result_s, *start, *end;
2279 char *self_s;
2280 Py_ssize_t self_len, offset;
2281 PyBytesObject *result;
2282
2283 /* The result string will be the same size */
2284
2285 self_s = PyBytes_AS_STRING(self);
2286 self_len = PyBytes_GET_SIZE(self);
2287
2288 offset = findstring(self_s, self_len,
2289 from_s, from_len,
2290 0, self_len, FORWARD);
2291 if (offset == -1) {
2292 /* No matches; return the original string */
2293 return return_self(self);
2294 }
2295
2296 /* Need to make a new string */
2297 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2298 if (result == NULL)
2299 return NULL;
2300 result_s = PyBytes_AS_STRING(result);
2301 Py_MEMCPY(result_s, self_s, self_len);
2302
2303 /* change everything in-place, starting with this one */
2304 start = result_s + offset;
2305 Py_MEMCPY(start, to_s, from_len);
2306 start += from_len;
2307 end = result_s + self_len;
2308
2309 while ( --maxcount > 0) {
2310 offset = findstring(start, end-start,
2311 from_s, from_len,
2312 0, end-start, FORWARD);
2313 if (offset==-1)
2314 break;
2315 Py_MEMCPY(start+offset, to_s, from_len);
2316 start += offset+from_len;
2317 }
2318
2319 return result;
2320}
2321
2322/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2323Py_LOCAL(PyBytesObject *)
2324replace_single_character(PyBytesObject *self,
2325 char from_c,
2326 const char *to_s, Py_ssize_t to_len,
2327 Py_ssize_t maxcount)
2328{
2329 char *self_s, *result_s;
2330 char *start, *next, *end;
2331 Py_ssize_t self_len, result_len;
2332 Py_ssize_t count, product;
2333 PyBytesObject *result;
2334
2335 self_s = PyBytes_AS_STRING(self);
2336 self_len = PyBytes_GET_SIZE(self);
2337
2338 count = countchar(self_s, self_len, from_c, maxcount);
2339 if (count == 0) {
2340 /* no matches, return unchanged */
2341 return return_self(self);
2342 }
2343
2344 /* use the difference between current and new, hence the "-1" */
2345 /* result_len = self_len + count * (to_len-1) */
2346 product = count * (to_len-1);
2347 if (product / (to_len-1) != count) {
2348 PyErr_SetString(PyExc_OverflowError,
2349 "replace string is too long");
2350 return NULL;
2351 }
2352 result_len = self_len + product;
2353 if (result_len < 0) {
2354 PyErr_SetString(PyExc_OverflowError,
2355 "replace string is too long");
2356 return NULL;
2357 }
2358
2359 if ( (result = (PyBytesObject *)
2360 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2361 return NULL;
2362 result_s = PyBytes_AS_STRING(result);
2363
2364 start = self_s;
2365 end = self_s + self_len;
2366 while (count-- > 0) {
2367 next = findchar(start, end-start, from_c);
2368 if (next == NULL)
2369 break;
2370
2371 if (next == start) {
2372 /* replace with the 'to' */
2373 Py_MEMCPY(result_s, to_s, to_len);
2374 result_s += to_len;
2375 start += 1;
2376 } else {
2377 /* copy the unchanged old then the 'to' */
2378 Py_MEMCPY(result_s, start, next-start);
2379 result_s += (next-start);
2380 Py_MEMCPY(result_s, to_s, to_len);
2381 result_s += to_len;
2382 start = next+1;
2383 }
2384 }
2385 /* Copy the remainder of the remaining string */
2386 Py_MEMCPY(result_s, start, end-start);
2387
2388 return result;
2389}
2390
2391/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2392Py_LOCAL(PyBytesObject *)
2393replace_substring(PyBytesObject *self,
2394 const char *from_s, Py_ssize_t from_len,
2395 const char *to_s, Py_ssize_t to_len,
2396 Py_ssize_t maxcount) {
2397 char *self_s, *result_s;
2398 char *start, *next, *end;
2399 Py_ssize_t self_len, result_len;
2400 Py_ssize_t count, offset, product;
2401 PyBytesObject *result;
2402
2403 self_s = PyBytes_AS_STRING(self);
2404 self_len = PyBytes_GET_SIZE(self);
2405
2406 count = countstring(self_s, self_len,
2407 from_s, from_len,
2408 0, self_len, FORWARD, maxcount);
2409 if (count == 0) {
2410 /* no matches, return unchanged */
2411 return return_self(self);
2412 }
2413
2414 /* Check for overflow */
2415 /* result_len = self_len + count * (to_len-from_len) */
2416 product = count * (to_len-from_len);
2417 if (product / (to_len-from_len) != count) {
2418 PyErr_SetString(PyExc_OverflowError,
2419 "replace string is too long");
2420 return NULL;
2421 }
2422 result_len = self_len + product;
2423 if (result_len < 0) {
2424 PyErr_SetString(PyExc_OverflowError,
2425 "replace string is too long");
2426 return NULL;
2427 }
2428
2429 if ( (result = (PyBytesObject *)
2430 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2431 return NULL;
2432 result_s = PyBytes_AS_STRING(result);
2433
2434 start = self_s;
2435 end = self_s + self_len;
2436 while (count-- > 0) {
2437 offset = findstring(start, end-start,
2438 from_s, from_len,
2439 0, end-start, FORWARD);
2440 if (offset == -1)
2441 break;
2442 next = start+offset;
2443 if (next == start) {
2444 /* replace with the 'to' */
2445 Py_MEMCPY(result_s, to_s, to_len);
2446 result_s += to_len;
2447 start += from_len;
2448 } else {
2449 /* copy the unchanged old then the 'to' */
2450 Py_MEMCPY(result_s, start, next-start);
2451 result_s += (next-start);
2452 Py_MEMCPY(result_s, to_s, to_len);
2453 result_s += to_len;
2454 start = next+from_len;
2455 }
2456 }
2457 /* Copy the remainder of the remaining string */
2458 Py_MEMCPY(result_s, start, end-start);
2459
2460 return result;
2461}
2462
2463
2464Py_LOCAL(PyBytesObject *)
2465replace(PyBytesObject *self,
2466 const char *from_s, Py_ssize_t from_len,
2467 const char *to_s, Py_ssize_t to_len,
2468 Py_ssize_t maxcount)
2469{
2470 if (maxcount < 0) {
2471 maxcount = PY_SSIZE_T_MAX;
2472 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2473 /* nothing to do; return the original string */
2474 return return_self(self);
2475 }
2476
2477 if (maxcount == 0 ||
2478 (from_len == 0 && to_len == 0)) {
2479 /* nothing to do; return the original string */
2480 return return_self(self);
2481 }
2482
2483 /* Handle zero-length special cases */
2484
2485 if (from_len == 0) {
2486 /* insert the 'to' string everywhere. */
2487 /* >>> "Python".replace("", ".") */
2488 /* '.P.y.t.h.o.n.' */
2489 return replace_interleave(self, to_s, to_len, maxcount);
2490 }
2491
2492 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2493 /* point for an empty self string to generate a non-empty string */
2494 /* Special case so the remaining code always gets a non-empty string */
2495 if (PyBytes_GET_SIZE(self) == 0) {
2496 return return_self(self);
2497 }
2498
2499 if (to_len == 0) {
2500 /* delete all occurances of 'from' string */
2501 if (from_len == 1) {
2502 return replace_delete_single_character(
2503 self, from_s[0], maxcount);
2504 } else {
2505 return replace_delete_substring(self, from_s,
2506 from_len, maxcount);
2507 }
2508 }
2509
2510 /* Handle special case where both strings have the same length */
2511
2512 if (from_len == to_len) {
2513 if (from_len == 1) {
2514 return replace_single_character_in_place(
2515 self,
2516 from_s[0],
2517 to_s[0],
2518 maxcount);
2519 } else {
2520 return replace_substring_in_place(
2521 self, from_s, from_len, to_s, to_len,
2522 maxcount);
2523 }
2524 }
2525
2526 /* Otherwise use the more generic algorithms */
2527 if (from_len == 1) {
2528 return replace_single_character(self, from_s[0],
2529 to_s, to_len, maxcount);
2530 } else {
2531 /* len('from')>=2, len('to')>=1 */
2532 return replace_substring(self, from_s, from_len, to_s, to_len,
2533 maxcount);
2534 }
2535}
2536
2537PyDoc_STRVAR(replace__doc__,
2538"B.replace(old, new[, count]) -> bytes\n\
2539\n\
2540Return a copy of B with all occurrences of subsection\n\
2541old replaced by new. If the optional argument count is\n\
2542given, only the first count occurrences are replaced.");
2543
2544static PyObject *
2545string_replace(PyBytesObject *self, PyObject *args)
2546{
2547 Py_ssize_t count = -1;
2548 PyObject *from, *to;
2549 const char *from_s, *to_s;
2550 Py_ssize_t from_len, to_len;
2551
2552 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2553 return NULL;
2554
2555 if (PyBytes_Check(from)) {
2556 from_s = PyBytes_AS_STRING(from);
2557 from_len = PyBytes_GET_SIZE(from);
2558 }
2559 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2560 return NULL;
2561
2562 if (PyBytes_Check(to)) {
2563 to_s = PyBytes_AS_STRING(to);
2564 to_len = PyBytes_GET_SIZE(to);
2565 }
2566 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2567 return NULL;
2568
2569 return (PyObject *)replace((PyBytesObject *) self,
2570 from_s, from_len,
2571 to_s, to_len, count);
2572}
2573
2574/** End DALKE **/
2575
2576/* Matches the end (direction >= 0) or start (direction < 0) of self
2577 * against substr, using the start and end arguments. Returns
2578 * -1 on error, 0 if not found and 1 if found.
2579 */
2580Py_LOCAL(int)
2581_string_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
2582 Py_ssize_t end, int direction)
2583{
2584 Py_ssize_t len = PyBytes_GET_SIZE(self);
2585 Py_ssize_t slen;
2586 const char* sub;
2587 const char* str;
2588
2589 if (PyBytes_Check(substr)) {
2590 sub = PyBytes_AS_STRING(substr);
2591 slen = PyBytes_GET_SIZE(substr);
2592 }
2593 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2594 return -1;
2595 str = PyBytes_AS_STRING(self);
2596
2597 string_adjust_indices(&start, &end, len);
2598
2599 if (direction < 0) {
2600 /* startswith */
2601 if (start+slen > len)
2602 return 0;
2603 } else {
2604 /* endswith */
2605 if (end-start < slen || start > len)
2606 return 0;
2607
2608 if (end-slen > start)
2609 start = end - slen;
2610 }
2611 if (end-start >= slen)
2612 return ! memcmp(str+start, sub, slen);
2613 return 0;
2614}
2615
2616
2617PyDoc_STRVAR(startswith__doc__,
2618"B.startswith(prefix [,start [,end]]) -> bool\n\
2619\n\
2620Return True if B starts with the specified prefix, False otherwise.\n\
2621With optional start, test B beginning at that position.\n\
2622With optional end, stop comparing B at that position.\n\
2623prefix can also be a tuple of strings to try.");
2624
2625static PyObject *
2626string_startswith(PyBytesObject *self, PyObject *args)
2627{
2628 Py_ssize_t start = 0;
2629 Py_ssize_t end = PY_SSIZE_T_MAX;
2630 PyObject *subobj;
2631 int result;
2632
2633 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2634 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2635 return NULL;
2636 if (PyTuple_Check(subobj)) {
2637 Py_ssize_t i;
2638 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2639 result = _string_tailmatch(self,
2640 PyTuple_GET_ITEM(subobj, i),
2641 start, end, -1);
2642 if (result == -1)
2643 return NULL;
2644 else if (result) {
2645 Py_RETURN_TRUE;
2646 }
2647 }
2648 Py_RETURN_FALSE;
2649 }
2650 result = _string_tailmatch(self, subobj, start, end, -1);
2651 if (result == -1)
2652 return NULL;
2653 else
2654 return PyBool_FromLong(result);
2655}
2656
2657
2658PyDoc_STRVAR(endswith__doc__,
2659"B.endswith(suffix [,start [,end]]) -> bool\n\
2660\n\
2661Return True if B ends with the specified suffix, False otherwise.\n\
2662With optional start, test B beginning at that position.\n\
2663With optional end, stop comparing B at that position.\n\
2664suffix can also be a tuple of strings to try.");
2665
2666static PyObject *
2667string_endswith(PyBytesObject *self, PyObject *args)
2668{
2669 Py_ssize_t start = 0;
2670 Py_ssize_t end = PY_SSIZE_T_MAX;
2671 PyObject *subobj;
2672 int result;
2673
2674 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2675 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2676 return NULL;
2677 if (PyTuple_Check(subobj)) {
2678 Py_ssize_t i;
2679 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2680 result = _string_tailmatch(self,
2681 PyTuple_GET_ITEM(subobj, i),
2682 start, end, +1);
2683 if (result == -1)
2684 return NULL;
2685 else if (result) {
2686 Py_RETURN_TRUE;
2687 }
2688 }
2689 Py_RETURN_FALSE;
2690 }
2691 result = _string_tailmatch(self, subobj, start, end, +1);
2692 if (result == -1)
2693 return NULL;
2694 else
2695 return PyBool_FromLong(result);
2696}
2697
2698
2699PyDoc_STRVAR(decode__doc__,
2700"B.decode([encoding[, errors]]) -> object\n\
2701\n\
2702Decodes S using the codec registered for encoding. encoding defaults\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002703to the default encoding. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002704handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2705a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002706as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002707able to handle UnicodeDecodeErrors.");
2708
2709static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002710string_decode(PyObject *self, PyObject *args)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002711{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002712 const char *encoding = NULL;
2713 const char *errors = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +00002714
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002715 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2716 return NULL;
2717 if (encoding == NULL)
2718 encoding = PyUnicode_GetDefaultEncoding();
2719 return PyCodec_Decode(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002720}
2721
Guido van Rossum20188312006-05-05 15:15:40 +00002722
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002723PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002724"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002725\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002726Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002727Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002728Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002729
2730static int
Guido van Rossumae404e22007-10-26 21:46:44 +00002731hex_digit_to_int(Py_UNICODE c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002732{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002733 if (c >= 128)
2734 return -1;
2735 if (ISDIGIT(c))
2736 return c - '0';
2737 else {
2738 if (ISUPPER(c))
2739 c = TOLOWER(c);
2740 if (c >= 'a' && c <= 'f')
2741 return c - 'a' + 10;
2742 }
2743 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002744}
2745
2746static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002747string_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002748{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002749 PyObject *newstring, *hexobj;
2750 char *buf;
2751 Py_UNICODE *hex;
2752 Py_ssize_t hexlen, byteslen, i, j;
2753 int top, bot;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002754
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002755 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2756 return NULL;
2757 assert(PyUnicode_Check(hexobj));
2758 hexlen = PyUnicode_GET_SIZE(hexobj);
2759 hex = PyUnicode_AS_UNICODE(hexobj);
2760 byteslen = hexlen/2; /* This overestimates if there are spaces */
2761 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2762 if (!newstring)
2763 return NULL;
2764 buf = PyBytes_AS_STRING(newstring);
2765 for (i = j = 0; i < hexlen; i += 2) {
2766 /* skip over spaces in the input */
2767 while (hex[i] == ' ')
2768 i++;
2769 if (i >= hexlen)
2770 break;
2771 top = hex_digit_to_int(hex[i]);
2772 bot = hex_digit_to_int(hex[i+1]);
2773 if (top == -1 || bot == -1) {
2774 PyErr_Format(PyExc_ValueError,
2775 "non-hexadecimal number found in "
2776 "fromhex() arg at position %zd", i);
2777 goto error;
2778 }
2779 buf[j++] = (top << 4) + bot;
2780 }
2781 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2782 goto error;
2783 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002784
2785 error:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002786 Py_XDECREF(newstring);
2787 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002788}
2789
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002790
2791static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002792string_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002793{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002794 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002795}
2796
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002797
2798static PyMethodDef
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002799string_methods[] = {
2800 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
2801 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2802 _Py_capitalize__doc__},
2803 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2804 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
2805 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
2806 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
2807 endswith__doc__},
2808 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2809 expandtabs__doc__},
2810 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
2811 {"fromhex", (PyCFunction)string_fromhex, METH_VARARGS|METH_CLASS,
2812 fromhex_doc},
2813 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
2814 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2815 _Py_isalnum__doc__},
2816 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2817 _Py_isalpha__doc__},
2818 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2819 _Py_isdigit__doc__},
2820 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2821 _Py_islower__doc__},
2822 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2823 _Py_isspace__doc__},
2824 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2825 _Py_istitle__doc__},
2826 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2827 _Py_isupper__doc__},
2828 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2829 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2830 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2831 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
2832 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
2833 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2834 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2835 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
2836 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2837 {"rpartition", (PyCFunction)string_rpartition, METH_O,
2838 rpartition__doc__},
2839 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
2840 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
2841 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2842 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
2843 splitlines__doc__},
2844 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
2845 startswith__doc__},
2846 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
2847 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2848 _Py_swapcase__doc__},
2849 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2850 {"translate", (PyCFunction)string_translate, METH_VARARGS,
2851 translate__doc__},
2852 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2853 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2854 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002855};
2856
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002857static PyObject *
2858str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2859
2860static PyObject *
2861string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2862{
2863 PyObject *x = NULL, *it;
2864 const char *encoding = NULL;
2865 const char *errors = NULL;
2866 PyObject *new = NULL;
2867 Py_ssize_t i, size;
2868 static char *kwlist[] = {"source", "encoding", "errors", 0};
2869
2870 if (type != &PyBytes_Type)
2871 return str_subtype_new(type, args, kwds);
2872 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2873 &encoding, &errors))
2874 return NULL;
2875 if (x == NULL) {
2876 if (encoding != NULL || errors != NULL) {
2877 PyErr_SetString(PyExc_TypeError,
2878 "encoding or errors without sequence "
2879 "argument");
2880 return NULL;
2881 }
2882 return PyBytes_FromString("");
2883 }
2884
2885 if (PyUnicode_Check(x)) {
2886 /* Encode via the codec registry */
2887 if (encoding == NULL) {
2888 PyErr_SetString(PyExc_TypeError,
2889 "string argument without an encoding");
2890 return NULL;
2891 }
2892 new = PyCodec_Encode(x, encoding, errors);
2893 if (new == NULL)
2894 return NULL;
2895 assert(PyBytes_Check(new));
2896 return new;
2897 }
2898
2899 /* If it's not unicode, there can't be encoding or errors */
2900 if (encoding != NULL || errors != NULL) {
2901 PyErr_SetString(PyExc_TypeError,
2902 "encoding or errors without a string argument");
2903 return NULL;
2904 }
2905
2906 /* Is it an int? */
2907 size = PyNumber_AsSsize_t(x, PyExc_ValueError);
2908 if (size == -1 && PyErr_Occurred()) {
2909 PyErr_Clear();
2910 }
2911 else {
2912 if (size < 0) {
2913 PyErr_SetString(PyExc_ValueError, "negative count");
2914 return NULL;
2915 }
2916 new = PyBytes_FromStringAndSize(NULL, size);
2917 if (new == NULL) {
2918 return NULL;
2919 }
2920 if (size > 0) {
2921 memset(((PyBytesObject*)new)->ob_sval, 0, size);
2922 }
2923 return new;
2924 }
2925
2926 /* Use the modern buffer interface */
2927 if (PyObject_CheckBuffer(x)) {
2928 Py_buffer view;
2929 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2930 return NULL;
2931 new = PyBytes_FromStringAndSize(NULL, view.len);
2932 if (!new)
2933 goto fail;
2934 // XXX(brett.cannon): Better way to get to internal buffer?
2935 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2936 &view, view.len, 'C') < 0)
2937 goto fail;
2938 PyObject_ReleaseBuffer(x, &view);
2939 return new;
2940 fail:
2941 Py_XDECREF(new);
2942 PyObject_ReleaseBuffer(x, &view);
2943 return NULL;
2944 }
2945
2946 /* For iterator version, create a string object and resize as needed */
2947 /* XXX(gb): is 64 a good value? also, optimize if length is known */
2948 /* XXX(guido): perhaps use Pysequence_Fast() -- I can't imagine the
2949 input being a truly long iterator. */
2950 size = 64;
2951 new = PyBytes_FromStringAndSize(NULL, size);
2952 if (new == NULL)
2953 return NULL;
2954
2955 /* XXX Optimize this if the arguments is a list, tuple */
2956
2957 /* Get the iterator */
2958 it = PyObject_GetIter(x);
2959 if (it == NULL)
2960 goto error;
2961
2962 /* Run the iterator to exhaustion */
2963 for (i = 0; ; i++) {
2964 PyObject *item;
2965 Py_ssize_t value;
2966
2967 /* Get the next item */
2968 item = PyIter_Next(it);
2969 if (item == NULL) {
2970 if (PyErr_Occurred())
2971 goto error;
2972 break;
2973 }
2974
2975 /* Interpret it as an int (__index__) */
2976 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2977 Py_DECREF(item);
2978 if (value == -1 && PyErr_Occurred())
2979 goto error;
2980
2981 /* Range check */
2982 if (value < 0 || value >= 256) {
2983 PyErr_SetString(PyExc_ValueError,
2984 "bytes must be in range(0, 256)");
2985 goto error;
2986 }
2987
2988 /* Append the byte */
2989 if (i >= size) {
2990 size *= 2;
2991 if (_PyBytes_Resize(&new, size) < 0)
2992 goto error;
2993 }
2994 ((PyBytesObject *)new)->ob_sval[i] = value;
2995 }
2996 _PyBytes_Resize(&new, i);
2997
2998 /* Clean up and return success */
2999 Py_DECREF(it);
3000 return new;
3001
3002 error:
3003 /* Error handling when new != NULL */
3004 Py_XDECREF(it);
3005 Py_DECREF(new);
3006 return NULL;
3007}
3008
3009static PyObject *
3010str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3011{
3012 PyObject *tmp, *pnew;
3013 Py_ssize_t n;
3014
3015 assert(PyType_IsSubtype(type, &PyBytes_Type));
3016 tmp = string_new(&PyBytes_Type, args, kwds);
3017 if (tmp == NULL)
3018 return NULL;
3019 assert(PyBytes_CheckExact(tmp));
3020 n = PyBytes_GET_SIZE(tmp);
3021 pnew = type->tp_alloc(type, n);
3022 if (pnew != NULL) {
3023 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3024 PyBytes_AS_STRING(tmp), n+1);
3025 ((PyBytesObject *)pnew)->ob_shash =
3026 ((PyBytesObject *)tmp)->ob_shash;
3027 }
3028 Py_DECREF(tmp);
3029 return pnew;
3030}
3031
3032PyDoc_STRVAR(string_doc,
3033"bytes(iterable_of_ints) -> bytes.\n\
3034bytes(string, encoding[, errors]) -> bytes\n\
3035bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer.\n\
3036bytes(memory_view) -> bytes.\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003037\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003038Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003039 - an iterable yielding integers in range(256)\n\
3040 - a text string encoded using the specified encoding\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003041 - a bytes or a buffer object\n\
3042 - any object implementing the buffer API.");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003043
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003044static PyObject *str_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003045
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003046PyTypeObject PyBytes_Type = {
3047 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3048 "bytes",
3049 sizeof(PyBytesObject),
3050 sizeof(char),
3051 string_dealloc, /* tp_dealloc */
3052 0, /* tp_print */
3053 0, /* tp_getattr */
3054 0, /* tp_setattr */
3055 0, /* tp_compare */
3056 (reprfunc)string_repr, /* tp_repr */
3057 0, /* tp_as_number */
3058 &string_as_sequence, /* tp_as_sequence */
3059 &string_as_mapping, /* tp_as_mapping */
3060 (hashfunc)string_hash, /* tp_hash */
3061 0, /* tp_call */
3062 string_str, /* tp_str */
3063 PyObject_GenericGetAttr, /* tp_getattro */
3064 0, /* tp_setattro */
3065 &string_as_buffer, /* tp_as_buffer */
3066 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3067 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3068 string_doc, /* tp_doc */
3069 0, /* tp_traverse */
3070 0, /* tp_clear */
3071 (richcmpfunc)string_richcompare, /* tp_richcompare */
3072 0, /* tp_weaklistoffset */
3073 str_iter, /* tp_iter */
3074 0, /* tp_iternext */
3075 string_methods, /* tp_methods */
3076 0, /* tp_members */
3077 0, /* tp_getset */
3078 &PyBaseObject_Type, /* tp_base */
3079 0, /* tp_dict */
3080 0, /* tp_descr_get */
3081 0, /* tp_descr_set */
3082 0, /* tp_dictoffset */
3083 0, /* tp_init */
3084 0, /* tp_alloc */
3085 string_new, /* tp_new */
3086 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003087};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003088
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003089void
3090PyBytes_Concat(register PyObject **pv, register PyObject *w)
3091{
3092 register PyObject *v;
3093 assert(pv != NULL);
3094 if (*pv == NULL)
3095 return;
3096 if (w == NULL) {
3097 Py_DECREF(*pv);
3098 *pv = NULL;
3099 return;
3100 }
3101 v = string_concat(*pv, w);
3102 Py_DECREF(*pv);
3103 *pv = v;
3104}
3105
3106void
3107PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
3108{
3109 PyBytes_Concat(pv, w);
3110 Py_XDECREF(w);
3111}
3112
3113
3114/* The following function breaks the notion that strings are immutable:
3115 it changes the size of a string. We get away with this only if there
3116 is only one module referencing the object. You can also think of it
3117 as creating a new string object and destroying the old one, only
3118 more efficiently. In any case, don't use this if the string may
3119 already be known to some other part of the code...
3120 Note that if there's not enough memory to resize the string, the original
3121 string object at *pv is deallocated, *pv is set to NULL, an "out of
3122 memory" exception is set, and -1 is returned. Else (on success) 0 is
3123 returned, and the value in *pv may or may not be the same as on input.
3124 As always, an extra byte is allocated for a trailing \0 byte (newsize
3125 does *not* include that), and a trailing \0 byte is stored.
3126*/
3127
3128int
3129_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3130{
3131 register PyObject *v;
3132 register PyBytesObject *sv;
3133 v = *pv;
3134 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
3135 *pv = 0;
3136 Py_DECREF(v);
3137 PyErr_BadInternalCall();
3138 return -1;
3139 }
3140 /* XXX UNREF/NEWREF interface should be more symmetrical */
3141 _Py_DEC_REFTOTAL;
3142 _Py_ForgetReference(v);
3143 *pv = (PyObject *)
3144 PyObject_REALLOC((char *)v, sizeof(PyBytesObject) + newsize);
3145 if (*pv == NULL) {
3146 PyObject_Del(v);
3147 PyErr_NoMemory();
3148 return -1;
3149 }
3150 _Py_NewReference(*pv);
3151 sv = (PyBytesObject *) *pv;
3152 Py_SIZE(sv) = newsize;
3153 sv->ob_sval[newsize] = '\0';
3154 sv->ob_shash = -1; /* invalidate cached hash value */
3155 return 0;
3156}
3157
3158/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and
3159 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3160 * Python's regular ints.
3161 * Return value: a new PyString*, or NULL if error.
3162 * . *pbuf is set to point into it,
3163 * *plen set to the # of chars following that.
3164 * Caller must decref it when done using pbuf.
3165 * The string starting at *pbuf is of the form
3166 * "-"? ("0x" | "0X")? digit+
3167 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3168 * set in flags. The case of hex digits will be correct,
3169 * There will be at least prec digits, zero-filled on the left if
3170 * necessary to get that many.
3171 * val object to be converted
3172 * flags bitmask of format flags; only F_ALT is looked at
3173 * prec minimum number of digits; 0-fill on left if needed
3174 * type a character in [duoxX]; u acts the same as d
3175 *
3176 * CAUTION: o, x and X conversions on regular ints can never
3177 * produce a '-' sign, but can for Python's unbounded ints.
3178 */
3179PyObject*
3180_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,
3181 char **pbuf, int *plen)
3182{
3183 PyObject *result = NULL;
3184 char *buf;
3185 Py_ssize_t i;
3186 int sign; /* 1 if '-', else 0 */
3187 int len; /* number of characters */
3188 Py_ssize_t llen;
3189 int numdigits; /* len == numnondigits + numdigits */
3190 int numnondigits = 0;
3191
3192 /* Avoid exceeding SSIZE_T_MAX */
3193 if (prec > PY_SSIZE_T_MAX-3) {
3194 PyErr_SetString(PyExc_OverflowError,
3195 "precision too large");
3196 return NULL;
3197 }
3198
3199 switch (type) {
3200 case 'd':
3201 case 'u':
3202 /* Special-case boolean: we want 0/1 */
3203 if (PyBool_Check(val))
3204 result = PyNumber_ToBase(val, 10);
3205 else
3206 result = Py_TYPE(val)->tp_str(val);
3207 break;
3208 case 'o':
3209 numnondigits = 2;
3210 result = PyNumber_ToBase(val, 8);
3211 break;
3212 case 'x':
3213 case 'X':
3214 numnondigits = 2;
3215 result = PyNumber_ToBase(val, 16);
3216 break;
3217 default:
3218 assert(!"'type' not in [duoxX]");
3219 }
3220 if (!result)
3221 return NULL;
3222
3223 buf = PyUnicode_AsString(result);
3224 if (!buf) {
3225 Py_DECREF(result);
3226 return NULL;
3227 }
3228
3229 /* To modify the string in-place, there can only be one reference. */
3230 if (Py_REFCNT(result) != 1) {
3231 PyErr_BadInternalCall();
3232 return NULL;
3233 }
3234 llen = PyUnicode_GetSize(result);
3235 if (llen > INT_MAX) {
3236 PyErr_SetString(PyExc_ValueError,
3237 "string too large in _PyBytes_FormatLong");
3238 return NULL;
3239 }
3240 len = (int)llen;
3241 if (buf[len-1] == 'L') {
3242 --len;
3243 buf[len] = '\0';
3244 }
3245 sign = buf[0] == '-';
3246 numnondigits += sign;
3247 numdigits = len - numnondigits;
3248 assert(numdigits > 0);
3249
3250 /* Get rid of base marker unless F_ALT */
3251 if (((flags & F_ALT) == 0 &&
3252 (type == 'o' || type == 'x' || type == 'X'))) {
3253 assert(buf[sign] == '0');
3254 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
3255 buf[sign+1] == 'o');
3256 numnondigits -= 2;
3257 buf += 2;
3258 len -= 2;
3259 if (sign)
3260 buf[0] = '-';
3261 assert(len == numnondigits + numdigits);
3262 assert(numdigits > 0);
3263 }
3264
3265 /* Fill with leading zeroes to meet minimum width. */
3266 if (prec > numdigits) {
3267 PyObject *r1 = PyBytes_FromStringAndSize(NULL,
3268 numnondigits + prec);
3269 char *b1;
3270 if (!r1) {
3271 Py_DECREF(result);
3272 return NULL;
3273 }
3274 b1 = PyBytes_AS_STRING(r1);
3275 for (i = 0; i < numnondigits; ++i)
3276 *b1++ = *buf++;
3277 for (i = 0; i < prec - numdigits; i++)
3278 *b1++ = '0';
3279 for (i = 0; i < numdigits; i++)
3280 *b1++ = *buf++;
3281 *b1 = '\0';
3282 Py_DECREF(result);
3283 result = r1;
3284 buf = PyBytes_AS_STRING(result);
3285 len = numnondigits + prec;
3286 }
3287
3288 /* Fix up case for hex conversions. */
3289 if (type == 'X') {
3290 /* Need to convert all lower case letters to upper case.
3291 and need to convert 0x to 0X (and -0x to -0X). */
3292 for (i = 0; i < len; i++)
3293 if (buf[i] >= 'a' && buf[i] <= 'x')
3294 buf[i] -= 'a'-'A';
3295 }
3296 *pbuf = buf;
3297 *plen = len;
3298 return result;
3299}
3300
3301void
3302PyBytes_Fini(void)
3303{
3304 int i;
3305 for (i = 0; i < UCHAR_MAX + 1; i++) {
3306 Py_XDECREF(characters[i]);
3307 characters[i] = NULL;
3308 }
3309 Py_XDECREF(nullstring);
3310 nullstring = NULL;
3311}
3312
3313/*********************** Str Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003314
3315typedef struct {
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003316 PyObject_HEAD
3317 Py_ssize_t it_index;
3318 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3319} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003320
3321static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003322striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003323{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003324 _PyObject_GC_UNTRACK(it);
3325 Py_XDECREF(it->it_seq);
3326 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003327}
3328
3329static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003330striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003331{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003332 Py_VISIT(it->it_seq);
3333 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003334}
3335
3336static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003337striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003338{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003339 PyBytesObject *seq;
3340 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003341
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003342 assert(it != NULL);
3343 seq = it->it_seq;
3344 if (seq == NULL)
3345 return NULL;
3346 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003347
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003348 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3349 item = PyLong_FromLong(
3350 (unsigned char)seq->ob_sval[it->it_index]);
3351 if (item != NULL)
3352 ++it->it_index;
3353 return item;
3354 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003355
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003356 Py_DECREF(seq);
3357 it->it_seq = NULL;
3358 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003359}
3360
3361static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003362striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003363{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003364 Py_ssize_t len = 0;
3365 if (it->it_seq)
3366 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3367 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003368}
3369
3370PyDoc_STRVAR(length_hint_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003371 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003372
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003373static PyMethodDef striter_methods[] = {
3374 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3375 length_hint_doc},
3376 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003377};
3378
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003379PyTypeObject PyBytesIter_Type = {
3380 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3381 "bytes_iterator", /* tp_name */
3382 sizeof(striterobject), /* tp_basicsize */
3383 0, /* tp_itemsize */
3384 /* methods */
3385 (destructor)striter_dealloc, /* tp_dealloc */
3386 0, /* tp_print */
3387 0, /* tp_getattr */
3388 0, /* tp_setattr */
3389 0, /* tp_compare */
3390 0, /* tp_repr */
3391 0, /* tp_as_number */
3392 0, /* tp_as_sequence */
3393 0, /* tp_as_mapping */
3394 0, /* tp_hash */
3395 0, /* tp_call */
3396 0, /* tp_str */
3397 PyObject_GenericGetAttr, /* tp_getattro */
3398 0, /* tp_setattro */
3399 0, /* tp_as_buffer */
3400 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3401 0, /* tp_doc */
3402 (traverseproc)striter_traverse, /* tp_traverse */
3403 0, /* tp_clear */
3404 0, /* tp_richcompare */
3405 0, /* tp_weaklistoffset */
3406 PyObject_SelfIter, /* tp_iter */
3407 (iternextfunc)striter_next, /* tp_iternext */
3408 striter_methods, /* tp_methods */
3409 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003410};
3411
3412static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003413str_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003414{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003415 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003416
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003417 if (!PyBytes_Check(seq)) {
3418 PyErr_BadInternalCall();
3419 return NULL;
3420 }
3421 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3422 if (it == NULL)
3423 return NULL;
3424 it->it_index = 0;
3425 Py_INCREF(seq);
3426 it->it_seq = (PyBytesObject *)seq;
3427 _PyObject_GC_TRACK(it);
3428 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003429}