blob: 52479ca41d749196c12ca93fa54c8518c0565e44 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00008
Neal Norwitz2bad9702007-08-27 06:19:22 +00009static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000010_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000011{
Christian Heimes90aa7642007-12-19 02:45:37 +000012 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000013
Gregory P. Smith60d241f2007-10-16 06:31:30 +000014 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000015 {
16 PyErr_Format(PyExc_TypeError,
17 "Type %.100s doesn't support the buffer API",
Christian Heimes90aa7642007-12-19 02:45:37 +000018 Py_TYPE(obj)->tp_name);
Guido van Rossuma74184e2007-08-29 04:05:57 +000019 return -1;
20 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000021
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000022 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
23 return -1;
24 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000025}
26
Christian Heimes2c9c7a52008-05-26 13:42:13 +000027#ifdef COUNT_ALLOCS
28int null_strings, one_strings;
29#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000030
Christian Heimes2c9c7a52008-05-26 13:42:13 +000031static PyBytesObject *characters[UCHAR_MAX + 1];
32static PyBytesObject *nullstring;
33
34/*
35 For both PyBytes_FromString() and PyBytes_FromStringAndSize(), the
36 parameter `size' denotes number of characters to allocate, not counting any
37 null terminating character.
38
39 For PyBytes_FromString(), the parameter `str' points to a null-terminated
40 string containing exactly `size' bytes.
41
42 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
43 either NULL or else points to a string containing at least `size' bytes.
44 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
45 not have to be null-terminated. (Therefore it is safe to construct a
46 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
47 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
48 bytes (setting the last byte to the null terminating character) and you can
49 fill in the data yourself. If `str' is non-NULL then the resulting
50 PyString object must be treated as immutable and you must not fill in nor
51 alter the data yourself, since the strings may be shared.
52
53 The PyObject member `op->ob_size', which denotes the number of "extra
54 items" in a variable-size object, will contain the number of bytes
55 allocated for string data, not counting the null terminating character. It
56 is therefore equal to the equal to the `size' parameter (for
57 PyBytes_FromStringAndSize()) or the length of the string in the `str'
58 parameter (for PyBytes_FromString()).
59*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000060PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000061PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000062{
Christian Heimes2c9c7a52008-05-26 13:42:13 +000063 register PyBytesObject *op;
64 if (size < 0) {
65 PyErr_SetString(PyExc_SystemError,
66 "Negative size passed to PyBytes_FromStringAndSize");
67 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +000068 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000069 if (size == 0 && (op = nullstring) != NULL) {
70#ifdef COUNT_ALLOCS
71 null_strings++;
72#endif
73 Py_INCREF(op);
74 return (PyObject *)op;
75 }
76 if (size == 1 && str != NULL &&
77 (op = characters[*str & UCHAR_MAX]) != NULL)
78 {
79#ifdef COUNT_ALLOCS
80 one_strings++;
81#endif
82 Py_INCREF(op);
83 return (PyObject *)op;
84 }
85
86 /* Inline PyObject_NewVar */
87 op = (PyBytesObject *)PyObject_MALLOC(sizeof(PyBytesObject) + size);
88 if (op == NULL)
89 return PyErr_NoMemory();
90 PyObject_INIT_VAR(op, &PyBytes_Type, size);
91 op->ob_shash = -1;
92 if (str != NULL)
93 Py_MEMCPY(op->ob_sval, str, size);
94 op->ob_sval[size] = '\0';
95 /* share short strings */
96 if (size == 0) {
97 nullstring = op;
98 Py_INCREF(op);
99 } else if (size == 1 && str != NULL) {
100 characters[*str & UCHAR_MAX] = op;
101 Py_INCREF(op);
102 }
103 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000104}
105
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000106PyObject *
107PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000108{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000109 register size_t size;
110 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000111
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000112 assert(str != NULL);
113 size = strlen(str);
114 if (size > PY_SSIZE_T_MAX) {
115 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000116 "byte string is too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000117 return NULL;
118 }
119 if (size == 0 && (op = nullstring) != NULL) {
120#ifdef COUNT_ALLOCS
121 null_strings++;
122#endif
123 Py_INCREF(op);
124 return (PyObject *)op;
125 }
126 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
127#ifdef COUNT_ALLOCS
128 one_strings++;
129#endif
130 Py_INCREF(op);
131 return (PyObject *)op;
132 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000133
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000134 /* Inline PyObject_NewVar */
135 op = (PyBytesObject *)PyObject_MALLOC(sizeof(PyBytesObject) + size);
136 if (op == NULL)
137 return PyErr_NoMemory();
138 PyObject_INIT_VAR(op, &PyBytes_Type, size);
139 op->ob_shash = -1;
140 Py_MEMCPY(op->ob_sval, str, size+1);
141 /* share short strings */
142 if (size == 0) {
143 nullstring = op;
144 Py_INCREF(op);
145 } else if (size == 1) {
146 characters[*str & UCHAR_MAX] = op;
147 Py_INCREF(op);
148 }
149 return (PyObject *) op;
150}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000151
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000152PyObject *
153PyBytes_FromFormatV(const char *format, va_list vargs)
154{
155 va_list count;
156 Py_ssize_t n = 0;
157 const char* f;
158 char *s;
159 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000160
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000161#ifdef VA_LIST_IS_ARRAY
162 Py_MEMCPY(count, vargs, sizeof(va_list));
163#else
164#ifdef __va_copy
165 __va_copy(count, vargs);
166#else
167 count = vargs;
168#endif
169#endif
170 /* step 1: figure out how large a buffer we need */
171 for (f = format; *f; f++) {
172 if (*f == '%') {
173 const char* p = f;
174 while (*++f && *f != '%' && !ISALPHA(*f))
175 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000176
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000177 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
178 * they don't affect the amount of space we reserve.
179 */
180 if ((*f == 'l' || *f == 'z') &&
181 (f[1] == 'd' || f[1] == 'u'))
182 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000183
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000184 switch (*f) {
185 case 'c':
186 (void)va_arg(count, int);
187 /* fall through... */
188 case '%':
189 n++;
190 break;
191 case 'd': case 'u': case 'i': case 'x':
192 (void) va_arg(count, int);
193 /* 20 bytes is enough to hold a 64-bit
194 integer. Decimal takes the most space.
195 This isn't enough for octal. */
196 n += 20;
197 break;
198 case 's':
199 s = va_arg(count, char*);
200 n += strlen(s);
201 break;
202 case 'p':
203 (void) va_arg(count, int);
204 /* maximum 64-bit pointer representation:
205 * 0xffffffffffffffff
206 * so 19 characters is enough.
207 * XXX I count 18 -- what's the extra for?
208 */
209 n += 19;
210 break;
211 default:
212 /* if we stumble upon an unknown
213 formatting code, copy the rest of
214 the format string to the output
215 string. (we cannot just skip the
216 code, since there's no way to know
217 what's in the argument list) */
218 n += strlen(p);
219 goto expand;
220 }
221 } else
222 n++;
223 }
224 expand:
225 /* step 2: fill the buffer */
226 /* Since we've analyzed how much space we need for the worst case,
227 use sprintf directly instead of the slower PyOS_snprintf. */
228 string = PyBytes_FromStringAndSize(NULL, n);
229 if (!string)
230 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000231
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000232 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000233
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000234 for (f = format; *f; f++) {
235 if (*f == '%') {
236 const char* p = f++;
237 Py_ssize_t i;
238 int longflag = 0;
239 int size_tflag = 0;
240 /* parse the width.precision part (we're only
241 interested in the precision value, if any) */
242 n = 0;
243 while (ISDIGIT(*f))
244 n = (n*10) + *f++ - '0';
245 if (*f == '.') {
246 f++;
247 n = 0;
248 while (ISDIGIT(*f))
249 n = (n*10) + *f++ - '0';
250 }
251 while (*f && *f != '%' && !ISALPHA(*f))
252 f++;
253 /* handle the long flag, but only for %ld and %lu.
254 others can be added when necessary. */
255 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
256 longflag = 1;
257 ++f;
258 }
259 /* handle the size_t flag. */
260 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
261 size_tflag = 1;
262 ++f;
263 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000264
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000265 switch (*f) {
266 case 'c':
267 *s++ = va_arg(vargs, int);
268 break;
269 case 'd':
270 if (longflag)
271 sprintf(s, "%ld", va_arg(vargs, long));
272 else if (size_tflag)
273 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
274 va_arg(vargs, Py_ssize_t));
275 else
276 sprintf(s, "%d", va_arg(vargs, int));
277 s += strlen(s);
278 break;
279 case 'u':
280 if (longflag)
281 sprintf(s, "%lu",
282 va_arg(vargs, unsigned long));
283 else if (size_tflag)
284 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
285 va_arg(vargs, size_t));
286 else
287 sprintf(s, "%u",
288 va_arg(vargs, unsigned int));
289 s += strlen(s);
290 break;
291 case 'i':
292 sprintf(s, "%i", va_arg(vargs, int));
293 s += strlen(s);
294 break;
295 case 'x':
296 sprintf(s, "%x", va_arg(vargs, int));
297 s += strlen(s);
298 break;
299 case 's':
300 p = va_arg(vargs, char*);
301 i = strlen(p);
302 if (n > 0 && i > n)
303 i = n;
304 Py_MEMCPY(s, p, i);
305 s += i;
306 break;
307 case 'p':
308 sprintf(s, "%p", va_arg(vargs, void*));
309 /* %p is ill-defined: ensure leading 0x. */
310 if (s[1] == 'X')
311 s[1] = 'x';
312 else if (s[1] != 'x') {
313 memmove(s+2, s, strlen(s)+1);
314 s[0] = '0';
315 s[1] = 'x';
316 }
317 s += strlen(s);
318 break;
319 case '%':
320 *s++ = '%';
321 break;
322 default:
323 strcpy(s, p);
324 s += strlen(s);
325 goto end;
326 }
327 } else
328 *s++ = *f;
329 }
330
331 end:
332 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
333 return string;
334}
335
336PyObject *
337PyBytes_FromFormat(const char *format, ...)
338{
339 PyObject* ret;
340 va_list vargs;
341
342#ifdef HAVE_STDARG_PROTOTYPES
343 va_start(vargs, format);
344#else
345 va_start(vargs);
346#endif
347 ret = PyBytes_FromFormatV(format, vargs);
348 va_end(vargs);
349 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000350}
351
352static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000353string_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000354{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000355 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000356}
357
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000358/* Unescape a backslash-escaped string. If unicode is non-zero,
359 the string is a u-literal. If recode_encoding is non-zero,
360 the string is UTF-8 encoded and should be re-encoded in the
361 specified encoding. */
362
363PyObject *PyBytes_DecodeEscape(const char *s,
364 Py_ssize_t len,
365 const char *errors,
366 Py_ssize_t unicode,
367 const char *recode_encoding)
368{
369 int c;
370 char *p, *buf;
371 const char *end;
372 PyObject *v;
373 Py_ssize_t newlen = recode_encoding ? 4*len:len;
374 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
375 if (v == NULL)
376 return NULL;
377 p = buf = PyBytes_AsString(v);
378 end = s + len;
379 while (s < end) {
380 if (*s != '\\') {
381 non_esc:
382 if (recode_encoding && (*s & 0x80)) {
383 PyObject *u, *w;
384 char *r;
385 const char* t;
386 Py_ssize_t rn;
387 t = s;
388 /* Decode non-ASCII bytes as UTF-8. */
389 while (t < end && (*t & 0x80)) t++;
390 u = PyUnicode_DecodeUTF8(s, t - s, errors);
391 if(!u) goto failed;
392
393 /* Recode them in target encoding. */
394 w = PyUnicode_AsEncodedString(
395 u, recode_encoding, errors);
396 Py_DECREF(u);
397 if (!w) goto failed;
398
399 /* Append bytes to output buffer. */
400 assert(PyBytes_Check(w));
401 r = PyBytes_AS_STRING(w);
402 rn = PyBytes_GET_SIZE(w);
403 Py_MEMCPY(p, r, rn);
404 p += rn;
405 Py_DECREF(w);
406 s = t;
407 } else {
408 *p++ = *s++;
409 }
410 continue;
411 }
412 s++;
413 if (s==end) {
414 PyErr_SetString(PyExc_ValueError,
415 "Trailing \\ in string");
416 goto failed;
417 }
418 switch (*s++) {
419 /* XXX This assumes ASCII! */
420 case '\n': break;
421 case '\\': *p++ = '\\'; break;
422 case '\'': *p++ = '\''; break;
423 case '\"': *p++ = '\"'; break;
424 case 'b': *p++ = '\b'; break;
425 case 'f': *p++ = '\014'; break; /* FF */
426 case 't': *p++ = '\t'; break;
427 case 'n': *p++ = '\n'; break;
428 case 'r': *p++ = '\r'; break;
429 case 'v': *p++ = '\013'; break; /* VT */
430 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
431 case '0': case '1': case '2': case '3':
432 case '4': case '5': case '6': case '7':
433 c = s[-1] - '0';
434 if (s < end && '0' <= *s && *s <= '7') {
435 c = (c<<3) + *s++ - '0';
436 if (s < end && '0' <= *s && *s <= '7')
437 c = (c<<3) + *s++ - '0';
438 }
439 *p++ = c;
440 break;
441 case 'x':
442 if (s+1 < end && ISXDIGIT(s[0]) && ISXDIGIT(s[1])) {
443 unsigned int x = 0;
444 c = Py_CHARMASK(*s);
445 s++;
446 if (ISDIGIT(c))
447 x = c - '0';
448 else if (ISLOWER(c))
449 x = 10 + c - 'a';
450 else
451 x = 10 + c - 'A';
452 x = x << 4;
453 c = Py_CHARMASK(*s);
454 s++;
455 if (ISDIGIT(c))
456 x += c - '0';
457 else if (ISLOWER(c))
458 x += 10 + c - 'a';
459 else
460 x += 10 + c - 'A';
461 *p++ = x;
462 break;
463 }
464 if (!errors || strcmp(errors, "strict") == 0) {
465 PyErr_SetString(PyExc_ValueError,
466 "invalid \\x escape");
467 goto failed;
468 }
469 if (strcmp(errors, "replace") == 0) {
470 *p++ = '?';
471 } else if (strcmp(errors, "ignore") == 0)
472 /* do nothing */;
473 else {
474 PyErr_Format(PyExc_ValueError,
475 "decoding error; unknown "
476 "error handling code: %.400s",
477 errors);
478 goto failed;
479 }
480 default:
481 *p++ = '\\';
482 s--;
483 goto non_esc; /* an arbitry number of unescaped
484 UTF-8 bytes may follow. */
485 }
486 }
487 if (p-buf < newlen)
488 _PyBytes_Resize(&v, p - buf);
489 return v;
490 failed:
491 Py_DECREF(v);
492 return NULL;
493}
494
495/* -------------------------------------------------------------------- */
496/* object api */
497
498Py_ssize_t
499PyBytes_Size(register PyObject *op)
500{
501 if (!PyBytes_Check(op)) {
502 PyErr_Format(PyExc_TypeError,
503 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
504 return -1;
505 }
506 return Py_SIZE(op);
507}
508
509char *
510PyBytes_AsString(register PyObject *op)
511{
512 if (!PyBytes_Check(op)) {
513 PyErr_Format(PyExc_TypeError,
514 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
515 return NULL;
516 }
517 return ((PyBytesObject *)op)->ob_sval;
518}
519
520int
521PyBytes_AsStringAndSize(register PyObject *obj,
522 register char **s,
523 register Py_ssize_t *len)
524{
525 if (s == NULL) {
526 PyErr_BadInternalCall();
527 return -1;
528 }
529
530 if (!PyBytes_Check(obj)) {
531 PyErr_Format(PyExc_TypeError,
532 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
533 return -1;
534 }
535
536 *s = PyBytes_AS_STRING(obj);
537 if (len != NULL)
538 *len = PyBytes_GET_SIZE(obj);
539 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
540 PyErr_SetString(PyExc_TypeError,
541 "expected bytes with no null");
542 return -1;
543 }
544 return 0;
545}
Neal Norwitz6968b052007-02-27 19:02:19 +0000546
547/* -------------------------------------------------------------------- */
548/* Methods */
549
550#define STRINGLIB_CHAR char
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000551
Neal Norwitz6968b052007-02-27 19:02:19 +0000552#define STRINGLIB_CMP memcmp
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000553#define STRINGLIB_LEN PyBytes_GET_SIZE
554#define STRINGLIB_NEW PyBytes_FromStringAndSize
555#define STRINGLIB_STR PyBytes_AS_STRING
556/* #define STRINGLIB_WANT_CONTAINS_OBJ 1 */
557
558#define STRINGLIB_EMPTY nullstring
559#define STRINGLIB_CHECK_EXACT PyBytes_CheckExact
560#define STRINGLIB_MUTABLE 0
Neal Norwitz6968b052007-02-27 19:02:19 +0000561
562#include "stringlib/fastsearch.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000563
Neal Norwitz6968b052007-02-27 19:02:19 +0000564#include "stringlib/count.h"
565#include "stringlib/find.h"
566#include "stringlib/partition.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000567#include "stringlib/ctype.h"
568#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000569
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000570#define _Py_InsertThousandsGrouping _PyBytes_InsertThousandsGrouping
571#include "stringlib/localeutil.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000572
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000573PyObject *
574PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000575{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000576 static const char *hexdigits = "0123456789abcdef";
577 register PyBytesObject* op = (PyBytesObject*) obj;
578 Py_ssize_t length = Py_SIZE(op);
579 size_t newsize = 3 + 4 * length;
580 PyObject *v;
581 if (newsize > PY_SSIZE_T_MAX || (newsize-3) / 4 != length) {
582 PyErr_SetString(PyExc_OverflowError,
583 "bytes object is too large to make repr");
584 return NULL;
585 }
586 v = PyUnicode_FromUnicode(NULL, newsize);
587 if (v == NULL) {
588 return NULL;
589 }
590 else {
591 register Py_ssize_t i;
592 register Py_UNICODE c;
593 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
594 int quote;
595
596 /* Figure out which quote to use; single is preferred */
597 quote = '\'';
598 if (smartquotes) {
599 char *test, *start;
600 start = PyBytes_AS_STRING(op);
601 for (test = start; test < start+length; ++test) {
602 if (*test == '"') {
603 quote = '\''; /* back to single */
604 goto decided;
605 }
606 else if (*test == '\'')
607 quote = '"';
608 }
609 decided:
610 ;
611 }
612
613 *p++ = 'b', *p++ = quote;
614 for (i = 0; i < length; i++) {
615 /* There's at least enough room for a hex escape
616 and a closing quote. */
617 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
618 c = op->ob_sval[i];
619 if (c == quote || c == '\\')
620 *p++ = '\\', *p++ = c;
621 else if (c == '\t')
622 *p++ = '\\', *p++ = 't';
623 else if (c == '\n')
624 *p++ = '\\', *p++ = 'n';
625 else if (c == '\r')
626 *p++ = '\\', *p++ = 'r';
627 else if (c < ' ' || c >= 0x7f) {
628 *p++ = '\\';
629 *p++ = 'x';
630 *p++ = hexdigits[(c & 0xf0) >> 4];
631 *p++ = hexdigits[c & 0xf];
632 }
633 else
634 *p++ = c;
635 }
636 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
637 *p++ = quote;
638 *p = '\0';
639 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
640 Py_DECREF(v);
641 return NULL;
642 }
643 return v;
644 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000645}
646
Neal Norwitz6968b052007-02-27 19:02:19 +0000647static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000648string_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000649{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000650 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000651}
652
Neal Norwitz6968b052007-02-27 19:02:19 +0000653static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000654string_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000655{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000656 if (Py_BytesWarningFlag) {
657 if (PyErr_WarnEx(PyExc_BytesWarning,
658 "str() on a bytes instance", 1))
659 return NULL;
660 }
661 return string_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000662}
663
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000664static Py_ssize_t
665string_length(PyBytesObject *a)
666{
667 return Py_SIZE(a);
668}
Neal Norwitz6968b052007-02-27 19:02:19 +0000669
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000670/* This is also used by PyBytes_Concat() */
671static PyObject *
672string_concat(PyObject *a, PyObject *b)
673{
674 Py_ssize_t size;
675 Py_buffer va, vb;
676 PyObject *result = NULL;
677
678 va.len = -1;
679 vb.len = -1;
680 if (_getbuffer(a, &va) < 0 ||
681 _getbuffer(b, &vb) < 0) {
682 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
683 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
684 goto done;
685 }
686
687 /* Optimize end cases */
688 if (va.len == 0 && PyBytes_CheckExact(b)) {
689 result = b;
690 Py_INCREF(result);
691 goto done;
692 }
693 if (vb.len == 0 && PyBytes_CheckExact(a)) {
694 result = a;
695 Py_INCREF(result);
696 goto done;
697 }
698
699 size = va.len + vb.len;
700 if (size < 0) {
701 PyErr_NoMemory();
702 goto done;
703 }
704
705 result = PyBytes_FromStringAndSize(NULL, size);
706 if (result != NULL) {
707 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
708 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
709 }
710
711 done:
712 if (va.len != -1)
Martin v. Löwis423be952008-08-13 15:53:07 +0000713 PyBuffer_Release(&va);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000714 if (vb.len != -1)
Martin v. Löwis423be952008-08-13 15:53:07 +0000715 PyBuffer_Release(&vb);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000716 return result;
717}
Neal Norwitz6968b052007-02-27 19:02:19 +0000718
719static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000720string_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000721{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000722 register Py_ssize_t i;
723 register Py_ssize_t j;
724 register Py_ssize_t size;
725 register PyBytesObject *op;
726 size_t nbytes;
727 if (n < 0)
728 n = 0;
729 /* watch out for overflows: the size can overflow int,
730 * and the # of bytes needed can overflow size_t
731 */
732 size = Py_SIZE(a) * n;
733 if (n && size / n != Py_SIZE(a)) {
734 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000735 "repeated bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000736 return NULL;
737 }
738 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
739 Py_INCREF(a);
740 return (PyObject *)a;
741 }
742 nbytes = (size_t)size;
743 if (nbytes + sizeof(PyBytesObject) <= nbytes) {
744 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000745 "repeated bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000746 return NULL;
747 }
748 op = (PyBytesObject *)
749 PyObject_MALLOC(sizeof(PyBytesObject) + nbytes);
750 if (op == NULL)
751 return PyErr_NoMemory();
752 PyObject_INIT_VAR(op, &PyBytes_Type, size);
753 op->ob_shash = -1;
754 op->ob_sval[size] = '\0';
755 if (Py_SIZE(a) == 1 && n > 0) {
756 memset(op->ob_sval, a->ob_sval[0] , n);
757 return (PyObject *) op;
758 }
759 i = 0;
760 if (i < size) {
761 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
762 i = Py_SIZE(a);
763 }
764 while (i < size) {
765 j = (i <= size-i) ? i : size-i;
766 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
767 i += j;
768 }
769 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000770}
771
Guido van Rossum98297ee2007-11-06 21:34:58 +0000772static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000773string_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000774{
775 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
776 if (ival == -1 && PyErr_Occurred()) {
777 Py_buffer varg;
778 int pos;
779 PyErr_Clear();
780 if (_getbuffer(arg, &varg) < 0)
781 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000782 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
Guido van Rossum98297ee2007-11-06 21:34:58 +0000783 varg.buf, varg.len, 0);
Martin v. Löwis423be952008-08-13 15:53:07 +0000784 PyBuffer_Release(&varg);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000785 return pos >= 0;
786 }
787 if (ival < 0 || ival >= 256) {
788 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
789 return -1;
790 }
791
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000792 return memchr(PyBytes_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000793}
794
Neal Norwitz6968b052007-02-27 19:02:19 +0000795static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000796string_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000797{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000798 if (i < 0 || i >= Py_SIZE(a)) {
Benjamin Peterson4116f362008-05-27 00:36:20 +0000799 PyErr_SetString(PyExc_IndexError, "index out of range");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000800 return NULL;
801 }
802 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000803}
804
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000805static PyObject*
806string_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000807{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000808 int c;
809 Py_ssize_t len_a, len_b;
810 Py_ssize_t min_len;
811 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000812
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000813 /* Make sure both arguments are strings. */
814 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
815 if (Py_BytesWarningFlag && (op == Py_EQ) &&
816 (PyObject_IsInstance((PyObject*)a,
817 (PyObject*)&PyUnicode_Type) ||
818 PyObject_IsInstance((PyObject*)b,
819 (PyObject*)&PyUnicode_Type))) {
820 if (PyErr_WarnEx(PyExc_BytesWarning,
Georg Brandle5d68ac2008-06-04 11:30:26 +0000821 "Comparison between bytes and string", 1))
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000822 return NULL;
823 }
824 result = Py_NotImplemented;
825 goto out;
826 }
827 if (a == b) {
828 switch (op) {
829 case Py_EQ:case Py_LE:case Py_GE:
830 result = Py_True;
831 goto out;
832 case Py_NE:case Py_LT:case Py_GT:
833 result = Py_False;
834 goto out;
835 }
836 }
837 if (op == Py_EQ) {
838 /* Supporting Py_NE here as well does not save
839 much time, since Py_NE is rarely used. */
840 if (Py_SIZE(a) == Py_SIZE(b)
841 && (a->ob_sval[0] == b->ob_sval[0]
842 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
843 result = Py_True;
844 } else {
845 result = Py_False;
846 }
847 goto out;
848 }
849 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
850 min_len = (len_a < len_b) ? len_a : len_b;
851 if (min_len > 0) {
852 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
853 if (c==0)
854 c = memcmp(a->ob_sval, b->ob_sval, min_len);
855 } else
856 c = 0;
857 if (c == 0)
858 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
859 switch (op) {
860 case Py_LT: c = c < 0; break;
861 case Py_LE: c = c <= 0; break;
862 case Py_EQ: assert(0); break; /* unreachable */
863 case Py_NE: c = c != 0; break;
864 case Py_GT: c = c > 0; break;
865 case Py_GE: c = c >= 0; break;
866 default:
867 result = Py_NotImplemented;
868 goto out;
869 }
870 result = c ? Py_True : Py_False;
871 out:
872 Py_INCREF(result);
873 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000874}
875
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000876static long
877string_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000878{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000879 register Py_ssize_t len;
880 register unsigned char *p;
881 register long x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000882
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000883 if (a->ob_shash != -1)
884 return a->ob_shash;
885 len = Py_SIZE(a);
886 p = (unsigned char *) a->ob_sval;
887 x = *p << 7;
888 while (--len >= 0)
889 x = (1000003*x) ^ *p++;
890 x ^= Py_SIZE(a);
891 if (x == -1)
892 x = -2;
893 a->ob_shash = x;
894 return x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000895}
896
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000897static PyObject*
898string_subscript(PyBytesObject* self, PyObject* item)
899{
900 if (PyIndex_Check(item)) {
901 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
902 if (i == -1 && PyErr_Occurred())
903 return NULL;
904 if (i < 0)
905 i += PyBytes_GET_SIZE(self);
906 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
907 PyErr_SetString(PyExc_IndexError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000908 "index out of range");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000909 return NULL;
910 }
911 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
912 }
913 else if (PySlice_Check(item)) {
914 Py_ssize_t start, stop, step, slicelength, cur, i;
915 char* source_buf;
916 char* result_buf;
917 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000918
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000919 if (PySlice_GetIndicesEx((PySliceObject*)item,
920 PyBytes_GET_SIZE(self),
921 &start, &stop, &step, &slicelength) < 0) {
922 return NULL;
923 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000924
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000925 if (slicelength <= 0) {
926 return PyBytes_FromStringAndSize("", 0);
927 }
928 else if (start == 0 && step == 1 &&
929 slicelength == PyBytes_GET_SIZE(self) &&
930 PyBytes_CheckExact(self)) {
931 Py_INCREF(self);
932 return (PyObject *)self;
933 }
934 else if (step == 1) {
935 return PyBytes_FromStringAndSize(
936 PyBytes_AS_STRING(self) + start,
937 slicelength);
938 }
939 else {
940 source_buf = PyBytes_AsString((PyObject*)self);
941 result_buf = (char *)PyMem_Malloc(slicelength);
942 if (result_buf == NULL)
943 return PyErr_NoMemory();
Neal Norwitz6968b052007-02-27 19:02:19 +0000944
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000945 for (cur = start, i = 0; i < slicelength;
946 cur += step, i++) {
947 result_buf[i] = source_buf[cur];
948 }
949
950 result = PyBytes_FromStringAndSize(result_buf,
951 slicelength);
952 PyMem_Free(result_buf);
953 return result;
954 }
955 }
956 else {
957 PyErr_Format(PyExc_TypeError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000958 "byte indices must be integers, not %.200s",
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000959 Py_TYPE(item)->tp_name);
960 return NULL;
961 }
962}
963
964static int
965string_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
966{
Martin v. Löwis423be952008-08-13 15:53:07 +0000967 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
Antoine Pitrou2f89aa62008-08-02 21:02:48 +0000968 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000969}
970
971static PySequenceMethods string_as_sequence = {
972 (lenfunc)string_length, /*sq_length*/
973 (binaryfunc)string_concat, /*sq_concat*/
974 (ssizeargfunc)string_repeat, /*sq_repeat*/
975 (ssizeargfunc)string_item, /*sq_item*/
976 0, /*sq_slice*/
977 0, /*sq_ass_item*/
978 0, /*sq_ass_slice*/
979 (objobjproc)string_contains /*sq_contains*/
980};
981
982static PyMappingMethods string_as_mapping = {
983 (lenfunc)string_length,
984 (binaryfunc)string_subscript,
985 0,
986};
987
988static PyBufferProcs string_as_buffer = {
989 (getbufferproc)string_buffer_getbuffer,
990 NULL,
991};
992
993
994#define LEFTSTRIP 0
995#define RIGHTSTRIP 1
996#define BOTHSTRIP 2
997
998/* Arrays indexed by above */
999static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1000
1001#define STRIPNAME(i) (stripformat[i]+3)
1002
Neal Norwitz6968b052007-02-27 19:02:19 +00001003
1004/* Don't call if length < 2 */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001005#define Py_STRING_MATCH(target, offset, pattern, length) \
1006 (target[offset] == pattern[0] && \
1007 target[offset+length-1] == pattern[length-1] && \
Neal Norwitz6968b052007-02-27 19:02:19 +00001008 !memcmp(target+offset+1, pattern+1, length-2) )
1009
1010
Neal Norwitz6968b052007-02-27 19:02:19 +00001011/* Overallocate the initial list to reduce the number of reallocs for small
1012 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1013 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1014 text (roughly 11 words per line) and field delimited data (usually 1-10
1015 fields). For large strings the split algorithms are bandwidth limited
1016 so increasing the preallocation likely will not improve things.*/
1017
1018#define MAX_PREALLOC 12
1019
1020/* 5 splits gives 6 elements */
1021#define PREALLOC_SIZE(maxsplit) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001022 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
Neal Norwitz6968b052007-02-27 19:02:19 +00001023
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001024#define SPLIT_ADD(data, left, right) { \
1025 str = PyBytes_FromStringAndSize((data) + (left), \
1026 (right) - (left)); \
1027 if (str == NULL) \
1028 goto onError; \
1029 if (count < MAX_PREALLOC) { \
1030 PyList_SET_ITEM(list, count, str); \
1031 } else { \
1032 if (PyList_Append(list, str)) { \
1033 Py_DECREF(str); \
1034 goto onError; \
1035 } \
1036 else \
1037 Py_DECREF(str); \
1038 } \
1039 count++; }
Neal Norwitz6968b052007-02-27 19:02:19 +00001040
1041/* Always force the list to the expected size. */
Christian Heimes90aa7642007-12-19 02:45:37 +00001042#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
Neal Norwitz6968b052007-02-27 19:02:19 +00001043
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001044#define SKIP_SPACE(s, i, len) { while (i<len && ISSPACE(s[i])) i++; }
1045#define SKIP_NONSPACE(s, i, len) { while (i<len && !ISSPACE(s[i])) i++; }
1046#define RSKIP_SPACE(s, i) { while (i>=0 && ISSPACE(s[i])) i--; }
1047#define RSKIP_NONSPACE(s, i) { while (i>=0 && !ISSPACE(s[i])) i--; }
Neal Norwitz6968b052007-02-27 19:02:19 +00001048
1049Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001050split_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Neal Norwitz6968b052007-02-27 19:02:19 +00001051{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001052 const char *s = PyBytes_AS_STRING(self);
1053 Py_ssize_t i, j, count=0;
1054 PyObject *str;
1055 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Neal Norwitz6968b052007-02-27 19:02:19 +00001056
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001057 if (list == NULL)
1058 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001059
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001060 i = j = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +00001061
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001062 while (maxsplit-- > 0) {
1063 SKIP_SPACE(s, i, len);
1064 if (i==len) break;
1065 j = i; i++;
1066 SKIP_NONSPACE(s, i, len);
1067 if (j == 0 && i == len && PyBytes_CheckExact(self)) {
1068 /* No whitespace in self, so just use it as list[0] */
1069 Py_INCREF(self);
1070 PyList_SET_ITEM(list, 0, (PyObject *)self);
1071 count++;
1072 break;
1073 }
1074 SPLIT_ADD(s, j, i);
1075 }
1076
1077 if (i < len) {
1078 /* Only occurs when maxsplit was reached */
1079 /* Skip any remaining whitespace and copy to end of string */
1080 SKIP_SPACE(s, i, len);
1081 if (i != len)
1082 SPLIT_ADD(s, i, len);
1083 }
1084 FIX_PREALLOC_SIZE(list);
1085 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001086 onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001087 Py_DECREF(list);
1088 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001089}
1090
Guido van Rossum8f950672007-09-10 16:53:45 +00001091Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001092split_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Guido van Rossum8f950672007-09-10 16:53:45 +00001093{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001094 const char *s = PyBytes_AS_STRING(self);
1095 register Py_ssize_t i, j, count=0;
1096 PyObject *str;
1097 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Guido van Rossum8f950672007-09-10 16:53:45 +00001098
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001099 if (list == NULL)
1100 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001101
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001102 i = j = 0;
1103 while ((j < len) && (maxcount-- > 0)) {
1104 for(; j<len; j++) {
1105 /* I found that using memchr makes no difference */
1106 if (s[j] == ch) {
1107 SPLIT_ADD(s, i, j);
1108 i = j = j + 1;
1109 break;
1110 }
1111 }
1112 }
1113 if (i == 0 && count == 0 && PyBytes_CheckExact(self)) {
1114 /* ch not in self, so just use self as list[0] */
1115 Py_INCREF(self);
1116 PyList_SET_ITEM(list, 0, (PyObject *)self);
1117 count++;
1118 }
1119 else if (i <= len) {
1120 SPLIT_ADD(s, i, len);
1121 }
1122 FIX_PREALLOC_SIZE(list);
1123 return list;
Guido van Rossum8f950672007-09-10 16:53:45 +00001124
1125 onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001126 Py_DECREF(list);
1127 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001128}
1129
Neal Norwitz6968b052007-02-27 19:02:19 +00001130PyDoc_STRVAR(split__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001131"B.split([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001132\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001133Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001134If sep is not specified or is None, B is split on ASCII whitespace\n\
1135characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001136If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001137
1138static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001139string_split(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001140{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001141 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
1142 Py_ssize_t maxsplit = -1, count=0;
1143 const char *s = PyBytes_AS_STRING(self), *sub;
1144 Py_buffer vsub;
1145 PyObject *list, *str, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001146#ifdef USE_FAST
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001147 Py_ssize_t pos;
Neal Norwitz6968b052007-02-27 19:02:19 +00001148#endif
1149
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001150 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1151 return NULL;
1152 if (maxsplit < 0)
1153 maxsplit = PY_SSIZE_T_MAX;
1154 if (subobj == Py_None)
1155 return split_whitespace(self, len, maxsplit);
1156 if (_getbuffer(subobj, &vsub) < 0)
1157 return NULL;
1158 sub = vsub.buf;
1159 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001160
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001161 if (n == 0) {
1162 PyErr_SetString(PyExc_ValueError, "empty separator");
Martin v. Löwis423be952008-08-13 15:53:07 +00001163 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001164 return NULL;
1165 }
1166 else if (n == 1)
1167 return split_char(self, len, sub[0], maxsplit);
Guido van Rossum8f950672007-09-10 16:53:45 +00001168
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001169 list = PyList_New(PREALLOC_SIZE(maxsplit));
1170 if (list == NULL) {
Martin v. Löwis423be952008-08-13 15:53:07 +00001171 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001172 return NULL;
1173 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001174
1175#ifdef USE_FAST
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001176 i = j = 0;
1177 while (maxsplit-- > 0) {
1178 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1179 if (pos < 0)
1180 break;
1181 j = i+pos;
1182 SPLIT_ADD(s, i, j);
1183 i = j + n;
1184 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001185#else
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001186 i = j = 0;
1187 while ((j+n <= len) && (maxsplit-- > 0)) {
1188 for (; j+n <= len; j++) {
1189 if (Py_STRING_MATCH(s, j, sub, n)) {
1190 SPLIT_ADD(s, i, j);
1191 i = j = j + n;
1192 break;
1193 }
1194 }
1195 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001196#endif
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001197 SPLIT_ADD(s, i, len);
1198 FIX_PREALLOC_SIZE(list);
Martin v. Löwis423be952008-08-13 15:53:07 +00001199 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001200 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001201
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001202 onError:
1203 Py_DECREF(list);
Martin v. Löwis423be952008-08-13 15:53:07 +00001204 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001205 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001206}
1207
Neal Norwitz6968b052007-02-27 19:02:19 +00001208PyDoc_STRVAR(partition__doc__,
1209"B.partition(sep) -> (head, sep, tail)\n\
1210\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001211Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001212the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001213found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001214
1215static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001216string_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001217{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001218 const char *sep;
1219 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001220
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001221 if (PyBytes_Check(sep_obj)) {
1222 sep = PyBytes_AS_STRING(sep_obj);
1223 sep_len = PyBytes_GET_SIZE(sep_obj);
1224 }
1225 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1226 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001227
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001228 return stringlib_partition(
1229 (PyObject*) self,
1230 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1231 sep_obj, sep, sep_len
1232 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001233}
1234
1235PyDoc_STRVAR(rpartition__doc__,
1236"B.rpartition(sep) -> (tail, sep, head)\n\
1237\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001238Search for the separator sep in B, starting at the end of B,\n\
1239and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001240part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001241bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001242
1243static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001244string_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001245{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001246 const char *sep;
1247 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001248
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001249 if (PyBytes_Check(sep_obj)) {
1250 sep = PyBytes_AS_STRING(sep_obj);
1251 sep_len = PyBytes_GET_SIZE(sep_obj);
1252 }
1253 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1254 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001255
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001256 return stringlib_rpartition(
1257 (PyObject*) self,
1258 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1259 sep_obj, sep, sep_len
1260 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001261}
1262
1263Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001264rsplit_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Neal Norwitz6968b052007-02-27 19:02:19 +00001265{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001266 const char *s = PyBytes_AS_STRING(self);
1267 Py_ssize_t i, j, count=0;
1268 PyObject *str;
1269 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Neal Norwitz6968b052007-02-27 19:02:19 +00001270
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001271 if (list == NULL)
1272 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001273
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001274 i = j = len-1;
Neal Norwitz6968b052007-02-27 19:02:19 +00001275
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001276 while (maxsplit-- > 0) {
1277 RSKIP_SPACE(s, i);
1278 if (i<0) break;
1279 j = i; i--;
1280 RSKIP_NONSPACE(s, i);
1281 if (j == len-1 && i < 0 && PyBytes_CheckExact(self)) {
1282 /* No whitespace in self, so just use it as list[0] */
1283 Py_INCREF(self);
1284 PyList_SET_ITEM(list, 0, (PyObject *)self);
1285 count++;
1286 break;
1287 }
1288 SPLIT_ADD(s, i + 1, j + 1);
1289 }
1290 if (i >= 0) {
1291 /* Only occurs when maxsplit was reached. Skip any remaining
1292 whitespace and copy to beginning of string. */
1293 RSKIP_SPACE(s, i);
1294 if (i >= 0)
1295 SPLIT_ADD(s, 0, i + 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001296
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001297 }
1298 FIX_PREALLOC_SIZE(list);
1299 if (PyList_Reverse(list) < 0)
1300 goto onError;
1301 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001302 onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001303 Py_DECREF(list);
1304 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001305}
1306
Guido van Rossum8f950672007-09-10 16:53:45 +00001307Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001308rsplit_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Guido van Rossum8f950672007-09-10 16:53:45 +00001309{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001310 const char *s = PyBytes_AS_STRING(self);
1311 register Py_ssize_t i, j, count=0;
1312 PyObject *str;
1313 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Guido van Rossum8f950672007-09-10 16:53:45 +00001314
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001315 if (list == NULL)
1316 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001317
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001318 i = j = len - 1;
1319 while ((i >= 0) && (maxcount-- > 0)) {
1320 for (; i >= 0; i--) {
1321 if (s[i] == ch) {
1322 SPLIT_ADD(s, i + 1, j + 1);
1323 j = i = i - 1;
1324 break;
1325 }
1326 }
1327 }
1328 if (i < 0 && count == 0 && PyBytes_CheckExact(self)) {
1329 /* ch not in self, so just use self as list[0] */
1330 Py_INCREF(self);
1331 PyList_SET_ITEM(list, 0, (PyObject *)self);
1332 count++;
1333 }
1334 else if (j >= -1) {
1335 SPLIT_ADD(s, 0, j + 1);
1336 }
1337 FIX_PREALLOC_SIZE(list);
1338 if (PyList_Reverse(list) < 0)
1339 goto onError;
1340 return list;
Guido van Rossum8f950672007-09-10 16:53:45 +00001341
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001342 onError:
1343 Py_DECREF(list);
1344 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001345}
1346
Neal Norwitz6968b052007-02-27 19:02:19 +00001347PyDoc_STRVAR(rsplit__doc__,
Benjamin Peterson4116f362008-05-27 00:36:20 +00001348"B.rsplit([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001349\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001350Return a list of the sections in B, using sep as the delimiter,\n\
1351starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001352If sep is not given, B is split on ASCII whitespace characters\n\
1353(space, tab, return, newline, formfeed, vertical tab).\n\
1354If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001355
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001356
Neal Norwitz6968b052007-02-27 19:02:19 +00001357static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001358string_rsplit(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001359{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001360 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
1361 Py_ssize_t maxsplit = -1, count=0;
1362 const char *s, *sub;
1363 Py_buffer vsub;
1364 PyObject *list, *str, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001365
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001366 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1367 return NULL;
1368 if (maxsplit < 0)
1369 maxsplit = PY_SSIZE_T_MAX;
1370 if (subobj == Py_None)
1371 return rsplit_whitespace(self, len, maxsplit);
1372 if (_getbuffer(subobj, &vsub) < 0)
1373 return NULL;
1374 sub = vsub.buf;
1375 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001376
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001377 if (n == 0) {
1378 PyErr_SetString(PyExc_ValueError, "empty separator");
Martin v. Löwis423be952008-08-13 15:53:07 +00001379 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001380 return NULL;
1381 }
1382 else if (n == 1)
1383 return rsplit_char(self, len, sub[0], maxsplit);
Guido van Rossum8f950672007-09-10 16:53:45 +00001384
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001385 list = PyList_New(PREALLOC_SIZE(maxsplit));
1386 if (list == NULL) {
Martin v. Löwis423be952008-08-13 15:53:07 +00001387 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001388 return NULL;
1389 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001390
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001391 j = len;
1392 i = j - n;
Neal Norwitz6968b052007-02-27 19:02:19 +00001393
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001394 s = PyBytes_AS_STRING(self);
1395 while ( (i >= 0) && (maxsplit-- > 0) ) {
1396 for (; i>=0; i--) {
1397 if (Py_STRING_MATCH(s, i, sub, n)) {
1398 SPLIT_ADD(s, i + n, j);
1399 j = i;
1400 i -= n;
1401 break;
1402 }
1403 }
1404 }
1405 SPLIT_ADD(s, 0, j);
1406 FIX_PREALLOC_SIZE(list);
1407 if (PyList_Reverse(list) < 0)
1408 goto onError;
Martin v. Löwis423be952008-08-13 15:53:07 +00001409 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001410 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001411
1412onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001413 Py_DECREF(list);
Martin v. Löwis423be952008-08-13 15:53:07 +00001414 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001415 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001416}
1417
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001418#undef SPLIT_ADD
1419#undef MAX_PREALLOC
1420#undef PREALLOC_SIZE
1421
1422
1423PyDoc_STRVAR(join__doc__,
1424"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001425\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001426Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001427Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1428
Neal Norwitz6968b052007-02-27 19:02:19 +00001429static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001430string_join(PyObject *self, PyObject *orig)
Neal Norwitz6968b052007-02-27 19:02:19 +00001431{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001432 char *sep = PyBytes_AS_STRING(self);
1433 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1434 PyObject *res = NULL;
1435 char *p;
1436 Py_ssize_t seqlen = 0;
1437 size_t sz = 0;
1438 Py_ssize_t i;
1439 PyObject *seq, *item;
Neal Norwitz6968b052007-02-27 19:02:19 +00001440
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001441 seq = PySequence_Fast(orig, "");
1442 if (seq == NULL) {
1443 return NULL;
1444 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001445
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001446 seqlen = PySequence_Size(seq);
1447 if (seqlen == 0) {
1448 Py_DECREF(seq);
1449 return PyBytes_FromString("");
1450 }
1451 if (seqlen == 1) {
1452 item = PySequence_Fast_GET_ITEM(seq, 0);
1453 if (PyBytes_CheckExact(item)) {
1454 Py_INCREF(item);
1455 Py_DECREF(seq);
1456 return item;
1457 }
1458 }
1459
1460 /* There are at least two things to join, or else we have a subclass
1461 * of the builtin types in the sequence.
1462 * Do a pre-pass to figure out the total amount of space we'll
1463 * need (sz), and see whether all argument are bytes.
1464 */
1465 /* XXX Shouldn't we use _getbuffer() on these items instead? */
1466 for (i = 0; i < seqlen; i++) {
1467 const size_t old_sz = sz;
1468 item = PySequence_Fast_GET_ITEM(seq, i);
1469 if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
1470 PyErr_Format(PyExc_TypeError,
1471 "sequence item %zd: expected bytes,"
1472 " %.80s found",
1473 i, Py_TYPE(item)->tp_name);
1474 Py_DECREF(seq);
1475 return NULL;
1476 }
1477 sz += Py_SIZE(item);
1478 if (i != 0)
1479 sz += seplen;
1480 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1481 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00001482 "join() result is too long for bytes");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001483 Py_DECREF(seq);
1484 return NULL;
1485 }
1486 }
1487
1488 /* Allocate result space. */
1489 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1490 if (res == NULL) {
1491 Py_DECREF(seq);
1492 return NULL;
1493 }
1494
1495 /* Catenate everything. */
1496 /* I'm not worried about a PyByteArray item growing because there's
1497 nowhere in this function where we release the GIL. */
1498 p = PyBytes_AS_STRING(res);
1499 for (i = 0; i < seqlen; ++i) {
1500 size_t n;
1501 char *q;
1502 if (i) {
1503 Py_MEMCPY(p, sep, seplen);
1504 p += seplen;
1505 }
1506 item = PySequence_Fast_GET_ITEM(seq, i);
1507 n = Py_SIZE(item);
1508 if (PyBytes_Check(item))
1509 q = PyBytes_AS_STRING(item);
1510 else
1511 q = PyByteArray_AS_STRING(item);
1512 Py_MEMCPY(p, q, n);
1513 p += n;
1514 }
1515
1516 Py_DECREF(seq);
1517 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001518}
1519
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001520PyObject *
1521_PyBytes_Join(PyObject *sep, PyObject *x)
1522{
1523 assert(sep != NULL && PyBytes_Check(sep));
1524 assert(x != NULL);
1525 return string_join(sep, x);
1526}
1527
1528Py_LOCAL_INLINE(void)
1529string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1530{
1531 if (*end > len)
1532 *end = len;
1533 else if (*end < 0)
1534 *end += len;
1535 if (*end < 0)
1536 *end = 0;
1537 if (*start < 0)
1538 *start += len;
1539 if (*start < 0)
1540 *start = 0;
1541}
1542
1543Py_LOCAL_INLINE(Py_ssize_t)
1544string_find_internal(PyBytesObject *self, PyObject *args, int dir)
1545{
1546 PyObject *subobj;
1547 const char *sub;
1548 Py_ssize_t sub_len;
1549 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1550 PyObject *obj_start=Py_None, *obj_end=Py_None;
1551
1552 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1553 &obj_start, &obj_end))
1554 return -2;
1555 /* To support None in "start" and "end" arguments, meaning
1556 the same as if they were not passed.
1557 */
1558 if (obj_start != Py_None)
1559 if (!_PyEval_SliceIndex(obj_start, &start))
1560 return -2;
1561 if (obj_end != Py_None)
1562 if (!_PyEval_SliceIndex(obj_end, &end))
1563 return -2;
1564
1565 if (PyBytes_Check(subobj)) {
1566 sub = PyBytes_AS_STRING(subobj);
1567 sub_len = PyBytes_GET_SIZE(subobj);
1568 }
1569 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1570 /* XXX - the "expected a character buffer object" is pretty
1571 confusing for a non-expert. remap to something else ? */
1572 return -2;
1573
1574 if (dir > 0)
1575 return stringlib_find_slice(
1576 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1577 sub, sub_len, start, end);
1578 else
1579 return stringlib_rfind_slice(
1580 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1581 sub, sub_len, start, end);
1582}
1583
1584
1585PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001586"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001587\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001588Return the lowest index in S where substring sub is found,\n\
1589such that sub is contained within s[start:end]. Optional\n\
1590arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001591\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001592Return -1 on failure.");
1593
Neal Norwitz6968b052007-02-27 19:02:19 +00001594static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001595string_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001596{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001597 Py_ssize_t result = string_find_internal(self, args, +1);
1598 if (result == -2)
1599 return NULL;
1600 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001601}
1602
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001603
1604PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001605"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001606\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001607Like B.find() but raise ValueError when the substring is not found.");
1608
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001609static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001610string_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001611{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001612 Py_ssize_t result = string_find_internal(self, args, +1);
1613 if (result == -2)
1614 return NULL;
1615 if (result == -1) {
1616 PyErr_SetString(PyExc_ValueError,
1617 "substring not found");
1618 return NULL;
1619 }
1620 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001621}
1622
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001623
1624PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001625"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001626\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001627Return the highest index in B where substring sub is found,\n\
1628such that sub is contained within s[start:end]. Optional\n\
1629arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001630\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001631Return -1 on failure.");
1632
Neal Norwitz6968b052007-02-27 19:02:19 +00001633static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001634string_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001635{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001636 Py_ssize_t result = string_find_internal(self, args, -1);
1637 if (result == -2)
1638 return NULL;
1639 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001640}
1641
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001642
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001643PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001644"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001645\n\
1646Like B.rfind() but raise ValueError when the substring is not found.");
1647
1648static PyObject *
1649string_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001650{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001651 Py_ssize_t result = string_find_internal(self, args, -1);
1652 if (result == -2)
1653 return NULL;
1654 if (result == -1) {
1655 PyErr_SetString(PyExc_ValueError,
1656 "substring not found");
1657 return NULL;
1658 }
1659 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001660}
1661
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001662
1663Py_LOCAL_INLINE(PyObject *)
1664do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001665{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001666 Py_buffer vsep;
1667 char *s = PyBytes_AS_STRING(self);
1668 Py_ssize_t len = PyBytes_GET_SIZE(self);
1669 char *sep;
1670 Py_ssize_t seplen;
1671 Py_ssize_t i, j;
1672
1673 if (_getbuffer(sepobj, &vsep) < 0)
1674 return NULL;
1675 sep = vsep.buf;
1676 seplen = vsep.len;
1677
1678 i = 0;
1679 if (striptype != RIGHTSTRIP) {
1680 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1681 i++;
1682 }
1683 }
1684
1685 j = len;
1686 if (striptype != LEFTSTRIP) {
1687 do {
1688 j--;
1689 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1690 j++;
1691 }
1692
Martin v. Löwis423be952008-08-13 15:53:07 +00001693 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001694
1695 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1696 Py_INCREF(self);
1697 return (PyObject*)self;
1698 }
1699 else
1700 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001701}
1702
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001703
1704Py_LOCAL_INLINE(PyObject *)
1705do_strip(PyBytesObject *self, int striptype)
1706{
1707 char *s = PyBytes_AS_STRING(self);
1708 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
1709
1710 i = 0;
1711 if (striptype != RIGHTSTRIP) {
1712 while (i < len && ISSPACE(s[i])) {
1713 i++;
1714 }
1715 }
1716
1717 j = len;
1718 if (striptype != LEFTSTRIP) {
1719 do {
1720 j--;
1721 } while (j >= i && ISSPACE(s[j]));
1722 j++;
1723 }
1724
1725 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1726 Py_INCREF(self);
1727 return (PyObject*)self;
1728 }
1729 else
1730 return PyBytes_FromStringAndSize(s+i, j-i);
1731}
1732
1733
1734Py_LOCAL_INLINE(PyObject *)
1735do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1736{
1737 PyObject *sep = NULL;
1738
1739 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1740 return NULL;
1741
1742 if (sep != NULL && sep != Py_None) {
1743 return do_xstrip(self, striptype, sep);
1744 }
1745 return do_strip(self, striptype);
1746}
1747
1748
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001749PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001750"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001751\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001752Strip leading and trailing bytes contained in the argument.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001753If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001754static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001755string_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001756{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001757 if (PyTuple_GET_SIZE(args) == 0)
1758 return do_strip(self, BOTHSTRIP); /* Common case */
1759 else
1760 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001761}
1762
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001763
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001764PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001765"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001766\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001767Strip leading bytes contained in the argument.\n\
1768If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001769static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001770string_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001771{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001772 if (PyTuple_GET_SIZE(args) == 0)
1773 return do_strip(self, LEFTSTRIP); /* Common case */
1774 else
1775 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001776}
1777
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001778
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001779PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001780"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001781\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001782Strip trailing bytes contained in the argument.\n\
1783If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001784static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001785string_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001786{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001787 if (PyTuple_GET_SIZE(args) == 0)
1788 return do_strip(self, RIGHTSTRIP); /* Common case */
1789 else
1790 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001791}
Neal Norwitz6968b052007-02-27 19:02:19 +00001792
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001793
1794PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001795"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001796\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001797Return the number of non-overlapping occurrences of substring sub in\n\
1798string S[start:end]. Optional arguments start and end are interpreted\n\
1799as in slice notation.");
1800
1801static PyObject *
1802string_count(PyBytesObject *self, PyObject *args)
1803{
1804 PyObject *sub_obj;
1805 const char *str = PyBytes_AS_STRING(self), *sub;
1806 Py_ssize_t sub_len;
1807 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1808
1809 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1810 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1811 return NULL;
1812
1813 if (PyBytes_Check(sub_obj)) {
1814 sub = PyBytes_AS_STRING(sub_obj);
1815 sub_len = PyBytes_GET_SIZE(sub_obj);
1816 }
1817 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1818 return NULL;
1819
1820 string_adjust_indices(&start, &end, PyBytes_GET_SIZE(self));
1821
1822 return PyLong_FromSsize_t(
1823 stringlib_count(str + start, end - start, sub, sub_len)
1824 );
1825}
1826
1827
1828PyDoc_STRVAR(translate__doc__,
1829"B.translate(table[, deletechars]) -> bytes\n\
1830\n\
1831Return a copy of B, where all characters occurring in the\n\
1832optional argument deletechars are removed, and the remaining\n\
1833characters have been mapped through the given translation\n\
1834table, which must be a bytes object of length 256.");
1835
1836static PyObject *
1837string_translate(PyBytesObject *self, PyObject *args)
1838{
1839 register char *input, *output;
1840 const char *table;
1841 register Py_ssize_t i, c, changed = 0;
1842 PyObject *input_obj = (PyObject*)self;
1843 const char *output_start, *del_table=NULL;
1844 Py_ssize_t inlen, tablen, dellen = 0;
1845 PyObject *result;
1846 int trans_table[256];
1847 PyObject *tableobj, *delobj = NULL;
1848
1849 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1850 &tableobj, &delobj))
1851 return NULL;
1852
1853 if (PyBytes_Check(tableobj)) {
1854 table = PyBytes_AS_STRING(tableobj);
1855 tablen = PyBytes_GET_SIZE(tableobj);
1856 }
1857 else if (tableobj == Py_None) {
1858 table = NULL;
1859 tablen = 256;
1860 }
1861 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1862 return NULL;
1863
1864 if (tablen != 256) {
1865 PyErr_SetString(PyExc_ValueError,
1866 "translation table must be 256 characters long");
1867 return NULL;
1868 }
1869
1870 if (delobj != NULL) {
1871 if (PyBytes_Check(delobj)) {
1872 del_table = PyBytes_AS_STRING(delobj);
1873 dellen = PyBytes_GET_SIZE(delobj);
1874 }
1875 else if (PyUnicode_Check(delobj)) {
1876 PyErr_SetString(PyExc_TypeError,
1877 "deletions are implemented differently for unicode");
1878 return NULL;
1879 }
1880 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1881 return NULL;
1882 }
1883 else {
1884 del_table = NULL;
1885 dellen = 0;
1886 }
1887
1888 inlen = PyBytes_GET_SIZE(input_obj);
1889 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1890 if (result == NULL)
1891 return NULL;
1892 output_start = output = PyBytes_AsString(result);
1893 input = PyBytes_AS_STRING(input_obj);
1894
1895 if (dellen == 0 && table != NULL) {
1896 /* If no deletions are required, use faster code */
1897 for (i = inlen; --i >= 0; ) {
1898 c = Py_CHARMASK(*input++);
1899 if (Py_CHARMASK((*output++ = table[c])) != c)
1900 changed = 1;
1901 }
1902 if (changed || !PyBytes_CheckExact(input_obj))
1903 return result;
1904 Py_DECREF(result);
1905 Py_INCREF(input_obj);
1906 return input_obj;
1907 }
1908
1909 if (table == NULL) {
1910 for (i = 0; i < 256; i++)
1911 trans_table[i] = Py_CHARMASK(i);
1912 } else {
1913 for (i = 0; i < 256; i++)
1914 trans_table[i] = Py_CHARMASK(table[i]);
1915 }
1916
1917 for (i = 0; i < dellen; i++)
1918 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1919
1920 for (i = inlen; --i >= 0; ) {
1921 c = Py_CHARMASK(*input++);
1922 if (trans_table[c] != -1)
1923 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1924 continue;
1925 changed = 1;
1926 }
1927 if (!changed && PyBytes_CheckExact(input_obj)) {
1928 Py_DECREF(result);
1929 Py_INCREF(input_obj);
1930 return input_obj;
1931 }
1932 /* Fix the size of the resulting string */
1933 if (inlen > 0)
1934 _PyBytes_Resize(&result, output - output_start);
1935 return result;
1936}
1937
1938
1939#define FORWARD 1
1940#define REVERSE -1
1941
1942/* find and count characters and substrings */
1943
1944#define findchar(target, target_len, c) \
1945 ((char *)memchr((const void *)(target), c, target_len))
1946
1947/* String ops must return a string. */
1948/* If the object is subclass of string, create a copy */
1949Py_LOCAL(PyBytesObject *)
1950return_self(PyBytesObject *self)
1951{
1952 if (PyBytes_CheckExact(self)) {
1953 Py_INCREF(self);
1954 return self;
1955 }
1956 return (PyBytesObject *)PyBytes_FromStringAndSize(
1957 PyBytes_AS_STRING(self),
1958 PyBytes_GET_SIZE(self));
1959}
1960
1961Py_LOCAL_INLINE(Py_ssize_t)
1962countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
1963{
1964 Py_ssize_t count=0;
1965 const char *start=target;
1966 const char *end=target+target_len;
1967
1968 while ( (start=findchar(start, end-start, c)) != NULL ) {
1969 count++;
1970 if (count >= maxcount)
1971 break;
1972 start += 1;
1973 }
1974 return count;
1975}
1976
1977Py_LOCAL(Py_ssize_t)
1978findstring(const char *target, Py_ssize_t target_len,
1979 const char *pattern, Py_ssize_t pattern_len,
1980 Py_ssize_t start,
1981 Py_ssize_t end,
1982 int direction)
1983{
1984 if (start < 0) {
1985 start += target_len;
1986 if (start < 0)
1987 start = 0;
1988 }
1989 if (end > target_len) {
1990 end = target_len;
1991 } else if (end < 0) {
1992 end += target_len;
1993 if (end < 0)
1994 end = 0;
1995 }
1996
1997 /* zero-length substrings always match at the first attempt */
1998 if (pattern_len == 0)
1999 return (direction > 0) ? start : end;
2000
2001 end -= pattern_len;
2002
2003 if (direction < 0) {
2004 for (; end >= start; end--)
2005 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2006 return end;
2007 } else {
2008 for (; start <= end; start++)
2009 if (Py_STRING_MATCH(target, start,pattern,pattern_len))
2010 return start;
2011 }
2012 return -1;
2013}
2014
2015Py_LOCAL_INLINE(Py_ssize_t)
2016countstring(const char *target, Py_ssize_t target_len,
2017 const char *pattern, Py_ssize_t pattern_len,
2018 Py_ssize_t start,
2019 Py_ssize_t end,
2020 int direction, Py_ssize_t maxcount)
2021{
2022 Py_ssize_t count=0;
2023
2024 if (start < 0) {
2025 start += target_len;
2026 if (start < 0)
2027 start = 0;
2028 }
2029 if (end > target_len) {
2030 end = target_len;
2031 } else if (end < 0) {
2032 end += target_len;
2033 if (end < 0)
2034 end = 0;
2035 }
2036
2037 /* zero-length substrings match everywhere */
2038 if (pattern_len == 0 || maxcount == 0) {
2039 if (target_len+1 < maxcount)
2040 return target_len+1;
2041 return maxcount;
2042 }
2043
2044 end -= pattern_len;
2045 if (direction < 0) {
2046 for (; (end >= start); end--)
2047 if (Py_STRING_MATCH(target, end,pattern,pattern_len)) {
2048 count++;
2049 if (--maxcount <= 0) break;
2050 end -= pattern_len-1;
2051 }
2052 } else {
2053 for (; (start <= end); start++)
2054 if (Py_STRING_MATCH(target, start,
2055 pattern, pattern_len)) {
2056 count++;
2057 if (--maxcount <= 0)
2058 break;
2059 start += pattern_len-1;
2060 }
2061 }
2062 return count;
2063}
2064
2065
2066/* Algorithms for different cases of string replacement */
2067
2068/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2069Py_LOCAL(PyBytesObject *)
2070replace_interleave(PyBytesObject *self,
2071 const char *to_s, Py_ssize_t to_len,
2072 Py_ssize_t maxcount)
2073{
2074 char *self_s, *result_s;
2075 Py_ssize_t self_len, result_len;
2076 Py_ssize_t count, i, product;
2077 PyBytesObject *result;
2078
2079 self_len = PyBytes_GET_SIZE(self);
2080
2081 /* 1 at the end plus 1 after every character */
2082 count = self_len+1;
2083 if (maxcount < count)
2084 count = maxcount;
2085
2086 /* Check for overflow */
2087 /* result_len = count * to_len + self_len; */
2088 product = count * to_len;
2089 if (product / to_len != count) {
2090 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002091 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002092 return NULL;
2093 }
2094 result_len = product + self_len;
2095 if (result_len < 0) {
2096 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002097 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002098 return NULL;
2099 }
2100
2101 if (! (result = (PyBytesObject *)
2102 PyBytes_FromStringAndSize(NULL, result_len)) )
2103 return NULL;
2104
2105 self_s = PyBytes_AS_STRING(self);
2106 result_s = PyBytes_AS_STRING(result);
2107
2108 /* TODO: special case single character, which doesn't need memcpy */
2109
2110 /* Lay the first one down (guaranteed this will occur) */
2111 Py_MEMCPY(result_s, to_s, to_len);
2112 result_s += to_len;
2113 count -= 1;
2114
2115 for (i=0; i<count; i++) {
2116 *result_s++ = *self_s++;
2117 Py_MEMCPY(result_s, to_s, to_len);
2118 result_s += to_len;
2119 }
2120
2121 /* Copy the rest of the original string */
2122 Py_MEMCPY(result_s, self_s, self_len-i);
2123
2124 return result;
2125}
2126
2127/* Special case for deleting a single character */
2128/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2129Py_LOCAL(PyBytesObject *)
2130replace_delete_single_character(PyBytesObject *self,
2131 char from_c, Py_ssize_t maxcount)
2132{
2133 char *self_s, *result_s;
2134 char *start, *next, *end;
2135 Py_ssize_t self_len, result_len;
2136 Py_ssize_t count;
2137 PyBytesObject *result;
2138
2139 self_len = PyBytes_GET_SIZE(self);
2140 self_s = PyBytes_AS_STRING(self);
2141
2142 count = countchar(self_s, self_len, from_c, maxcount);
2143 if (count == 0) {
2144 return return_self(self);
2145 }
2146
2147 result_len = self_len - count; /* from_len == 1 */
2148 assert(result_len>=0);
2149
2150 if ( (result = (PyBytesObject *)
2151 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2152 return NULL;
2153 result_s = PyBytes_AS_STRING(result);
2154
2155 start = self_s;
2156 end = self_s + self_len;
2157 while (count-- > 0) {
2158 next = findchar(start, end-start, from_c);
2159 if (next == NULL)
2160 break;
2161 Py_MEMCPY(result_s, start, next-start);
2162 result_s += (next-start);
2163 start = next+1;
2164 }
2165 Py_MEMCPY(result_s, start, end-start);
2166
2167 return result;
2168}
2169
2170/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2171
2172Py_LOCAL(PyBytesObject *)
2173replace_delete_substring(PyBytesObject *self,
2174 const char *from_s, Py_ssize_t from_len,
2175 Py_ssize_t maxcount) {
2176 char *self_s, *result_s;
2177 char *start, *next, *end;
2178 Py_ssize_t self_len, result_len;
2179 Py_ssize_t count, offset;
2180 PyBytesObject *result;
2181
2182 self_len = PyBytes_GET_SIZE(self);
2183 self_s = PyBytes_AS_STRING(self);
2184
2185 count = countstring(self_s, self_len,
2186 from_s, from_len,
2187 0, self_len, 1,
2188 maxcount);
2189
2190 if (count == 0) {
2191 /* no matches */
2192 return return_self(self);
2193 }
2194
2195 result_len = self_len - (count * from_len);
2196 assert (result_len>=0);
2197
2198 if ( (result = (PyBytesObject *)
2199 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2200 return NULL;
2201
2202 result_s = PyBytes_AS_STRING(result);
2203
2204 start = self_s;
2205 end = self_s + self_len;
2206 while (count-- > 0) {
2207 offset = findstring(start, end-start,
2208 from_s, from_len,
2209 0, end-start, FORWARD);
2210 if (offset == -1)
2211 break;
2212 next = start + offset;
2213
2214 Py_MEMCPY(result_s, start, next-start);
2215
2216 result_s += (next-start);
2217 start = next+from_len;
2218 }
2219 Py_MEMCPY(result_s, start, end-start);
2220 return result;
2221}
2222
2223/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2224Py_LOCAL(PyBytesObject *)
2225replace_single_character_in_place(PyBytesObject *self,
2226 char from_c, char to_c,
2227 Py_ssize_t maxcount)
2228{
2229 char *self_s, *result_s, *start, *end, *next;
2230 Py_ssize_t self_len;
2231 PyBytesObject *result;
2232
2233 /* The result string will be the same size */
2234 self_s = PyBytes_AS_STRING(self);
2235 self_len = PyBytes_GET_SIZE(self);
2236
2237 next = findchar(self_s, self_len, from_c);
2238
2239 if (next == NULL) {
2240 /* No matches; return the original string */
2241 return return_self(self);
2242 }
2243
2244 /* Need to make a new string */
2245 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2246 if (result == NULL)
2247 return NULL;
2248 result_s = PyBytes_AS_STRING(result);
2249 Py_MEMCPY(result_s, self_s, self_len);
2250
2251 /* change everything in-place, starting with this one */
2252 start = result_s + (next-self_s);
2253 *start = to_c;
2254 start++;
2255 end = result_s + self_len;
2256
2257 while (--maxcount > 0) {
2258 next = findchar(start, end-start, from_c);
2259 if (next == NULL)
2260 break;
2261 *next = to_c;
2262 start = next+1;
2263 }
2264
2265 return result;
2266}
2267
2268/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2269Py_LOCAL(PyBytesObject *)
2270replace_substring_in_place(PyBytesObject *self,
2271 const char *from_s, Py_ssize_t from_len,
2272 const char *to_s, Py_ssize_t to_len,
2273 Py_ssize_t maxcount)
2274{
2275 char *result_s, *start, *end;
2276 char *self_s;
2277 Py_ssize_t self_len, offset;
2278 PyBytesObject *result;
2279
2280 /* The result string will be the same size */
2281
2282 self_s = PyBytes_AS_STRING(self);
2283 self_len = PyBytes_GET_SIZE(self);
2284
2285 offset = findstring(self_s, self_len,
2286 from_s, from_len,
2287 0, self_len, FORWARD);
2288 if (offset == -1) {
2289 /* No matches; return the original string */
2290 return return_self(self);
2291 }
2292
2293 /* Need to make a new string */
2294 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2295 if (result == NULL)
2296 return NULL;
2297 result_s = PyBytes_AS_STRING(result);
2298 Py_MEMCPY(result_s, self_s, self_len);
2299
2300 /* change everything in-place, starting with this one */
2301 start = result_s + offset;
2302 Py_MEMCPY(start, to_s, from_len);
2303 start += from_len;
2304 end = result_s + self_len;
2305
2306 while ( --maxcount > 0) {
2307 offset = findstring(start, end-start,
2308 from_s, from_len,
2309 0, end-start, FORWARD);
2310 if (offset==-1)
2311 break;
2312 Py_MEMCPY(start+offset, to_s, from_len);
2313 start += offset+from_len;
2314 }
2315
2316 return result;
2317}
2318
2319/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2320Py_LOCAL(PyBytesObject *)
2321replace_single_character(PyBytesObject *self,
2322 char from_c,
2323 const char *to_s, Py_ssize_t to_len,
2324 Py_ssize_t maxcount)
2325{
2326 char *self_s, *result_s;
2327 char *start, *next, *end;
2328 Py_ssize_t self_len, result_len;
2329 Py_ssize_t count, product;
2330 PyBytesObject *result;
2331
2332 self_s = PyBytes_AS_STRING(self);
2333 self_len = PyBytes_GET_SIZE(self);
2334
2335 count = countchar(self_s, self_len, from_c, maxcount);
2336 if (count == 0) {
2337 /* no matches, return unchanged */
2338 return return_self(self);
2339 }
2340
2341 /* use the difference between current and new, hence the "-1" */
2342 /* result_len = self_len + count * (to_len-1) */
2343 product = count * (to_len-1);
2344 if (product / (to_len-1) != count) {
2345 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002346 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002347 return NULL;
2348 }
2349 result_len = self_len + product;
2350 if (result_len < 0) {
2351 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002352 "replacment bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002353 return NULL;
2354 }
2355
2356 if ( (result = (PyBytesObject *)
2357 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2358 return NULL;
2359 result_s = PyBytes_AS_STRING(result);
2360
2361 start = self_s;
2362 end = self_s + self_len;
2363 while (count-- > 0) {
2364 next = findchar(start, end-start, from_c);
2365 if (next == NULL)
2366 break;
2367
2368 if (next == start) {
2369 /* replace with the 'to' */
2370 Py_MEMCPY(result_s, to_s, to_len);
2371 result_s += to_len;
2372 start += 1;
2373 } else {
2374 /* copy the unchanged old then the 'to' */
2375 Py_MEMCPY(result_s, start, next-start);
2376 result_s += (next-start);
2377 Py_MEMCPY(result_s, to_s, to_len);
2378 result_s += to_len;
2379 start = next+1;
2380 }
2381 }
2382 /* Copy the remainder of the remaining string */
2383 Py_MEMCPY(result_s, start, end-start);
2384
2385 return result;
2386}
2387
2388/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2389Py_LOCAL(PyBytesObject *)
2390replace_substring(PyBytesObject *self,
2391 const char *from_s, Py_ssize_t from_len,
2392 const char *to_s, Py_ssize_t to_len,
2393 Py_ssize_t maxcount) {
2394 char *self_s, *result_s;
2395 char *start, *next, *end;
2396 Py_ssize_t self_len, result_len;
2397 Py_ssize_t count, offset, product;
2398 PyBytesObject *result;
2399
2400 self_s = PyBytes_AS_STRING(self);
2401 self_len = PyBytes_GET_SIZE(self);
2402
2403 count = countstring(self_s, self_len,
2404 from_s, from_len,
2405 0, self_len, FORWARD, maxcount);
2406 if (count == 0) {
2407 /* no matches, return unchanged */
2408 return return_self(self);
2409 }
2410
2411 /* Check for overflow */
2412 /* result_len = self_len + count * (to_len-from_len) */
2413 product = count * (to_len-from_len);
2414 if (product / (to_len-from_len) != count) {
2415 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002416 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002417 return NULL;
2418 }
2419 result_len = self_len + product;
2420 if (result_len < 0) {
2421 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002422 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002423 return NULL;
2424 }
2425
2426 if ( (result = (PyBytesObject *)
2427 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2428 return NULL;
2429 result_s = PyBytes_AS_STRING(result);
2430
2431 start = self_s;
2432 end = self_s + self_len;
2433 while (count-- > 0) {
2434 offset = findstring(start, end-start,
2435 from_s, from_len,
2436 0, end-start, FORWARD);
2437 if (offset == -1)
2438 break;
2439 next = start+offset;
2440 if (next == start) {
2441 /* replace with the 'to' */
2442 Py_MEMCPY(result_s, to_s, to_len);
2443 result_s += to_len;
2444 start += from_len;
2445 } else {
2446 /* copy the unchanged old then the 'to' */
2447 Py_MEMCPY(result_s, start, next-start);
2448 result_s += (next-start);
2449 Py_MEMCPY(result_s, to_s, to_len);
2450 result_s += to_len;
2451 start = next+from_len;
2452 }
2453 }
2454 /* Copy the remainder of the remaining string */
2455 Py_MEMCPY(result_s, start, end-start);
2456
2457 return result;
2458}
2459
2460
2461Py_LOCAL(PyBytesObject *)
2462replace(PyBytesObject *self,
2463 const char *from_s, Py_ssize_t from_len,
2464 const char *to_s, Py_ssize_t to_len,
2465 Py_ssize_t maxcount)
2466{
2467 if (maxcount < 0) {
2468 maxcount = PY_SSIZE_T_MAX;
2469 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2470 /* nothing to do; return the original string */
2471 return return_self(self);
2472 }
2473
2474 if (maxcount == 0 ||
2475 (from_len == 0 && to_len == 0)) {
2476 /* nothing to do; return the original string */
2477 return return_self(self);
2478 }
2479
2480 /* Handle zero-length special cases */
2481
2482 if (from_len == 0) {
2483 /* insert the 'to' string everywhere. */
2484 /* >>> "Python".replace("", ".") */
2485 /* '.P.y.t.h.o.n.' */
2486 return replace_interleave(self, to_s, to_len, maxcount);
2487 }
2488
2489 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2490 /* point for an empty self string to generate a non-empty string */
2491 /* Special case so the remaining code always gets a non-empty string */
2492 if (PyBytes_GET_SIZE(self) == 0) {
2493 return return_self(self);
2494 }
2495
2496 if (to_len == 0) {
Georg Brandl17cb8a82008-05-30 08:20:09 +00002497 /* delete all occurrences of 'from' string */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002498 if (from_len == 1) {
2499 return replace_delete_single_character(
2500 self, from_s[0], maxcount);
2501 } else {
2502 return replace_delete_substring(self, from_s,
2503 from_len, maxcount);
2504 }
2505 }
2506
2507 /* Handle special case where both strings have the same length */
2508
2509 if (from_len == to_len) {
2510 if (from_len == 1) {
2511 return replace_single_character_in_place(
2512 self,
2513 from_s[0],
2514 to_s[0],
2515 maxcount);
2516 } else {
2517 return replace_substring_in_place(
2518 self, from_s, from_len, to_s, to_len,
2519 maxcount);
2520 }
2521 }
2522
2523 /* Otherwise use the more generic algorithms */
2524 if (from_len == 1) {
2525 return replace_single_character(self, from_s[0],
2526 to_s, to_len, maxcount);
2527 } else {
2528 /* len('from')>=2, len('to')>=1 */
2529 return replace_substring(self, from_s, from_len, to_s, to_len,
2530 maxcount);
2531 }
2532}
2533
2534PyDoc_STRVAR(replace__doc__,
2535"B.replace(old, new[, count]) -> bytes\n\
2536\n\
2537Return a copy of B with all occurrences of subsection\n\
2538old replaced by new. If the optional argument count is\n\
2539given, only the first count occurrences are replaced.");
2540
2541static PyObject *
2542string_replace(PyBytesObject *self, PyObject *args)
2543{
2544 Py_ssize_t count = -1;
2545 PyObject *from, *to;
2546 const char *from_s, *to_s;
2547 Py_ssize_t from_len, to_len;
2548
2549 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2550 return NULL;
2551
2552 if (PyBytes_Check(from)) {
2553 from_s = PyBytes_AS_STRING(from);
2554 from_len = PyBytes_GET_SIZE(from);
2555 }
2556 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2557 return NULL;
2558
2559 if (PyBytes_Check(to)) {
2560 to_s = PyBytes_AS_STRING(to);
2561 to_len = PyBytes_GET_SIZE(to);
2562 }
2563 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2564 return NULL;
2565
2566 return (PyObject *)replace((PyBytesObject *) self,
2567 from_s, from_len,
2568 to_s, to_len, count);
2569}
2570
2571/** End DALKE **/
2572
2573/* Matches the end (direction >= 0) or start (direction < 0) of self
2574 * against substr, using the start and end arguments. Returns
2575 * -1 on error, 0 if not found and 1 if found.
2576 */
2577Py_LOCAL(int)
2578_string_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
2579 Py_ssize_t end, int direction)
2580{
2581 Py_ssize_t len = PyBytes_GET_SIZE(self);
2582 Py_ssize_t slen;
2583 const char* sub;
2584 const char* str;
2585
2586 if (PyBytes_Check(substr)) {
2587 sub = PyBytes_AS_STRING(substr);
2588 slen = PyBytes_GET_SIZE(substr);
2589 }
2590 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2591 return -1;
2592 str = PyBytes_AS_STRING(self);
2593
2594 string_adjust_indices(&start, &end, len);
2595
2596 if (direction < 0) {
2597 /* startswith */
2598 if (start+slen > len)
2599 return 0;
2600 } else {
2601 /* endswith */
2602 if (end-start < slen || start > len)
2603 return 0;
2604
2605 if (end-slen > start)
2606 start = end - slen;
2607 }
2608 if (end-start >= slen)
2609 return ! memcmp(str+start, sub, slen);
2610 return 0;
2611}
2612
2613
2614PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002615"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002616\n\
2617Return True if B starts with the specified prefix, False otherwise.\n\
2618With optional start, test B beginning at that position.\n\
2619With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002620prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002621
2622static PyObject *
2623string_startswith(PyBytesObject *self, PyObject *args)
2624{
2625 Py_ssize_t start = 0;
2626 Py_ssize_t end = PY_SSIZE_T_MAX;
2627 PyObject *subobj;
2628 int result;
2629
2630 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2631 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2632 return NULL;
2633 if (PyTuple_Check(subobj)) {
2634 Py_ssize_t i;
2635 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2636 result = _string_tailmatch(self,
2637 PyTuple_GET_ITEM(subobj, i),
2638 start, end, -1);
2639 if (result == -1)
2640 return NULL;
2641 else if (result) {
2642 Py_RETURN_TRUE;
2643 }
2644 }
2645 Py_RETURN_FALSE;
2646 }
2647 result = _string_tailmatch(self, subobj, start, end, -1);
2648 if (result == -1)
2649 return NULL;
2650 else
2651 return PyBool_FromLong(result);
2652}
2653
2654
2655PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002656"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002657\n\
2658Return True if B ends with the specified suffix, False otherwise.\n\
2659With optional start, test B beginning at that position.\n\
2660With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002661suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002662
2663static PyObject *
2664string_endswith(PyBytesObject *self, PyObject *args)
2665{
2666 Py_ssize_t start = 0;
2667 Py_ssize_t end = PY_SSIZE_T_MAX;
2668 PyObject *subobj;
2669 int result;
2670
2671 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2672 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2673 return NULL;
2674 if (PyTuple_Check(subobj)) {
2675 Py_ssize_t i;
2676 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2677 result = _string_tailmatch(self,
2678 PyTuple_GET_ITEM(subobj, i),
2679 start, end, +1);
2680 if (result == -1)
2681 return NULL;
2682 else if (result) {
2683 Py_RETURN_TRUE;
2684 }
2685 }
2686 Py_RETURN_FALSE;
2687 }
2688 result = _string_tailmatch(self, subobj, start, end, +1);
2689 if (result == -1)
2690 return NULL;
2691 else
2692 return PyBool_FromLong(result);
2693}
2694
2695
2696PyDoc_STRVAR(decode__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002697"B.decode([encoding[, errors]]) -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002698\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002699Decode S using the codec registered for encoding. encoding defaults\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002700to the default encoding. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002701handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2702a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002703as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002704able to handle UnicodeDecodeErrors.");
2705
2706static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002707string_decode(PyObject *self, PyObject *args)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002708{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002709 const char *encoding = NULL;
2710 const char *errors = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +00002711
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002712 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2713 return NULL;
2714 if (encoding == NULL)
2715 encoding = PyUnicode_GetDefaultEncoding();
Marc-André Lemburgb2750b52008-06-06 12:18:17 +00002716 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002717}
2718
Guido van Rossum20188312006-05-05 15:15:40 +00002719
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002720PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002721"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002722\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002723Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002724Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002725Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002726
2727static int
Guido van Rossumae404e22007-10-26 21:46:44 +00002728hex_digit_to_int(Py_UNICODE c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002729{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002730 if (c >= 128)
2731 return -1;
2732 if (ISDIGIT(c))
2733 return c - '0';
2734 else {
2735 if (ISUPPER(c))
2736 c = TOLOWER(c);
2737 if (c >= 'a' && c <= 'f')
2738 return c - 'a' + 10;
2739 }
2740 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002741}
2742
2743static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002744string_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002745{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002746 PyObject *newstring, *hexobj;
2747 char *buf;
2748 Py_UNICODE *hex;
2749 Py_ssize_t hexlen, byteslen, i, j;
2750 int top, bot;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002751
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002752 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2753 return NULL;
2754 assert(PyUnicode_Check(hexobj));
2755 hexlen = PyUnicode_GET_SIZE(hexobj);
2756 hex = PyUnicode_AS_UNICODE(hexobj);
2757 byteslen = hexlen/2; /* This overestimates if there are spaces */
2758 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2759 if (!newstring)
2760 return NULL;
2761 buf = PyBytes_AS_STRING(newstring);
2762 for (i = j = 0; i < hexlen; i += 2) {
2763 /* skip over spaces in the input */
2764 while (hex[i] == ' ')
2765 i++;
2766 if (i >= hexlen)
2767 break;
2768 top = hex_digit_to_int(hex[i]);
2769 bot = hex_digit_to_int(hex[i+1]);
2770 if (top == -1 || bot == -1) {
2771 PyErr_Format(PyExc_ValueError,
2772 "non-hexadecimal number found in "
2773 "fromhex() arg at position %zd", i);
2774 goto error;
2775 }
2776 buf[j++] = (top << 4) + bot;
2777 }
2778 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2779 goto error;
2780 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002781
2782 error:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002783 Py_XDECREF(newstring);
2784 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002785}
2786
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002787PyDoc_STRVAR(sizeof__doc__,
2788"S.__sizeof__() -> size of S in memory, in bytes");
2789
2790static PyObject *
2791string_sizeof(PyBytesObject *v)
2792{
2793 Py_ssize_t res;
2794 res = sizeof(PyBytesObject) + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2795 return PyLong_FromSsize_t(res);
2796}
2797
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002798
2799static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002800string_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002801{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002802 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002803}
2804
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002805
2806static PyMethodDef
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002807string_methods[] = {
2808 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
2809 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2810 _Py_capitalize__doc__},
2811 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2812 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
2813 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
2814 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
2815 endswith__doc__},
2816 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2817 expandtabs__doc__},
2818 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
2819 {"fromhex", (PyCFunction)string_fromhex, METH_VARARGS|METH_CLASS,
2820 fromhex_doc},
2821 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
2822 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2823 _Py_isalnum__doc__},
2824 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2825 _Py_isalpha__doc__},
2826 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2827 _Py_isdigit__doc__},
2828 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2829 _Py_islower__doc__},
2830 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2831 _Py_isspace__doc__},
2832 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2833 _Py_istitle__doc__},
2834 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2835 _Py_isupper__doc__},
2836 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2837 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2838 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2839 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
2840 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
2841 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2842 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2843 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
2844 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2845 {"rpartition", (PyCFunction)string_rpartition, METH_O,
2846 rpartition__doc__},
2847 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
2848 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
2849 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2850 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
2851 splitlines__doc__},
2852 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
2853 startswith__doc__},
2854 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
2855 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2856 _Py_swapcase__doc__},
2857 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2858 {"translate", (PyCFunction)string_translate, METH_VARARGS,
2859 translate__doc__},
2860 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2861 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002862 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
2863 sizeof__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002864 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002865};
2866
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002867static PyObject *
2868str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2869
2870static PyObject *
2871string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2872{
2873 PyObject *x = NULL, *it;
2874 const char *encoding = NULL;
2875 const char *errors = NULL;
2876 PyObject *new = NULL;
2877 Py_ssize_t i, size;
2878 static char *kwlist[] = {"source", "encoding", "errors", 0};
2879
2880 if (type != &PyBytes_Type)
2881 return str_subtype_new(type, args, kwds);
2882 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2883 &encoding, &errors))
2884 return NULL;
2885 if (x == NULL) {
2886 if (encoding != NULL || errors != NULL) {
2887 PyErr_SetString(PyExc_TypeError,
2888 "encoding or errors without sequence "
2889 "argument");
2890 return NULL;
2891 }
2892 return PyBytes_FromString("");
2893 }
2894
2895 if (PyUnicode_Check(x)) {
2896 /* Encode via the codec registry */
2897 if (encoding == NULL) {
2898 PyErr_SetString(PyExc_TypeError,
2899 "string argument without an encoding");
2900 return NULL;
2901 }
Marc-André Lemburgb2750b52008-06-06 12:18:17 +00002902 new = PyUnicode_AsEncodedString(x, encoding, errors);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002903 if (new == NULL)
2904 return NULL;
2905 assert(PyBytes_Check(new));
2906 return new;
2907 }
2908
2909 /* If it's not unicode, there can't be encoding or errors */
2910 if (encoding != NULL || errors != NULL) {
2911 PyErr_SetString(PyExc_TypeError,
2912 "encoding or errors without a string argument");
2913 return NULL;
2914 }
2915
2916 /* Is it an int? */
2917 size = PyNumber_AsSsize_t(x, PyExc_ValueError);
2918 if (size == -1 && PyErr_Occurred()) {
2919 PyErr_Clear();
2920 }
2921 else {
2922 if (size < 0) {
2923 PyErr_SetString(PyExc_ValueError, "negative count");
2924 return NULL;
2925 }
2926 new = PyBytes_FromStringAndSize(NULL, size);
2927 if (new == NULL) {
2928 return NULL;
2929 }
2930 if (size > 0) {
2931 memset(((PyBytesObject*)new)->ob_sval, 0, size);
2932 }
2933 return new;
2934 }
2935
2936 /* Use the modern buffer interface */
2937 if (PyObject_CheckBuffer(x)) {
2938 Py_buffer view;
2939 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2940 return NULL;
2941 new = PyBytes_FromStringAndSize(NULL, view.len);
2942 if (!new)
2943 goto fail;
2944 // XXX(brett.cannon): Better way to get to internal buffer?
2945 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2946 &view, view.len, 'C') < 0)
2947 goto fail;
Martin v. Löwis423be952008-08-13 15:53:07 +00002948 PyBuffer_Release(&view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002949 return new;
2950 fail:
2951 Py_XDECREF(new);
Martin v. Löwis423be952008-08-13 15:53:07 +00002952 PyBuffer_Release(&view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002953 return NULL;
2954 }
2955
2956 /* For iterator version, create a string object and resize as needed */
2957 /* XXX(gb): is 64 a good value? also, optimize if length is known */
2958 /* XXX(guido): perhaps use Pysequence_Fast() -- I can't imagine the
2959 input being a truly long iterator. */
2960 size = 64;
2961 new = PyBytes_FromStringAndSize(NULL, size);
2962 if (new == NULL)
2963 return NULL;
2964
2965 /* XXX Optimize this if the arguments is a list, tuple */
2966
2967 /* Get the iterator */
2968 it = PyObject_GetIter(x);
2969 if (it == NULL)
2970 goto error;
2971
2972 /* Run the iterator to exhaustion */
2973 for (i = 0; ; i++) {
2974 PyObject *item;
2975 Py_ssize_t value;
2976
2977 /* Get the next item */
2978 item = PyIter_Next(it);
2979 if (item == NULL) {
2980 if (PyErr_Occurred())
2981 goto error;
2982 break;
2983 }
2984
2985 /* Interpret it as an int (__index__) */
2986 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2987 Py_DECREF(item);
2988 if (value == -1 && PyErr_Occurred())
2989 goto error;
2990
2991 /* Range check */
2992 if (value < 0 || value >= 256) {
2993 PyErr_SetString(PyExc_ValueError,
2994 "bytes must be in range(0, 256)");
2995 goto error;
2996 }
2997
2998 /* Append the byte */
2999 if (i >= size) {
3000 size *= 2;
3001 if (_PyBytes_Resize(&new, size) < 0)
3002 goto error;
3003 }
3004 ((PyBytesObject *)new)->ob_sval[i] = value;
3005 }
3006 _PyBytes_Resize(&new, i);
3007
3008 /* Clean up and return success */
3009 Py_DECREF(it);
3010 return new;
3011
3012 error:
3013 /* Error handling when new != NULL */
3014 Py_XDECREF(it);
3015 Py_DECREF(new);
3016 return NULL;
3017}
3018
3019static PyObject *
3020str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3021{
3022 PyObject *tmp, *pnew;
3023 Py_ssize_t n;
3024
3025 assert(PyType_IsSubtype(type, &PyBytes_Type));
3026 tmp = string_new(&PyBytes_Type, args, kwds);
3027 if (tmp == NULL)
3028 return NULL;
3029 assert(PyBytes_CheckExact(tmp));
3030 n = PyBytes_GET_SIZE(tmp);
3031 pnew = type->tp_alloc(type, n);
3032 if (pnew != NULL) {
3033 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3034 PyBytes_AS_STRING(tmp), n+1);
3035 ((PyBytesObject *)pnew)->ob_shash =
3036 ((PyBytesObject *)tmp)->ob_shash;
3037 }
3038 Py_DECREF(tmp);
3039 return pnew;
3040}
3041
3042PyDoc_STRVAR(string_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003043"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003044bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003045bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
3046bytes(memory_view) -> bytes\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003047\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003048Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003049 - an iterable yielding integers in range(256)\n\
3050 - a text string encoded using the specified encoding\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003051 - a bytes or a buffer object\n\
3052 - any object implementing the buffer API.");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003053
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003054static PyObject *str_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003055
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003056PyTypeObject PyBytes_Type = {
3057 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3058 "bytes",
3059 sizeof(PyBytesObject),
3060 sizeof(char),
3061 string_dealloc, /* tp_dealloc */
3062 0, /* tp_print */
3063 0, /* tp_getattr */
3064 0, /* tp_setattr */
3065 0, /* tp_compare */
3066 (reprfunc)string_repr, /* tp_repr */
3067 0, /* tp_as_number */
3068 &string_as_sequence, /* tp_as_sequence */
3069 &string_as_mapping, /* tp_as_mapping */
3070 (hashfunc)string_hash, /* tp_hash */
3071 0, /* tp_call */
3072 string_str, /* tp_str */
3073 PyObject_GenericGetAttr, /* tp_getattro */
3074 0, /* tp_setattro */
3075 &string_as_buffer, /* tp_as_buffer */
3076 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3077 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3078 string_doc, /* tp_doc */
3079 0, /* tp_traverse */
3080 0, /* tp_clear */
3081 (richcmpfunc)string_richcompare, /* tp_richcompare */
3082 0, /* tp_weaklistoffset */
3083 str_iter, /* tp_iter */
3084 0, /* tp_iternext */
3085 string_methods, /* tp_methods */
3086 0, /* tp_members */
3087 0, /* tp_getset */
3088 &PyBaseObject_Type, /* tp_base */
3089 0, /* tp_dict */
3090 0, /* tp_descr_get */
3091 0, /* tp_descr_set */
3092 0, /* tp_dictoffset */
3093 0, /* tp_init */
3094 0, /* tp_alloc */
3095 string_new, /* tp_new */
3096 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003097};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003098
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003099void
3100PyBytes_Concat(register PyObject **pv, register PyObject *w)
3101{
3102 register PyObject *v;
3103 assert(pv != NULL);
3104 if (*pv == NULL)
3105 return;
3106 if (w == NULL) {
3107 Py_DECREF(*pv);
3108 *pv = NULL;
3109 return;
3110 }
3111 v = string_concat(*pv, w);
3112 Py_DECREF(*pv);
3113 *pv = v;
3114}
3115
3116void
3117PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
3118{
3119 PyBytes_Concat(pv, w);
3120 Py_XDECREF(w);
3121}
3122
3123
3124/* The following function breaks the notion that strings are immutable:
3125 it changes the size of a string. We get away with this only if there
3126 is only one module referencing the object. You can also think of it
3127 as creating a new string object and destroying the old one, only
3128 more efficiently. In any case, don't use this if the string may
3129 already be known to some other part of the code...
3130 Note that if there's not enough memory to resize the string, the original
3131 string object at *pv is deallocated, *pv is set to NULL, an "out of
3132 memory" exception is set, and -1 is returned. Else (on success) 0 is
3133 returned, and the value in *pv may or may not be the same as on input.
3134 As always, an extra byte is allocated for a trailing \0 byte (newsize
3135 does *not* include that), and a trailing \0 byte is stored.
3136*/
3137
3138int
3139_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3140{
3141 register PyObject *v;
3142 register PyBytesObject *sv;
3143 v = *pv;
3144 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
3145 *pv = 0;
3146 Py_DECREF(v);
3147 PyErr_BadInternalCall();
3148 return -1;
3149 }
3150 /* XXX UNREF/NEWREF interface should be more symmetrical */
3151 _Py_DEC_REFTOTAL;
3152 _Py_ForgetReference(v);
3153 *pv = (PyObject *)
3154 PyObject_REALLOC((char *)v, sizeof(PyBytesObject) + newsize);
3155 if (*pv == NULL) {
3156 PyObject_Del(v);
3157 PyErr_NoMemory();
3158 return -1;
3159 }
3160 _Py_NewReference(*pv);
3161 sv = (PyBytesObject *) *pv;
3162 Py_SIZE(sv) = newsize;
3163 sv->ob_sval[newsize] = '\0';
3164 sv->ob_shash = -1; /* invalidate cached hash value */
3165 return 0;
3166}
3167
3168/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and
3169 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3170 * Python's regular ints.
3171 * Return value: a new PyString*, or NULL if error.
3172 * . *pbuf is set to point into it,
3173 * *plen set to the # of chars following that.
3174 * Caller must decref it when done using pbuf.
3175 * The string starting at *pbuf is of the form
3176 * "-"? ("0x" | "0X")? digit+
3177 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3178 * set in flags. The case of hex digits will be correct,
3179 * There will be at least prec digits, zero-filled on the left if
3180 * necessary to get that many.
3181 * val object to be converted
3182 * flags bitmask of format flags; only F_ALT is looked at
3183 * prec minimum number of digits; 0-fill on left if needed
3184 * type a character in [duoxX]; u acts the same as d
3185 *
3186 * CAUTION: o, x and X conversions on regular ints can never
3187 * produce a '-' sign, but can for Python's unbounded ints.
3188 */
3189PyObject*
3190_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,
3191 char **pbuf, int *plen)
3192{
3193 PyObject *result = NULL;
3194 char *buf;
3195 Py_ssize_t i;
3196 int sign; /* 1 if '-', else 0 */
3197 int len; /* number of characters */
3198 Py_ssize_t llen;
3199 int numdigits; /* len == numnondigits + numdigits */
3200 int numnondigits = 0;
3201
3202 /* Avoid exceeding SSIZE_T_MAX */
3203 if (prec > PY_SSIZE_T_MAX-3) {
3204 PyErr_SetString(PyExc_OverflowError,
3205 "precision too large");
3206 return NULL;
3207 }
3208
3209 switch (type) {
3210 case 'd':
3211 case 'u':
3212 /* Special-case boolean: we want 0/1 */
3213 if (PyBool_Check(val))
3214 result = PyNumber_ToBase(val, 10);
3215 else
3216 result = Py_TYPE(val)->tp_str(val);
3217 break;
3218 case 'o':
3219 numnondigits = 2;
3220 result = PyNumber_ToBase(val, 8);
3221 break;
3222 case 'x':
3223 case 'X':
3224 numnondigits = 2;
3225 result = PyNumber_ToBase(val, 16);
3226 break;
3227 default:
3228 assert(!"'type' not in [duoxX]");
3229 }
3230 if (!result)
3231 return NULL;
3232
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00003233 buf = _PyUnicode_AsString(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003234 if (!buf) {
3235 Py_DECREF(result);
3236 return NULL;
3237 }
3238
3239 /* To modify the string in-place, there can only be one reference. */
3240 if (Py_REFCNT(result) != 1) {
3241 PyErr_BadInternalCall();
3242 return NULL;
3243 }
3244 llen = PyUnicode_GetSize(result);
3245 if (llen > INT_MAX) {
3246 PyErr_SetString(PyExc_ValueError,
3247 "string too large in _PyBytes_FormatLong");
3248 return NULL;
3249 }
3250 len = (int)llen;
3251 if (buf[len-1] == 'L') {
3252 --len;
3253 buf[len] = '\0';
3254 }
3255 sign = buf[0] == '-';
3256 numnondigits += sign;
3257 numdigits = len - numnondigits;
3258 assert(numdigits > 0);
3259
3260 /* Get rid of base marker unless F_ALT */
3261 if (((flags & F_ALT) == 0 &&
3262 (type == 'o' || type == 'x' || type == 'X'))) {
3263 assert(buf[sign] == '0');
3264 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
3265 buf[sign+1] == 'o');
3266 numnondigits -= 2;
3267 buf += 2;
3268 len -= 2;
3269 if (sign)
3270 buf[0] = '-';
3271 assert(len == numnondigits + numdigits);
3272 assert(numdigits > 0);
3273 }
3274
3275 /* Fill with leading zeroes to meet minimum width. */
3276 if (prec > numdigits) {
3277 PyObject *r1 = PyBytes_FromStringAndSize(NULL,
3278 numnondigits + prec);
3279 char *b1;
3280 if (!r1) {
3281 Py_DECREF(result);
3282 return NULL;
3283 }
3284 b1 = PyBytes_AS_STRING(r1);
3285 for (i = 0; i < numnondigits; ++i)
3286 *b1++ = *buf++;
3287 for (i = 0; i < prec - numdigits; i++)
3288 *b1++ = '0';
3289 for (i = 0; i < numdigits; i++)
3290 *b1++ = *buf++;
3291 *b1 = '\0';
3292 Py_DECREF(result);
3293 result = r1;
3294 buf = PyBytes_AS_STRING(result);
3295 len = numnondigits + prec;
3296 }
3297
3298 /* Fix up case for hex conversions. */
3299 if (type == 'X') {
3300 /* Need to convert all lower case letters to upper case.
3301 and need to convert 0x to 0X (and -0x to -0X). */
3302 for (i = 0; i < len; i++)
3303 if (buf[i] >= 'a' && buf[i] <= 'x')
3304 buf[i] -= 'a'-'A';
3305 }
3306 *pbuf = buf;
3307 *plen = len;
3308 return result;
3309}
3310
3311void
3312PyBytes_Fini(void)
3313{
3314 int i;
3315 for (i = 0; i < UCHAR_MAX + 1; i++) {
3316 Py_XDECREF(characters[i]);
3317 characters[i] = NULL;
3318 }
3319 Py_XDECREF(nullstring);
3320 nullstring = NULL;
3321}
3322
Benjamin Peterson4116f362008-05-27 00:36:20 +00003323/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003324
3325typedef struct {
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003326 PyObject_HEAD
3327 Py_ssize_t it_index;
3328 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3329} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003330
3331static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003332striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003333{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003334 _PyObject_GC_UNTRACK(it);
3335 Py_XDECREF(it->it_seq);
3336 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003337}
3338
3339static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003340striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003341{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003342 Py_VISIT(it->it_seq);
3343 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003344}
3345
3346static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003347striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003348{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003349 PyBytesObject *seq;
3350 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003351
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003352 assert(it != NULL);
3353 seq = it->it_seq;
3354 if (seq == NULL)
3355 return NULL;
3356 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003357
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003358 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3359 item = PyLong_FromLong(
3360 (unsigned char)seq->ob_sval[it->it_index]);
3361 if (item != NULL)
3362 ++it->it_index;
3363 return item;
3364 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003365
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003366 Py_DECREF(seq);
3367 it->it_seq = NULL;
3368 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003369}
3370
3371static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003372striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003373{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003374 Py_ssize_t len = 0;
3375 if (it->it_seq)
3376 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3377 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003378}
3379
3380PyDoc_STRVAR(length_hint_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003381 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003382
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003383static PyMethodDef striter_methods[] = {
3384 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3385 length_hint_doc},
3386 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003387};
3388
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003389PyTypeObject PyBytesIter_Type = {
3390 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3391 "bytes_iterator", /* tp_name */
3392 sizeof(striterobject), /* tp_basicsize */
3393 0, /* tp_itemsize */
3394 /* methods */
3395 (destructor)striter_dealloc, /* tp_dealloc */
3396 0, /* tp_print */
3397 0, /* tp_getattr */
3398 0, /* tp_setattr */
3399 0, /* tp_compare */
3400 0, /* tp_repr */
3401 0, /* tp_as_number */
3402 0, /* tp_as_sequence */
3403 0, /* tp_as_mapping */
3404 0, /* tp_hash */
3405 0, /* tp_call */
3406 0, /* tp_str */
3407 PyObject_GenericGetAttr, /* tp_getattro */
3408 0, /* tp_setattro */
3409 0, /* tp_as_buffer */
3410 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3411 0, /* tp_doc */
3412 (traverseproc)striter_traverse, /* tp_traverse */
3413 0, /* tp_clear */
3414 0, /* tp_richcompare */
3415 0, /* tp_weaklistoffset */
3416 PyObject_SelfIter, /* tp_iter */
3417 (iternextfunc)striter_next, /* tp_iternext */
3418 striter_methods, /* tp_methods */
3419 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003420};
3421
3422static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003423str_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003424{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003425 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003426
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003427 if (!PyBytes_Check(seq)) {
3428 PyErr_BadInternalCall();
3429 return NULL;
3430 }
3431 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3432 if (it == NULL)
3433 return NULL;
3434 it->it_index = 0;
3435 Py_INCREF(seq);
3436 it->it_seq = (PyBytesObject *)seq;
3437 _PyObject_GC_TRACK(it);
3438 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003439}