blob: bfb4ff8fd3c7f62973cc9ddb1a18c26234e518a4 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00008
Neal Norwitz2bad9702007-08-27 06:19:22 +00009static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000010_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000011{
Christian Heimes90aa7642007-12-19 02:45:37 +000012 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000013
Gregory P. Smith60d241f2007-10-16 06:31:30 +000014 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000015 {
16 PyErr_Format(PyExc_TypeError,
17 "Type %.100s doesn't support the buffer API",
Christian Heimes90aa7642007-12-19 02:45:37 +000018 Py_TYPE(obj)->tp_name);
Guido van Rossuma74184e2007-08-29 04:05:57 +000019 return -1;
20 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000021
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000022 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
23 return -1;
24 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000025}
26
Christian Heimes2c9c7a52008-05-26 13:42:13 +000027#ifdef COUNT_ALLOCS
28int null_strings, one_strings;
29#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000030
Christian Heimes2c9c7a52008-05-26 13:42:13 +000031static PyBytesObject *characters[UCHAR_MAX + 1];
32static PyBytesObject *nullstring;
33
34/*
35 For both PyBytes_FromString() and PyBytes_FromStringAndSize(), the
36 parameter `size' denotes number of characters to allocate, not counting any
37 null terminating character.
38
39 For PyBytes_FromString(), the parameter `str' points to a null-terminated
40 string containing exactly `size' bytes.
41
42 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
43 either NULL or else points to a string containing at least `size' bytes.
44 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
45 not have to be null-terminated. (Therefore it is safe to construct a
46 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
47 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
48 bytes (setting the last byte to the null terminating character) and you can
49 fill in the data yourself. If `str' is non-NULL then the resulting
50 PyString object must be treated as immutable and you must not fill in nor
51 alter the data yourself, since the strings may be shared.
52
53 The PyObject member `op->ob_size', which denotes the number of "extra
54 items" in a variable-size object, will contain the number of bytes
55 allocated for string data, not counting the null terminating character. It
56 is therefore equal to the equal to the `size' parameter (for
57 PyBytes_FromStringAndSize()) or the length of the string in the `str'
58 parameter (for PyBytes_FromString()).
59*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000060PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000061PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000062{
Christian Heimes2c9c7a52008-05-26 13:42:13 +000063 register PyBytesObject *op;
64 if (size < 0) {
65 PyErr_SetString(PyExc_SystemError,
66 "Negative size passed to PyBytes_FromStringAndSize");
67 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +000068 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000069 if (size == 0 && (op = nullstring) != NULL) {
70#ifdef COUNT_ALLOCS
71 null_strings++;
72#endif
73 Py_INCREF(op);
74 return (PyObject *)op;
75 }
76 if (size == 1 && str != NULL &&
77 (op = characters[*str & UCHAR_MAX]) != NULL)
78 {
79#ifdef COUNT_ALLOCS
80 one_strings++;
81#endif
82 Py_INCREF(op);
83 return (PyObject *)op;
84 }
85
86 /* Inline PyObject_NewVar */
87 op = (PyBytesObject *)PyObject_MALLOC(sizeof(PyBytesObject) + size);
88 if (op == NULL)
89 return PyErr_NoMemory();
90 PyObject_INIT_VAR(op, &PyBytes_Type, size);
91 op->ob_shash = -1;
92 if (str != NULL)
93 Py_MEMCPY(op->ob_sval, str, size);
94 op->ob_sval[size] = '\0';
95 /* share short strings */
96 if (size == 0) {
97 nullstring = op;
98 Py_INCREF(op);
99 } else if (size == 1 && str != NULL) {
100 characters[*str & UCHAR_MAX] = op;
101 Py_INCREF(op);
102 }
103 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000104}
105
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000106PyObject *
107PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000108{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000109 register size_t size;
110 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000111
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000112 assert(str != NULL);
113 size = strlen(str);
114 if (size > PY_SSIZE_T_MAX) {
115 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000116 "byte string is too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000117 return NULL;
118 }
119 if (size == 0 && (op = nullstring) != NULL) {
120#ifdef COUNT_ALLOCS
121 null_strings++;
122#endif
123 Py_INCREF(op);
124 return (PyObject *)op;
125 }
126 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
127#ifdef COUNT_ALLOCS
128 one_strings++;
129#endif
130 Py_INCREF(op);
131 return (PyObject *)op;
132 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000133
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000134 /* Inline PyObject_NewVar */
135 op = (PyBytesObject *)PyObject_MALLOC(sizeof(PyBytesObject) + size);
136 if (op == NULL)
137 return PyErr_NoMemory();
138 PyObject_INIT_VAR(op, &PyBytes_Type, size);
139 op->ob_shash = -1;
140 Py_MEMCPY(op->ob_sval, str, size+1);
141 /* share short strings */
142 if (size == 0) {
143 nullstring = op;
144 Py_INCREF(op);
145 } else if (size == 1) {
146 characters[*str & UCHAR_MAX] = op;
147 Py_INCREF(op);
148 }
149 return (PyObject *) op;
150}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000151
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000152PyObject *
153PyBytes_FromFormatV(const char *format, va_list vargs)
154{
155 va_list count;
156 Py_ssize_t n = 0;
157 const char* f;
158 char *s;
159 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000160
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000161#ifdef VA_LIST_IS_ARRAY
162 Py_MEMCPY(count, vargs, sizeof(va_list));
163#else
164#ifdef __va_copy
165 __va_copy(count, vargs);
166#else
167 count = vargs;
168#endif
169#endif
170 /* step 1: figure out how large a buffer we need */
171 for (f = format; *f; f++) {
172 if (*f == '%') {
173 const char* p = f;
174 while (*++f && *f != '%' && !ISALPHA(*f))
175 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000176
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000177 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
178 * they don't affect the amount of space we reserve.
179 */
180 if ((*f == 'l' || *f == 'z') &&
181 (f[1] == 'd' || f[1] == 'u'))
182 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000183
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000184 switch (*f) {
185 case 'c':
186 (void)va_arg(count, int);
187 /* fall through... */
188 case '%':
189 n++;
190 break;
191 case 'd': case 'u': case 'i': case 'x':
192 (void) va_arg(count, int);
193 /* 20 bytes is enough to hold a 64-bit
194 integer. Decimal takes the most space.
195 This isn't enough for octal. */
196 n += 20;
197 break;
198 case 's':
199 s = va_arg(count, char*);
200 n += strlen(s);
201 break;
202 case 'p':
203 (void) va_arg(count, int);
204 /* maximum 64-bit pointer representation:
205 * 0xffffffffffffffff
206 * so 19 characters is enough.
207 * XXX I count 18 -- what's the extra for?
208 */
209 n += 19;
210 break;
211 default:
212 /* if we stumble upon an unknown
213 formatting code, copy the rest of
214 the format string to the output
215 string. (we cannot just skip the
216 code, since there's no way to know
217 what's in the argument list) */
218 n += strlen(p);
219 goto expand;
220 }
221 } else
222 n++;
223 }
224 expand:
225 /* step 2: fill the buffer */
226 /* Since we've analyzed how much space we need for the worst case,
227 use sprintf directly instead of the slower PyOS_snprintf. */
228 string = PyBytes_FromStringAndSize(NULL, n);
229 if (!string)
230 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000231
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000232 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000233
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000234 for (f = format; *f; f++) {
235 if (*f == '%') {
236 const char* p = f++;
237 Py_ssize_t i;
238 int longflag = 0;
239 int size_tflag = 0;
240 /* parse the width.precision part (we're only
241 interested in the precision value, if any) */
242 n = 0;
243 while (ISDIGIT(*f))
244 n = (n*10) + *f++ - '0';
245 if (*f == '.') {
246 f++;
247 n = 0;
248 while (ISDIGIT(*f))
249 n = (n*10) + *f++ - '0';
250 }
251 while (*f && *f != '%' && !ISALPHA(*f))
252 f++;
253 /* handle the long flag, but only for %ld and %lu.
254 others can be added when necessary. */
255 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
256 longflag = 1;
257 ++f;
258 }
259 /* handle the size_t flag. */
260 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
261 size_tflag = 1;
262 ++f;
263 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000264
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000265 switch (*f) {
266 case 'c':
267 *s++ = va_arg(vargs, int);
268 break;
269 case 'd':
270 if (longflag)
271 sprintf(s, "%ld", va_arg(vargs, long));
272 else if (size_tflag)
273 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
274 va_arg(vargs, Py_ssize_t));
275 else
276 sprintf(s, "%d", va_arg(vargs, int));
277 s += strlen(s);
278 break;
279 case 'u':
280 if (longflag)
281 sprintf(s, "%lu",
282 va_arg(vargs, unsigned long));
283 else if (size_tflag)
284 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
285 va_arg(vargs, size_t));
286 else
287 sprintf(s, "%u",
288 va_arg(vargs, unsigned int));
289 s += strlen(s);
290 break;
291 case 'i':
292 sprintf(s, "%i", va_arg(vargs, int));
293 s += strlen(s);
294 break;
295 case 'x':
296 sprintf(s, "%x", va_arg(vargs, int));
297 s += strlen(s);
298 break;
299 case 's':
300 p = va_arg(vargs, char*);
301 i = strlen(p);
302 if (n > 0 && i > n)
303 i = n;
304 Py_MEMCPY(s, p, i);
305 s += i;
306 break;
307 case 'p':
308 sprintf(s, "%p", va_arg(vargs, void*));
309 /* %p is ill-defined: ensure leading 0x. */
310 if (s[1] == 'X')
311 s[1] = 'x';
312 else if (s[1] != 'x') {
313 memmove(s+2, s, strlen(s)+1);
314 s[0] = '0';
315 s[1] = 'x';
316 }
317 s += strlen(s);
318 break;
319 case '%':
320 *s++ = '%';
321 break;
322 default:
323 strcpy(s, p);
324 s += strlen(s);
325 goto end;
326 }
327 } else
328 *s++ = *f;
329 }
330
331 end:
332 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
333 return string;
334}
335
336PyObject *
337PyBytes_FromFormat(const char *format, ...)
338{
339 PyObject* ret;
340 va_list vargs;
341
342#ifdef HAVE_STDARG_PROTOTYPES
343 va_start(vargs, format);
344#else
345 va_start(vargs);
346#endif
347 ret = PyBytes_FromFormatV(format, vargs);
348 va_end(vargs);
349 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000350}
351
352static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000353string_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000354{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000355 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000356}
357
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000358/* Unescape a backslash-escaped string. If unicode is non-zero,
359 the string is a u-literal. If recode_encoding is non-zero,
360 the string is UTF-8 encoded and should be re-encoded in the
361 specified encoding. */
362
363PyObject *PyBytes_DecodeEscape(const char *s,
364 Py_ssize_t len,
365 const char *errors,
366 Py_ssize_t unicode,
367 const char *recode_encoding)
368{
369 int c;
370 char *p, *buf;
371 const char *end;
372 PyObject *v;
373 Py_ssize_t newlen = recode_encoding ? 4*len:len;
374 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
375 if (v == NULL)
376 return NULL;
377 p = buf = PyBytes_AsString(v);
378 end = s + len;
379 while (s < end) {
380 if (*s != '\\') {
381 non_esc:
382 if (recode_encoding && (*s & 0x80)) {
383 PyObject *u, *w;
384 char *r;
385 const char* t;
386 Py_ssize_t rn;
387 t = s;
388 /* Decode non-ASCII bytes as UTF-8. */
389 while (t < end && (*t & 0x80)) t++;
390 u = PyUnicode_DecodeUTF8(s, t - s, errors);
391 if(!u) goto failed;
392
393 /* Recode them in target encoding. */
394 w = PyUnicode_AsEncodedString(
395 u, recode_encoding, errors);
396 Py_DECREF(u);
397 if (!w) goto failed;
398
399 /* Append bytes to output buffer. */
400 assert(PyBytes_Check(w));
401 r = PyBytes_AS_STRING(w);
402 rn = PyBytes_GET_SIZE(w);
403 Py_MEMCPY(p, r, rn);
404 p += rn;
405 Py_DECREF(w);
406 s = t;
407 } else {
408 *p++ = *s++;
409 }
410 continue;
411 }
412 s++;
413 if (s==end) {
414 PyErr_SetString(PyExc_ValueError,
415 "Trailing \\ in string");
416 goto failed;
417 }
418 switch (*s++) {
419 /* XXX This assumes ASCII! */
420 case '\n': break;
421 case '\\': *p++ = '\\'; break;
422 case '\'': *p++ = '\''; break;
423 case '\"': *p++ = '\"'; break;
424 case 'b': *p++ = '\b'; break;
425 case 'f': *p++ = '\014'; break; /* FF */
426 case 't': *p++ = '\t'; break;
427 case 'n': *p++ = '\n'; break;
428 case 'r': *p++ = '\r'; break;
429 case 'v': *p++ = '\013'; break; /* VT */
430 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
431 case '0': case '1': case '2': case '3':
432 case '4': case '5': case '6': case '7':
433 c = s[-1] - '0';
434 if (s < end && '0' <= *s && *s <= '7') {
435 c = (c<<3) + *s++ - '0';
436 if (s < end && '0' <= *s && *s <= '7')
437 c = (c<<3) + *s++ - '0';
438 }
439 *p++ = c;
440 break;
441 case 'x':
442 if (s+1 < end && ISXDIGIT(s[0]) && ISXDIGIT(s[1])) {
443 unsigned int x = 0;
444 c = Py_CHARMASK(*s);
445 s++;
446 if (ISDIGIT(c))
447 x = c - '0';
448 else if (ISLOWER(c))
449 x = 10 + c - 'a';
450 else
451 x = 10 + c - 'A';
452 x = x << 4;
453 c = Py_CHARMASK(*s);
454 s++;
455 if (ISDIGIT(c))
456 x += c - '0';
457 else if (ISLOWER(c))
458 x += 10 + c - 'a';
459 else
460 x += 10 + c - 'A';
461 *p++ = x;
462 break;
463 }
464 if (!errors || strcmp(errors, "strict") == 0) {
465 PyErr_SetString(PyExc_ValueError,
466 "invalid \\x escape");
467 goto failed;
468 }
469 if (strcmp(errors, "replace") == 0) {
470 *p++ = '?';
471 } else if (strcmp(errors, "ignore") == 0)
472 /* do nothing */;
473 else {
474 PyErr_Format(PyExc_ValueError,
475 "decoding error; unknown "
476 "error handling code: %.400s",
477 errors);
478 goto failed;
479 }
480 default:
481 *p++ = '\\';
482 s--;
483 goto non_esc; /* an arbitry number of unescaped
484 UTF-8 bytes may follow. */
485 }
486 }
487 if (p-buf < newlen)
488 _PyBytes_Resize(&v, p - buf);
489 return v;
490 failed:
491 Py_DECREF(v);
492 return NULL;
493}
494
495/* -------------------------------------------------------------------- */
496/* object api */
497
498Py_ssize_t
499PyBytes_Size(register PyObject *op)
500{
501 if (!PyBytes_Check(op)) {
502 PyErr_Format(PyExc_TypeError,
503 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
504 return -1;
505 }
506 return Py_SIZE(op);
507}
508
509char *
510PyBytes_AsString(register PyObject *op)
511{
512 if (!PyBytes_Check(op)) {
513 PyErr_Format(PyExc_TypeError,
514 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
515 return NULL;
516 }
517 return ((PyBytesObject *)op)->ob_sval;
518}
519
520int
521PyBytes_AsStringAndSize(register PyObject *obj,
522 register char **s,
523 register Py_ssize_t *len)
524{
525 if (s == NULL) {
526 PyErr_BadInternalCall();
527 return -1;
528 }
529
530 if (!PyBytes_Check(obj)) {
531 PyErr_Format(PyExc_TypeError,
532 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
533 return -1;
534 }
535
536 *s = PyBytes_AS_STRING(obj);
537 if (len != NULL)
538 *len = PyBytes_GET_SIZE(obj);
539 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
540 PyErr_SetString(PyExc_TypeError,
541 "expected bytes with no null");
542 return -1;
543 }
544 return 0;
545}
Neal Norwitz6968b052007-02-27 19:02:19 +0000546
547/* -------------------------------------------------------------------- */
548/* Methods */
549
550#define STRINGLIB_CHAR char
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000551
Neal Norwitz6968b052007-02-27 19:02:19 +0000552#define STRINGLIB_CMP memcmp
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000553#define STRINGLIB_LEN PyBytes_GET_SIZE
554#define STRINGLIB_NEW PyBytes_FromStringAndSize
555#define STRINGLIB_STR PyBytes_AS_STRING
556/* #define STRINGLIB_WANT_CONTAINS_OBJ 1 */
557
558#define STRINGLIB_EMPTY nullstring
559#define STRINGLIB_CHECK_EXACT PyBytes_CheckExact
560#define STRINGLIB_MUTABLE 0
Neal Norwitz6968b052007-02-27 19:02:19 +0000561
562#include "stringlib/fastsearch.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000563
Neal Norwitz6968b052007-02-27 19:02:19 +0000564#include "stringlib/count.h"
565#include "stringlib/find.h"
566#include "stringlib/partition.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000567#include "stringlib/ctype.h"
568#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000569
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000570#define _Py_InsertThousandsGrouping _PyBytes_InsertThousandsGrouping
571#include "stringlib/localeutil.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000572
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000573PyObject *
574PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000575{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000576 static const char *hexdigits = "0123456789abcdef";
577 register PyBytesObject* op = (PyBytesObject*) obj;
578 Py_ssize_t length = Py_SIZE(op);
579 size_t newsize = 3 + 4 * length;
580 PyObject *v;
581 if (newsize > PY_SSIZE_T_MAX || (newsize-3) / 4 != length) {
582 PyErr_SetString(PyExc_OverflowError,
583 "bytes object is too large to make repr");
584 return NULL;
585 }
586 v = PyUnicode_FromUnicode(NULL, newsize);
587 if (v == NULL) {
588 return NULL;
589 }
590 else {
591 register Py_ssize_t i;
592 register Py_UNICODE c;
593 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
594 int quote;
595
596 /* Figure out which quote to use; single is preferred */
597 quote = '\'';
598 if (smartquotes) {
599 char *test, *start;
600 start = PyBytes_AS_STRING(op);
601 for (test = start; test < start+length; ++test) {
602 if (*test == '"') {
603 quote = '\''; /* back to single */
604 goto decided;
605 }
606 else if (*test == '\'')
607 quote = '"';
608 }
609 decided:
610 ;
611 }
612
613 *p++ = 'b', *p++ = quote;
614 for (i = 0; i < length; i++) {
615 /* There's at least enough room for a hex escape
616 and a closing quote. */
617 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
618 c = op->ob_sval[i];
619 if (c == quote || c == '\\')
620 *p++ = '\\', *p++ = c;
621 else if (c == '\t')
622 *p++ = '\\', *p++ = 't';
623 else if (c == '\n')
624 *p++ = '\\', *p++ = 'n';
625 else if (c == '\r')
626 *p++ = '\\', *p++ = 'r';
627 else if (c < ' ' || c >= 0x7f) {
628 *p++ = '\\';
629 *p++ = 'x';
630 *p++ = hexdigits[(c & 0xf0) >> 4];
631 *p++ = hexdigits[c & 0xf];
632 }
633 else
634 *p++ = c;
635 }
636 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
637 *p++ = quote;
638 *p = '\0';
639 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
640 Py_DECREF(v);
641 return NULL;
642 }
643 return v;
644 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000645}
646
Neal Norwitz6968b052007-02-27 19:02:19 +0000647static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000648string_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000649{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000650 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000651}
652
Neal Norwitz6968b052007-02-27 19:02:19 +0000653static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000654string_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000655{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000656 if (Py_BytesWarningFlag) {
657 if (PyErr_WarnEx(PyExc_BytesWarning,
658 "str() on a bytes instance", 1))
659 return NULL;
660 }
661 return string_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000662}
663
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000664static Py_ssize_t
665string_length(PyBytesObject *a)
666{
667 return Py_SIZE(a);
668}
Neal Norwitz6968b052007-02-27 19:02:19 +0000669
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000670/* This is also used by PyBytes_Concat() */
671static PyObject *
672string_concat(PyObject *a, PyObject *b)
673{
674 Py_ssize_t size;
675 Py_buffer va, vb;
676 PyObject *result = NULL;
677
678 va.len = -1;
679 vb.len = -1;
680 if (_getbuffer(a, &va) < 0 ||
681 _getbuffer(b, &vb) < 0) {
682 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
683 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
684 goto done;
685 }
686
687 /* Optimize end cases */
688 if (va.len == 0 && PyBytes_CheckExact(b)) {
689 result = b;
690 Py_INCREF(result);
691 goto done;
692 }
693 if (vb.len == 0 && PyBytes_CheckExact(a)) {
694 result = a;
695 Py_INCREF(result);
696 goto done;
697 }
698
699 size = va.len + vb.len;
700 if (size < 0) {
701 PyErr_NoMemory();
702 goto done;
703 }
704
705 result = PyBytes_FromStringAndSize(NULL, size);
706 if (result != NULL) {
707 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
708 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
709 }
710
711 done:
712 if (va.len != -1)
Martin v. Löwis423be952008-08-13 15:53:07 +0000713 PyBuffer_Release(&va);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000714 if (vb.len != -1)
Martin v. Löwis423be952008-08-13 15:53:07 +0000715 PyBuffer_Release(&vb);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000716 return result;
717}
Neal Norwitz6968b052007-02-27 19:02:19 +0000718
719static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000720string_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000721{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000722 register Py_ssize_t i;
723 register Py_ssize_t j;
724 register Py_ssize_t size;
725 register PyBytesObject *op;
726 size_t nbytes;
727 if (n < 0)
728 n = 0;
729 /* watch out for overflows: the size can overflow int,
730 * and the # of bytes needed can overflow size_t
731 */
732 size = Py_SIZE(a) * n;
733 if (n && size / n != Py_SIZE(a)) {
734 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000735 "repeated bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000736 return NULL;
737 }
738 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
739 Py_INCREF(a);
740 return (PyObject *)a;
741 }
742 nbytes = (size_t)size;
743 if (nbytes + sizeof(PyBytesObject) <= nbytes) {
744 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000745 "repeated bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000746 return NULL;
747 }
748 op = (PyBytesObject *)
749 PyObject_MALLOC(sizeof(PyBytesObject) + nbytes);
750 if (op == NULL)
751 return PyErr_NoMemory();
752 PyObject_INIT_VAR(op, &PyBytes_Type, size);
753 op->ob_shash = -1;
754 op->ob_sval[size] = '\0';
755 if (Py_SIZE(a) == 1 && n > 0) {
756 memset(op->ob_sval, a->ob_sval[0] , n);
757 return (PyObject *) op;
758 }
759 i = 0;
760 if (i < size) {
761 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
762 i = Py_SIZE(a);
763 }
764 while (i < size) {
765 j = (i <= size-i) ? i : size-i;
766 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
767 i += j;
768 }
769 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000770}
771
Guido van Rossum98297ee2007-11-06 21:34:58 +0000772static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000773string_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000774{
775 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
776 if (ival == -1 && PyErr_Occurred()) {
777 Py_buffer varg;
778 int pos;
779 PyErr_Clear();
780 if (_getbuffer(arg, &varg) < 0)
781 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000782 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
Guido van Rossum98297ee2007-11-06 21:34:58 +0000783 varg.buf, varg.len, 0);
Martin v. Löwis423be952008-08-13 15:53:07 +0000784 PyBuffer_Release(&varg);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000785 return pos >= 0;
786 }
787 if (ival < 0 || ival >= 256) {
788 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
789 return -1;
790 }
791
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000792 return memchr(PyBytes_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000793}
794
Neal Norwitz6968b052007-02-27 19:02:19 +0000795static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000796string_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000797{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000798 if (i < 0 || i >= Py_SIZE(a)) {
Benjamin Peterson4116f362008-05-27 00:36:20 +0000799 PyErr_SetString(PyExc_IndexError, "index out of range");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000800 return NULL;
801 }
802 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000803}
804
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000805static PyObject*
806string_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000807{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000808 int c;
809 Py_ssize_t len_a, len_b;
810 Py_ssize_t min_len;
811 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000812
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000813 /* Make sure both arguments are strings. */
814 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
815 if (Py_BytesWarningFlag && (op == Py_EQ) &&
816 (PyObject_IsInstance((PyObject*)a,
817 (PyObject*)&PyUnicode_Type) ||
818 PyObject_IsInstance((PyObject*)b,
819 (PyObject*)&PyUnicode_Type))) {
820 if (PyErr_WarnEx(PyExc_BytesWarning,
Georg Brandle5d68ac2008-06-04 11:30:26 +0000821 "Comparison between bytes and string", 1))
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000822 return NULL;
823 }
824 result = Py_NotImplemented;
825 goto out;
826 }
827 if (a == b) {
828 switch (op) {
829 case Py_EQ:case Py_LE:case Py_GE:
830 result = Py_True;
831 goto out;
832 case Py_NE:case Py_LT:case Py_GT:
833 result = Py_False;
834 goto out;
835 }
836 }
837 if (op == Py_EQ) {
838 /* Supporting Py_NE here as well does not save
839 much time, since Py_NE is rarely used. */
840 if (Py_SIZE(a) == Py_SIZE(b)
841 && (a->ob_sval[0] == b->ob_sval[0]
842 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
843 result = Py_True;
844 } else {
845 result = Py_False;
846 }
847 goto out;
848 }
849 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
850 min_len = (len_a < len_b) ? len_a : len_b;
851 if (min_len > 0) {
852 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
853 if (c==0)
854 c = memcmp(a->ob_sval, b->ob_sval, min_len);
855 } else
856 c = 0;
857 if (c == 0)
858 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
859 switch (op) {
860 case Py_LT: c = c < 0; break;
861 case Py_LE: c = c <= 0; break;
862 case Py_EQ: assert(0); break; /* unreachable */
863 case Py_NE: c = c != 0; break;
864 case Py_GT: c = c > 0; break;
865 case Py_GE: c = c >= 0; break;
866 default:
867 result = Py_NotImplemented;
868 goto out;
869 }
870 result = c ? Py_True : Py_False;
871 out:
872 Py_INCREF(result);
873 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000874}
875
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000876static long
877string_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000878{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000879 register Py_ssize_t len;
880 register unsigned char *p;
881 register long x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000882
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000883 if (a->ob_shash != -1)
884 return a->ob_shash;
885 len = Py_SIZE(a);
886 p = (unsigned char *) a->ob_sval;
887 x = *p << 7;
888 while (--len >= 0)
889 x = (1000003*x) ^ *p++;
890 x ^= Py_SIZE(a);
891 if (x == -1)
892 x = -2;
893 a->ob_shash = x;
894 return x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000895}
896
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000897static PyObject*
898string_subscript(PyBytesObject* self, PyObject* item)
899{
900 if (PyIndex_Check(item)) {
901 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
902 if (i == -1 && PyErr_Occurred())
903 return NULL;
904 if (i < 0)
905 i += PyBytes_GET_SIZE(self);
906 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
907 PyErr_SetString(PyExc_IndexError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000908 "index out of range");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000909 return NULL;
910 }
911 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
912 }
913 else if (PySlice_Check(item)) {
914 Py_ssize_t start, stop, step, slicelength, cur, i;
915 char* source_buf;
916 char* result_buf;
917 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000918
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000919 if (PySlice_GetIndicesEx((PySliceObject*)item,
920 PyBytes_GET_SIZE(self),
921 &start, &stop, &step, &slicelength) < 0) {
922 return NULL;
923 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000924
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000925 if (slicelength <= 0) {
926 return PyBytes_FromStringAndSize("", 0);
927 }
928 else if (start == 0 && step == 1 &&
929 slicelength == PyBytes_GET_SIZE(self) &&
930 PyBytes_CheckExact(self)) {
931 Py_INCREF(self);
932 return (PyObject *)self;
933 }
934 else if (step == 1) {
935 return PyBytes_FromStringAndSize(
936 PyBytes_AS_STRING(self) + start,
937 slicelength);
938 }
939 else {
940 source_buf = PyBytes_AsString((PyObject*)self);
941 result_buf = (char *)PyMem_Malloc(slicelength);
942 if (result_buf == NULL)
943 return PyErr_NoMemory();
Neal Norwitz6968b052007-02-27 19:02:19 +0000944
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000945 for (cur = start, i = 0; i < slicelength;
946 cur += step, i++) {
947 result_buf[i] = source_buf[cur];
948 }
949
950 result = PyBytes_FromStringAndSize(result_buf,
951 slicelength);
952 PyMem_Free(result_buf);
953 return result;
954 }
955 }
956 else {
957 PyErr_Format(PyExc_TypeError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000958 "byte indices must be integers, not %.200s",
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000959 Py_TYPE(item)->tp_name);
960 return NULL;
961 }
962}
963
964static int
965string_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
966{
Martin v. Löwis423be952008-08-13 15:53:07 +0000967 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
Antoine Pitrou2f89aa62008-08-02 21:02:48 +0000968 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000969}
970
971static PySequenceMethods string_as_sequence = {
972 (lenfunc)string_length, /*sq_length*/
973 (binaryfunc)string_concat, /*sq_concat*/
974 (ssizeargfunc)string_repeat, /*sq_repeat*/
975 (ssizeargfunc)string_item, /*sq_item*/
976 0, /*sq_slice*/
977 0, /*sq_ass_item*/
978 0, /*sq_ass_slice*/
979 (objobjproc)string_contains /*sq_contains*/
980};
981
982static PyMappingMethods string_as_mapping = {
983 (lenfunc)string_length,
984 (binaryfunc)string_subscript,
985 0,
986};
987
988static PyBufferProcs string_as_buffer = {
989 (getbufferproc)string_buffer_getbuffer,
990 NULL,
991};
992
993
994#define LEFTSTRIP 0
995#define RIGHTSTRIP 1
996#define BOTHSTRIP 2
997
998/* Arrays indexed by above */
999static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1000
1001#define STRIPNAME(i) (stripformat[i]+3)
1002
Neal Norwitz6968b052007-02-27 19:02:19 +00001003
1004/* Don't call if length < 2 */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001005#define Py_STRING_MATCH(target, offset, pattern, length) \
1006 (target[offset] == pattern[0] && \
1007 target[offset+length-1] == pattern[length-1] && \
Neal Norwitz6968b052007-02-27 19:02:19 +00001008 !memcmp(target+offset+1, pattern+1, length-2) )
1009
1010
Neal Norwitz6968b052007-02-27 19:02:19 +00001011/* Overallocate the initial list to reduce the number of reallocs for small
1012 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1013 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1014 text (roughly 11 words per line) and field delimited data (usually 1-10
1015 fields). For large strings the split algorithms are bandwidth limited
1016 so increasing the preallocation likely will not improve things.*/
1017
1018#define MAX_PREALLOC 12
1019
1020/* 5 splits gives 6 elements */
1021#define PREALLOC_SIZE(maxsplit) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001022 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
Neal Norwitz6968b052007-02-27 19:02:19 +00001023
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001024#define SPLIT_ADD(data, left, right) { \
1025 str = PyBytes_FromStringAndSize((data) + (left), \
1026 (right) - (left)); \
1027 if (str == NULL) \
1028 goto onError; \
1029 if (count < MAX_PREALLOC) { \
1030 PyList_SET_ITEM(list, count, str); \
1031 } else { \
1032 if (PyList_Append(list, str)) { \
1033 Py_DECREF(str); \
1034 goto onError; \
1035 } \
1036 else \
1037 Py_DECREF(str); \
1038 } \
1039 count++; }
Neal Norwitz6968b052007-02-27 19:02:19 +00001040
1041/* Always force the list to the expected size. */
Christian Heimes90aa7642007-12-19 02:45:37 +00001042#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
Neal Norwitz6968b052007-02-27 19:02:19 +00001043
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001044#define SKIP_SPACE(s, i, len) { while (i<len && ISSPACE(s[i])) i++; }
1045#define SKIP_NONSPACE(s, i, len) { while (i<len && !ISSPACE(s[i])) i++; }
1046#define RSKIP_SPACE(s, i) { while (i>=0 && ISSPACE(s[i])) i--; }
1047#define RSKIP_NONSPACE(s, i) { while (i>=0 && !ISSPACE(s[i])) i--; }
Neal Norwitz6968b052007-02-27 19:02:19 +00001048
1049Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001050split_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Neal Norwitz6968b052007-02-27 19:02:19 +00001051{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001052 const char *s = PyBytes_AS_STRING(self);
1053 Py_ssize_t i, j, count=0;
1054 PyObject *str;
1055 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Neal Norwitz6968b052007-02-27 19:02:19 +00001056
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001057 if (list == NULL)
1058 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001059
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001060 i = j = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +00001061
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001062 while (maxsplit-- > 0) {
1063 SKIP_SPACE(s, i, len);
1064 if (i==len) break;
1065 j = i; i++;
1066 SKIP_NONSPACE(s, i, len);
1067 if (j == 0 && i == len && PyBytes_CheckExact(self)) {
1068 /* No whitespace in self, so just use it as list[0] */
1069 Py_INCREF(self);
1070 PyList_SET_ITEM(list, 0, (PyObject *)self);
1071 count++;
1072 break;
1073 }
1074 SPLIT_ADD(s, j, i);
1075 }
1076
1077 if (i < len) {
1078 /* Only occurs when maxsplit was reached */
1079 /* Skip any remaining whitespace and copy to end of string */
1080 SKIP_SPACE(s, i, len);
1081 if (i != len)
1082 SPLIT_ADD(s, i, len);
1083 }
1084 FIX_PREALLOC_SIZE(list);
1085 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001086 onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001087 Py_DECREF(list);
1088 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001089}
1090
Guido van Rossum8f950672007-09-10 16:53:45 +00001091Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001092split_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Guido van Rossum8f950672007-09-10 16:53:45 +00001093{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001094 const char *s = PyBytes_AS_STRING(self);
1095 register Py_ssize_t i, j, count=0;
1096 PyObject *str;
1097 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Guido van Rossum8f950672007-09-10 16:53:45 +00001098
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001099 if (list == NULL)
1100 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001101
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001102 i = j = 0;
1103 while ((j < len) && (maxcount-- > 0)) {
1104 for(; j<len; j++) {
1105 /* I found that using memchr makes no difference */
1106 if (s[j] == ch) {
1107 SPLIT_ADD(s, i, j);
1108 i = j = j + 1;
1109 break;
1110 }
1111 }
1112 }
1113 if (i == 0 && count == 0 && PyBytes_CheckExact(self)) {
1114 /* ch not in self, so just use self as list[0] */
1115 Py_INCREF(self);
1116 PyList_SET_ITEM(list, 0, (PyObject *)self);
1117 count++;
1118 }
1119 else if (i <= len) {
1120 SPLIT_ADD(s, i, len);
1121 }
1122 FIX_PREALLOC_SIZE(list);
1123 return list;
Guido van Rossum8f950672007-09-10 16:53:45 +00001124
1125 onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001126 Py_DECREF(list);
1127 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001128}
1129
Neal Norwitz6968b052007-02-27 19:02:19 +00001130PyDoc_STRVAR(split__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001131"B.split([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001132\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001133Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001134If sep is not specified or is None, B is split on ASCII whitespace\n\
1135characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001136If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001137
1138static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001139string_split(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001140{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001141 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
1142 Py_ssize_t maxsplit = -1, count=0;
1143 const char *s = PyBytes_AS_STRING(self), *sub;
1144 Py_buffer vsub;
1145 PyObject *list, *str, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001146#ifdef USE_FAST
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001147 Py_ssize_t pos;
Neal Norwitz6968b052007-02-27 19:02:19 +00001148#endif
1149
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001150 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1151 return NULL;
1152 if (maxsplit < 0)
1153 maxsplit = PY_SSIZE_T_MAX;
1154 if (subobj == Py_None)
1155 return split_whitespace(self, len, maxsplit);
1156 if (_getbuffer(subobj, &vsub) < 0)
1157 return NULL;
1158 sub = vsub.buf;
1159 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001160
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001161 if (n == 0) {
1162 PyErr_SetString(PyExc_ValueError, "empty separator");
Martin v. Löwis423be952008-08-13 15:53:07 +00001163 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001164 return NULL;
1165 }
Amaury Forgeot d'Arc20443f32008-08-22 22:05:20 +00001166 else if (n == 1) {
1167 list = split_char(self, len, sub[0], maxsplit);
1168 PyBuffer_Release(&vsub);
1169 return list;
1170 }
Guido van Rossum8f950672007-09-10 16:53:45 +00001171
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001172 list = PyList_New(PREALLOC_SIZE(maxsplit));
1173 if (list == NULL) {
Martin v. Löwis423be952008-08-13 15:53:07 +00001174 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001175 return NULL;
1176 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001177
1178#ifdef USE_FAST
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001179 i = j = 0;
1180 while (maxsplit-- > 0) {
1181 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1182 if (pos < 0)
1183 break;
1184 j = i+pos;
1185 SPLIT_ADD(s, i, j);
1186 i = j + n;
1187 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001188#else
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001189 i = j = 0;
1190 while ((j+n <= len) && (maxsplit-- > 0)) {
1191 for (; j+n <= len; j++) {
1192 if (Py_STRING_MATCH(s, j, sub, n)) {
1193 SPLIT_ADD(s, i, j);
1194 i = j = j + n;
1195 break;
1196 }
1197 }
1198 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001199#endif
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001200 SPLIT_ADD(s, i, len);
1201 FIX_PREALLOC_SIZE(list);
Martin v. Löwis423be952008-08-13 15:53:07 +00001202 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001203 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001204
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001205 onError:
1206 Py_DECREF(list);
Martin v. Löwis423be952008-08-13 15:53:07 +00001207 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001208 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001209}
1210
Neal Norwitz6968b052007-02-27 19:02:19 +00001211PyDoc_STRVAR(partition__doc__,
1212"B.partition(sep) -> (head, sep, tail)\n\
1213\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001214Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001215the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001216found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001217
1218static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001219string_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001220{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001221 const char *sep;
1222 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001223
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001224 if (PyBytes_Check(sep_obj)) {
1225 sep = PyBytes_AS_STRING(sep_obj);
1226 sep_len = PyBytes_GET_SIZE(sep_obj);
1227 }
1228 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1229 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001230
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001231 return stringlib_partition(
1232 (PyObject*) self,
1233 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1234 sep_obj, sep, sep_len
1235 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001236}
1237
1238PyDoc_STRVAR(rpartition__doc__,
1239"B.rpartition(sep) -> (tail, sep, head)\n\
1240\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001241Search for the separator sep in B, starting at the end of B,\n\
1242and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001243part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001244bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001245
1246static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001247string_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001248{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001249 const char *sep;
1250 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001251
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001252 if (PyBytes_Check(sep_obj)) {
1253 sep = PyBytes_AS_STRING(sep_obj);
1254 sep_len = PyBytes_GET_SIZE(sep_obj);
1255 }
1256 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1257 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001258
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001259 return stringlib_rpartition(
1260 (PyObject*) self,
1261 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1262 sep_obj, sep, sep_len
1263 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001264}
1265
1266Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001267rsplit_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Neal Norwitz6968b052007-02-27 19:02:19 +00001268{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001269 const char *s = PyBytes_AS_STRING(self);
1270 Py_ssize_t i, j, count=0;
1271 PyObject *str;
1272 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Neal Norwitz6968b052007-02-27 19:02:19 +00001273
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001274 if (list == NULL)
1275 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001276
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001277 i = j = len-1;
Neal Norwitz6968b052007-02-27 19:02:19 +00001278
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001279 while (maxsplit-- > 0) {
1280 RSKIP_SPACE(s, i);
1281 if (i<0) break;
1282 j = i; i--;
1283 RSKIP_NONSPACE(s, i);
1284 if (j == len-1 && i < 0 && PyBytes_CheckExact(self)) {
1285 /* No whitespace in self, so just use it as list[0] */
1286 Py_INCREF(self);
1287 PyList_SET_ITEM(list, 0, (PyObject *)self);
1288 count++;
1289 break;
1290 }
1291 SPLIT_ADD(s, i + 1, j + 1);
1292 }
1293 if (i >= 0) {
1294 /* Only occurs when maxsplit was reached. Skip any remaining
1295 whitespace and copy to beginning of string. */
1296 RSKIP_SPACE(s, i);
1297 if (i >= 0)
1298 SPLIT_ADD(s, 0, i + 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001299
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001300 }
1301 FIX_PREALLOC_SIZE(list);
1302 if (PyList_Reverse(list) < 0)
1303 goto onError;
1304 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001305 onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001306 Py_DECREF(list);
1307 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001308}
1309
Guido van Rossum8f950672007-09-10 16:53:45 +00001310Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001311rsplit_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Guido van Rossum8f950672007-09-10 16:53:45 +00001312{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001313 const char *s = PyBytes_AS_STRING(self);
1314 register Py_ssize_t i, j, count=0;
1315 PyObject *str;
1316 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Guido van Rossum8f950672007-09-10 16:53:45 +00001317
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001318 if (list == NULL)
1319 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001320
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001321 i = j = len - 1;
1322 while ((i >= 0) && (maxcount-- > 0)) {
1323 for (; i >= 0; i--) {
1324 if (s[i] == ch) {
1325 SPLIT_ADD(s, i + 1, j + 1);
1326 j = i = i - 1;
1327 break;
1328 }
1329 }
1330 }
1331 if (i < 0 && count == 0 && PyBytes_CheckExact(self)) {
1332 /* ch not in self, so just use self as list[0] */
1333 Py_INCREF(self);
1334 PyList_SET_ITEM(list, 0, (PyObject *)self);
1335 count++;
1336 }
1337 else if (j >= -1) {
1338 SPLIT_ADD(s, 0, j + 1);
1339 }
1340 FIX_PREALLOC_SIZE(list);
1341 if (PyList_Reverse(list) < 0)
1342 goto onError;
1343 return list;
Guido van Rossum8f950672007-09-10 16:53:45 +00001344
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001345 onError:
1346 Py_DECREF(list);
1347 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001348}
1349
Neal Norwitz6968b052007-02-27 19:02:19 +00001350PyDoc_STRVAR(rsplit__doc__,
Benjamin Peterson4116f362008-05-27 00:36:20 +00001351"B.rsplit([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001352\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001353Return a list of the sections in B, using sep as the delimiter,\n\
1354starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001355If sep is not given, B is split on ASCII whitespace characters\n\
1356(space, tab, return, newline, formfeed, vertical tab).\n\
1357If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001358
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001359
Neal Norwitz6968b052007-02-27 19:02:19 +00001360static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001361string_rsplit(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001362{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001363 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
1364 Py_ssize_t maxsplit = -1, count=0;
1365 const char *s, *sub;
1366 Py_buffer vsub;
1367 PyObject *list, *str, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001368
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001369 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1370 return NULL;
1371 if (maxsplit < 0)
1372 maxsplit = PY_SSIZE_T_MAX;
1373 if (subobj == Py_None)
1374 return rsplit_whitespace(self, len, maxsplit);
1375 if (_getbuffer(subobj, &vsub) < 0)
1376 return NULL;
1377 sub = vsub.buf;
1378 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001379
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001380 if (n == 0) {
1381 PyErr_SetString(PyExc_ValueError, "empty separator");
Martin v. Löwis423be952008-08-13 15:53:07 +00001382 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001383 return NULL;
1384 }
Amaury Forgeot d'Arc20443f32008-08-22 22:05:20 +00001385 else if (n == 1) {
1386 list = rsplit_char(self, len, sub[0], maxsplit);
1387 PyBuffer_Release(&vsub);
1388 return list;
1389 }
Guido van Rossum8f950672007-09-10 16:53:45 +00001390
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001391 list = PyList_New(PREALLOC_SIZE(maxsplit));
1392 if (list == NULL) {
Martin v. Löwis423be952008-08-13 15:53:07 +00001393 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001394 return NULL;
1395 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001396
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001397 j = len;
1398 i = j - n;
Neal Norwitz6968b052007-02-27 19:02:19 +00001399
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001400 s = PyBytes_AS_STRING(self);
1401 while ( (i >= 0) && (maxsplit-- > 0) ) {
1402 for (; i>=0; i--) {
1403 if (Py_STRING_MATCH(s, i, sub, n)) {
1404 SPLIT_ADD(s, i + n, j);
1405 j = i;
1406 i -= n;
1407 break;
1408 }
1409 }
1410 }
1411 SPLIT_ADD(s, 0, j);
1412 FIX_PREALLOC_SIZE(list);
1413 if (PyList_Reverse(list) < 0)
1414 goto onError;
Martin v. Löwis423be952008-08-13 15:53:07 +00001415 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001416 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001417
1418onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001419 Py_DECREF(list);
Martin v. Löwis423be952008-08-13 15:53:07 +00001420 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001421 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001422}
1423
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001424#undef SPLIT_ADD
1425#undef MAX_PREALLOC
1426#undef PREALLOC_SIZE
1427
1428
1429PyDoc_STRVAR(join__doc__,
1430"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001431\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001432Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001433Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1434
Neal Norwitz6968b052007-02-27 19:02:19 +00001435static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001436string_join(PyObject *self, PyObject *orig)
Neal Norwitz6968b052007-02-27 19:02:19 +00001437{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001438 char *sep = PyBytes_AS_STRING(self);
1439 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1440 PyObject *res = NULL;
1441 char *p;
1442 Py_ssize_t seqlen = 0;
1443 size_t sz = 0;
1444 Py_ssize_t i;
1445 PyObject *seq, *item;
Neal Norwitz6968b052007-02-27 19:02:19 +00001446
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001447 seq = PySequence_Fast(orig, "");
1448 if (seq == NULL) {
1449 return NULL;
1450 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001451
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001452 seqlen = PySequence_Size(seq);
1453 if (seqlen == 0) {
1454 Py_DECREF(seq);
1455 return PyBytes_FromString("");
1456 }
1457 if (seqlen == 1) {
1458 item = PySequence_Fast_GET_ITEM(seq, 0);
1459 if (PyBytes_CheckExact(item)) {
1460 Py_INCREF(item);
1461 Py_DECREF(seq);
1462 return item;
1463 }
1464 }
1465
1466 /* There are at least two things to join, or else we have a subclass
1467 * of the builtin types in the sequence.
1468 * Do a pre-pass to figure out the total amount of space we'll
1469 * need (sz), and see whether all argument are bytes.
1470 */
1471 /* XXX Shouldn't we use _getbuffer() on these items instead? */
1472 for (i = 0; i < seqlen; i++) {
1473 const size_t old_sz = sz;
1474 item = PySequence_Fast_GET_ITEM(seq, i);
1475 if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
1476 PyErr_Format(PyExc_TypeError,
1477 "sequence item %zd: expected bytes,"
1478 " %.80s found",
1479 i, Py_TYPE(item)->tp_name);
1480 Py_DECREF(seq);
1481 return NULL;
1482 }
1483 sz += Py_SIZE(item);
1484 if (i != 0)
1485 sz += seplen;
1486 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1487 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00001488 "join() result is too long for bytes");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001489 Py_DECREF(seq);
1490 return NULL;
1491 }
1492 }
1493
1494 /* Allocate result space. */
1495 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1496 if (res == NULL) {
1497 Py_DECREF(seq);
1498 return NULL;
1499 }
1500
1501 /* Catenate everything. */
1502 /* I'm not worried about a PyByteArray item growing because there's
1503 nowhere in this function where we release the GIL. */
1504 p = PyBytes_AS_STRING(res);
1505 for (i = 0; i < seqlen; ++i) {
1506 size_t n;
1507 char *q;
1508 if (i) {
1509 Py_MEMCPY(p, sep, seplen);
1510 p += seplen;
1511 }
1512 item = PySequence_Fast_GET_ITEM(seq, i);
1513 n = Py_SIZE(item);
1514 if (PyBytes_Check(item))
1515 q = PyBytes_AS_STRING(item);
1516 else
1517 q = PyByteArray_AS_STRING(item);
1518 Py_MEMCPY(p, q, n);
1519 p += n;
1520 }
1521
1522 Py_DECREF(seq);
1523 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001524}
1525
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001526PyObject *
1527_PyBytes_Join(PyObject *sep, PyObject *x)
1528{
1529 assert(sep != NULL && PyBytes_Check(sep));
1530 assert(x != NULL);
1531 return string_join(sep, x);
1532}
1533
1534Py_LOCAL_INLINE(void)
1535string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1536{
1537 if (*end > len)
1538 *end = len;
1539 else if (*end < 0)
1540 *end += len;
1541 if (*end < 0)
1542 *end = 0;
1543 if (*start < 0)
1544 *start += len;
1545 if (*start < 0)
1546 *start = 0;
1547}
1548
1549Py_LOCAL_INLINE(Py_ssize_t)
1550string_find_internal(PyBytesObject *self, PyObject *args, int dir)
1551{
1552 PyObject *subobj;
1553 const char *sub;
1554 Py_ssize_t sub_len;
1555 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1556 PyObject *obj_start=Py_None, *obj_end=Py_None;
1557
1558 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1559 &obj_start, &obj_end))
1560 return -2;
1561 /* To support None in "start" and "end" arguments, meaning
1562 the same as if they were not passed.
1563 */
1564 if (obj_start != Py_None)
1565 if (!_PyEval_SliceIndex(obj_start, &start))
1566 return -2;
1567 if (obj_end != Py_None)
1568 if (!_PyEval_SliceIndex(obj_end, &end))
1569 return -2;
1570
1571 if (PyBytes_Check(subobj)) {
1572 sub = PyBytes_AS_STRING(subobj);
1573 sub_len = PyBytes_GET_SIZE(subobj);
1574 }
1575 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1576 /* XXX - the "expected a character buffer object" is pretty
1577 confusing for a non-expert. remap to something else ? */
1578 return -2;
1579
1580 if (dir > 0)
1581 return stringlib_find_slice(
1582 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1583 sub, sub_len, start, end);
1584 else
1585 return stringlib_rfind_slice(
1586 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1587 sub, sub_len, start, end);
1588}
1589
1590
1591PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001592"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001593\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001594Return the lowest index in S where substring sub is found,\n\
1595such that sub is contained within s[start:end]. Optional\n\
1596arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001597\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001598Return -1 on failure.");
1599
Neal Norwitz6968b052007-02-27 19:02:19 +00001600static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001601string_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001602{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001603 Py_ssize_t result = string_find_internal(self, args, +1);
1604 if (result == -2)
1605 return NULL;
1606 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001607}
1608
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001609
1610PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001611"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001612\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001613Like B.find() but raise ValueError when the substring is not found.");
1614
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001615static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001616string_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001617{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001618 Py_ssize_t result = string_find_internal(self, args, +1);
1619 if (result == -2)
1620 return NULL;
1621 if (result == -1) {
1622 PyErr_SetString(PyExc_ValueError,
1623 "substring not found");
1624 return NULL;
1625 }
1626 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001627}
1628
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001629
1630PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001631"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001632\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001633Return the highest index in B where substring sub is found,\n\
1634such that sub is contained within s[start:end]. Optional\n\
1635arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001636\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001637Return -1 on failure.");
1638
Neal Norwitz6968b052007-02-27 19:02:19 +00001639static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001640string_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001641{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001642 Py_ssize_t result = string_find_internal(self, args, -1);
1643 if (result == -2)
1644 return NULL;
1645 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001646}
1647
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001648
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001649PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001650"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001651\n\
1652Like B.rfind() but raise ValueError when the substring is not found.");
1653
1654static PyObject *
1655string_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001656{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001657 Py_ssize_t result = string_find_internal(self, args, -1);
1658 if (result == -2)
1659 return NULL;
1660 if (result == -1) {
1661 PyErr_SetString(PyExc_ValueError,
1662 "substring not found");
1663 return NULL;
1664 }
1665 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001666}
1667
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001668
1669Py_LOCAL_INLINE(PyObject *)
1670do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001671{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001672 Py_buffer vsep;
1673 char *s = PyBytes_AS_STRING(self);
1674 Py_ssize_t len = PyBytes_GET_SIZE(self);
1675 char *sep;
1676 Py_ssize_t seplen;
1677 Py_ssize_t i, j;
1678
1679 if (_getbuffer(sepobj, &vsep) < 0)
1680 return NULL;
1681 sep = vsep.buf;
1682 seplen = vsep.len;
1683
1684 i = 0;
1685 if (striptype != RIGHTSTRIP) {
1686 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1687 i++;
1688 }
1689 }
1690
1691 j = len;
1692 if (striptype != LEFTSTRIP) {
1693 do {
1694 j--;
1695 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1696 j++;
1697 }
1698
Martin v. Löwis423be952008-08-13 15:53:07 +00001699 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001700
1701 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1702 Py_INCREF(self);
1703 return (PyObject*)self;
1704 }
1705 else
1706 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001707}
1708
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001709
1710Py_LOCAL_INLINE(PyObject *)
1711do_strip(PyBytesObject *self, int striptype)
1712{
1713 char *s = PyBytes_AS_STRING(self);
1714 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
1715
1716 i = 0;
1717 if (striptype != RIGHTSTRIP) {
1718 while (i < len && ISSPACE(s[i])) {
1719 i++;
1720 }
1721 }
1722
1723 j = len;
1724 if (striptype != LEFTSTRIP) {
1725 do {
1726 j--;
1727 } while (j >= i && ISSPACE(s[j]));
1728 j++;
1729 }
1730
1731 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1732 Py_INCREF(self);
1733 return (PyObject*)self;
1734 }
1735 else
1736 return PyBytes_FromStringAndSize(s+i, j-i);
1737}
1738
1739
1740Py_LOCAL_INLINE(PyObject *)
1741do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1742{
1743 PyObject *sep = NULL;
1744
1745 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1746 return NULL;
1747
1748 if (sep != NULL && sep != Py_None) {
1749 return do_xstrip(self, striptype, sep);
1750 }
1751 return do_strip(self, striptype);
1752}
1753
1754
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001755PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001756"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001757\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001758Strip leading and trailing bytes contained in the argument.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001759If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001760static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001761string_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001762{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001763 if (PyTuple_GET_SIZE(args) == 0)
1764 return do_strip(self, BOTHSTRIP); /* Common case */
1765 else
1766 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001767}
1768
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001769
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001770PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001771"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001772\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001773Strip leading bytes contained in the argument.\n\
1774If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001775static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001776string_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001777{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001778 if (PyTuple_GET_SIZE(args) == 0)
1779 return do_strip(self, LEFTSTRIP); /* Common case */
1780 else
1781 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001782}
1783
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001784
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001785PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001786"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001787\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001788Strip trailing bytes contained in the argument.\n\
1789If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001790static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001791string_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001792{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001793 if (PyTuple_GET_SIZE(args) == 0)
1794 return do_strip(self, RIGHTSTRIP); /* Common case */
1795 else
1796 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001797}
Neal Norwitz6968b052007-02-27 19:02:19 +00001798
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001799
1800PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001801"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001802\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001803Return the number of non-overlapping occurrences of substring sub in\n\
1804string S[start:end]. Optional arguments start and end are interpreted\n\
1805as in slice notation.");
1806
1807static PyObject *
1808string_count(PyBytesObject *self, PyObject *args)
1809{
1810 PyObject *sub_obj;
1811 const char *str = PyBytes_AS_STRING(self), *sub;
1812 Py_ssize_t sub_len;
1813 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1814
1815 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1816 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1817 return NULL;
1818
1819 if (PyBytes_Check(sub_obj)) {
1820 sub = PyBytes_AS_STRING(sub_obj);
1821 sub_len = PyBytes_GET_SIZE(sub_obj);
1822 }
1823 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1824 return NULL;
1825
1826 string_adjust_indices(&start, &end, PyBytes_GET_SIZE(self));
1827
1828 return PyLong_FromSsize_t(
1829 stringlib_count(str + start, end - start, sub, sub_len)
1830 );
1831}
1832
1833
1834PyDoc_STRVAR(translate__doc__,
1835"B.translate(table[, deletechars]) -> bytes\n\
1836\n\
1837Return a copy of B, where all characters occurring in the\n\
1838optional argument deletechars are removed, and the remaining\n\
1839characters have been mapped through the given translation\n\
1840table, which must be a bytes object of length 256.");
1841
1842static PyObject *
1843string_translate(PyBytesObject *self, PyObject *args)
1844{
1845 register char *input, *output;
1846 const char *table;
1847 register Py_ssize_t i, c, changed = 0;
1848 PyObject *input_obj = (PyObject*)self;
1849 const char *output_start, *del_table=NULL;
1850 Py_ssize_t inlen, tablen, dellen = 0;
1851 PyObject *result;
1852 int trans_table[256];
1853 PyObject *tableobj, *delobj = NULL;
1854
1855 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1856 &tableobj, &delobj))
1857 return NULL;
1858
1859 if (PyBytes_Check(tableobj)) {
1860 table = PyBytes_AS_STRING(tableobj);
1861 tablen = PyBytes_GET_SIZE(tableobj);
1862 }
1863 else if (tableobj == Py_None) {
1864 table = NULL;
1865 tablen = 256;
1866 }
1867 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1868 return NULL;
1869
1870 if (tablen != 256) {
1871 PyErr_SetString(PyExc_ValueError,
1872 "translation table must be 256 characters long");
1873 return NULL;
1874 }
1875
1876 if (delobj != NULL) {
1877 if (PyBytes_Check(delobj)) {
1878 del_table = PyBytes_AS_STRING(delobj);
1879 dellen = PyBytes_GET_SIZE(delobj);
1880 }
1881 else if (PyUnicode_Check(delobj)) {
1882 PyErr_SetString(PyExc_TypeError,
1883 "deletions are implemented differently for unicode");
1884 return NULL;
1885 }
1886 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1887 return NULL;
1888 }
1889 else {
1890 del_table = NULL;
1891 dellen = 0;
1892 }
1893
1894 inlen = PyBytes_GET_SIZE(input_obj);
1895 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1896 if (result == NULL)
1897 return NULL;
1898 output_start = output = PyBytes_AsString(result);
1899 input = PyBytes_AS_STRING(input_obj);
1900
1901 if (dellen == 0 && table != NULL) {
1902 /* If no deletions are required, use faster code */
1903 for (i = inlen; --i >= 0; ) {
1904 c = Py_CHARMASK(*input++);
1905 if (Py_CHARMASK((*output++ = table[c])) != c)
1906 changed = 1;
1907 }
1908 if (changed || !PyBytes_CheckExact(input_obj))
1909 return result;
1910 Py_DECREF(result);
1911 Py_INCREF(input_obj);
1912 return input_obj;
1913 }
1914
1915 if (table == NULL) {
1916 for (i = 0; i < 256; i++)
1917 trans_table[i] = Py_CHARMASK(i);
1918 } else {
1919 for (i = 0; i < 256; i++)
1920 trans_table[i] = Py_CHARMASK(table[i]);
1921 }
1922
1923 for (i = 0; i < dellen; i++)
1924 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1925
1926 for (i = inlen; --i >= 0; ) {
1927 c = Py_CHARMASK(*input++);
1928 if (trans_table[c] != -1)
1929 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1930 continue;
1931 changed = 1;
1932 }
1933 if (!changed && PyBytes_CheckExact(input_obj)) {
1934 Py_DECREF(result);
1935 Py_INCREF(input_obj);
1936 return input_obj;
1937 }
1938 /* Fix the size of the resulting string */
1939 if (inlen > 0)
1940 _PyBytes_Resize(&result, output - output_start);
1941 return result;
1942}
1943
1944
1945#define FORWARD 1
1946#define REVERSE -1
1947
1948/* find and count characters and substrings */
1949
1950#define findchar(target, target_len, c) \
1951 ((char *)memchr((const void *)(target), c, target_len))
1952
1953/* String ops must return a string. */
1954/* If the object is subclass of string, create a copy */
1955Py_LOCAL(PyBytesObject *)
1956return_self(PyBytesObject *self)
1957{
1958 if (PyBytes_CheckExact(self)) {
1959 Py_INCREF(self);
1960 return self;
1961 }
1962 return (PyBytesObject *)PyBytes_FromStringAndSize(
1963 PyBytes_AS_STRING(self),
1964 PyBytes_GET_SIZE(self));
1965}
1966
1967Py_LOCAL_INLINE(Py_ssize_t)
1968countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
1969{
1970 Py_ssize_t count=0;
1971 const char *start=target;
1972 const char *end=target+target_len;
1973
1974 while ( (start=findchar(start, end-start, c)) != NULL ) {
1975 count++;
1976 if (count >= maxcount)
1977 break;
1978 start += 1;
1979 }
1980 return count;
1981}
1982
1983Py_LOCAL(Py_ssize_t)
1984findstring(const char *target, Py_ssize_t target_len,
1985 const char *pattern, Py_ssize_t pattern_len,
1986 Py_ssize_t start,
1987 Py_ssize_t end,
1988 int direction)
1989{
1990 if (start < 0) {
1991 start += target_len;
1992 if (start < 0)
1993 start = 0;
1994 }
1995 if (end > target_len) {
1996 end = target_len;
1997 } else if (end < 0) {
1998 end += target_len;
1999 if (end < 0)
2000 end = 0;
2001 }
2002
2003 /* zero-length substrings always match at the first attempt */
2004 if (pattern_len == 0)
2005 return (direction > 0) ? start : end;
2006
2007 end -= pattern_len;
2008
2009 if (direction < 0) {
2010 for (; end >= start; end--)
2011 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2012 return end;
2013 } else {
2014 for (; start <= end; start++)
2015 if (Py_STRING_MATCH(target, start,pattern,pattern_len))
2016 return start;
2017 }
2018 return -1;
2019}
2020
2021Py_LOCAL_INLINE(Py_ssize_t)
2022countstring(const char *target, Py_ssize_t target_len,
2023 const char *pattern, Py_ssize_t pattern_len,
2024 Py_ssize_t start,
2025 Py_ssize_t end,
2026 int direction, Py_ssize_t maxcount)
2027{
2028 Py_ssize_t count=0;
2029
2030 if (start < 0) {
2031 start += target_len;
2032 if (start < 0)
2033 start = 0;
2034 }
2035 if (end > target_len) {
2036 end = target_len;
2037 } else if (end < 0) {
2038 end += target_len;
2039 if (end < 0)
2040 end = 0;
2041 }
2042
2043 /* zero-length substrings match everywhere */
2044 if (pattern_len == 0 || maxcount == 0) {
2045 if (target_len+1 < maxcount)
2046 return target_len+1;
2047 return maxcount;
2048 }
2049
2050 end -= pattern_len;
2051 if (direction < 0) {
2052 for (; (end >= start); end--)
2053 if (Py_STRING_MATCH(target, end,pattern,pattern_len)) {
2054 count++;
2055 if (--maxcount <= 0) break;
2056 end -= pattern_len-1;
2057 }
2058 } else {
2059 for (; (start <= end); start++)
2060 if (Py_STRING_MATCH(target, start,
2061 pattern, pattern_len)) {
2062 count++;
2063 if (--maxcount <= 0)
2064 break;
2065 start += pattern_len-1;
2066 }
2067 }
2068 return count;
2069}
2070
2071
2072/* Algorithms for different cases of string replacement */
2073
2074/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2075Py_LOCAL(PyBytesObject *)
2076replace_interleave(PyBytesObject *self,
2077 const char *to_s, Py_ssize_t to_len,
2078 Py_ssize_t maxcount)
2079{
2080 char *self_s, *result_s;
2081 Py_ssize_t self_len, result_len;
2082 Py_ssize_t count, i, product;
2083 PyBytesObject *result;
2084
2085 self_len = PyBytes_GET_SIZE(self);
2086
2087 /* 1 at the end plus 1 after every character */
2088 count = self_len+1;
2089 if (maxcount < count)
2090 count = maxcount;
2091
2092 /* Check for overflow */
2093 /* result_len = count * to_len + self_len; */
2094 product = count * to_len;
2095 if (product / to_len != count) {
2096 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002097 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002098 return NULL;
2099 }
2100 result_len = product + self_len;
2101 if (result_len < 0) {
2102 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002103 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002104 return NULL;
2105 }
2106
2107 if (! (result = (PyBytesObject *)
2108 PyBytes_FromStringAndSize(NULL, result_len)) )
2109 return NULL;
2110
2111 self_s = PyBytes_AS_STRING(self);
2112 result_s = PyBytes_AS_STRING(result);
2113
2114 /* TODO: special case single character, which doesn't need memcpy */
2115
2116 /* Lay the first one down (guaranteed this will occur) */
2117 Py_MEMCPY(result_s, to_s, to_len);
2118 result_s += to_len;
2119 count -= 1;
2120
2121 for (i=0; i<count; i++) {
2122 *result_s++ = *self_s++;
2123 Py_MEMCPY(result_s, to_s, to_len);
2124 result_s += to_len;
2125 }
2126
2127 /* Copy the rest of the original string */
2128 Py_MEMCPY(result_s, self_s, self_len-i);
2129
2130 return result;
2131}
2132
2133/* Special case for deleting a single character */
2134/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2135Py_LOCAL(PyBytesObject *)
2136replace_delete_single_character(PyBytesObject *self,
2137 char from_c, Py_ssize_t maxcount)
2138{
2139 char *self_s, *result_s;
2140 char *start, *next, *end;
2141 Py_ssize_t self_len, result_len;
2142 Py_ssize_t count;
2143 PyBytesObject *result;
2144
2145 self_len = PyBytes_GET_SIZE(self);
2146 self_s = PyBytes_AS_STRING(self);
2147
2148 count = countchar(self_s, self_len, from_c, maxcount);
2149 if (count == 0) {
2150 return return_self(self);
2151 }
2152
2153 result_len = self_len - count; /* from_len == 1 */
2154 assert(result_len>=0);
2155
2156 if ( (result = (PyBytesObject *)
2157 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2158 return NULL;
2159 result_s = PyBytes_AS_STRING(result);
2160
2161 start = self_s;
2162 end = self_s + self_len;
2163 while (count-- > 0) {
2164 next = findchar(start, end-start, from_c);
2165 if (next == NULL)
2166 break;
2167 Py_MEMCPY(result_s, start, next-start);
2168 result_s += (next-start);
2169 start = next+1;
2170 }
2171 Py_MEMCPY(result_s, start, end-start);
2172
2173 return result;
2174}
2175
2176/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2177
2178Py_LOCAL(PyBytesObject *)
2179replace_delete_substring(PyBytesObject *self,
2180 const char *from_s, Py_ssize_t from_len,
2181 Py_ssize_t maxcount) {
2182 char *self_s, *result_s;
2183 char *start, *next, *end;
2184 Py_ssize_t self_len, result_len;
2185 Py_ssize_t count, offset;
2186 PyBytesObject *result;
2187
2188 self_len = PyBytes_GET_SIZE(self);
2189 self_s = PyBytes_AS_STRING(self);
2190
2191 count = countstring(self_s, self_len,
2192 from_s, from_len,
2193 0, self_len, 1,
2194 maxcount);
2195
2196 if (count == 0) {
2197 /* no matches */
2198 return return_self(self);
2199 }
2200
2201 result_len = self_len - (count * from_len);
2202 assert (result_len>=0);
2203
2204 if ( (result = (PyBytesObject *)
2205 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2206 return NULL;
2207
2208 result_s = PyBytes_AS_STRING(result);
2209
2210 start = self_s;
2211 end = self_s + self_len;
2212 while (count-- > 0) {
2213 offset = findstring(start, end-start,
2214 from_s, from_len,
2215 0, end-start, FORWARD);
2216 if (offset == -1)
2217 break;
2218 next = start + offset;
2219
2220 Py_MEMCPY(result_s, start, next-start);
2221
2222 result_s += (next-start);
2223 start = next+from_len;
2224 }
2225 Py_MEMCPY(result_s, start, end-start);
2226 return result;
2227}
2228
2229/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2230Py_LOCAL(PyBytesObject *)
2231replace_single_character_in_place(PyBytesObject *self,
2232 char from_c, char to_c,
2233 Py_ssize_t maxcount)
2234{
2235 char *self_s, *result_s, *start, *end, *next;
2236 Py_ssize_t self_len;
2237 PyBytesObject *result;
2238
2239 /* The result string will be the same size */
2240 self_s = PyBytes_AS_STRING(self);
2241 self_len = PyBytes_GET_SIZE(self);
2242
2243 next = findchar(self_s, self_len, from_c);
2244
2245 if (next == NULL) {
2246 /* No matches; return the original string */
2247 return return_self(self);
2248 }
2249
2250 /* Need to make a new string */
2251 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2252 if (result == NULL)
2253 return NULL;
2254 result_s = PyBytes_AS_STRING(result);
2255 Py_MEMCPY(result_s, self_s, self_len);
2256
2257 /* change everything in-place, starting with this one */
2258 start = result_s + (next-self_s);
2259 *start = to_c;
2260 start++;
2261 end = result_s + self_len;
2262
2263 while (--maxcount > 0) {
2264 next = findchar(start, end-start, from_c);
2265 if (next == NULL)
2266 break;
2267 *next = to_c;
2268 start = next+1;
2269 }
2270
2271 return result;
2272}
2273
2274/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2275Py_LOCAL(PyBytesObject *)
2276replace_substring_in_place(PyBytesObject *self,
2277 const char *from_s, Py_ssize_t from_len,
2278 const char *to_s, Py_ssize_t to_len,
2279 Py_ssize_t maxcount)
2280{
2281 char *result_s, *start, *end;
2282 char *self_s;
2283 Py_ssize_t self_len, offset;
2284 PyBytesObject *result;
2285
2286 /* The result string will be the same size */
2287
2288 self_s = PyBytes_AS_STRING(self);
2289 self_len = PyBytes_GET_SIZE(self);
2290
2291 offset = findstring(self_s, self_len,
2292 from_s, from_len,
2293 0, self_len, FORWARD);
2294 if (offset == -1) {
2295 /* No matches; return the original string */
2296 return return_self(self);
2297 }
2298
2299 /* Need to make a new string */
2300 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2301 if (result == NULL)
2302 return NULL;
2303 result_s = PyBytes_AS_STRING(result);
2304 Py_MEMCPY(result_s, self_s, self_len);
2305
2306 /* change everything in-place, starting with this one */
2307 start = result_s + offset;
2308 Py_MEMCPY(start, to_s, from_len);
2309 start += from_len;
2310 end = result_s + self_len;
2311
2312 while ( --maxcount > 0) {
2313 offset = findstring(start, end-start,
2314 from_s, from_len,
2315 0, end-start, FORWARD);
2316 if (offset==-1)
2317 break;
2318 Py_MEMCPY(start+offset, to_s, from_len);
2319 start += offset+from_len;
2320 }
2321
2322 return result;
2323}
2324
2325/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2326Py_LOCAL(PyBytesObject *)
2327replace_single_character(PyBytesObject *self,
2328 char from_c,
2329 const char *to_s, Py_ssize_t to_len,
2330 Py_ssize_t maxcount)
2331{
2332 char *self_s, *result_s;
2333 char *start, *next, *end;
2334 Py_ssize_t self_len, result_len;
2335 Py_ssize_t count, product;
2336 PyBytesObject *result;
2337
2338 self_s = PyBytes_AS_STRING(self);
2339 self_len = PyBytes_GET_SIZE(self);
2340
2341 count = countchar(self_s, self_len, from_c, maxcount);
2342 if (count == 0) {
2343 /* no matches, return unchanged */
2344 return return_self(self);
2345 }
2346
2347 /* use the difference between current and new, hence the "-1" */
2348 /* result_len = self_len + count * (to_len-1) */
2349 product = count * (to_len-1);
2350 if (product / (to_len-1) != count) {
2351 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002352 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002353 return NULL;
2354 }
2355 result_len = self_len + product;
2356 if (result_len < 0) {
2357 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002358 "replacment bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002359 return NULL;
2360 }
2361
2362 if ( (result = (PyBytesObject *)
2363 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2364 return NULL;
2365 result_s = PyBytes_AS_STRING(result);
2366
2367 start = self_s;
2368 end = self_s + self_len;
2369 while (count-- > 0) {
2370 next = findchar(start, end-start, from_c);
2371 if (next == NULL)
2372 break;
2373
2374 if (next == start) {
2375 /* replace with the 'to' */
2376 Py_MEMCPY(result_s, to_s, to_len);
2377 result_s += to_len;
2378 start += 1;
2379 } else {
2380 /* copy the unchanged old then the 'to' */
2381 Py_MEMCPY(result_s, start, next-start);
2382 result_s += (next-start);
2383 Py_MEMCPY(result_s, to_s, to_len);
2384 result_s += to_len;
2385 start = next+1;
2386 }
2387 }
2388 /* Copy the remainder of the remaining string */
2389 Py_MEMCPY(result_s, start, end-start);
2390
2391 return result;
2392}
2393
2394/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2395Py_LOCAL(PyBytesObject *)
2396replace_substring(PyBytesObject *self,
2397 const char *from_s, Py_ssize_t from_len,
2398 const char *to_s, Py_ssize_t to_len,
2399 Py_ssize_t maxcount) {
2400 char *self_s, *result_s;
2401 char *start, *next, *end;
2402 Py_ssize_t self_len, result_len;
2403 Py_ssize_t count, offset, product;
2404 PyBytesObject *result;
2405
2406 self_s = PyBytes_AS_STRING(self);
2407 self_len = PyBytes_GET_SIZE(self);
2408
2409 count = countstring(self_s, self_len,
2410 from_s, from_len,
2411 0, self_len, FORWARD, maxcount);
2412 if (count == 0) {
2413 /* no matches, return unchanged */
2414 return return_self(self);
2415 }
2416
2417 /* Check for overflow */
2418 /* result_len = self_len + count * (to_len-from_len) */
2419 product = count * (to_len-from_len);
2420 if (product / (to_len-from_len) != count) {
2421 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002422 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002423 return NULL;
2424 }
2425 result_len = self_len + product;
2426 if (result_len < 0) {
2427 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002428 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002429 return NULL;
2430 }
2431
2432 if ( (result = (PyBytesObject *)
2433 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2434 return NULL;
2435 result_s = PyBytes_AS_STRING(result);
2436
2437 start = self_s;
2438 end = self_s + self_len;
2439 while (count-- > 0) {
2440 offset = findstring(start, end-start,
2441 from_s, from_len,
2442 0, end-start, FORWARD);
2443 if (offset == -1)
2444 break;
2445 next = start+offset;
2446 if (next == start) {
2447 /* replace with the 'to' */
2448 Py_MEMCPY(result_s, to_s, to_len);
2449 result_s += to_len;
2450 start += from_len;
2451 } else {
2452 /* copy the unchanged old then the 'to' */
2453 Py_MEMCPY(result_s, start, next-start);
2454 result_s += (next-start);
2455 Py_MEMCPY(result_s, to_s, to_len);
2456 result_s += to_len;
2457 start = next+from_len;
2458 }
2459 }
2460 /* Copy the remainder of the remaining string */
2461 Py_MEMCPY(result_s, start, end-start);
2462
2463 return result;
2464}
2465
2466
2467Py_LOCAL(PyBytesObject *)
2468replace(PyBytesObject *self,
2469 const char *from_s, Py_ssize_t from_len,
2470 const char *to_s, Py_ssize_t to_len,
2471 Py_ssize_t maxcount)
2472{
2473 if (maxcount < 0) {
2474 maxcount = PY_SSIZE_T_MAX;
2475 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2476 /* nothing to do; return the original string */
2477 return return_self(self);
2478 }
2479
2480 if (maxcount == 0 ||
2481 (from_len == 0 && to_len == 0)) {
2482 /* nothing to do; return the original string */
2483 return return_self(self);
2484 }
2485
2486 /* Handle zero-length special cases */
2487
2488 if (from_len == 0) {
2489 /* insert the 'to' string everywhere. */
2490 /* >>> "Python".replace("", ".") */
2491 /* '.P.y.t.h.o.n.' */
2492 return replace_interleave(self, to_s, to_len, maxcount);
2493 }
2494
2495 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2496 /* point for an empty self string to generate a non-empty string */
2497 /* Special case so the remaining code always gets a non-empty string */
2498 if (PyBytes_GET_SIZE(self) == 0) {
2499 return return_self(self);
2500 }
2501
2502 if (to_len == 0) {
Georg Brandl17cb8a82008-05-30 08:20:09 +00002503 /* delete all occurrences of 'from' string */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002504 if (from_len == 1) {
2505 return replace_delete_single_character(
2506 self, from_s[0], maxcount);
2507 } else {
2508 return replace_delete_substring(self, from_s,
2509 from_len, maxcount);
2510 }
2511 }
2512
2513 /* Handle special case where both strings have the same length */
2514
2515 if (from_len == to_len) {
2516 if (from_len == 1) {
2517 return replace_single_character_in_place(
2518 self,
2519 from_s[0],
2520 to_s[0],
2521 maxcount);
2522 } else {
2523 return replace_substring_in_place(
2524 self, from_s, from_len, to_s, to_len,
2525 maxcount);
2526 }
2527 }
2528
2529 /* Otherwise use the more generic algorithms */
2530 if (from_len == 1) {
2531 return replace_single_character(self, from_s[0],
2532 to_s, to_len, maxcount);
2533 } else {
2534 /* len('from')>=2, len('to')>=1 */
2535 return replace_substring(self, from_s, from_len, to_s, to_len,
2536 maxcount);
2537 }
2538}
2539
2540PyDoc_STRVAR(replace__doc__,
2541"B.replace(old, new[, count]) -> bytes\n\
2542\n\
2543Return a copy of B with all occurrences of subsection\n\
2544old replaced by new. If the optional argument count is\n\
2545given, only the first count occurrences are replaced.");
2546
2547static PyObject *
2548string_replace(PyBytesObject *self, PyObject *args)
2549{
2550 Py_ssize_t count = -1;
2551 PyObject *from, *to;
2552 const char *from_s, *to_s;
2553 Py_ssize_t from_len, to_len;
2554
2555 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2556 return NULL;
2557
2558 if (PyBytes_Check(from)) {
2559 from_s = PyBytes_AS_STRING(from);
2560 from_len = PyBytes_GET_SIZE(from);
2561 }
2562 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2563 return NULL;
2564
2565 if (PyBytes_Check(to)) {
2566 to_s = PyBytes_AS_STRING(to);
2567 to_len = PyBytes_GET_SIZE(to);
2568 }
2569 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2570 return NULL;
2571
2572 return (PyObject *)replace((PyBytesObject *) self,
2573 from_s, from_len,
2574 to_s, to_len, count);
2575}
2576
2577/** End DALKE **/
2578
2579/* Matches the end (direction >= 0) or start (direction < 0) of self
2580 * against substr, using the start and end arguments. Returns
2581 * -1 on error, 0 if not found and 1 if found.
2582 */
2583Py_LOCAL(int)
2584_string_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
2585 Py_ssize_t end, int direction)
2586{
2587 Py_ssize_t len = PyBytes_GET_SIZE(self);
2588 Py_ssize_t slen;
2589 const char* sub;
2590 const char* str;
2591
2592 if (PyBytes_Check(substr)) {
2593 sub = PyBytes_AS_STRING(substr);
2594 slen = PyBytes_GET_SIZE(substr);
2595 }
2596 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2597 return -1;
2598 str = PyBytes_AS_STRING(self);
2599
2600 string_adjust_indices(&start, &end, len);
2601
2602 if (direction < 0) {
2603 /* startswith */
2604 if (start+slen > len)
2605 return 0;
2606 } else {
2607 /* endswith */
2608 if (end-start < slen || start > len)
2609 return 0;
2610
2611 if (end-slen > start)
2612 start = end - slen;
2613 }
2614 if (end-start >= slen)
2615 return ! memcmp(str+start, sub, slen);
2616 return 0;
2617}
2618
2619
2620PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002621"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002622\n\
2623Return True if B starts with the specified prefix, False otherwise.\n\
2624With optional start, test B beginning at that position.\n\
2625With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002626prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002627
2628static PyObject *
2629string_startswith(PyBytesObject *self, PyObject *args)
2630{
2631 Py_ssize_t start = 0;
2632 Py_ssize_t end = PY_SSIZE_T_MAX;
2633 PyObject *subobj;
2634 int result;
2635
2636 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2637 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2638 return NULL;
2639 if (PyTuple_Check(subobj)) {
2640 Py_ssize_t i;
2641 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2642 result = _string_tailmatch(self,
2643 PyTuple_GET_ITEM(subobj, i),
2644 start, end, -1);
2645 if (result == -1)
2646 return NULL;
2647 else if (result) {
2648 Py_RETURN_TRUE;
2649 }
2650 }
2651 Py_RETURN_FALSE;
2652 }
2653 result = _string_tailmatch(self, subobj, start, end, -1);
2654 if (result == -1)
2655 return NULL;
2656 else
2657 return PyBool_FromLong(result);
2658}
2659
2660
2661PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002662"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002663\n\
2664Return True if B ends with the specified suffix, False otherwise.\n\
2665With optional start, test B beginning at that position.\n\
2666With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002667suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002668
2669static PyObject *
2670string_endswith(PyBytesObject *self, PyObject *args)
2671{
2672 Py_ssize_t start = 0;
2673 Py_ssize_t end = PY_SSIZE_T_MAX;
2674 PyObject *subobj;
2675 int result;
2676
2677 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2678 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2679 return NULL;
2680 if (PyTuple_Check(subobj)) {
2681 Py_ssize_t i;
2682 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2683 result = _string_tailmatch(self,
2684 PyTuple_GET_ITEM(subobj, i),
2685 start, end, +1);
2686 if (result == -1)
2687 return NULL;
2688 else if (result) {
2689 Py_RETURN_TRUE;
2690 }
2691 }
2692 Py_RETURN_FALSE;
2693 }
2694 result = _string_tailmatch(self, subobj, start, end, +1);
2695 if (result == -1)
2696 return NULL;
2697 else
2698 return PyBool_FromLong(result);
2699}
2700
2701
2702PyDoc_STRVAR(decode__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002703"B.decode([encoding[, errors]]) -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002704\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002705Decode S using the codec registered for encoding. encoding defaults\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002706to the default encoding. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002707handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2708a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002709as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002710able to handle UnicodeDecodeErrors.");
2711
2712static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002713string_decode(PyObject *self, PyObject *args)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002714{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002715 const char *encoding = NULL;
2716 const char *errors = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +00002717
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002718 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2719 return NULL;
2720 if (encoding == NULL)
2721 encoding = PyUnicode_GetDefaultEncoding();
Marc-André Lemburgb2750b52008-06-06 12:18:17 +00002722 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002723}
2724
Guido van Rossum20188312006-05-05 15:15:40 +00002725
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002726PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002727"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002728\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002729Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002730Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002731Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002732
2733static int
Guido van Rossumae404e22007-10-26 21:46:44 +00002734hex_digit_to_int(Py_UNICODE c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002735{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002736 if (c >= 128)
2737 return -1;
2738 if (ISDIGIT(c))
2739 return c - '0';
2740 else {
2741 if (ISUPPER(c))
2742 c = TOLOWER(c);
2743 if (c >= 'a' && c <= 'f')
2744 return c - 'a' + 10;
2745 }
2746 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002747}
2748
2749static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002750string_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002751{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002752 PyObject *newstring, *hexobj;
2753 char *buf;
2754 Py_UNICODE *hex;
2755 Py_ssize_t hexlen, byteslen, i, j;
2756 int top, bot;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002757
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002758 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2759 return NULL;
2760 assert(PyUnicode_Check(hexobj));
2761 hexlen = PyUnicode_GET_SIZE(hexobj);
2762 hex = PyUnicode_AS_UNICODE(hexobj);
2763 byteslen = hexlen/2; /* This overestimates if there are spaces */
2764 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2765 if (!newstring)
2766 return NULL;
2767 buf = PyBytes_AS_STRING(newstring);
2768 for (i = j = 0; i < hexlen; i += 2) {
2769 /* skip over spaces in the input */
2770 while (hex[i] == ' ')
2771 i++;
2772 if (i >= hexlen)
2773 break;
2774 top = hex_digit_to_int(hex[i]);
2775 bot = hex_digit_to_int(hex[i+1]);
2776 if (top == -1 || bot == -1) {
2777 PyErr_Format(PyExc_ValueError,
2778 "non-hexadecimal number found in "
2779 "fromhex() arg at position %zd", i);
2780 goto error;
2781 }
2782 buf[j++] = (top << 4) + bot;
2783 }
2784 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2785 goto error;
2786 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002787
2788 error:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002789 Py_XDECREF(newstring);
2790 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002791}
2792
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002793PyDoc_STRVAR(sizeof__doc__,
2794"S.__sizeof__() -> size of S in memory, in bytes");
2795
2796static PyObject *
2797string_sizeof(PyBytesObject *v)
2798{
2799 Py_ssize_t res;
2800 res = sizeof(PyBytesObject) + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2801 return PyLong_FromSsize_t(res);
2802}
2803
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002804
2805static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002806string_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002807{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002808 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002809}
2810
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002811
2812static PyMethodDef
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002813string_methods[] = {
2814 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
2815 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2816 _Py_capitalize__doc__},
2817 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2818 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
2819 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
2820 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
2821 endswith__doc__},
2822 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2823 expandtabs__doc__},
2824 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
2825 {"fromhex", (PyCFunction)string_fromhex, METH_VARARGS|METH_CLASS,
2826 fromhex_doc},
2827 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
2828 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2829 _Py_isalnum__doc__},
2830 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2831 _Py_isalpha__doc__},
2832 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2833 _Py_isdigit__doc__},
2834 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2835 _Py_islower__doc__},
2836 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2837 _Py_isspace__doc__},
2838 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2839 _Py_istitle__doc__},
2840 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2841 _Py_isupper__doc__},
2842 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2843 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2844 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2845 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
2846 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
2847 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2848 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2849 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
2850 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2851 {"rpartition", (PyCFunction)string_rpartition, METH_O,
2852 rpartition__doc__},
2853 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
2854 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
2855 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2856 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
2857 splitlines__doc__},
2858 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
2859 startswith__doc__},
2860 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
2861 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2862 _Py_swapcase__doc__},
2863 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2864 {"translate", (PyCFunction)string_translate, METH_VARARGS,
2865 translate__doc__},
2866 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2867 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002868 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
2869 sizeof__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002870 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002871};
2872
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002873static PyObject *
2874str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2875
2876static PyObject *
2877string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2878{
2879 PyObject *x = NULL, *it;
2880 const char *encoding = NULL;
2881 const char *errors = NULL;
2882 PyObject *new = NULL;
2883 Py_ssize_t i, size;
2884 static char *kwlist[] = {"source", "encoding", "errors", 0};
2885
2886 if (type != &PyBytes_Type)
2887 return str_subtype_new(type, args, kwds);
2888 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2889 &encoding, &errors))
2890 return NULL;
2891 if (x == NULL) {
2892 if (encoding != NULL || errors != NULL) {
2893 PyErr_SetString(PyExc_TypeError,
2894 "encoding or errors without sequence "
2895 "argument");
2896 return NULL;
2897 }
2898 return PyBytes_FromString("");
2899 }
2900
2901 if (PyUnicode_Check(x)) {
2902 /* Encode via the codec registry */
2903 if (encoding == NULL) {
2904 PyErr_SetString(PyExc_TypeError,
2905 "string argument without an encoding");
2906 return NULL;
2907 }
Marc-André Lemburgb2750b52008-06-06 12:18:17 +00002908 new = PyUnicode_AsEncodedString(x, encoding, errors);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002909 if (new == NULL)
2910 return NULL;
2911 assert(PyBytes_Check(new));
2912 return new;
2913 }
2914
2915 /* If it's not unicode, there can't be encoding or errors */
2916 if (encoding != NULL || errors != NULL) {
2917 PyErr_SetString(PyExc_TypeError,
2918 "encoding or errors without a string argument");
2919 return NULL;
2920 }
2921
2922 /* Is it an int? */
2923 size = PyNumber_AsSsize_t(x, PyExc_ValueError);
2924 if (size == -1 && PyErr_Occurred()) {
2925 PyErr_Clear();
2926 }
2927 else {
2928 if (size < 0) {
2929 PyErr_SetString(PyExc_ValueError, "negative count");
2930 return NULL;
2931 }
2932 new = PyBytes_FromStringAndSize(NULL, size);
2933 if (new == NULL) {
2934 return NULL;
2935 }
2936 if (size > 0) {
2937 memset(((PyBytesObject*)new)->ob_sval, 0, size);
2938 }
2939 return new;
2940 }
2941
2942 /* Use the modern buffer interface */
2943 if (PyObject_CheckBuffer(x)) {
2944 Py_buffer view;
2945 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2946 return NULL;
2947 new = PyBytes_FromStringAndSize(NULL, view.len);
2948 if (!new)
2949 goto fail;
2950 // XXX(brett.cannon): Better way to get to internal buffer?
2951 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2952 &view, view.len, 'C') < 0)
2953 goto fail;
Martin v. Löwis423be952008-08-13 15:53:07 +00002954 PyBuffer_Release(&view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002955 return new;
2956 fail:
2957 Py_XDECREF(new);
Martin v. Löwis423be952008-08-13 15:53:07 +00002958 PyBuffer_Release(&view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002959 return NULL;
2960 }
2961
2962 /* For iterator version, create a string object and resize as needed */
2963 /* XXX(gb): is 64 a good value? also, optimize if length is known */
2964 /* XXX(guido): perhaps use Pysequence_Fast() -- I can't imagine the
2965 input being a truly long iterator. */
2966 size = 64;
2967 new = PyBytes_FromStringAndSize(NULL, size);
2968 if (new == NULL)
2969 return NULL;
2970
2971 /* XXX Optimize this if the arguments is a list, tuple */
2972
2973 /* Get the iterator */
2974 it = PyObject_GetIter(x);
2975 if (it == NULL)
2976 goto error;
2977
2978 /* Run the iterator to exhaustion */
2979 for (i = 0; ; i++) {
2980 PyObject *item;
2981 Py_ssize_t value;
2982
2983 /* Get the next item */
2984 item = PyIter_Next(it);
2985 if (item == NULL) {
2986 if (PyErr_Occurred())
2987 goto error;
2988 break;
2989 }
2990
2991 /* Interpret it as an int (__index__) */
2992 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2993 Py_DECREF(item);
2994 if (value == -1 && PyErr_Occurred())
2995 goto error;
2996
2997 /* Range check */
2998 if (value < 0 || value >= 256) {
2999 PyErr_SetString(PyExc_ValueError,
3000 "bytes must be in range(0, 256)");
3001 goto error;
3002 }
3003
3004 /* Append the byte */
3005 if (i >= size) {
3006 size *= 2;
3007 if (_PyBytes_Resize(&new, size) < 0)
3008 goto error;
3009 }
3010 ((PyBytesObject *)new)->ob_sval[i] = value;
3011 }
3012 _PyBytes_Resize(&new, i);
3013
3014 /* Clean up and return success */
3015 Py_DECREF(it);
3016 return new;
3017
3018 error:
3019 /* Error handling when new != NULL */
3020 Py_XDECREF(it);
3021 Py_DECREF(new);
3022 return NULL;
3023}
3024
3025static PyObject *
3026str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3027{
3028 PyObject *tmp, *pnew;
3029 Py_ssize_t n;
3030
3031 assert(PyType_IsSubtype(type, &PyBytes_Type));
3032 tmp = string_new(&PyBytes_Type, args, kwds);
3033 if (tmp == NULL)
3034 return NULL;
3035 assert(PyBytes_CheckExact(tmp));
3036 n = PyBytes_GET_SIZE(tmp);
3037 pnew = type->tp_alloc(type, n);
3038 if (pnew != NULL) {
3039 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3040 PyBytes_AS_STRING(tmp), n+1);
3041 ((PyBytesObject *)pnew)->ob_shash =
3042 ((PyBytesObject *)tmp)->ob_shash;
3043 }
3044 Py_DECREF(tmp);
3045 return pnew;
3046}
3047
3048PyDoc_STRVAR(string_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003049"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003050bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003051bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
3052bytes(memory_view) -> bytes\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003053\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003054Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003055 - an iterable yielding integers in range(256)\n\
3056 - a text string encoded using the specified encoding\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003057 - a bytes or a buffer object\n\
3058 - any object implementing the buffer API.");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003059
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003060static PyObject *str_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003061
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003062PyTypeObject PyBytes_Type = {
3063 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3064 "bytes",
3065 sizeof(PyBytesObject),
3066 sizeof(char),
3067 string_dealloc, /* tp_dealloc */
3068 0, /* tp_print */
3069 0, /* tp_getattr */
3070 0, /* tp_setattr */
3071 0, /* tp_compare */
3072 (reprfunc)string_repr, /* tp_repr */
3073 0, /* tp_as_number */
3074 &string_as_sequence, /* tp_as_sequence */
3075 &string_as_mapping, /* tp_as_mapping */
3076 (hashfunc)string_hash, /* tp_hash */
3077 0, /* tp_call */
3078 string_str, /* tp_str */
3079 PyObject_GenericGetAttr, /* tp_getattro */
3080 0, /* tp_setattro */
3081 &string_as_buffer, /* tp_as_buffer */
3082 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3083 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3084 string_doc, /* tp_doc */
3085 0, /* tp_traverse */
3086 0, /* tp_clear */
3087 (richcmpfunc)string_richcompare, /* tp_richcompare */
3088 0, /* tp_weaklistoffset */
3089 str_iter, /* tp_iter */
3090 0, /* tp_iternext */
3091 string_methods, /* tp_methods */
3092 0, /* tp_members */
3093 0, /* tp_getset */
3094 &PyBaseObject_Type, /* tp_base */
3095 0, /* tp_dict */
3096 0, /* tp_descr_get */
3097 0, /* tp_descr_set */
3098 0, /* tp_dictoffset */
3099 0, /* tp_init */
3100 0, /* tp_alloc */
3101 string_new, /* tp_new */
3102 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003103};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003104
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003105void
3106PyBytes_Concat(register PyObject **pv, register PyObject *w)
3107{
3108 register PyObject *v;
3109 assert(pv != NULL);
3110 if (*pv == NULL)
3111 return;
3112 if (w == NULL) {
3113 Py_DECREF(*pv);
3114 *pv = NULL;
3115 return;
3116 }
3117 v = string_concat(*pv, w);
3118 Py_DECREF(*pv);
3119 *pv = v;
3120}
3121
3122void
3123PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
3124{
3125 PyBytes_Concat(pv, w);
3126 Py_XDECREF(w);
3127}
3128
3129
3130/* The following function breaks the notion that strings are immutable:
3131 it changes the size of a string. We get away with this only if there
3132 is only one module referencing the object. You can also think of it
3133 as creating a new string object and destroying the old one, only
3134 more efficiently. In any case, don't use this if the string may
3135 already be known to some other part of the code...
3136 Note that if there's not enough memory to resize the string, the original
3137 string object at *pv is deallocated, *pv is set to NULL, an "out of
3138 memory" exception is set, and -1 is returned. Else (on success) 0 is
3139 returned, and the value in *pv may or may not be the same as on input.
3140 As always, an extra byte is allocated for a trailing \0 byte (newsize
3141 does *not* include that), and a trailing \0 byte is stored.
3142*/
3143
3144int
3145_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3146{
3147 register PyObject *v;
3148 register PyBytesObject *sv;
3149 v = *pv;
3150 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
3151 *pv = 0;
3152 Py_DECREF(v);
3153 PyErr_BadInternalCall();
3154 return -1;
3155 }
3156 /* XXX UNREF/NEWREF interface should be more symmetrical */
3157 _Py_DEC_REFTOTAL;
3158 _Py_ForgetReference(v);
3159 *pv = (PyObject *)
3160 PyObject_REALLOC((char *)v, sizeof(PyBytesObject) + newsize);
3161 if (*pv == NULL) {
3162 PyObject_Del(v);
3163 PyErr_NoMemory();
3164 return -1;
3165 }
3166 _Py_NewReference(*pv);
3167 sv = (PyBytesObject *) *pv;
3168 Py_SIZE(sv) = newsize;
3169 sv->ob_sval[newsize] = '\0';
3170 sv->ob_shash = -1; /* invalidate cached hash value */
3171 return 0;
3172}
3173
3174/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and
3175 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3176 * Python's regular ints.
3177 * Return value: a new PyString*, or NULL if error.
3178 * . *pbuf is set to point into it,
3179 * *plen set to the # of chars following that.
3180 * Caller must decref it when done using pbuf.
3181 * The string starting at *pbuf is of the form
3182 * "-"? ("0x" | "0X")? digit+
3183 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3184 * set in flags. The case of hex digits will be correct,
3185 * There will be at least prec digits, zero-filled on the left if
3186 * necessary to get that many.
3187 * val object to be converted
3188 * flags bitmask of format flags; only F_ALT is looked at
3189 * prec minimum number of digits; 0-fill on left if needed
3190 * type a character in [duoxX]; u acts the same as d
3191 *
3192 * CAUTION: o, x and X conversions on regular ints can never
3193 * produce a '-' sign, but can for Python's unbounded ints.
3194 */
3195PyObject*
3196_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,
3197 char **pbuf, int *plen)
3198{
3199 PyObject *result = NULL;
3200 char *buf;
3201 Py_ssize_t i;
3202 int sign; /* 1 if '-', else 0 */
3203 int len; /* number of characters */
3204 Py_ssize_t llen;
3205 int numdigits; /* len == numnondigits + numdigits */
3206 int numnondigits = 0;
3207
3208 /* Avoid exceeding SSIZE_T_MAX */
3209 if (prec > PY_SSIZE_T_MAX-3) {
3210 PyErr_SetString(PyExc_OverflowError,
3211 "precision too large");
3212 return NULL;
3213 }
3214
3215 switch (type) {
3216 case 'd':
3217 case 'u':
3218 /* Special-case boolean: we want 0/1 */
3219 if (PyBool_Check(val))
3220 result = PyNumber_ToBase(val, 10);
3221 else
3222 result = Py_TYPE(val)->tp_str(val);
3223 break;
3224 case 'o':
3225 numnondigits = 2;
3226 result = PyNumber_ToBase(val, 8);
3227 break;
3228 case 'x':
3229 case 'X':
3230 numnondigits = 2;
3231 result = PyNumber_ToBase(val, 16);
3232 break;
3233 default:
3234 assert(!"'type' not in [duoxX]");
3235 }
3236 if (!result)
3237 return NULL;
3238
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00003239 buf = _PyUnicode_AsString(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003240 if (!buf) {
3241 Py_DECREF(result);
3242 return NULL;
3243 }
3244
3245 /* To modify the string in-place, there can only be one reference. */
3246 if (Py_REFCNT(result) != 1) {
3247 PyErr_BadInternalCall();
3248 return NULL;
3249 }
3250 llen = PyUnicode_GetSize(result);
3251 if (llen > INT_MAX) {
3252 PyErr_SetString(PyExc_ValueError,
3253 "string too large in _PyBytes_FormatLong");
3254 return NULL;
3255 }
3256 len = (int)llen;
3257 if (buf[len-1] == 'L') {
3258 --len;
3259 buf[len] = '\0';
3260 }
3261 sign = buf[0] == '-';
3262 numnondigits += sign;
3263 numdigits = len - numnondigits;
3264 assert(numdigits > 0);
3265
3266 /* Get rid of base marker unless F_ALT */
3267 if (((flags & F_ALT) == 0 &&
3268 (type == 'o' || type == 'x' || type == 'X'))) {
3269 assert(buf[sign] == '0');
3270 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
3271 buf[sign+1] == 'o');
3272 numnondigits -= 2;
3273 buf += 2;
3274 len -= 2;
3275 if (sign)
3276 buf[0] = '-';
3277 assert(len == numnondigits + numdigits);
3278 assert(numdigits > 0);
3279 }
3280
3281 /* Fill with leading zeroes to meet minimum width. */
3282 if (prec > numdigits) {
3283 PyObject *r1 = PyBytes_FromStringAndSize(NULL,
3284 numnondigits + prec);
3285 char *b1;
3286 if (!r1) {
3287 Py_DECREF(result);
3288 return NULL;
3289 }
3290 b1 = PyBytes_AS_STRING(r1);
3291 for (i = 0; i < numnondigits; ++i)
3292 *b1++ = *buf++;
3293 for (i = 0; i < prec - numdigits; i++)
3294 *b1++ = '0';
3295 for (i = 0; i < numdigits; i++)
3296 *b1++ = *buf++;
3297 *b1 = '\0';
3298 Py_DECREF(result);
3299 result = r1;
3300 buf = PyBytes_AS_STRING(result);
3301 len = numnondigits + prec;
3302 }
3303
3304 /* Fix up case for hex conversions. */
3305 if (type == 'X') {
3306 /* Need to convert all lower case letters to upper case.
3307 and need to convert 0x to 0X (and -0x to -0X). */
3308 for (i = 0; i < len; i++)
3309 if (buf[i] >= 'a' && buf[i] <= 'x')
3310 buf[i] -= 'a'-'A';
3311 }
3312 *pbuf = buf;
3313 *plen = len;
3314 return result;
3315}
3316
3317void
3318PyBytes_Fini(void)
3319{
3320 int i;
3321 for (i = 0; i < UCHAR_MAX + 1; i++) {
3322 Py_XDECREF(characters[i]);
3323 characters[i] = NULL;
3324 }
3325 Py_XDECREF(nullstring);
3326 nullstring = NULL;
3327}
3328
Benjamin Peterson4116f362008-05-27 00:36:20 +00003329/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003330
3331typedef struct {
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003332 PyObject_HEAD
3333 Py_ssize_t it_index;
3334 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3335} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003336
3337static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003338striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003339{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003340 _PyObject_GC_UNTRACK(it);
3341 Py_XDECREF(it->it_seq);
3342 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003343}
3344
3345static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003346striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003347{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003348 Py_VISIT(it->it_seq);
3349 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003350}
3351
3352static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003353striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003354{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003355 PyBytesObject *seq;
3356 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003357
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003358 assert(it != NULL);
3359 seq = it->it_seq;
3360 if (seq == NULL)
3361 return NULL;
3362 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003363
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003364 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3365 item = PyLong_FromLong(
3366 (unsigned char)seq->ob_sval[it->it_index]);
3367 if (item != NULL)
3368 ++it->it_index;
3369 return item;
3370 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003371
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003372 Py_DECREF(seq);
3373 it->it_seq = NULL;
3374 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003375}
3376
3377static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003378striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003379{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003380 Py_ssize_t len = 0;
3381 if (it->it_seq)
3382 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3383 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003384}
3385
3386PyDoc_STRVAR(length_hint_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003387 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003388
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003389static PyMethodDef striter_methods[] = {
3390 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3391 length_hint_doc},
3392 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003393};
3394
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003395PyTypeObject PyBytesIter_Type = {
3396 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3397 "bytes_iterator", /* tp_name */
3398 sizeof(striterobject), /* tp_basicsize */
3399 0, /* tp_itemsize */
3400 /* methods */
3401 (destructor)striter_dealloc, /* tp_dealloc */
3402 0, /* tp_print */
3403 0, /* tp_getattr */
3404 0, /* tp_setattr */
3405 0, /* tp_compare */
3406 0, /* tp_repr */
3407 0, /* tp_as_number */
3408 0, /* tp_as_sequence */
3409 0, /* tp_as_mapping */
3410 0, /* tp_hash */
3411 0, /* tp_call */
3412 0, /* tp_str */
3413 PyObject_GenericGetAttr, /* tp_getattro */
3414 0, /* tp_setattro */
3415 0, /* tp_as_buffer */
3416 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3417 0, /* tp_doc */
3418 (traverseproc)striter_traverse, /* tp_traverse */
3419 0, /* tp_clear */
3420 0, /* tp_richcompare */
3421 0, /* tp_weaklistoffset */
3422 PyObject_SelfIter, /* tp_iter */
3423 (iternextfunc)striter_next, /* tp_iternext */
3424 striter_methods, /* tp_methods */
3425 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003426};
3427
3428static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003429str_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003430{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003431 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003432
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003433 if (!PyBytes_Check(seq)) {
3434 PyErr_BadInternalCall();
3435 return NULL;
3436 }
3437 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3438 if (it == NULL)
3439 return NULL;
3440 it->it_index = 0;
3441 Py_INCREF(seq);
3442 it->it_seq = (PyBytesObject *)seq;
3443 _PyObject_GC_TRACK(it);
3444 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003445}