blob: 3bda6d99459ca25a1dbf835bd25a2ae24ea5425d [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00008
Neal Norwitz2bad9702007-08-27 06:19:22 +00009static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000010_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000011{
Christian Heimes90aa7642007-12-19 02:45:37 +000012 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000013
Gregory P. Smith60d241f2007-10-16 06:31:30 +000014 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000015 {
16 PyErr_Format(PyExc_TypeError,
17 "Type %.100s doesn't support the buffer API",
Christian Heimes90aa7642007-12-19 02:45:37 +000018 Py_TYPE(obj)->tp_name);
Guido van Rossuma74184e2007-08-29 04:05:57 +000019 return -1;
20 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000021
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000022 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
23 return -1;
24 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000025}
26
Christian Heimes2c9c7a52008-05-26 13:42:13 +000027#ifdef COUNT_ALLOCS
28int null_strings, one_strings;
29#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000030
Christian Heimes2c9c7a52008-05-26 13:42:13 +000031static PyBytesObject *characters[UCHAR_MAX + 1];
32static PyBytesObject *nullstring;
33
34/*
35 For both PyBytes_FromString() and PyBytes_FromStringAndSize(), the
36 parameter `size' denotes number of characters to allocate, not counting any
37 null terminating character.
38
39 For PyBytes_FromString(), the parameter `str' points to a null-terminated
40 string containing exactly `size' bytes.
41
42 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
43 either NULL or else points to a string containing at least `size' bytes.
44 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
45 not have to be null-terminated. (Therefore it is safe to construct a
46 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
47 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
48 bytes (setting the last byte to the null terminating character) and you can
49 fill in the data yourself. If `str' is non-NULL then the resulting
50 PyString object must be treated as immutable and you must not fill in nor
51 alter the data yourself, since the strings may be shared.
52
53 The PyObject member `op->ob_size', which denotes the number of "extra
54 items" in a variable-size object, will contain the number of bytes
55 allocated for string data, not counting the null terminating character. It
56 is therefore equal to the equal to the `size' parameter (for
57 PyBytes_FromStringAndSize()) or the length of the string in the `str'
58 parameter (for PyBytes_FromString()).
59*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000060PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000061PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000062{
Christian Heimes2c9c7a52008-05-26 13:42:13 +000063 register PyBytesObject *op;
64 if (size < 0) {
65 PyErr_SetString(PyExc_SystemError,
66 "Negative size passed to PyBytes_FromStringAndSize");
67 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +000068 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000069 if (size == 0 && (op = nullstring) != NULL) {
70#ifdef COUNT_ALLOCS
71 null_strings++;
72#endif
73 Py_INCREF(op);
74 return (PyObject *)op;
75 }
76 if (size == 1 && str != NULL &&
77 (op = characters[*str & UCHAR_MAX]) != NULL)
78 {
79#ifdef COUNT_ALLOCS
80 one_strings++;
81#endif
82 Py_INCREF(op);
83 return (PyObject *)op;
84 }
85
Neal Norwitz3ce5d922008-08-24 07:08:55 +000086 if (size > PY_SSIZE_T_MAX - sizeof(PyBytesObject)) {
87 PyErr_SetString(PyExc_OverflowError,
88 "byte string is too large");
89 return NULL;
90 }
91
Christian Heimes2c9c7a52008-05-26 13:42:13 +000092 /* Inline PyObject_NewVar */
93 op = (PyBytesObject *)PyObject_MALLOC(sizeof(PyBytesObject) + size);
94 if (op == NULL)
95 return PyErr_NoMemory();
96 PyObject_INIT_VAR(op, &PyBytes_Type, size);
97 op->ob_shash = -1;
98 if (str != NULL)
99 Py_MEMCPY(op->ob_sval, str, size);
100 op->ob_sval[size] = '\0';
101 /* share short strings */
102 if (size == 0) {
103 nullstring = op;
104 Py_INCREF(op);
105 } else if (size == 1 && str != NULL) {
106 characters[*str & UCHAR_MAX] = op;
107 Py_INCREF(op);
108 }
109 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000110}
111
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000112PyObject *
113PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000114{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000115 register size_t size;
116 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000117
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000118 assert(str != NULL);
119 size = strlen(str);
Neal Norwitz3ce5d922008-08-24 07:08:55 +0000120 if (size > PY_SSIZE_T_MAX - sizeof(PyBytesObject)) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000121 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000122 "byte string is too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000123 return NULL;
124 }
125 if (size == 0 && (op = nullstring) != NULL) {
126#ifdef COUNT_ALLOCS
127 null_strings++;
128#endif
129 Py_INCREF(op);
130 return (PyObject *)op;
131 }
132 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
133#ifdef COUNT_ALLOCS
134 one_strings++;
135#endif
136 Py_INCREF(op);
137 return (PyObject *)op;
138 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000139
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000140 /* Inline PyObject_NewVar */
141 op = (PyBytesObject *)PyObject_MALLOC(sizeof(PyBytesObject) + size);
142 if (op == NULL)
143 return PyErr_NoMemory();
144 PyObject_INIT_VAR(op, &PyBytes_Type, size);
145 op->ob_shash = -1;
146 Py_MEMCPY(op->ob_sval, str, size+1);
147 /* share short strings */
148 if (size == 0) {
149 nullstring = op;
150 Py_INCREF(op);
151 } else if (size == 1) {
152 characters[*str & UCHAR_MAX] = op;
153 Py_INCREF(op);
154 }
155 return (PyObject *) op;
156}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000157
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000158PyObject *
159PyBytes_FromFormatV(const char *format, va_list vargs)
160{
161 va_list count;
162 Py_ssize_t n = 0;
163 const char* f;
164 char *s;
165 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000166
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000167#ifdef VA_LIST_IS_ARRAY
168 Py_MEMCPY(count, vargs, sizeof(va_list));
169#else
170#ifdef __va_copy
171 __va_copy(count, vargs);
172#else
173 count = vargs;
174#endif
175#endif
176 /* step 1: figure out how large a buffer we need */
177 for (f = format; *f; f++) {
178 if (*f == '%') {
179 const char* p = f;
180 while (*++f && *f != '%' && !ISALPHA(*f))
181 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000182
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000183 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
184 * they don't affect the amount of space we reserve.
185 */
186 if ((*f == 'l' || *f == 'z') &&
187 (f[1] == 'd' || f[1] == 'u'))
188 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000189
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000190 switch (*f) {
191 case 'c':
192 (void)va_arg(count, int);
193 /* fall through... */
194 case '%':
195 n++;
196 break;
197 case 'd': case 'u': case 'i': case 'x':
198 (void) va_arg(count, int);
199 /* 20 bytes is enough to hold a 64-bit
200 integer. Decimal takes the most space.
201 This isn't enough for octal. */
202 n += 20;
203 break;
204 case 's':
205 s = va_arg(count, char*);
206 n += strlen(s);
207 break;
208 case 'p':
209 (void) va_arg(count, int);
210 /* maximum 64-bit pointer representation:
211 * 0xffffffffffffffff
212 * so 19 characters is enough.
213 * XXX I count 18 -- what's the extra for?
214 */
215 n += 19;
216 break;
217 default:
218 /* if we stumble upon an unknown
219 formatting code, copy the rest of
220 the format string to the output
221 string. (we cannot just skip the
222 code, since there's no way to know
223 what's in the argument list) */
224 n += strlen(p);
225 goto expand;
226 }
227 } else
228 n++;
229 }
230 expand:
231 /* step 2: fill the buffer */
232 /* Since we've analyzed how much space we need for the worst case,
233 use sprintf directly instead of the slower PyOS_snprintf. */
234 string = PyBytes_FromStringAndSize(NULL, n);
235 if (!string)
236 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000237
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000238 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000239
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000240 for (f = format; *f; f++) {
241 if (*f == '%') {
242 const char* p = f++;
243 Py_ssize_t i;
244 int longflag = 0;
245 int size_tflag = 0;
246 /* parse the width.precision part (we're only
247 interested in the precision value, if any) */
248 n = 0;
249 while (ISDIGIT(*f))
250 n = (n*10) + *f++ - '0';
251 if (*f == '.') {
252 f++;
253 n = 0;
254 while (ISDIGIT(*f))
255 n = (n*10) + *f++ - '0';
256 }
257 while (*f && *f != '%' && !ISALPHA(*f))
258 f++;
259 /* handle the long flag, but only for %ld and %lu.
260 others can be added when necessary. */
261 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
262 longflag = 1;
263 ++f;
264 }
265 /* handle the size_t flag. */
266 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
267 size_tflag = 1;
268 ++f;
269 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000270
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000271 switch (*f) {
272 case 'c':
273 *s++ = va_arg(vargs, int);
274 break;
275 case 'd':
276 if (longflag)
277 sprintf(s, "%ld", va_arg(vargs, long));
278 else if (size_tflag)
279 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
280 va_arg(vargs, Py_ssize_t));
281 else
282 sprintf(s, "%d", va_arg(vargs, int));
283 s += strlen(s);
284 break;
285 case 'u':
286 if (longflag)
287 sprintf(s, "%lu",
288 va_arg(vargs, unsigned long));
289 else if (size_tflag)
290 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
291 va_arg(vargs, size_t));
292 else
293 sprintf(s, "%u",
294 va_arg(vargs, unsigned int));
295 s += strlen(s);
296 break;
297 case 'i':
298 sprintf(s, "%i", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 'x':
302 sprintf(s, "%x", va_arg(vargs, int));
303 s += strlen(s);
304 break;
305 case 's':
306 p = va_arg(vargs, char*);
307 i = strlen(p);
308 if (n > 0 && i > n)
309 i = n;
310 Py_MEMCPY(s, p, i);
311 s += i;
312 break;
313 case 'p':
314 sprintf(s, "%p", va_arg(vargs, void*));
315 /* %p is ill-defined: ensure leading 0x. */
316 if (s[1] == 'X')
317 s[1] = 'x';
318 else if (s[1] != 'x') {
319 memmove(s+2, s, strlen(s)+1);
320 s[0] = '0';
321 s[1] = 'x';
322 }
323 s += strlen(s);
324 break;
325 case '%':
326 *s++ = '%';
327 break;
328 default:
329 strcpy(s, p);
330 s += strlen(s);
331 goto end;
332 }
333 } else
334 *s++ = *f;
335 }
336
337 end:
338 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
339 return string;
340}
341
342PyObject *
343PyBytes_FromFormat(const char *format, ...)
344{
345 PyObject* ret;
346 va_list vargs;
347
348#ifdef HAVE_STDARG_PROTOTYPES
349 va_start(vargs, format);
350#else
351 va_start(vargs);
352#endif
353 ret = PyBytes_FromFormatV(format, vargs);
354 va_end(vargs);
355 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000356}
357
358static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000359string_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000360{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000361 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000362}
363
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000364/* Unescape a backslash-escaped string. If unicode is non-zero,
365 the string is a u-literal. If recode_encoding is non-zero,
366 the string is UTF-8 encoded and should be re-encoded in the
367 specified encoding. */
368
369PyObject *PyBytes_DecodeEscape(const char *s,
370 Py_ssize_t len,
371 const char *errors,
372 Py_ssize_t unicode,
373 const char *recode_encoding)
374{
375 int c;
376 char *p, *buf;
377 const char *end;
378 PyObject *v;
379 Py_ssize_t newlen = recode_encoding ? 4*len:len;
380 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
381 if (v == NULL)
382 return NULL;
383 p = buf = PyBytes_AsString(v);
384 end = s + len;
385 while (s < end) {
386 if (*s != '\\') {
387 non_esc:
388 if (recode_encoding && (*s & 0x80)) {
389 PyObject *u, *w;
390 char *r;
391 const char* t;
392 Py_ssize_t rn;
393 t = s;
394 /* Decode non-ASCII bytes as UTF-8. */
395 while (t < end && (*t & 0x80)) t++;
396 u = PyUnicode_DecodeUTF8(s, t - s, errors);
397 if(!u) goto failed;
398
399 /* Recode them in target encoding. */
400 w = PyUnicode_AsEncodedString(
401 u, recode_encoding, errors);
402 Py_DECREF(u);
403 if (!w) goto failed;
404
405 /* Append bytes to output buffer. */
406 assert(PyBytes_Check(w));
407 r = PyBytes_AS_STRING(w);
408 rn = PyBytes_GET_SIZE(w);
409 Py_MEMCPY(p, r, rn);
410 p += rn;
411 Py_DECREF(w);
412 s = t;
413 } else {
414 *p++ = *s++;
415 }
416 continue;
417 }
418 s++;
419 if (s==end) {
420 PyErr_SetString(PyExc_ValueError,
421 "Trailing \\ in string");
422 goto failed;
423 }
424 switch (*s++) {
425 /* XXX This assumes ASCII! */
426 case '\n': break;
427 case '\\': *p++ = '\\'; break;
428 case '\'': *p++ = '\''; break;
429 case '\"': *p++ = '\"'; break;
430 case 'b': *p++ = '\b'; break;
431 case 'f': *p++ = '\014'; break; /* FF */
432 case 't': *p++ = '\t'; break;
433 case 'n': *p++ = '\n'; break;
434 case 'r': *p++ = '\r'; break;
435 case 'v': *p++ = '\013'; break; /* VT */
436 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
437 case '0': case '1': case '2': case '3':
438 case '4': case '5': case '6': case '7':
439 c = s[-1] - '0';
440 if (s < end && '0' <= *s && *s <= '7') {
441 c = (c<<3) + *s++ - '0';
442 if (s < end && '0' <= *s && *s <= '7')
443 c = (c<<3) + *s++ - '0';
444 }
445 *p++ = c;
446 break;
447 case 'x':
448 if (s+1 < end && ISXDIGIT(s[0]) && ISXDIGIT(s[1])) {
449 unsigned int x = 0;
450 c = Py_CHARMASK(*s);
451 s++;
452 if (ISDIGIT(c))
453 x = c - '0';
454 else if (ISLOWER(c))
455 x = 10 + c - 'a';
456 else
457 x = 10 + c - 'A';
458 x = x << 4;
459 c = Py_CHARMASK(*s);
460 s++;
461 if (ISDIGIT(c))
462 x += c - '0';
463 else if (ISLOWER(c))
464 x += 10 + c - 'a';
465 else
466 x += 10 + c - 'A';
467 *p++ = x;
468 break;
469 }
470 if (!errors || strcmp(errors, "strict") == 0) {
471 PyErr_SetString(PyExc_ValueError,
472 "invalid \\x escape");
473 goto failed;
474 }
475 if (strcmp(errors, "replace") == 0) {
476 *p++ = '?';
477 } else if (strcmp(errors, "ignore") == 0)
478 /* do nothing */;
479 else {
480 PyErr_Format(PyExc_ValueError,
481 "decoding error; unknown "
482 "error handling code: %.400s",
483 errors);
484 goto failed;
485 }
486 default:
487 *p++ = '\\';
488 s--;
489 goto non_esc; /* an arbitry number of unescaped
490 UTF-8 bytes may follow. */
491 }
492 }
493 if (p-buf < newlen)
494 _PyBytes_Resize(&v, p - buf);
495 return v;
496 failed:
497 Py_DECREF(v);
498 return NULL;
499}
500
501/* -------------------------------------------------------------------- */
502/* object api */
503
504Py_ssize_t
505PyBytes_Size(register PyObject *op)
506{
507 if (!PyBytes_Check(op)) {
508 PyErr_Format(PyExc_TypeError,
509 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
510 return -1;
511 }
512 return Py_SIZE(op);
513}
514
515char *
516PyBytes_AsString(register PyObject *op)
517{
518 if (!PyBytes_Check(op)) {
519 PyErr_Format(PyExc_TypeError,
520 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
521 return NULL;
522 }
523 return ((PyBytesObject *)op)->ob_sval;
524}
525
526int
527PyBytes_AsStringAndSize(register PyObject *obj,
528 register char **s,
529 register Py_ssize_t *len)
530{
531 if (s == NULL) {
532 PyErr_BadInternalCall();
533 return -1;
534 }
535
536 if (!PyBytes_Check(obj)) {
537 PyErr_Format(PyExc_TypeError,
538 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
539 return -1;
540 }
541
542 *s = PyBytes_AS_STRING(obj);
543 if (len != NULL)
544 *len = PyBytes_GET_SIZE(obj);
545 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
546 PyErr_SetString(PyExc_TypeError,
547 "expected bytes with no null");
548 return -1;
549 }
550 return 0;
551}
Neal Norwitz6968b052007-02-27 19:02:19 +0000552
553/* -------------------------------------------------------------------- */
554/* Methods */
555
556#define STRINGLIB_CHAR char
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000557
Neal Norwitz6968b052007-02-27 19:02:19 +0000558#define STRINGLIB_CMP memcmp
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000559#define STRINGLIB_LEN PyBytes_GET_SIZE
560#define STRINGLIB_NEW PyBytes_FromStringAndSize
561#define STRINGLIB_STR PyBytes_AS_STRING
562/* #define STRINGLIB_WANT_CONTAINS_OBJ 1 */
563
564#define STRINGLIB_EMPTY nullstring
565#define STRINGLIB_CHECK_EXACT PyBytes_CheckExact
566#define STRINGLIB_MUTABLE 0
Neal Norwitz6968b052007-02-27 19:02:19 +0000567
568#include "stringlib/fastsearch.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000569
Neal Norwitz6968b052007-02-27 19:02:19 +0000570#include "stringlib/count.h"
571#include "stringlib/find.h"
572#include "stringlib/partition.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000573#include "stringlib/ctype.h"
574#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000575
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000576#define _Py_InsertThousandsGrouping _PyBytes_InsertThousandsGrouping
577#include "stringlib/localeutil.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000578
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000579PyObject *
580PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000581{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000582 static const char *hexdigits = "0123456789abcdef";
583 register PyBytesObject* op = (PyBytesObject*) obj;
584 Py_ssize_t length = Py_SIZE(op);
585 size_t newsize = 3 + 4 * length;
586 PyObject *v;
587 if (newsize > PY_SSIZE_T_MAX || (newsize-3) / 4 != length) {
588 PyErr_SetString(PyExc_OverflowError,
589 "bytes object is too large to make repr");
590 return NULL;
591 }
592 v = PyUnicode_FromUnicode(NULL, newsize);
593 if (v == NULL) {
594 return NULL;
595 }
596 else {
597 register Py_ssize_t i;
598 register Py_UNICODE c;
599 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
600 int quote;
601
602 /* Figure out which quote to use; single is preferred */
603 quote = '\'';
604 if (smartquotes) {
605 char *test, *start;
606 start = PyBytes_AS_STRING(op);
607 for (test = start; test < start+length; ++test) {
608 if (*test == '"') {
609 quote = '\''; /* back to single */
610 goto decided;
611 }
612 else if (*test == '\'')
613 quote = '"';
614 }
615 decided:
616 ;
617 }
618
619 *p++ = 'b', *p++ = quote;
620 for (i = 0; i < length; i++) {
621 /* There's at least enough room for a hex escape
622 and a closing quote. */
623 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
624 c = op->ob_sval[i];
625 if (c == quote || c == '\\')
626 *p++ = '\\', *p++ = c;
627 else if (c == '\t')
628 *p++ = '\\', *p++ = 't';
629 else if (c == '\n')
630 *p++ = '\\', *p++ = 'n';
631 else if (c == '\r')
632 *p++ = '\\', *p++ = 'r';
633 else if (c < ' ' || c >= 0x7f) {
634 *p++ = '\\';
635 *p++ = 'x';
636 *p++ = hexdigits[(c & 0xf0) >> 4];
637 *p++ = hexdigits[c & 0xf];
638 }
639 else
640 *p++ = c;
641 }
642 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
643 *p++ = quote;
644 *p = '\0';
645 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
646 Py_DECREF(v);
647 return NULL;
648 }
649 return v;
650 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000651}
652
Neal Norwitz6968b052007-02-27 19:02:19 +0000653static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000654string_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000655{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000656 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000657}
658
Neal Norwitz6968b052007-02-27 19:02:19 +0000659static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000660string_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000661{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000662 if (Py_BytesWarningFlag) {
663 if (PyErr_WarnEx(PyExc_BytesWarning,
664 "str() on a bytes instance", 1))
665 return NULL;
666 }
667 return string_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000668}
669
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000670static Py_ssize_t
671string_length(PyBytesObject *a)
672{
673 return Py_SIZE(a);
674}
Neal Norwitz6968b052007-02-27 19:02:19 +0000675
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000676/* This is also used by PyBytes_Concat() */
677static PyObject *
678string_concat(PyObject *a, PyObject *b)
679{
680 Py_ssize_t size;
681 Py_buffer va, vb;
682 PyObject *result = NULL;
683
684 va.len = -1;
685 vb.len = -1;
686 if (_getbuffer(a, &va) < 0 ||
687 _getbuffer(b, &vb) < 0) {
688 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
689 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
690 goto done;
691 }
692
693 /* Optimize end cases */
694 if (va.len == 0 && PyBytes_CheckExact(b)) {
695 result = b;
696 Py_INCREF(result);
697 goto done;
698 }
699 if (vb.len == 0 && PyBytes_CheckExact(a)) {
700 result = a;
701 Py_INCREF(result);
702 goto done;
703 }
704
705 size = va.len + vb.len;
706 if (size < 0) {
707 PyErr_NoMemory();
708 goto done;
709 }
710
711 result = PyBytes_FromStringAndSize(NULL, size);
712 if (result != NULL) {
713 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
714 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
715 }
716
717 done:
718 if (va.len != -1)
Martin v. Löwis423be952008-08-13 15:53:07 +0000719 PyBuffer_Release(&va);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000720 if (vb.len != -1)
Martin v. Löwis423be952008-08-13 15:53:07 +0000721 PyBuffer_Release(&vb);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000722 return result;
723}
Neal Norwitz6968b052007-02-27 19:02:19 +0000724
725static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000726string_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000727{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000728 register Py_ssize_t i;
729 register Py_ssize_t j;
730 register Py_ssize_t size;
731 register PyBytesObject *op;
732 size_t nbytes;
733 if (n < 0)
734 n = 0;
735 /* watch out for overflows: the size can overflow int,
736 * and the # of bytes needed can overflow size_t
737 */
738 size = Py_SIZE(a) * n;
739 if (n && size / n != Py_SIZE(a)) {
740 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000741 "repeated bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000742 return NULL;
743 }
744 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
745 Py_INCREF(a);
746 return (PyObject *)a;
747 }
748 nbytes = (size_t)size;
749 if (nbytes + sizeof(PyBytesObject) <= nbytes) {
750 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000751 "repeated bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000752 return NULL;
753 }
754 op = (PyBytesObject *)
755 PyObject_MALLOC(sizeof(PyBytesObject) + nbytes);
756 if (op == NULL)
757 return PyErr_NoMemory();
758 PyObject_INIT_VAR(op, &PyBytes_Type, size);
759 op->ob_shash = -1;
760 op->ob_sval[size] = '\0';
761 if (Py_SIZE(a) == 1 && n > 0) {
762 memset(op->ob_sval, a->ob_sval[0] , n);
763 return (PyObject *) op;
764 }
765 i = 0;
766 if (i < size) {
767 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
768 i = Py_SIZE(a);
769 }
770 while (i < size) {
771 j = (i <= size-i) ? i : size-i;
772 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
773 i += j;
774 }
775 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000776}
777
Guido van Rossum98297ee2007-11-06 21:34:58 +0000778static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000779string_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000780{
781 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
782 if (ival == -1 && PyErr_Occurred()) {
783 Py_buffer varg;
784 int pos;
785 PyErr_Clear();
786 if (_getbuffer(arg, &varg) < 0)
787 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000788 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
Guido van Rossum98297ee2007-11-06 21:34:58 +0000789 varg.buf, varg.len, 0);
Martin v. Löwis423be952008-08-13 15:53:07 +0000790 PyBuffer_Release(&varg);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000791 return pos >= 0;
792 }
793 if (ival < 0 || ival >= 256) {
794 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
795 return -1;
796 }
797
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000798 return memchr(PyBytes_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000799}
800
Neal Norwitz6968b052007-02-27 19:02:19 +0000801static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000802string_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000803{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000804 if (i < 0 || i >= Py_SIZE(a)) {
Benjamin Peterson4116f362008-05-27 00:36:20 +0000805 PyErr_SetString(PyExc_IndexError, "index out of range");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000806 return NULL;
807 }
808 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000809}
810
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000811static PyObject*
812string_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000813{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000814 int c;
815 Py_ssize_t len_a, len_b;
816 Py_ssize_t min_len;
817 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000818
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000819 /* Make sure both arguments are strings. */
820 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
821 if (Py_BytesWarningFlag && (op == Py_EQ) &&
822 (PyObject_IsInstance((PyObject*)a,
823 (PyObject*)&PyUnicode_Type) ||
824 PyObject_IsInstance((PyObject*)b,
825 (PyObject*)&PyUnicode_Type))) {
826 if (PyErr_WarnEx(PyExc_BytesWarning,
Georg Brandle5d68ac2008-06-04 11:30:26 +0000827 "Comparison between bytes and string", 1))
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000828 return NULL;
829 }
830 result = Py_NotImplemented;
831 goto out;
832 }
833 if (a == b) {
834 switch (op) {
835 case Py_EQ:case Py_LE:case Py_GE:
836 result = Py_True;
837 goto out;
838 case Py_NE:case Py_LT:case Py_GT:
839 result = Py_False;
840 goto out;
841 }
842 }
843 if (op == Py_EQ) {
844 /* Supporting Py_NE here as well does not save
845 much time, since Py_NE is rarely used. */
846 if (Py_SIZE(a) == Py_SIZE(b)
847 && (a->ob_sval[0] == b->ob_sval[0]
848 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
849 result = Py_True;
850 } else {
851 result = Py_False;
852 }
853 goto out;
854 }
855 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
856 min_len = (len_a < len_b) ? len_a : len_b;
857 if (min_len > 0) {
858 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
859 if (c==0)
860 c = memcmp(a->ob_sval, b->ob_sval, min_len);
861 } else
862 c = 0;
863 if (c == 0)
864 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
865 switch (op) {
866 case Py_LT: c = c < 0; break;
867 case Py_LE: c = c <= 0; break;
868 case Py_EQ: assert(0); break; /* unreachable */
869 case Py_NE: c = c != 0; break;
870 case Py_GT: c = c > 0; break;
871 case Py_GE: c = c >= 0; break;
872 default:
873 result = Py_NotImplemented;
874 goto out;
875 }
876 result = c ? Py_True : Py_False;
877 out:
878 Py_INCREF(result);
879 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000880}
881
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000882static long
883string_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000884{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000885 register Py_ssize_t len;
886 register unsigned char *p;
887 register long x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000888
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000889 if (a->ob_shash != -1)
890 return a->ob_shash;
891 len = Py_SIZE(a);
892 p = (unsigned char *) a->ob_sval;
893 x = *p << 7;
894 while (--len >= 0)
895 x = (1000003*x) ^ *p++;
896 x ^= Py_SIZE(a);
897 if (x == -1)
898 x = -2;
899 a->ob_shash = x;
900 return x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000901}
902
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000903static PyObject*
904string_subscript(PyBytesObject* self, PyObject* item)
905{
906 if (PyIndex_Check(item)) {
907 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
908 if (i == -1 && PyErr_Occurred())
909 return NULL;
910 if (i < 0)
911 i += PyBytes_GET_SIZE(self);
912 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
913 PyErr_SetString(PyExc_IndexError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000914 "index out of range");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000915 return NULL;
916 }
917 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
918 }
919 else if (PySlice_Check(item)) {
920 Py_ssize_t start, stop, step, slicelength, cur, i;
921 char* source_buf;
922 char* result_buf;
923 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000924
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000925 if (PySlice_GetIndicesEx((PySliceObject*)item,
926 PyBytes_GET_SIZE(self),
927 &start, &stop, &step, &slicelength) < 0) {
928 return NULL;
929 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000930
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000931 if (slicelength <= 0) {
932 return PyBytes_FromStringAndSize("", 0);
933 }
934 else if (start == 0 && step == 1 &&
935 slicelength == PyBytes_GET_SIZE(self) &&
936 PyBytes_CheckExact(self)) {
937 Py_INCREF(self);
938 return (PyObject *)self;
939 }
940 else if (step == 1) {
941 return PyBytes_FromStringAndSize(
942 PyBytes_AS_STRING(self) + start,
943 slicelength);
944 }
945 else {
946 source_buf = PyBytes_AsString((PyObject*)self);
947 result_buf = (char *)PyMem_Malloc(slicelength);
948 if (result_buf == NULL)
949 return PyErr_NoMemory();
Neal Norwitz6968b052007-02-27 19:02:19 +0000950
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000951 for (cur = start, i = 0; i < slicelength;
952 cur += step, i++) {
953 result_buf[i] = source_buf[cur];
954 }
955
956 result = PyBytes_FromStringAndSize(result_buf,
957 slicelength);
958 PyMem_Free(result_buf);
959 return result;
960 }
961 }
962 else {
963 PyErr_Format(PyExc_TypeError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000964 "byte indices must be integers, not %.200s",
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000965 Py_TYPE(item)->tp_name);
966 return NULL;
967 }
968}
969
970static int
971string_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
972{
Martin v. Löwis423be952008-08-13 15:53:07 +0000973 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
Antoine Pitrou2f89aa62008-08-02 21:02:48 +0000974 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000975}
976
977static PySequenceMethods string_as_sequence = {
978 (lenfunc)string_length, /*sq_length*/
979 (binaryfunc)string_concat, /*sq_concat*/
980 (ssizeargfunc)string_repeat, /*sq_repeat*/
981 (ssizeargfunc)string_item, /*sq_item*/
982 0, /*sq_slice*/
983 0, /*sq_ass_item*/
984 0, /*sq_ass_slice*/
985 (objobjproc)string_contains /*sq_contains*/
986};
987
988static PyMappingMethods string_as_mapping = {
989 (lenfunc)string_length,
990 (binaryfunc)string_subscript,
991 0,
992};
993
994static PyBufferProcs string_as_buffer = {
995 (getbufferproc)string_buffer_getbuffer,
996 NULL,
997};
998
999
1000#define LEFTSTRIP 0
1001#define RIGHTSTRIP 1
1002#define BOTHSTRIP 2
1003
1004/* Arrays indexed by above */
1005static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1006
1007#define STRIPNAME(i) (stripformat[i]+3)
1008
Neal Norwitz6968b052007-02-27 19:02:19 +00001009
1010/* Don't call if length < 2 */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001011#define Py_STRING_MATCH(target, offset, pattern, length) \
1012 (target[offset] == pattern[0] && \
1013 target[offset+length-1] == pattern[length-1] && \
Neal Norwitz6968b052007-02-27 19:02:19 +00001014 !memcmp(target+offset+1, pattern+1, length-2) )
1015
1016
Neal Norwitz6968b052007-02-27 19:02:19 +00001017/* Overallocate the initial list to reduce the number of reallocs for small
1018 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1019 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1020 text (roughly 11 words per line) and field delimited data (usually 1-10
1021 fields). For large strings the split algorithms are bandwidth limited
1022 so increasing the preallocation likely will not improve things.*/
1023
1024#define MAX_PREALLOC 12
1025
1026/* 5 splits gives 6 elements */
1027#define PREALLOC_SIZE(maxsplit) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001028 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
Neal Norwitz6968b052007-02-27 19:02:19 +00001029
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001030#define SPLIT_ADD(data, left, right) { \
1031 str = PyBytes_FromStringAndSize((data) + (left), \
1032 (right) - (left)); \
1033 if (str == NULL) \
1034 goto onError; \
1035 if (count < MAX_PREALLOC) { \
1036 PyList_SET_ITEM(list, count, str); \
1037 } else { \
1038 if (PyList_Append(list, str)) { \
1039 Py_DECREF(str); \
1040 goto onError; \
1041 } \
1042 else \
1043 Py_DECREF(str); \
1044 } \
1045 count++; }
Neal Norwitz6968b052007-02-27 19:02:19 +00001046
1047/* Always force the list to the expected size. */
Christian Heimes90aa7642007-12-19 02:45:37 +00001048#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
Neal Norwitz6968b052007-02-27 19:02:19 +00001049
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001050#define SKIP_SPACE(s, i, len) { while (i<len && ISSPACE(s[i])) i++; }
1051#define SKIP_NONSPACE(s, i, len) { while (i<len && !ISSPACE(s[i])) i++; }
1052#define RSKIP_SPACE(s, i) { while (i>=0 && ISSPACE(s[i])) i--; }
1053#define RSKIP_NONSPACE(s, i) { while (i>=0 && !ISSPACE(s[i])) i--; }
Neal Norwitz6968b052007-02-27 19:02:19 +00001054
1055Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001056split_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Neal Norwitz6968b052007-02-27 19:02:19 +00001057{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001058 const char *s = PyBytes_AS_STRING(self);
1059 Py_ssize_t i, j, count=0;
1060 PyObject *str;
1061 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Neal Norwitz6968b052007-02-27 19:02:19 +00001062
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001063 if (list == NULL)
1064 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001065
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001066 i = j = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +00001067
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001068 while (maxsplit-- > 0) {
1069 SKIP_SPACE(s, i, len);
1070 if (i==len) break;
1071 j = i; i++;
1072 SKIP_NONSPACE(s, i, len);
1073 if (j == 0 && i == len && PyBytes_CheckExact(self)) {
1074 /* No whitespace in self, so just use it as list[0] */
1075 Py_INCREF(self);
1076 PyList_SET_ITEM(list, 0, (PyObject *)self);
1077 count++;
1078 break;
1079 }
1080 SPLIT_ADD(s, j, i);
1081 }
1082
1083 if (i < len) {
1084 /* Only occurs when maxsplit was reached */
1085 /* Skip any remaining whitespace and copy to end of string */
1086 SKIP_SPACE(s, i, len);
1087 if (i != len)
1088 SPLIT_ADD(s, i, len);
1089 }
1090 FIX_PREALLOC_SIZE(list);
1091 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001092 onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001093 Py_DECREF(list);
1094 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001095}
1096
Guido van Rossum8f950672007-09-10 16:53:45 +00001097Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001098split_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Guido van Rossum8f950672007-09-10 16:53:45 +00001099{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001100 const char *s = PyBytes_AS_STRING(self);
1101 register Py_ssize_t i, j, count=0;
1102 PyObject *str;
1103 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Guido van Rossum8f950672007-09-10 16:53:45 +00001104
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001105 if (list == NULL)
1106 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001107
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001108 i = j = 0;
1109 while ((j < len) && (maxcount-- > 0)) {
1110 for(; j<len; j++) {
1111 /* I found that using memchr makes no difference */
1112 if (s[j] == ch) {
1113 SPLIT_ADD(s, i, j);
1114 i = j = j + 1;
1115 break;
1116 }
1117 }
1118 }
1119 if (i == 0 && count == 0 && PyBytes_CheckExact(self)) {
1120 /* ch not in self, so just use self as list[0] */
1121 Py_INCREF(self);
1122 PyList_SET_ITEM(list, 0, (PyObject *)self);
1123 count++;
1124 }
1125 else if (i <= len) {
1126 SPLIT_ADD(s, i, len);
1127 }
1128 FIX_PREALLOC_SIZE(list);
1129 return list;
Guido van Rossum8f950672007-09-10 16:53:45 +00001130
1131 onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001132 Py_DECREF(list);
1133 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001134}
1135
Neal Norwitz6968b052007-02-27 19:02:19 +00001136PyDoc_STRVAR(split__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001137"B.split([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001138\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001139Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001140If sep is not specified or is None, B is split on ASCII whitespace\n\
1141characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001142If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001143
1144static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001145string_split(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001146{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001147 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
1148 Py_ssize_t maxsplit = -1, count=0;
1149 const char *s = PyBytes_AS_STRING(self), *sub;
1150 Py_buffer vsub;
1151 PyObject *list, *str, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001152#ifdef USE_FAST
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001153 Py_ssize_t pos;
Neal Norwitz6968b052007-02-27 19:02:19 +00001154#endif
1155
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001156 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1157 return NULL;
1158 if (maxsplit < 0)
1159 maxsplit = PY_SSIZE_T_MAX;
1160 if (subobj == Py_None)
1161 return split_whitespace(self, len, maxsplit);
1162 if (_getbuffer(subobj, &vsub) < 0)
1163 return NULL;
1164 sub = vsub.buf;
1165 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001166
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001167 if (n == 0) {
1168 PyErr_SetString(PyExc_ValueError, "empty separator");
Martin v. Löwis423be952008-08-13 15:53:07 +00001169 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001170 return NULL;
1171 }
Amaury Forgeot d'Arc20443f32008-08-22 22:05:20 +00001172 else if (n == 1) {
1173 list = split_char(self, len, sub[0], maxsplit);
1174 PyBuffer_Release(&vsub);
1175 return list;
1176 }
Guido van Rossum8f950672007-09-10 16:53:45 +00001177
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001178 list = PyList_New(PREALLOC_SIZE(maxsplit));
1179 if (list == NULL) {
Martin v. Löwis423be952008-08-13 15:53:07 +00001180 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001181 return NULL;
1182 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001183
1184#ifdef USE_FAST
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001185 i = j = 0;
1186 while (maxsplit-- > 0) {
1187 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1188 if (pos < 0)
1189 break;
1190 j = i+pos;
1191 SPLIT_ADD(s, i, j);
1192 i = j + n;
1193 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001194#else
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001195 i = j = 0;
1196 while ((j+n <= len) && (maxsplit-- > 0)) {
1197 for (; j+n <= len; j++) {
1198 if (Py_STRING_MATCH(s, j, sub, n)) {
1199 SPLIT_ADD(s, i, j);
1200 i = j = j + n;
1201 break;
1202 }
1203 }
1204 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001205#endif
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001206 SPLIT_ADD(s, i, len);
1207 FIX_PREALLOC_SIZE(list);
Martin v. Löwis423be952008-08-13 15:53:07 +00001208 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001209 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001210
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001211 onError:
1212 Py_DECREF(list);
Martin v. Löwis423be952008-08-13 15:53:07 +00001213 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001214 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001215}
1216
Neal Norwitz6968b052007-02-27 19:02:19 +00001217PyDoc_STRVAR(partition__doc__,
1218"B.partition(sep) -> (head, sep, tail)\n\
1219\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001220Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001221the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001222found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001223
1224static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001225string_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001226{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001227 const char *sep;
1228 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001229
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001230 if (PyBytes_Check(sep_obj)) {
1231 sep = PyBytes_AS_STRING(sep_obj);
1232 sep_len = PyBytes_GET_SIZE(sep_obj);
1233 }
1234 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1235 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001236
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001237 return stringlib_partition(
1238 (PyObject*) self,
1239 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1240 sep_obj, sep, sep_len
1241 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001242}
1243
1244PyDoc_STRVAR(rpartition__doc__,
1245"B.rpartition(sep) -> (tail, sep, head)\n\
1246\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001247Search for the separator sep in B, starting at the end of B,\n\
1248and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001249part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001250bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001251
1252static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001253string_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001254{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001255 const char *sep;
1256 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001257
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001258 if (PyBytes_Check(sep_obj)) {
1259 sep = PyBytes_AS_STRING(sep_obj);
1260 sep_len = PyBytes_GET_SIZE(sep_obj);
1261 }
1262 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1263 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001264
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001265 return stringlib_rpartition(
1266 (PyObject*) self,
1267 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1268 sep_obj, sep, sep_len
1269 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001270}
1271
1272Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001273rsplit_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Neal Norwitz6968b052007-02-27 19:02:19 +00001274{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001275 const char *s = PyBytes_AS_STRING(self);
1276 Py_ssize_t i, j, count=0;
1277 PyObject *str;
1278 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Neal Norwitz6968b052007-02-27 19:02:19 +00001279
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001280 if (list == NULL)
1281 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001282
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001283 i = j = len-1;
Neal Norwitz6968b052007-02-27 19:02:19 +00001284
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001285 while (maxsplit-- > 0) {
1286 RSKIP_SPACE(s, i);
1287 if (i<0) break;
1288 j = i; i--;
1289 RSKIP_NONSPACE(s, i);
1290 if (j == len-1 && i < 0 && PyBytes_CheckExact(self)) {
1291 /* No whitespace in self, so just use it as list[0] */
1292 Py_INCREF(self);
1293 PyList_SET_ITEM(list, 0, (PyObject *)self);
1294 count++;
1295 break;
1296 }
1297 SPLIT_ADD(s, i + 1, j + 1);
1298 }
1299 if (i >= 0) {
1300 /* Only occurs when maxsplit was reached. Skip any remaining
1301 whitespace and copy to beginning of string. */
1302 RSKIP_SPACE(s, i);
1303 if (i >= 0)
1304 SPLIT_ADD(s, 0, i + 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001305
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001306 }
1307 FIX_PREALLOC_SIZE(list);
1308 if (PyList_Reverse(list) < 0)
1309 goto onError;
1310 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001311 onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001312 Py_DECREF(list);
1313 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001314}
1315
Guido van Rossum8f950672007-09-10 16:53:45 +00001316Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001317rsplit_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Guido van Rossum8f950672007-09-10 16:53:45 +00001318{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001319 const char *s = PyBytes_AS_STRING(self);
1320 register Py_ssize_t i, j, count=0;
1321 PyObject *str;
1322 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Guido van Rossum8f950672007-09-10 16:53:45 +00001323
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001324 if (list == NULL)
1325 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001326
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001327 i = j = len - 1;
1328 while ((i >= 0) && (maxcount-- > 0)) {
1329 for (; i >= 0; i--) {
1330 if (s[i] == ch) {
1331 SPLIT_ADD(s, i + 1, j + 1);
1332 j = i = i - 1;
1333 break;
1334 }
1335 }
1336 }
1337 if (i < 0 && count == 0 && PyBytes_CheckExact(self)) {
1338 /* ch not in self, so just use self as list[0] */
1339 Py_INCREF(self);
1340 PyList_SET_ITEM(list, 0, (PyObject *)self);
1341 count++;
1342 }
1343 else if (j >= -1) {
1344 SPLIT_ADD(s, 0, j + 1);
1345 }
1346 FIX_PREALLOC_SIZE(list);
1347 if (PyList_Reverse(list) < 0)
1348 goto onError;
1349 return list;
Guido van Rossum8f950672007-09-10 16:53:45 +00001350
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001351 onError:
1352 Py_DECREF(list);
1353 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001354}
1355
Neal Norwitz6968b052007-02-27 19:02:19 +00001356PyDoc_STRVAR(rsplit__doc__,
Benjamin Peterson4116f362008-05-27 00:36:20 +00001357"B.rsplit([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001358\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001359Return a list of the sections in B, using sep as the delimiter,\n\
1360starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001361If sep is not given, B is split on ASCII whitespace characters\n\
1362(space, tab, return, newline, formfeed, vertical tab).\n\
1363If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001364
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001365
Neal Norwitz6968b052007-02-27 19:02:19 +00001366static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001367string_rsplit(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001368{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001369 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
1370 Py_ssize_t maxsplit = -1, count=0;
1371 const char *s, *sub;
1372 Py_buffer vsub;
1373 PyObject *list, *str, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001374
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001375 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1376 return NULL;
1377 if (maxsplit < 0)
1378 maxsplit = PY_SSIZE_T_MAX;
1379 if (subobj == Py_None)
1380 return rsplit_whitespace(self, len, maxsplit);
1381 if (_getbuffer(subobj, &vsub) < 0)
1382 return NULL;
1383 sub = vsub.buf;
1384 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001385
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001386 if (n == 0) {
1387 PyErr_SetString(PyExc_ValueError, "empty separator");
Martin v. Löwis423be952008-08-13 15:53:07 +00001388 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001389 return NULL;
1390 }
Amaury Forgeot d'Arc20443f32008-08-22 22:05:20 +00001391 else if (n == 1) {
1392 list = rsplit_char(self, len, sub[0], maxsplit);
1393 PyBuffer_Release(&vsub);
1394 return list;
1395 }
Guido van Rossum8f950672007-09-10 16:53:45 +00001396
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001397 list = PyList_New(PREALLOC_SIZE(maxsplit));
1398 if (list == NULL) {
Martin v. Löwis423be952008-08-13 15:53:07 +00001399 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001400 return NULL;
1401 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001402
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001403 j = len;
1404 i = j - n;
Neal Norwitz6968b052007-02-27 19:02:19 +00001405
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001406 s = PyBytes_AS_STRING(self);
1407 while ( (i >= 0) && (maxsplit-- > 0) ) {
1408 for (; i>=0; i--) {
1409 if (Py_STRING_MATCH(s, i, sub, n)) {
1410 SPLIT_ADD(s, i + n, j);
1411 j = i;
1412 i -= n;
1413 break;
1414 }
1415 }
1416 }
1417 SPLIT_ADD(s, 0, j);
1418 FIX_PREALLOC_SIZE(list);
1419 if (PyList_Reverse(list) < 0)
1420 goto onError;
Martin v. Löwis423be952008-08-13 15:53:07 +00001421 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001422 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001423
1424onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001425 Py_DECREF(list);
Martin v. Löwis423be952008-08-13 15:53:07 +00001426 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001427 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001428}
1429
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001430#undef SPLIT_ADD
1431#undef MAX_PREALLOC
1432#undef PREALLOC_SIZE
1433
1434
1435PyDoc_STRVAR(join__doc__,
1436"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001437\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001438Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001439Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1440
Neal Norwitz6968b052007-02-27 19:02:19 +00001441static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001442string_join(PyObject *self, PyObject *orig)
Neal Norwitz6968b052007-02-27 19:02:19 +00001443{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001444 char *sep = PyBytes_AS_STRING(self);
1445 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1446 PyObject *res = NULL;
1447 char *p;
1448 Py_ssize_t seqlen = 0;
1449 size_t sz = 0;
1450 Py_ssize_t i;
1451 PyObject *seq, *item;
Neal Norwitz6968b052007-02-27 19:02:19 +00001452
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001453 seq = PySequence_Fast(orig, "");
1454 if (seq == NULL) {
1455 return NULL;
1456 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001457
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001458 seqlen = PySequence_Size(seq);
1459 if (seqlen == 0) {
1460 Py_DECREF(seq);
1461 return PyBytes_FromString("");
1462 }
1463 if (seqlen == 1) {
1464 item = PySequence_Fast_GET_ITEM(seq, 0);
1465 if (PyBytes_CheckExact(item)) {
1466 Py_INCREF(item);
1467 Py_DECREF(seq);
1468 return item;
1469 }
1470 }
1471
1472 /* There are at least two things to join, or else we have a subclass
1473 * of the builtin types in the sequence.
1474 * Do a pre-pass to figure out the total amount of space we'll
1475 * need (sz), and see whether all argument are bytes.
1476 */
1477 /* XXX Shouldn't we use _getbuffer() on these items instead? */
1478 for (i = 0; i < seqlen; i++) {
1479 const size_t old_sz = sz;
1480 item = PySequence_Fast_GET_ITEM(seq, i);
1481 if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
1482 PyErr_Format(PyExc_TypeError,
1483 "sequence item %zd: expected bytes,"
1484 " %.80s found",
1485 i, Py_TYPE(item)->tp_name);
1486 Py_DECREF(seq);
1487 return NULL;
1488 }
1489 sz += Py_SIZE(item);
1490 if (i != 0)
1491 sz += seplen;
1492 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1493 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00001494 "join() result is too long for bytes");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001495 Py_DECREF(seq);
1496 return NULL;
1497 }
1498 }
1499
1500 /* Allocate result space. */
1501 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1502 if (res == NULL) {
1503 Py_DECREF(seq);
1504 return NULL;
1505 }
1506
1507 /* Catenate everything. */
1508 /* I'm not worried about a PyByteArray item growing because there's
1509 nowhere in this function where we release the GIL. */
1510 p = PyBytes_AS_STRING(res);
1511 for (i = 0; i < seqlen; ++i) {
1512 size_t n;
1513 char *q;
1514 if (i) {
1515 Py_MEMCPY(p, sep, seplen);
1516 p += seplen;
1517 }
1518 item = PySequence_Fast_GET_ITEM(seq, i);
1519 n = Py_SIZE(item);
1520 if (PyBytes_Check(item))
1521 q = PyBytes_AS_STRING(item);
1522 else
1523 q = PyByteArray_AS_STRING(item);
1524 Py_MEMCPY(p, q, n);
1525 p += n;
1526 }
1527
1528 Py_DECREF(seq);
1529 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001530}
1531
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001532PyObject *
1533_PyBytes_Join(PyObject *sep, PyObject *x)
1534{
1535 assert(sep != NULL && PyBytes_Check(sep));
1536 assert(x != NULL);
1537 return string_join(sep, x);
1538}
1539
1540Py_LOCAL_INLINE(void)
1541string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1542{
1543 if (*end > len)
1544 *end = len;
1545 else if (*end < 0)
1546 *end += len;
1547 if (*end < 0)
1548 *end = 0;
1549 if (*start < 0)
1550 *start += len;
1551 if (*start < 0)
1552 *start = 0;
1553}
1554
1555Py_LOCAL_INLINE(Py_ssize_t)
1556string_find_internal(PyBytesObject *self, PyObject *args, int dir)
1557{
1558 PyObject *subobj;
1559 const char *sub;
1560 Py_ssize_t sub_len;
1561 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1562 PyObject *obj_start=Py_None, *obj_end=Py_None;
1563
1564 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1565 &obj_start, &obj_end))
1566 return -2;
1567 /* To support None in "start" and "end" arguments, meaning
1568 the same as if they were not passed.
1569 */
1570 if (obj_start != Py_None)
1571 if (!_PyEval_SliceIndex(obj_start, &start))
1572 return -2;
1573 if (obj_end != Py_None)
1574 if (!_PyEval_SliceIndex(obj_end, &end))
1575 return -2;
1576
1577 if (PyBytes_Check(subobj)) {
1578 sub = PyBytes_AS_STRING(subobj);
1579 sub_len = PyBytes_GET_SIZE(subobj);
1580 }
1581 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1582 /* XXX - the "expected a character buffer object" is pretty
1583 confusing for a non-expert. remap to something else ? */
1584 return -2;
1585
1586 if (dir > 0)
1587 return stringlib_find_slice(
1588 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1589 sub, sub_len, start, end);
1590 else
1591 return stringlib_rfind_slice(
1592 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1593 sub, sub_len, start, end);
1594}
1595
1596
1597PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001598"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001599\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001600Return the lowest index in S where substring sub is found,\n\
1601such that sub is contained within s[start:end]. Optional\n\
1602arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001603\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001604Return -1 on failure.");
1605
Neal Norwitz6968b052007-02-27 19:02:19 +00001606static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001607string_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001608{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001609 Py_ssize_t result = string_find_internal(self, args, +1);
1610 if (result == -2)
1611 return NULL;
1612 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001613}
1614
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001615
1616PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001617"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001618\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001619Like B.find() but raise ValueError when the substring is not found.");
1620
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001621static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001622string_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001623{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001624 Py_ssize_t result = string_find_internal(self, args, +1);
1625 if (result == -2)
1626 return NULL;
1627 if (result == -1) {
1628 PyErr_SetString(PyExc_ValueError,
1629 "substring not found");
1630 return NULL;
1631 }
1632 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001633}
1634
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001635
1636PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001637"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001638\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001639Return the highest index in B where substring sub is found,\n\
1640such that sub is contained within s[start:end]. Optional\n\
1641arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001642\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001643Return -1 on failure.");
1644
Neal Norwitz6968b052007-02-27 19:02:19 +00001645static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001646string_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001647{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001648 Py_ssize_t result = string_find_internal(self, args, -1);
1649 if (result == -2)
1650 return NULL;
1651 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001652}
1653
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001654
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001655PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001656"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001657\n\
1658Like B.rfind() but raise ValueError when the substring is not found.");
1659
1660static PyObject *
1661string_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001662{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001663 Py_ssize_t result = string_find_internal(self, args, -1);
1664 if (result == -2)
1665 return NULL;
1666 if (result == -1) {
1667 PyErr_SetString(PyExc_ValueError,
1668 "substring not found");
1669 return NULL;
1670 }
1671 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001672}
1673
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001674
1675Py_LOCAL_INLINE(PyObject *)
1676do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001677{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001678 Py_buffer vsep;
1679 char *s = PyBytes_AS_STRING(self);
1680 Py_ssize_t len = PyBytes_GET_SIZE(self);
1681 char *sep;
1682 Py_ssize_t seplen;
1683 Py_ssize_t i, j;
1684
1685 if (_getbuffer(sepobj, &vsep) < 0)
1686 return NULL;
1687 sep = vsep.buf;
1688 seplen = vsep.len;
1689
1690 i = 0;
1691 if (striptype != RIGHTSTRIP) {
1692 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1693 i++;
1694 }
1695 }
1696
1697 j = len;
1698 if (striptype != LEFTSTRIP) {
1699 do {
1700 j--;
1701 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1702 j++;
1703 }
1704
Martin v. Löwis423be952008-08-13 15:53:07 +00001705 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001706
1707 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1708 Py_INCREF(self);
1709 return (PyObject*)self;
1710 }
1711 else
1712 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001713}
1714
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001715
1716Py_LOCAL_INLINE(PyObject *)
1717do_strip(PyBytesObject *self, int striptype)
1718{
1719 char *s = PyBytes_AS_STRING(self);
1720 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
1721
1722 i = 0;
1723 if (striptype != RIGHTSTRIP) {
1724 while (i < len && ISSPACE(s[i])) {
1725 i++;
1726 }
1727 }
1728
1729 j = len;
1730 if (striptype != LEFTSTRIP) {
1731 do {
1732 j--;
1733 } while (j >= i && ISSPACE(s[j]));
1734 j++;
1735 }
1736
1737 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1738 Py_INCREF(self);
1739 return (PyObject*)self;
1740 }
1741 else
1742 return PyBytes_FromStringAndSize(s+i, j-i);
1743}
1744
1745
1746Py_LOCAL_INLINE(PyObject *)
1747do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1748{
1749 PyObject *sep = NULL;
1750
1751 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1752 return NULL;
1753
1754 if (sep != NULL && sep != Py_None) {
1755 return do_xstrip(self, striptype, sep);
1756 }
1757 return do_strip(self, striptype);
1758}
1759
1760
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001761PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001762"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001763\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001764Strip leading and trailing bytes contained in the argument.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001765If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001766static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001767string_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001768{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001769 if (PyTuple_GET_SIZE(args) == 0)
1770 return do_strip(self, BOTHSTRIP); /* Common case */
1771 else
1772 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001773}
1774
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001775
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001776PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001777"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001778\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001779Strip leading bytes contained in the argument.\n\
1780If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001781static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001782string_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001783{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001784 if (PyTuple_GET_SIZE(args) == 0)
1785 return do_strip(self, LEFTSTRIP); /* Common case */
1786 else
1787 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001788}
1789
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001790
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001791PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001792"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001793\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001794Strip trailing bytes contained in the argument.\n\
1795If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001796static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001797string_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001798{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001799 if (PyTuple_GET_SIZE(args) == 0)
1800 return do_strip(self, RIGHTSTRIP); /* Common case */
1801 else
1802 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001803}
Neal Norwitz6968b052007-02-27 19:02:19 +00001804
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001805
1806PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001807"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001808\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001809Return the number of non-overlapping occurrences of substring sub in\n\
1810string S[start:end]. Optional arguments start and end are interpreted\n\
1811as in slice notation.");
1812
1813static PyObject *
1814string_count(PyBytesObject *self, PyObject *args)
1815{
1816 PyObject *sub_obj;
1817 const char *str = PyBytes_AS_STRING(self), *sub;
1818 Py_ssize_t sub_len;
1819 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1820
1821 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1822 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1823 return NULL;
1824
1825 if (PyBytes_Check(sub_obj)) {
1826 sub = PyBytes_AS_STRING(sub_obj);
1827 sub_len = PyBytes_GET_SIZE(sub_obj);
1828 }
1829 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1830 return NULL;
1831
1832 string_adjust_indices(&start, &end, PyBytes_GET_SIZE(self));
1833
1834 return PyLong_FromSsize_t(
1835 stringlib_count(str + start, end - start, sub, sub_len)
1836 );
1837}
1838
1839
1840PyDoc_STRVAR(translate__doc__,
1841"B.translate(table[, deletechars]) -> bytes\n\
1842\n\
1843Return a copy of B, where all characters occurring in the\n\
1844optional argument deletechars are removed, and the remaining\n\
1845characters have been mapped through the given translation\n\
1846table, which must be a bytes object of length 256.");
1847
1848static PyObject *
1849string_translate(PyBytesObject *self, PyObject *args)
1850{
1851 register char *input, *output;
1852 const char *table;
1853 register Py_ssize_t i, c, changed = 0;
1854 PyObject *input_obj = (PyObject*)self;
1855 const char *output_start, *del_table=NULL;
1856 Py_ssize_t inlen, tablen, dellen = 0;
1857 PyObject *result;
1858 int trans_table[256];
1859 PyObject *tableobj, *delobj = NULL;
1860
1861 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1862 &tableobj, &delobj))
1863 return NULL;
1864
1865 if (PyBytes_Check(tableobj)) {
1866 table = PyBytes_AS_STRING(tableobj);
1867 tablen = PyBytes_GET_SIZE(tableobj);
1868 }
1869 else if (tableobj == Py_None) {
1870 table = NULL;
1871 tablen = 256;
1872 }
1873 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1874 return NULL;
1875
1876 if (tablen != 256) {
1877 PyErr_SetString(PyExc_ValueError,
1878 "translation table must be 256 characters long");
1879 return NULL;
1880 }
1881
1882 if (delobj != NULL) {
1883 if (PyBytes_Check(delobj)) {
1884 del_table = PyBytes_AS_STRING(delobj);
1885 dellen = PyBytes_GET_SIZE(delobj);
1886 }
1887 else if (PyUnicode_Check(delobj)) {
1888 PyErr_SetString(PyExc_TypeError,
1889 "deletions are implemented differently for unicode");
1890 return NULL;
1891 }
1892 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1893 return NULL;
1894 }
1895 else {
1896 del_table = NULL;
1897 dellen = 0;
1898 }
1899
1900 inlen = PyBytes_GET_SIZE(input_obj);
1901 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1902 if (result == NULL)
1903 return NULL;
1904 output_start = output = PyBytes_AsString(result);
1905 input = PyBytes_AS_STRING(input_obj);
1906
1907 if (dellen == 0 && table != NULL) {
1908 /* If no deletions are required, use faster code */
1909 for (i = inlen; --i >= 0; ) {
1910 c = Py_CHARMASK(*input++);
1911 if (Py_CHARMASK((*output++ = table[c])) != c)
1912 changed = 1;
1913 }
1914 if (changed || !PyBytes_CheckExact(input_obj))
1915 return result;
1916 Py_DECREF(result);
1917 Py_INCREF(input_obj);
1918 return input_obj;
1919 }
1920
1921 if (table == NULL) {
1922 for (i = 0; i < 256; i++)
1923 trans_table[i] = Py_CHARMASK(i);
1924 } else {
1925 for (i = 0; i < 256; i++)
1926 trans_table[i] = Py_CHARMASK(table[i]);
1927 }
1928
1929 for (i = 0; i < dellen; i++)
1930 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1931
1932 for (i = inlen; --i >= 0; ) {
1933 c = Py_CHARMASK(*input++);
1934 if (trans_table[c] != -1)
1935 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1936 continue;
1937 changed = 1;
1938 }
1939 if (!changed && PyBytes_CheckExact(input_obj)) {
1940 Py_DECREF(result);
1941 Py_INCREF(input_obj);
1942 return input_obj;
1943 }
1944 /* Fix the size of the resulting string */
1945 if (inlen > 0)
1946 _PyBytes_Resize(&result, output - output_start);
1947 return result;
1948}
1949
1950
1951#define FORWARD 1
1952#define REVERSE -1
1953
1954/* find and count characters and substrings */
1955
1956#define findchar(target, target_len, c) \
1957 ((char *)memchr((const void *)(target), c, target_len))
1958
1959/* String ops must return a string. */
1960/* If the object is subclass of string, create a copy */
1961Py_LOCAL(PyBytesObject *)
1962return_self(PyBytesObject *self)
1963{
1964 if (PyBytes_CheckExact(self)) {
1965 Py_INCREF(self);
1966 return self;
1967 }
1968 return (PyBytesObject *)PyBytes_FromStringAndSize(
1969 PyBytes_AS_STRING(self),
1970 PyBytes_GET_SIZE(self));
1971}
1972
1973Py_LOCAL_INLINE(Py_ssize_t)
1974countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
1975{
1976 Py_ssize_t count=0;
1977 const char *start=target;
1978 const char *end=target+target_len;
1979
1980 while ( (start=findchar(start, end-start, c)) != NULL ) {
1981 count++;
1982 if (count >= maxcount)
1983 break;
1984 start += 1;
1985 }
1986 return count;
1987}
1988
1989Py_LOCAL(Py_ssize_t)
1990findstring(const char *target, Py_ssize_t target_len,
1991 const char *pattern, Py_ssize_t pattern_len,
1992 Py_ssize_t start,
1993 Py_ssize_t end,
1994 int direction)
1995{
1996 if (start < 0) {
1997 start += target_len;
1998 if (start < 0)
1999 start = 0;
2000 }
2001 if (end > target_len) {
2002 end = target_len;
2003 } else if (end < 0) {
2004 end += target_len;
2005 if (end < 0)
2006 end = 0;
2007 }
2008
2009 /* zero-length substrings always match at the first attempt */
2010 if (pattern_len == 0)
2011 return (direction > 0) ? start : end;
2012
2013 end -= pattern_len;
2014
2015 if (direction < 0) {
2016 for (; end >= start; end--)
2017 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2018 return end;
2019 } else {
2020 for (; start <= end; start++)
2021 if (Py_STRING_MATCH(target, start,pattern,pattern_len))
2022 return start;
2023 }
2024 return -1;
2025}
2026
2027Py_LOCAL_INLINE(Py_ssize_t)
2028countstring(const char *target, Py_ssize_t target_len,
2029 const char *pattern, Py_ssize_t pattern_len,
2030 Py_ssize_t start,
2031 Py_ssize_t end,
2032 int direction, Py_ssize_t maxcount)
2033{
2034 Py_ssize_t count=0;
2035
2036 if (start < 0) {
2037 start += target_len;
2038 if (start < 0)
2039 start = 0;
2040 }
2041 if (end > target_len) {
2042 end = target_len;
2043 } else if (end < 0) {
2044 end += target_len;
2045 if (end < 0)
2046 end = 0;
2047 }
2048
2049 /* zero-length substrings match everywhere */
2050 if (pattern_len == 0 || maxcount == 0) {
2051 if (target_len+1 < maxcount)
2052 return target_len+1;
2053 return maxcount;
2054 }
2055
2056 end -= pattern_len;
2057 if (direction < 0) {
2058 for (; (end >= start); end--)
2059 if (Py_STRING_MATCH(target, end,pattern,pattern_len)) {
2060 count++;
2061 if (--maxcount <= 0) break;
2062 end -= pattern_len-1;
2063 }
2064 } else {
2065 for (; (start <= end); start++)
2066 if (Py_STRING_MATCH(target, start,
2067 pattern, pattern_len)) {
2068 count++;
2069 if (--maxcount <= 0)
2070 break;
2071 start += pattern_len-1;
2072 }
2073 }
2074 return count;
2075}
2076
2077
2078/* Algorithms for different cases of string replacement */
2079
2080/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2081Py_LOCAL(PyBytesObject *)
2082replace_interleave(PyBytesObject *self,
2083 const char *to_s, Py_ssize_t to_len,
2084 Py_ssize_t maxcount)
2085{
2086 char *self_s, *result_s;
2087 Py_ssize_t self_len, result_len;
2088 Py_ssize_t count, i, product;
2089 PyBytesObject *result;
2090
2091 self_len = PyBytes_GET_SIZE(self);
2092
2093 /* 1 at the end plus 1 after every character */
2094 count = self_len+1;
2095 if (maxcount < count)
2096 count = maxcount;
2097
2098 /* Check for overflow */
2099 /* result_len = count * to_len + self_len; */
2100 product = count * to_len;
2101 if (product / to_len != count) {
2102 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002103 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002104 return NULL;
2105 }
2106 result_len = product + self_len;
2107 if (result_len < 0) {
2108 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002109 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002110 return NULL;
2111 }
2112
2113 if (! (result = (PyBytesObject *)
2114 PyBytes_FromStringAndSize(NULL, result_len)) )
2115 return NULL;
2116
2117 self_s = PyBytes_AS_STRING(self);
2118 result_s = PyBytes_AS_STRING(result);
2119
2120 /* TODO: special case single character, which doesn't need memcpy */
2121
2122 /* Lay the first one down (guaranteed this will occur) */
2123 Py_MEMCPY(result_s, to_s, to_len);
2124 result_s += to_len;
2125 count -= 1;
2126
2127 for (i=0; i<count; i++) {
2128 *result_s++ = *self_s++;
2129 Py_MEMCPY(result_s, to_s, to_len);
2130 result_s += to_len;
2131 }
2132
2133 /* Copy the rest of the original string */
2134 Py_MEMCPY(result_s, self_s, self_len-i);
2135
2136 return result;
2137}
2138
2139/* Special case for deleting a single character */
2140/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2141Py_LOCAL(PyBytesObject *)
2142replace_delete_single_character(PyBytesObject *self,
2143 char from_c, Py_ssize_t maxcount)
2144{
2145 char *self_s, *result_s;
2146 char *start, *next, *end;
2147 Py_ssize_t self_len, result_len;
2148 Py_ssize_t count;
2149 PyBytesObject *result;
2150
2151 self_len = PyBytes_GET_SIZE(self);
2152 self_s = PyBytes_AS_STRING(self);
2153
2154 count = countchar(self_s, self_len, from_c, maxcount);
2155 if (count == 0) {
2156 return return_self(self);
2157 }
2158
2159 result_len = self_len - count; /* from_len == 1 */
2160 assert(result_len>=0);
2161
2162 if ( (result = (PyBytesObject *)
2163 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2164 return NULL;
2165 result_s = PyBytes_AS_STRING(result);
2166
2167 start = self_s;
2168 end = self_s + self_len;
2169 while (count-- > 0) {
2170 next = findchar(start, end-start, from_c);
2171 if (next == NULL)
2172 break;
2173 Py_MEMCPY(result_s, start, next-start);
2174 result_s += (next-start);
2175 start = next+1;
2176 }
2177 Py_MEMCPY(result_s, start, end-start);
2178
2179 return result;
2180}
2181
2182/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2183
2184Py_LOCAL(PyBytesObject *)
2185replace_delete_substring(PyBytesObject *self,
2186 const char *from_s, Py_ssize_t from_len,
2187 Py_ssize_t maxcount) {
2188 char *self_s, *result_s;
2189 char *start, *next, *end;
2190 Py_ssize_t self_len, result_len;
2191 Py_ssize_t count, offset;
2192 PyBytesObject *result;
2193
2194 self_len = PyBytes_GET_SIZE(self);
2195 self_s = PyBytes_AS_STRING(self);
2196
2197 count = countstring(self_s, self_len,
2198 from_s, from_len,
2199 0, self_len, 1,
2200 maxcount);
2201
2202 if (count == 0) {
2203 /* no matches */
2204 return return_self(self);
2205 }
2206
2207 result_len = self_len - (count * from_len);
2208 assert (result_len>=0);
2209
2210 if ( (result = (PyBytesObject *)
2211 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2212 return NULL;
2213
2214 result_s = PyBytes_AS_STRING(result);
2215
2216 start = self_s;
2217 end = self_s + self_len;
2218 while (count-- > 0) {
2219 offset = findstring(start, end-start,
2220 from_s, from_len,
2221 0, end-start, FORWARD);
2222 if (offset == -1)
2223 break;
2224 next = start + offset;
2225
2226 Py_MEMCPY(result_s, start, next-start);
2227
2228 result_s += (next-start);
2229 start = next+from_len;
2230 }
2231 Py_MEMCPY(result_s, start, end-start);
2232 return result;
2233}
2234
2235/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2236Py_LOCAL(PyBytesObject *)
2237replace_single_character_in_place(PyBytesObject *self,
2238 char from_c, char to_c,
2239 Py_ssize_t maxcount)
2240{
2241 char *self_s, *result_s, *start, *end, *next;
2242 Py_ssize_t self_len;
2243 PyBytesObject *result;
2244
2245 /* The result string will be the same size */
2246 self_s = PyBytes_AS_STRING(self);
2247 self_len = PyBytes_GET_SIZE(self);
2248
2249 next = findchar(self_s, self_len, from_c);
2250
2251 if (next == NULL) {
2252 /* No matches; return the original string */
2253 return return_self(self);
2254 }
2255
2256 /* Need to make a new string */
2257 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2258 if (result == NULL)
2259 return NULL;
2260 result_s = PyBytes_AS_STRING(result);
2261 Py_MEMCPY(result_s, self_s, self_len);
2262
2263 /* change everything in-place, starting with this one */
2264 start = result_s + (next-self_s);
2265 *start = to_c;
2266 start++;
2267 end = result_s + self_len;
2268
2269 while (--maxcount > 0) {
2270 next = findchar(start, end-start, from_c);
2271 if (next == NULL)
2272 break;
2273 *next = to_c;
2274 start = next+1;
2275 }
2276
2277 return result;
2278}
2279
2280/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2281Py_LOCAL(PyBytesObject *)
2282replace_substring_in_place(PyBytesObject *self,
2283 const char *from_s, Py_ssize_t from_len,
2284 const char *to_s, Py_ssize_t to_len,
2285 Py_ssize_t maxcount)
2286{
2287 char *result_s, *start, *end;
2288 char *self_s;
2289 Py_ssize_t self_len, offset;
2290 PyBytesObject *result;
2291
2292 /* The result string will be the same size */
2293
2294 self_s = PyBytes_AS_STRING(self);
2295 self_len = PyBytes_GET_SIZE(self);
2296
2297 offset = findstring(self_s, self_len,
2298 from_s, from_len,
2299 0, self_len, FORWARD);
2300 if (offset == -1) {
2301 /* No matches; return the original string */
2302 return return_self(self);
2303 }
2304
2305 /* Need to make a new string */
2306 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2307 if (result == NULL)
2308 return NULL;
2309 result_s = PyBytes_AS_STRING(result);
2310 Py_MEMCPY(result_s, self_s, self_len);
2311
2312 /* change everything in-place, starting with this one */
2313 start = result_s + offset;
2314 Py_MEMCPY(start, to_s, from_len);
2315 start += from_len;
2316 end = result_s + self_len;
2317
2318 while ( --maxcount > 0) {
2319 offset = findstring(start, end-start,
2320 from_s, from_len,
2321 0, end-start, FORWARD);
2322 if (offset==-1)
2323 break;
2324 Py_MEMCPY(start+offset, to_s, from_len);
2325 start += offset+from_len;
2326 }
2327
2328 return result;
2329}
2330
2331/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2332Py_LOCAL(PyBytesObject *)
2333replace_single_character(PyBytesObject *self,
2334 char from_c,
2335 const char *to_s, Py_ssize_t to_len,
2336 Py_ssize_t maxcount)
2337{
2338 char *self_s, *result_s;
2339 char *start, *next, *end;
2340 Py_ssize_t self_len, result_len;
2341 Py_ssize_t count, product;
2342 PyBytesObject *result;
2343
2344 self_s = PyBytes_AS_STRING(self);
2345 self_len = PyBytes_GET_SIZE(self);
2346
2347 count = countchar(self_s, self_len, from_c, maxcount);
2348 if (count == 0) {
2349 /* no matches, return unchanged */
2350 return return_self(self);
2351 }
2352
2353 /* use the difference between current and new, hence the "-1" */
2354 /* result_len = self_len + count * (to_len-1) */
2355 product = count * (to_len-1);
2356 if (product / (to_len-1) != count) {
2357 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002358 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002359 return NULL;
2360 }
2361 result_len = self_len + product;
2362 if (result_len < 0) {
2363 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002364 "replacment bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002365 return NULL;
2366 }
2367
2368 if ( (result = (PyBytesObject *)
2369 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2370 return NULL;
2371 result_s = PyBytes_AS_STRING(result);
2372
2373 start = self_s;
2374 end = self_s + self_len;
2375 while (count-- > 0) {
2376 next = findchar(start, end-start, from_c);
2377 if (next == NULL)
2378 break;
2379
2380 if (next == start) {
2381 /* replace with the 'to' */
2382 Py_MEMCPY(result_s, to_s, to_len);
2383 result_s += to_len;
2384 start += 1;
2385 } else {
2386 /* copy the unchanged old then the 'to' */
2387 Py_MEMCPY(result_s, start, next-start);
2388 result_s += (next-start);
2389 Py_MEMCPY(result_s, to_s, to_len);
2390 result_s += to_len;
2391 start = next+1;
2392 }
2393 }
2394 /* Copy the remainder of the remaining string */
2395 Py_MEMCPY(result_s, start, end-start);
2396
2397 return result;
2398}
2399
2400/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2401Py_LOCAL(PyBytesObject *)
2402replace_substring(PyBytesObject *self,
2403 const char *from_s, Py_ssize_t from_len,
2404 const char *to_s, Py_ssize_t to_len,
2405 Py_ssize_t maxcount) {
2406 char *self_s, *result_s;
2407 char *start, *next, *end;
2408 Py_ssize_t self_len, result_len;
2409 Py_ssize_t count, offset, product;
2410 PyBytesObject *result;
2411
2412 self_s = PyBytes_AS_STRING(self);
2413 self_len = PyBytes_GET_SIZE(self);
2414
2415 count = countstring(self_s, self_len,
2416 from_s, from_len,
2417 0, self_len, FORWARD, maxcount);
2418 if (count == 0) {
2419 /* no matches, return unchanged */
2420 return return_self(self);
2421 }
2422
2423 /* Check for overflow */
2424 /* result_len = self_len + count * (to_len-from_len) */
2425 product = count * (to_len-from_len);
2426 if (product / (to_len-from_len) != count) {
2427 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002428 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002429 return NULL;
2430 }
2431 result_len = self_len + product;
2432 if (result_len < 0) {
2433 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002434 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002435 return NULL;
2436 }
2437
2438 if ( (result = (PyBytesObject *)
2439 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2440 return NULL;
2441 result_s = PyBytes_AS_STRING(result);
2442
2443 start = self_s;
2444 end = self_s + self_len;
2445 while (count-- > 0) {
2446 offset = findstring(start, end-start,
2447 from_s, from_len,
2448 0, end-start, FORWARD);
2449 if (offset == -1)
2450 break;
2451 next = start+offset;
2452 if (next == start) {
2453 /* replace with the 'to' */
2454 Py_MEMCPY(result_s, to_s, to_len);
2455 result_s += to_len;
2456 start += from_len;
2457 } else {
2458 /* copy the unchanged old then the 'to' */
2459 Py_MEMCPY(result_s, start, next-start);
2460 result_s += (next-start);
2461 Py_MEMCPY(result_s, to_s, to_len);
2462 result_s += to_len;
2463 start = next+from_len;
2464 }
2465 }
2466 /* Copy the remainder of the remaining string */
2467 Py_MEMCPY(result_s, start, end-start);
2468
2469 return result;
2470}
2471
2472
2473Py_LOCAL(PyBytesObject *)
2474replace(PyBytesObject *self,
2475 const char *from_s, Py_ssize_t from_len,
2476 const char *to_s, Py_ssize_t to_len,
2477 Py_ssize_t maxcount)
2478{
2479 if (maxcount < 0) {
2480 maxcount = PY_SSIZE_T_MAX;
2481 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2482 /* nothing to do; return the original string */
2483 return return_self(self);
2484 }
2485
2486 if (maxcount == 0 ||
2487 (from_len == 0 && to_len == 0)) {
2488 /* nothing to do; return the original string */
2489 return return_self(self);
2490 }
2491
2492 /* Handle zero-length special cases */
2493
2494 if (from_len == 0) {
2495 /* insert the 'to' string everywhere. */
2496 /* >>> "Python".replace("", ".") */
2497 /* '.P.y.t.h.o.n.' */
2498 return replace_interleave(self, to_s, to_len, maxcount);
2499 }
2500
2501 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2502 /* point for an empty self string to generate a non-empty string */
2503 /* Special case so the remaining code always gets a non-empty string */
2504 if (PyBytes_GET_SIZE(self) == 0) {
2505 return return_self(self);
2506 }
2507
2508 if (to_len == 0) {
Georg Brandl17cb8a82008-05-30 08:20:09 +00002509 /* delete all occurrences of 'from' string */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002510 if (from_len == 1) {
2511 return replace_delete_single_character(
2512 self, from_s[0], maxcount);
2513 } else {
2514 return replace_delete_substring(self, from_s,
2515 from_len, maxcount);
2516 }
2517 }
2518
2519 /* Handle special case where both strings have the same length */
2520
2521 if (from_len == to_len) {
2522 if (from_len == 1) {
2523 return replace_single_character_in_place(
2524 self,
2525 from_s[0],
2526 to_s[0],
2527 maxcount);
2528 } else {
2529 return replace_substring_in_place(
2530 self, from_s, from_len, to_s, to_len,
2531 maxcount);
2532 }
2533 }
2534
2535 /* Otherwise use the more generic algorithms */
2536 if (from_len == 1) {
2537 return replace_single_character(self, from_s[0],
2538 to_s, to_len, maxcount);
2539 } else {
2540 /* len('from')>=2, len('to')>=1 */
2541 return replace_substring(self, from_s, from_len, to_s, to_len,
2542 maxcount);
2543 }
2544}
2545
2546PyDoc_STRVAR(replace__doc__,
2547"B.replace(old, new[, count]) -> bytes\n\
2548\n\
2549Return a copy of B with all occurrences of subsection\n\
2550old replaced by new. If the optional argument count is\n\
2551given, only the first count occurrences are replaced.");
2552
2553static PyObject *
2554string_replace(PyBytesObject *self, PyObject *args)
2555{
2556 Py_ssize_t count = -1;
2557 PyObject *from, *to;
2558 const char *from_s, *to_s;
2559 Py_ssize_t from_len, to_len;
2560
2561 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2562 return NULL;
2563
2564 if (PyBytes_Check(from)) {
2565 from_s = PyBytes_AS_STRING(from);
2566 from_len = PyBytes_GET_SIZE(from);
2567 }
2568 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2569 return NULL;
2570
2571 if (PyBytes_Check(to)) {
2572 to_s = PyBytes_AS_STRING(to);
2573 to_len = PyBytes_GET_SIZE(to);
2574 }
2575 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2576 return NULL;
2577
2578 return (PyObject *)replace((PyBytesObject *) self,
2579 from_s, from_len,
2580 to_s, to_len, count);
2581}
2582
2583/** End DALKE **/
2584
2585/* Matches the end (direction >= 0) or start (direction < 0) of self
2586 * against substr, using the start and end arguments. Returns
2587 * -1 on error, 0 if not found and 1 if found.
2588 */
2589Py_LOCAL(int)
2590_string_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
2591 Py_ssize_t end, int direction)
2592{
2593 Py_ssize_t len = PyBytes_GET_SIZE(self);
2594 Py_ssize_t slen;
2595 const char* sub;
2596 const char* str;
2597
2598 if (PyBytes_Check(substr)) {
2599 sub = PyBytes_AS_STRING(substr);
2600 slen = PyBytes_GET_SIZE(substr);
2601 }
2602 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2603 return -1;
2604 str = PyBytes_AS_STRING(self);
2605
2606 string_adjust_indices(&start, &end, len);
2607
2608 if (direction < 0) {
2609 /* startswith */
2610 if (start+slen > len)
2611 return 0;
2612 } else {
2613 /* endswith */
2614 if (end-start < slen || start > len)
2615 return 0;
2616
2617 if (end-slen > start)
2618 start = end - slen;
2619 }
2620 if (end-start >= slen)
2621 return ! memcmp(str+start, sub, slen);
2622 return 0;
2623}
2624
2625
2626PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002627"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002628\n\
2629Return True if B starts with the specified prefix, False otherwise.\n\
2630With optional start, test B beginning at that position.\n\
2631With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002632prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002633
2634static PyObject *
2635string_startswith(PyBytesObject *self, PyObject *args)
2636{
2637 Py_ssize_t start = 0;
2638 Py_ssize_t end = PY_SSIZE_T_MAX;
2639 PyObject *subobj;
2640 int result;
2641
2642 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2643 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2644 return NULL;
2645 if (PyTuple_Check(subobj)) {
2646 Py_ssize_t i;
2647 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2648 result = _string_tailmatch(self,
2649 PyTuple_GET_ITEM(subobj, i),
2650 start, end, -1);
2651 if (result == -1)
2652 return NULL;
2653 else if (result) {
2654 Py_RETURN_TRUE;
2655 }
2656 }
2657 Py_RETURN_FALSE;
2658 }
2659 result = _string_tailmatch(self, subobj, start, end, -1);
2660 if (result == -1)
2661 return NULL;
2662 else
2663 return PyBool_FromLong(result);
2664}
2665
2666
2667PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002668"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002669\n\
2670Return True if B ends with the specified suffix, False otherwise.\n\
2671With optional start, test B beginning at that position.\n\
2672With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002673suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002674
2675static PyObject *
2676string_endswith(PyBytesObject *self, PyObject *args)
2677{
2678 Py_ssize_t start = 0;
2679 Py_ssize_t end = PY_SSIZE_T_MAX;
2680 PyObject *subobj;
2681 int result;
2682
2683 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2684 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2685 return NULL;
2686 if (PyTuple_Check(subobj)) {
2687 Py_ssize_t i;
2688 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2689 result = _string_tailmatch(self,
2690 PyTuple_GET_ITEM(subobj, i),
2691 start, end, +1);
2692 if (result == -1)
2693 return NULL;
2694 else if (result) {
2695 Py_RETURN_TRUE;
2696 }
2697 }
2698 Py_RETURN_FALSE;
2699 }
2700 result = _string_tailmatch(self, subobj, start, end, +1);
2701 if (result == -1)
2702 return NULL;
2703 else
2704 return PyBool_FromLong(result);
2705}
2706
2707
2708PyDoc_STRVAR(decode__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002709"B.decode([encoding[, errors]]) -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002710\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002711Decode S using the codec registered for encoding. encoding defaults\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002712to the default encoding. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002713handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2714a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002715as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002716able to handle UnicodeDecodeErrors.");
2717
2718static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002719string_decode(PyObject *self, PyObject *args)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002720{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002721 const char *encoding = NULL;
2722 const char *errors = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +00002723
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002724 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2725 return NULL;
2726 if (encoding == NULL)
2727 encoding = PyUnicode_GetDefaultEncoding();
Marc-André Lemburgb2750b52008-06-06 12:18:17 +00002728 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002729}
2730
Guido van Rossum20188312006-05-05 15:15:40 +00002731
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002732PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002733"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002734\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002735Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002736Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002737Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002738
2739static int
Guido van Rossumae404e22007-10-26 21:46:44 +00002740hex_digit_to_int(Py_UNICODE c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002741{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002742 if (c >= 128)
2743 return -1;
2744 if (ISDIGIT(c))
2745 return c - '0';
2746 else {
2747 if (ISUPPER(c))
2748 c = TOLOWER(c);
2749 if (c >= 'a' && c <= 'f')
2750 return c - 'a' + 10;
2751 }
2752 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002753}
2754
2755static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002756string_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002757{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002758 PyObject *newstring, *hexobj;
2759 char *buf;
2760 Py_UNICODE *hex;
2761 Py_ssize_t hexlen, byteslen, i, j;
2762 int top, bot;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002763
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002764 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2765 return NULL;
2766 assert(PyUnicode_Check(hexobj));
2767 hexlen = PyUnicode_GET_SIZE(hexobj);
2768 hex = PyUnicode_AS_UNICODE(hexobj);
2769 byteslen = hexlen/2; /* This overestimates if there are spaces */
2770 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2771 if (!newstring)
2772 return NULL;
2773 buf = PyBytes_AS_STRING(newstring);
2774 for (i = j = 0; i < hexlen; i += 2) {
2775 /* skip over spaces in the input */
2776 while (hex[i] == ' ')
2777 i++;
2778 if (i >= hexlen)
2779 break;
2780 top = hex_digit_to_int(hex[i]);
2781 bot = hex_digit_to_int(hex[i+1]);
2782 if (top == -1 || bot == -1) {
2783 PyErr_Format(PyExc_ValueError,
2784 "non-hexadecimal number found in "
2785 "fromhex() arg at position %zd", i);
2786 goto error;
2787 }
2788 buf[j++] = (top << 4) + bot;
2789 }
2790 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2791 goto error;
2792 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002793
2794 error:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002795 Py_XDECREF(newstring);
2796 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002797}
2798
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002799PyDoc_STRVAR(sizeof__doc__,
2800"S.__sizeof__() -> size of S in memory, in bytes");
2801
2802static PyObject *
2803string_sizeof(PyBytesObject *v)
2804{
2805 Py_ssize_t res;
2806 res = sizeof(PyBytesObject) + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2807 return PyLong_FromSsize_t(res);
2808}
2809
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002810
2811static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002812string_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002813{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002814 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002815}
2816
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002817
2818static PyMethodDef
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002819string_methods[] = {
2820 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
2821 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2822 _Py_capitalize__doc__},
2823 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2824 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
2825 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
2826 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
2827 endswith__doc__},
2828 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2829 expandtabs__doc__},
2830 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
2831 {"fromhex", (PyCFunction)string_fromhex, METH_VARARGS|METH_CLASS,
2832 fromhex_doc},
2833 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
2834 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2835 _Py_isalnum__doc__},
2836 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2837 _Py_isalpha__doc__},
2838 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2839 _Py_isdigit__doc__},
2840 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2841 _Py_islower__doc__},
2842 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2843 _Py_isspace__doc__},
2844 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2845 _Py_istitle__doc__},
2846 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2847 _Py_isupper__doc__},
2848 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2849 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2850 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2851 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
2852 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
2853 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2854 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2855 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
2856 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2857 {"rpartition", (PyCFunction)string_rpartition, METH_O,
2858 rpartition__doc__},
2859 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
2860 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
2861 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2862 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
2863 splitlines__doc__},
2864 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
2865 startswith__doc__},
2866 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
2867 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2868 _Py_swapcase__doc__},
2869 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2870 {"translate", (PyCFunction)string_translate, METH_VARARGS,
2871 translate__doc__},
2872 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2873 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002874 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
2875 sizeof__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002876 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002877};
2878
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002879static PyObject *
2880str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2881
2882static PyObject *
2883string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2884{
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002885 PyObject *x = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002886 const char *encoding = NULL;
2887 const char *errors = NULL;
2888 PyObject *new = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002889 static char *kwlist[] = {"source", "encoding", "errors", 0};
2890
2891 if (type != &PyBytes_Type)
2892 return str_subtype_new(type, args, kwds);
2893 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2894 &encoding, &errors))
2895 return NULL;
2896 if (x == NULL) {
2897 if (encoding != NULL || errors != NULL) {
2898 PyErr_SetString(PyExc_TypeError,
2899 "encoding or errors without sequence "
2900 "argument");
2901 return NULL;
2902 }
2903 return PyBytes_FromString("");
2904 }
2905
2906 if (PyUnicode_Check(x)) {
2907 /* Encode via the codec registry */
2908 if (encoding == NULL) {
2909 PyErr_SetString(PyExc_TypeError,
2910 "string argument without an encoding");
2911 return NULL;
2912 }
Marc-André Lemburgb2750b52008-06-06 12:18:17 +00002913 new = PyUnicode_AsEncodedString(x, encoding, errors);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002914 if (new == NULL)
2915 return NULL;
2916 assert(PyBytes_Check(new));
2917 return new;
2918 }
2919
2920 /* If it's not unicode, there can't be encoding or errors */
2921 if (encoding != NULL || errors != NULL) {
2922 PyErr_SetString(PyExc_TypeError,
2923 "encoding or errors without a string argument");
2924 return NULL;
2925 }
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002926 return PyObject_Bytes(x);
2927}
2928
2929PyObject *
2930PyBytes_FromObject(PyObject *x)
2931{
2932 PyObject *new, *it;
2933 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002934
2935 /* Is it an int? */
2936 size = PyNumber_AsSsize_t(x, PyExc_ValueError);
2937 if (size == -1 && PyErr_Occurred()) {
2938 PyErr_Clear();
2939 }
2940 else {
2941 if (size < 0) {
2942 PyErr_SetString(PyExc_ValueError, "negative count");
2943 return NULL;
2944 }
2945 new = PyBytes_FromStringAndSize(NULL, size);
2946 if (new == NULL) {
2947 return NULL;
2948 }
2949 if (size > 0) {
2950 memset(((PyBytesObject*)new)->ob_sval, 0, size);
2951 }
2952 return new;
2953 }
2954
2955 /* Use the modern buffer interface */
2956 if (PyObject_CheckBuffer(x)) {
2957 Py_buffer view;
2958 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2959 return NULL;
2960 new = PyBytes_FromStringAndSize(NULL, view.len);
2961 if (!new)
2962 goto fail;
2963 // XXX(brett.cannon): Better way to get to internal buffer?
2964 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2965 &view, view.len, 'C') < 0)
2966 goto fail;
Martin v. Löwis423be952008-08-13 15:53:07 +00002967 PyBuffer_Release(&view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002968 return new;
2969 fail:
2970 Py_XDECREF(new);
Martin v. Löwis423be952008-08-13 15:53:07 +00002971 PyBuffer_Release(&view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002972 return NULL;
2973 }
2974
2975 /* For iterator version, create a string object and resize as needed */
2976 /* XXX(gb): is 64 a good value? also, optimize if length is known */
2977 /* XXX(guido): perhaps use Pysequence_Fast() -- I can't imagine the
2978 input being a truly long iterator. */
2979 size = 64;
2980 new = PyBytes_FromStringAndSize(NULL, size);
2981 if (new == NULL)
2982 return NULL;
2983
2984 /* XXX Optimize this if the arguments is a list, tuple */
2985
2986 /* Get the iterator */
2987 it = PyObject_GetIter(x);
2988 if (it == NULL)
2989 goto error;
2990
2991 /* Run the iterator to exhaustion */
2992 for (i = 0; ; i++) {
2993 PyObject *item;
2994 Py_ssize_t value;
2995
2996 /* Get the next item */
2997 item = PyIter_Next(it);
2998 if (item == NULL) {
2999 if (PyErr_Occurred())
3000 goto error;
3001 break;
3002 }
3003
3004 /* Interpret it as an int (__index__) */
3005 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3006 Py_DECREF(item);
3007 if (value == -1 && PyErr_Occurred())
3008 goto error;
3009
3010 /* Range check */
3011 if (value < 0 || value >= 256) {
3012 PyErr_SetString(PyExc_ValueError,
3013 "bytes must be in range(0, 256)");
3014 goto error;
3015 }
3016
3017 /* Append the byte */
3018 if (i >= size) {
3019 size *= 2;
3020 if (_PyBytes_Resize(&new, size) < 0)
3021 goto error;
3022 }
3023 ((PyBytesObject *)new)->ob_sval[i] = value;
3024 }
3025 _PyBytes_Resize(&new, i);
3026
3027 /* Clean up and return success */
3028 Py_DECREF(it);
3029 return new;
3030
3031 error:
3032 /* Error handling when new != NULL */
3033 Py_XDECREF(it);
3034 Py_DECREF(new);
3035 return NULL;
3036}
3037
3038static PyObject *
3039str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3040{
3041 PyObject *tmp, *pnew;
3042 Py_ssize_t n;
3043
3044 assert(PyType_IsSubtype(type, &PyBytes_Type));
3045 tmp = string_new(&PyBytes_Type, args, kwds);
3046 if (tmp == NULL)
3047 return NULL;
3048 assert(PyBytes_CheckExact(tmp));
3049 n = PyBytes_GET_SIZE(tmp);
3050 pnew = type->tp_alloc(type, n);
3051 if (pnew != NULL) {
3052 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3053 PyBytes_AS_STRING(tmp), n+1);
3054 ((PyBytesObject *)pnew)->ob_shash =
3055 ((PyBytesObject *)tmp)->ob_shash;
3056 }
3057 Py_DECREF(tmp);
3058 return pnew;
3059}
3060
3061PyDoc_STRVAR(string_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003062"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003063bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003064bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
3065bytes(memory_view) -> bytes\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003066\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003067Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003068 - an iterable yielding integers in range(256)\n\
3069 - a text string encoded using the specified encoding\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003070 - a bytes or a buffer object\n\
3071 - any object implementing the buffer API.");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003072
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003073static PyObject *str_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003074
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003075PyTypeObject PyBytes_Type = {
3076 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3077 "bytes",
3078 sizeof(PyBytesObject),
3079 sizeof(char),
3080 string_dealloc, /* tp_dealloc */
3081 0, /* tp_print */
3082 0, /* tp_getattr */
3083 0, /* tp_setattr */
3084 0, /* tp_compare */
3085 (reprfunc)string_repr, /* tp_repr */
3086 0, /* tp_as_number */
3087 &string_as_sequence, /* tp_as_sequence */
3088 &string_as_mapping, /* tp_as_mapping */
3089 (hashfunc)string_hash, /* tp_hash */
3090 0, /* tp_call */
3091 string_str, /* tp_str */
3092 PyObject_GenericGetAttr, /* tp_getattro */
3093 0, /* tp_setattro */
3094 &string_as_buffer, /* tp_as_buffer */
3095 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3096 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3097 string_doc, /* tp_doc */
3098 0, /* tp_traverse */
3099 0, /* tp_clear */
3100 (richcmpfunc)string_richcompare, /* tp_richcompare */
3101 0, /* tp_weaklistoffset */
3102 str_iter, /* tp_iter */
3103 0, /* tp_iternext */
3104 string_methods, /* tp_methods */
3105 0, /* tp_members */
3106 0, /* tp_getset */
3107 &PyBaseObject_Type, /* tp_base */
3108 0, /* tp_dict */
3109 0, /* tp_descr_get */
3110 0, /* tp_descr_set */
3111 0, /* tp_dictoffset */
3112 0, /* tp_init */
3113 0, /* tp_alloc */
3114 string_new, /* tp_new */
3115 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003116};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003117
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003118void
3119PyBytes_Concat(register PyObject **pv, register PyObject *w)
3120{
3121 register PyObject *v;
3122 assert(pv != NULL);
3123 if (*pv == NULL)
3124 return;
3125 if (w == NULL) {
3126 Py_DECREF(*pv);
3127 *pv = NULL;
3128 return;
3129 }
3130 v = string_concat(*pv, w);
3131 Py_DECREF(*pv);
3132 *pv = v;
3133}
3134
3135void
3136PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
3137{
3138 PyBytes_Concat(pv, w);
3139 Py_XDECREF(w);
3140}
3141
3142
3143/* The following function breaks the notion that strings are immutable:
3144 it changes the size of a string. We get away with this only if there
3145 is only one module referencing the object. You can also think of it
3146 as creating a new string object and destroying the old one, only
3147 more efficiently. In any case, don't use this if the string may
3148 already be known to some other part of the code...
3149 Note that if there's not enough memory to resize the string, the original
3150 string object at *pv is deallocated, *pv is set to NULL, an "out of
3151 memory" exception is set, and -1 is returned. Else (on success) 0 is
3152 returned, and the value in *pv may or may not be the same as on input.
3153 As always, an extra byte is allocated for a trailing \0 byte (newsize
3154 does *not* include that), and a trailing \0 byte is stored.
3155*/
3156
3157int
3158_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3159{
3160 register PyObject *v;
3161 register PyBytesObject *sv;
3162 v = *pv;
3163 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
3164 *pv = 0;
3165 Py_DECREF(v);
3166 PyErr_BadInternalCall();
3167 return -1;
3168 }
3169 /* XXX UNREF/NEWREF interface should be more symmetrical */
3170 _Py_DEC_REFTOTAL;
3171 _Py_ForgetReference(v);
3172 *pv = (PyObject *)
3173 PyObject_REALLOC((char *)v, sizeof(PyBytesObject) + newsize);
3174 if (*pv == NULL) {
3175 PyObject_Del(v);
3176 PyErr_NoMemory();
3177 return -1;
3178 }
3179 _Py_NewReference(*pv);
3180 sv = (PyBytesObject *) *pv;
3181 Py_SIZE(sv) = newsize;
3182 sv->ob_sval[newsize] = '\0';
3183 sv->ob_shash = -1; /* invalidate cached hash value */
3184 return 0;
3185}
3186
3187/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and
3188 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3189 * Python's regular ints.
3190 * Return value: a new PyString*, or NULL if error.
3191 * . *pbuf is set to point into it,
3192 * *plen set to the # of chars following that.
3193 * Caller must decref it when done using pbuf.
3194 * The string starting at *pbuf is of the form
3195 * "-"? ("0x" | "0X")? digit+
3196 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3197 * set in flags. The case of hex digits will be correct,
3198 * There will be at least prec digits, zero-filled on the left if
3199 * necessary to get that many.
3200 * val object to be converted
3201 * flags bitmask of format flags; only F_ALT is looked at
3202 * prec minimum number of digits; 0-fill on left if needed
3203 * type a character in [duoxX]; u acts the same as d
3204 *
3205 * CAUTION: o, x and X conversions on regular ints can never
3206 * produce a '-' sign, but can for Python's unbounded ints.
3207 */
3208PyObject*
3209_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,
3210 char **pbuf, int *plen)
3211{
3212 PyObject *result = NULL;
3213 char *buf;
3214 Py_ssize_t i;
3215 int sign; /* 1 if '-', else 0 */
3216 int len; /* number of characters */
3217 Py_ssize_t llen;
3218 int numdigits; /* len == numnondigits + numdigits */
3219 int numnondigits = 0;
3220
3221 /* Avoid exceeding SSIZE_T_MAX */
Christian Heimesce694b72008-08-24 16:15:19 +00003222 if (prec > INT_MAX-3) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003223 PyErr_SetString(PyExc_OverflowError,
3224 "precision too large");
3225 return NULL;
3226 }
3227
3228 switch (type) {
3229 case 'd':
3230 case 'u':
3231 /* Special-case boolean: we want 0/1 */
3232 if (PyBool_Check(val))
3233 result = PyNumber_ToBase(val, 10);
3234 else
3235 result = Py_TYPE(val)->tp_str(val);
3236 break;
3237 case 'o':
3238 numnondigits = 2;
3239 result = PyNumber_ToBase(val, 8);
3240 break;
3241 case 'x':
3242 case 'X':
3243 numnondigits = 2;
3244 result = PyNumber_ToBase(val, 16);
3245 break;
3246 default:
3247 assert(!"'type' not in [duoxX]");
3248 }
3249 if (!result)
3250 return NULL;
3251
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00003252 buf = _PyUnicode_AsString(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003253 if (!buf) {
3254 Py_DECREF(result);
3255 return NULL;
3256 }
3257
3258 /* To modify the string in-place, there can only be one reference. */
3259 if (Py_REFCNT(result) != 1) {
3260 PyErr_BadInternalCall();
3261 return NULL;
3262 }
3263 llen = PyUnicode_GetSize(result);
3264 if (llen > INT_MAX) {
3265 PyErr_SetString(PyExc_ValueError,
3266 "string too large in _PyBytes_FormatLong");
3267 return NULL;
3268 }
3269 len = (int)llen;
3270 if (buf[len-1] == 'L') {
3271 --len;
3272 buf[len] = '\0';
3273 }
3274 sign = buf[0] == '-';
3275 numnondigits += sign;
3276 numdigits = len - numnondigits;
3277 assert(numdigits > 0);
3278
3279 /* Get rid of base marker unless F_ALT */
3280 if (((flags & F_ALT) == 0 &&
3281 (type == 'o' || type == 'x' || type == 'X'))) {
3282 assert(buf[sign] == '0');
3283 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
3284 buf[sign+1] == 'o');
3285 numnondigits -= 2;
3286 buf += 2;
3287 len -= 2;
3288 if (sign)
3289 buf[0] = '-';
3290 assert(len == numnondigits + numdigits);
3291 assert(numdigits > 0);
3292 }
3293
3294 /* Fill with leading zeroes to meet minimum width. */
3295 if (prec > numdigits) {
3296 PyObject *r1 = PyBytes_FromStringAndSize(NULL,
3297 numnondigits + prec);
3298 char *b1;
3299 if (!r1) {
3300 Py_DECREF(result);
3301 return NULL;
3302 }
3303 b1 = PyBytes_AS_STRING(r1);
3304 for (i = 0; i < numnondigits; ++i)
3305 *b1++ = *buf++;
3306 for (i = 0; i < prec - numdigits; i++)
3307 *b1++ = '0';
3308 for (i = 0; i < numdigits; i++)
3309 *b1++ = *buf++;
3310 *b1 = '\0';
3311 Py_DECREF(result);
3312 result = r1;
3313 buf = PyBytes_AS_STRING(result);
3314 len = numnondigits + prec;
3315 }
3316
3317 /* Fix up case for hex conversions. */
3318 if (type == 'X') {
3319 /* Need to convert all lower case letters to upper case.
3320 and need to convert 0x to 0X (and -0x to -0X). */
3321 for (i = 0; i < len; i++)
3322 if (buf[i] >= 'a' && buf[i] <= 'x')
3323 buf[i] -= 'a'-'A';
3324 }
3325 *pbuf = buf;
3326 *plen = len;
3327 return result;
3328}
3329
3330void
3331PyBytes_Fini(void)
3332{
3333 int i;
3334 for (i = 0; i < UCHAR_MAX + 1; i++) {
3335 Py_XDECREF(characters[i]);
3336 characters[i] = NULL;
3337 }
3338 Py_XDECREF(nullstring);
3339 nullstring = NULL;
3340}
3341
Benjamin Peterson4116f362008-05-27 00:36:20 +00003342/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003343
3344typedef struct {
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003345 PyObject_HEAD
3346 Py_ssize_t it_index;
3347 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3348} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003349
3350static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003351striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003352{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003353 _PyObject_GC_UNTRACK(it);
3354 Py_XDECREF(it->it_seq);
3355 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003356}
3357
3358static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003359striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003360{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003361 Py_VISIT(it->it_seq);
3362 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003363}
3364
3365static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003366striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003367{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003368 PyBytesObject *seq;
3369 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003370
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003371 assert(it != NULL);
3372 seq = it->it_seq;
3373 if (seq == NULL)
3374 return NULL;
3375 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003376
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003377 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3378 item = PyLong_FromLong(
3379 (unsigned char)seq->ob_sval[it->it_index]);
3380 if (item != NULL)
3381 ++it->it_index;
3382 return item;
3383 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003384
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003385 Py_DECREF(seq);
3386 it->it_seq = NULL;
3387 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003388}
3389
3390static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003391striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003392{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003393 Py_ssize_t len = 0;
3394 if (it->it_seq)
3395 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3396 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003397}
3398
3399PyDoc_STRVAR(length_hint_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003400 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003401
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003402static PyMethodDef striter_methods[] = {
3403 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3404 length_hint_doc},
3405 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003406};
3407
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003408PyTypeObject PyBytesIter_Type = {
3409 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3410 "bytes_iterator", /* tp_name */
3411 sizeof(striterobject), /* tp_basicsize */
3412 0, /* tp_itemsize */
3413 /* methods */
3414 (destructor)striter_dealloc, /* tp_dealloc */
3415 0, /* tp_print */
3416 0, /* tp_getattr */
3417 0, /* tp_setattr */
3418 0, /* tp_compare */
3419 0, /* tp_repr */
3420 0, /* tp_as_number */
3421 0, /* tp_as_sequence */
3422 0, /* tp_as_mapping */
3423 0, /* tp_hash */
3424 0, /* tp_call */
3425 0, /* tp_str */
3426 PyObject_GenericGetAttr, /* tp_getattro */
3427 0, /* tp_setattro */
3428 0, /* tp_as_buffer */
3429 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3430 0, /* tp_doc */
3431 (traverseproc)striter_traverse, /* tp_traverse */
3432 0, /* tp_clear */
3433 0, /* tp_richcompare */
3434 0, /* tp_weaklistoffset */
3435 PyObject_SelfIter, /* tp_iter */
3436 (iternextfunc)striter_next, /* tp_iternext */
3437 striter_methods, /* tp_methods */
3438 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003439};
3440
3441static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003442str_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003443{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003444 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003445
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003446 if (!PyBytes_Check(seq)) {
3447 PyErr_BadInternalCall();
3448 return NULL;
3449 }
3450 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3451 if (it == NULL)
3452 return NULL;
3453 it->it_index = 0;
3454 Py_INCREF(seq);
3455 it->it_seq = (PyBytesObject *)seq;
3456 _PyObject_GC_TRACK(it);
3457 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003458}