blob: d5c2bea35e5f9d931f750baf41d749047fd7bc5e [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Christian Heimes90aa7642007-12-19 02:45:37 +000013 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000014
Gregory P. Smith60d241f2007-10-16 06:31:30 +000015 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000016 {
17 PyErr_Format(PyExc_TypeError,
18 "Type %.100s doesn't support the buffer API",
Christian Heimes90aa7642007-12-19 02:45:37 +000019 Py_TYPE(obj)->tp_name);
Guido van Rossuma74184e2007-08-29 04:05:57 +000020 return -1;
21 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000023 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
24 return -1;
25 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000026}
27
Christian Heimes2c9c7a52008-05-26 13:42:13 +000028#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000029Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000030#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000031
Christian Heimes2c9c7a52008-05-26 13:42:13 +000032static PyBytesObject *characters[UCHAR_MAX + 1];
33static PyBytesObject *nullstring;
34
Mark Dickinsonfd24b322008-12-06 15:33:31 +000035/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
36 for a string of length n should request PyBytesObject_SIZE + n bytes.
37
38 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
39 3 bytes per string allocation on a typical system.
40*/
41#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
42
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043/*
44 For both PyBytes_FromString() and PyBytes_FromStringAndSize(), the
45 parameter `size' denotes number of characters to allocate, not counting any
46 null terminating character.
47
48 For PyBytes_FromString(), the parameter `str' points to a null-terminated
49 string containing exactly `size' bytes.
50
51 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
52 either NULL or else points to a string containing at least `size' bytes.
53 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
54 not have to be null-terminated. (Therefore it is safe to construct a
55 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
56 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
57 bytes (setting the last byte to the null terminating character) and you can
58 fill in the data yourself. If `str' is non-NULL then the resulting
59 PyString object must be treated as immutable and you must not fill in nor
60 alter the data yourself, since the strings may be shared.
61
62 The PyObject member `op->ob_size', which denotes the number of "extra
63 items" in a variable-size object, will contain the number of bytes
64 allocated for string data, not counting the null terminating character. It
65 is therefore equal to the equal to the `size' parameter (for
66 PyBytes_FromStringAndSize()) or the length of the string in the `str'
67 parameter (for PyBytes_FromString()).
68*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000069PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000070PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000071{
Christian Heimes2c9c7a52008-05-26 13:42:13 +000072 register PyBytesObject *op;
73 if (size < 0) {
74 PyErr_SetString(PyExc_SystemError,
75 "Negative size passed to PyBytes_FromStringAndSize");
76 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +000077 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000078 if (size == 0 && (op = nullstring) != NULL) {
79#ifdef COUNT_ALLOCS
80 null_strings++;
81#endif
82 Py_INCREF(op);
83 return (PyObject *)op;
84 }
85 if (size == 1 && str != NULL &&
86 (op = characters[*str & UCHAR_MAX]) != NULL)
87 {
88#ifdef COUNT_ALLOCS
89 one_strings++;
90#endif
91 Py_INCREF(op);
92 return (PyObject *)op;
93 }
94
Mark Dickinsonfd24b322008-12-06 15:33:31 +000095 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Neal Norwitz3ce5d922008-08-24 07:08:55 +000096 PyErr_SetString(PyExc_OverflowError,
97 "byte string is too large");
98 return NULL;
99 }
100
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000101 /* Inline PyObject_NewVar */
Mark Dickinsonfd24b322008-12-06 15:33:31 +0000102 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000103 if (op == NULL)
104 return PyErr_NoMemory();
105 PyObject_INIT_VAR(op, &PyBytes_Type, size);
106 op->ob_shash = -1;
107 if (str != NULL)
108 Py_MEMCPY(op->ob_sval, str, size);
109 op->ob_sval[size] = '\0';
110 /* share short strings */
111 if (size == 0) {
112 nullstring = op;
113 Py_INCREF(op);
114 } else if (size == 1 && str != NULL) {
115 characters[*str & UCHAR_MAX] = op;
116 Py_INCREF(op);
117 }
118 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000119}
120
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000121PyObject *
122PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000123{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000124 register size_t size;
125 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000126
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000127 assert(str != NULL);
128 size = strlen(str);
Mark Dickinsonfd24b322008-12-06 15:33:31 +0000129 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000130 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000131 "byte string is too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000132 return NULL;
133 }
134 if (size == 0 && (op = nullstring) != NULL) {
135#ifdef COUNT_ALLOCS
136 null_strings++;
137#endif
138 Py_INCREF(op);
139 return (PyObject *)op;
140 }
141 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
142#ifdef COUNT_ALLOCS
143 one_strings++;
144#endif
145 Py_INCREF(op);
146 return (PyObject *)op;
147 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000148
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000149 /* Inline PyObject_NewVar */
Mark Dickinsonfd24b322008-12-06 15:33:31 +0000150 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000151 if (op == NULL)
152 return PyErr_NoMemory();
153 PyObject_INIT_VAR(op, &PyBytes_Type, size);
154 op->ob_shash = -1;
155 Py_MEMCPY(op->ob_sval, str, size+1);
156 /* share short strings */
157 if (size == 0) {
158 nullstring = op;
159 Py_INCREF(op);
160 } else if (size == 1) {
161 characters[*str & UCHAR_MAX] = op;
162 Py_INCREF(op);
163 }
164 return (PyObject *) op;
165}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000166
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000167PyObject *
168PyBytes_FromFormatV(const char *format, va_list vargs)
169{
170 va_list count;
171 Py_ssize_t n = 0;
172 const char* f;
173 char *s;
174 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000175
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000176#ifdef VA_LIST_IS_ARRAY
177 Py_MEMCPY(count, vargs, sizeof(va_list));
178#else
179#ifdef __va_copy
180 __va_copy(count, vargs);
181#else
182 count = vargs;
183#endif
184#endif
185 /* step 1: figure out how large a buffer we need */
186 for (f = format; *f; f++) {
187 if (*f == '%') {
188 const char* p = f;
189 while (*++f && *f != '%' && !ISALPHA(*f))
190 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000191
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000192 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
193 * they don't affect the amount of space we reserve.
194 */
195 if ((*f == 'l' || *f == 'z') &&
196 (f[1] == 'd' || f[1] == 'u'))
197 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000198
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000199 switch (*f) {
200 case 'c':
201 (void)va_arg(count, int);
202 /* fall through... */
203 case '%':
204 n++;
205 break;
206 case 'd': case 'u': case 'i': case 'x':
207 (void) va_arg(count, int);
208 /* 20 bytes is enough to hold a 64-bit
209 integer. Decimal takes the most space.
210 This isn't enough for octal. */
211 n += 20;
212 break;
213 case 's':
214 s = va_arg(count, char*);
215 n += strlen(s);
216 break;
217 case 'p':
218 (void) va_arg(count, int);
219 /* maximum 64-bit pointer representation:
220 * 0xffffffffffffffff
221 * so 19 characters is enough.
222 * XXX I count 18 -- what's the extra for?
223 */
224 n += 19;
225 break;
226 default:
227 /* if we stumble upon an unknown
228 formatting code, copy the rest of
229 the format string to the output
230 string. (we cannot just skip the
231 code, since there's no way to know
232 what's in the argument list) */
233 n += strlen(p);
234 goto expand;
235 }
236 } else
237 n++;
238 }
239 expand:
240 /* step 2: fill the buffer */
241 /* Since we've analyzed how much space we need for the worst case,
242 use sprintf directly instead of the slower PyOS_snprintf. */
243 string = PyBytes_FromStringAndSize(NULL, n);
244 if (!string)
245 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000246
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000247 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000248
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000249 for (f = format; *f; f++) {
250 if (*f == '%') {
251 const char* p = f++;
252 Py_ssize_t i;
253 int longflag = 0;
254 int size_tflag = 0;
255 /* parse the width.precision part (we're only
256 interested in the precision value, if any) */
257 n = 0;
258 while (ISDIGIT(*f))
259 n = (n*10) + *f++ - '0';
260 if (*f == '.') {
261 f++;
262 n = 0;
263 while (ISDIGIT(*f))
264 n = (n*10) + *f++ - '0';
265 }
266 while (*f && *f != '%' && !ISALPHA(*f))
267 f++;
268 /* handle the long flag, but only for %ld and %lu.
269 others can be added when necessary. */
270 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
271 longflag = 1;
272 ++f;
273 }
274 /* handle the size_t flag. */
275 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
276 size_tflag = 1;
277 ++f;
278 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000279
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000280 switch (*f) {
281 case 'c':
282 *s++ = va_arg(vargs, int);
283 break;
284 case 'd':
285 if (longflag)
286 sprintf(s, "%ld", va_arg(vargs, long));
287 else if (size_tflag)
288 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
289 va_arg(vargs, Py_ssize_t));
290 else
291 sprintf(s, "%d", va_arg(vargs, int));
292 s += strlen(s);
293 break;
294 case 'u':
295 if (longflag)
296 sprintf(s, "%lu",
297 va_arg(vargs, unsigned long));
298 else if (size_tflag)
299 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
300 va_arg(vargs, size_t));
301 else
302 sprintf(s, "%u",
303 va_arg(vargs, unsigned int));
304 s += strlen(s);
305 break;
306 case 'i':
307 sprintf(s, "%i", va_arg(vargs, int));
308 s += strlen(s);
309 break;
310 case 'x':
311 sprintf(s, "%x", va_arg(vargs, int));
312 s += strlen(s);
313 break;
314 case 's':
315 p = va_arg(vargs, char*);
316 i = strlen(p);
317 if (n > 0 && i > n)
318 i = n;
319 Py_MEMCPY(s, p, i);
320 s += i;
321 break;
322 case 'p':
323 sprintf(s, "%p", va_arg(vargs, void*));
324 /* %p is ill-defined: ensure leading 0x. */
325 if (s[1] == 'X')
326 s[1] = 'x';
327 else if (s[1] != 'x') {
328 memmove(s+2, s, strlen(s)+1);
329 s[0] = '0';
330 s[1] = 'x';
331 }
332 s += strlen(s);
333 break;
334 case '%':
335 *s++ = '%';
336 break;
337 default:
338 strcpy(s, p);
339 s += strlen(s);
340 goto end;
341 }
342 } else
343 *s++ = *f;
344 }
345
346 end:
347 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
348 return string;
349}
350
351PyObject *
352PyBytes_FromFormat(const char *format, ...)
353{
354 PyObject* ret;
355 va_list vargs;
356
357#ifdef HAVE_STDARG_PROTOTYPES
358 va_start(vargs, format);
359#else
360 va_start(vargs);
361#endif
362 ret = PyBytes_FromFormatV(format, vargs);
363 va_end(vargs);
364 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000365}
366
367static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000368string_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000369{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000370 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000371}
372
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000373/* Unescape a backslash-escaped string. If unicode is non-zero,
374 the string is a u-literal. If recode_encoding is non-zero,
375 the string is UTF-8 encoded and should be re-encoded in the
376 specified encoding. */
377
378PyObject *PyBytes_DecodeEscape(const char *s,
379 Py_ssize_t len,
380 const char *errors,
381 Py_ssize_t unicode,
382 const char *recode_encoding)
383{
384 int c;
385 char *p, *buf;
386 const char *end;
387 PyObject *v;
388 Py_ssize_t newlen = recode_encoding ? 4*len:len;
389 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
390 if (v == NULL)
391 return NULL;
392 p = buf = PyBytes_AsString(v);
393 end = s + len;
394 while (s < end) {
395 if (*s != '\\') {
396 non_esc:
397 if (recode_encoding && (*s & 0x80)) {
398 PyObject *u, *w;
399 char *r;
400 const char* t;
401 Py_ssize_t rn;
402 t = s;
403 /* Decode non-ASCII bytes as UTF-8. */
404 while (t < end && (*t & 0x80)) t++;
405 u = PyUnicode_DecodeUTF8(s, t - s, errors);
406 if(!u) goto failed;
407
408 /* Recode them in target encoding. */
409 w = PyUnicode_AsEncodedString(
410 u, recode_encoding, errors);
411 Py_DECREF(u);
412 if (!w) goto failed;
413
414 /* Append bytes to output buffer. */
415 assert(PyBytes_Check(w));
416 r = PyBytes_AS_STRING(w);
417 rn = PyBytes_GET_SIZE(w);
418 Py_MEMCPY(p, r, rn);
419 p += rn;
420 Py_DECREF(w);
421 s = t;
422 } else {
423 *p++ = *s++;
424 }
425 continue;
426 }
427 s++;
428 if (s==end) {
429 PyErr_SetString(PyExc_ValueError,
430 "Trailing \\ in string");
431 goto failed;
432 }
433 switch (*s++) {
434 /* XXX This assumes ASCII! */
435 case '\n': break;
436 case '\\': *p++ = '\\'; break;
437 case '\'': *p++ = '\''; break;
438 case '\"': *p++ = '\"'; break;
439 case 'b': *p++ = '\b'; break;
440 case 'f': *p++ = '\014'; break; /* FF */
441 case 't': *p++ = '\t'; break;
442 case 'n': *p++ = '\n'; break;
443 case 'r': *p++ = '\r'; break;
444 case 'v': *p++ = '\013'; break; /* VT */
445 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
446 case '0': case '1': case '2': case '3':
447 case '4': case '5': case '6': case '7':
448 c = s[-1] - '0';
449 if (s < end && '0' <= *s && *s <= '7') {
450 c = (c<<3) + *s++ - '0';
451 if (s < end && '0' <= *s && *s <= '7')
452 c = (c<<3) + *s++ - '0';
453 }
454 *p++ = c;
455 break;
456 case 'x':
457 if (s+1 < end && ISXDIGIT(s[0]) && ISXDIGIT(s[1])) {
458 unsigned int x = 0;
459 c = Py_CHARMASK(*s);
460 s++;
461 if (ISDIGIT(c))
462 x = c - '0';
463 else if (ISLOWER(c))
464 x = 10 + c - 'a';
465 else
466 x = 10 + c - 'A';
467 x = x << 4;
468 c = Py_CHARMASK(*s);
469 s++;
470 if (ISDIGIT(c))
471 x += c - '0';
472 else if (ISLOWER(c))
473 x += 10 + c - 'a';
474 else
475 x += 10 + c - 'A';
476 *p++ = x;
477 break;
478 }
479 if (!errors || strcmp(errors, "strict") == 0) {
480 PyErr_SetString(PyExc_ValueError,
481 "invalid \\x escape");
482 goto failed;
483 }
484 if (strcmp(errors, "replace") == 0) {
485 *p++ = '?';
486 } else if (strcmp(errors, "ignore") == 0)
487 /* do nothing */;
488 else {
489 PyErr_Format(PyExc_ValueError,
490 "decoding error; unknown "
491 "error handling code: %.400s",
492 errors);
493 goto failed;
494 }
495 default:
496 *p++ = '\\';
497 s--;
498 goto non_esc; /* an arbitry number of unescaped
499 UTF-8 bytes may follow. */
500 }
501 }
502 if (p-buf < newlen)
503 _PyBytes_Resize(&v, p - buf);
504 return v;
505 failed:
506 Py_DECREF(v);
507 return NULL;
508}
509
510/* -------------------------------------------------------------------- */
511/* object api */
512
513Py_ssize_t
514PyBytes_Size(register PyObject *op)
515{
516 if (!PyBytes_Check(op)) {
517 PyErr_Format(PyExc_TypeError,
518 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
519 return -1;
520 }
521 return Py_SIZE(op);
522}
523
524char *
525PyBytes_AsString(register PyObject *op)
526{
527 if (!PyBytes_Check(op)) {
528 PyErr_Format(PyExc_TypeError,
529 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
530 return NULL;
531 }
532 return ((PyBytesObject *)op)->ob_sval;
533}
534
535int
536PyBytes_AsStringAndSize(register PyObject *obj,
537 register char **s,
538 register Py_ssize_t *len)
539{
540 if (s == NULL) {
541 PyErr_BadInternalCall();
542 return -1;
543 }
544
545 if (!PyBytes_Check(obj)) {
546 PyErr_Format(PyExc_TypeError,
547 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
548 return -1;
549 }
550
551 *s = PyBytes_AS_STRING(obj);
552 if (len != NULL)
553 *len = PyBytes_GET_SIZE(obj);
554 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
555 PyErr_SetString(PyExc_TypeError,
556 "expected bytes with no null");
557 return -1;
558 }
559 return 0;
560}
Neal Norwitz6968b052007-02-27 19:02:19 +0000561
562/* -------------------------------------------------------------------- */
563/* Methods */
564
565#define STRINGLIB_CHAR char
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000566
Neal Norwitz6968b052007-02-27 19:02:19 +0000567#define STRINGLIB_CMP memcmp
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000568#define STRINGLIB_LEN PyBytes_GET_SIZE
569#define STRINGLIB_NEW PyBytes_FromStringAndSize
570#define STRINGLIB_STR PyBytes_AS_STRING
571/* #define STRINGLIB_WANT_CONTAINS_OBJ 1 */
572
573#define STRINGLIB_EMPTY nullstring
574#define STRINGLIB_CHECK_EXACT PyBytes_CheckExact
575#define STRINGLIB_MUTABLE 0
Neal Norwitz6968b052007-02-27 19:02:19 +0000576
577#include "stringlib/fastsearch.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000578
Neal Norwitz6968b052007-02-27 19:02:19 +0000579#include "stringlib/count.h"
580#include "stringlib/find.h"
581#include "stringlib/partition.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000582#include "stringlib/ctype.h"
583#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000584
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000585#define _Py_InsertThousandsGrouping _PyBytes_InsertThousandsGrouping
Eric Smitha3b1ac82009-04-03 14:45:06 +0000586#define _Py_InsertThousandsGroupingLocale _PyBytes_InsertThousandsGroupingLocale
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000587#include "stringlib/localeutil.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000588
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000589PyObject *
590PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000591{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000592 static const char *hexdigits = "0123456789abcdef";
593 register PyBytesObject* op = (PyBytesObject*) obj;
594 Py_ssize_t length = Py_SIZE(op);
595 size_t newsize = 3 + 4 * length;
596 PyObject *v;
597 if (newsize > PY_SSIZE_T_MAX || (newsize-3) / 4 != length) {
598 PyErr_SetString(PyExc_OverflowError,
599 "bytes object is too large to make repr");
600 return NULL;
601 }
602 v = PyUnicode_FromUnicode(NULL, newsize);
603 if (v == NULL) {
604 return NULL;
605 }
606 else {
607 register Py_ssize_t i;
608 register Py_UNICODE c;
609 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
610 int quote;
611
612 /* Figure out which quote to use; single is preferred */
613 quote = '\'';
614 if (smartquotes) {
615 char *test, *start;
616 start = PyBytes_AS_STRING(op);
617 for (test = start; test < start+length; ++test) {
618 if (*test == '"') {
619 quote = '\''; /* back to single */
620 goto decided;
621 }
622 else if (*test == '\'')
623 quote = '"';
624 }
625 decided:
626 ;
627 }
628
629 *p++ = 'b', *p++ = quote;
630 for (i = 0; i < length; i++) {
631 /* There's at least enough room for a hex escape
632 and a closing quote. */
633 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
634 c = op->ob_sval[i];
635 if (c == quote || c == '\\')
636 *p++ = '\\', *p++ = c;
637 else if (c == '\t')
638 *p++ = '\\', *p++ = 't';
639 else if (c == '\n')
640 *p++ = '\\', *p++ = 'n';
641 else if (c == '\r')
642 *p++ = '\\', *p++ = 'r';
643 else if (c < ' ' || c >= 0x7f) {
644 *p++ = '\\';
645 *p++ = 'x';
646 *p++ = hexdigits[(c & 0xf0) >> 4];
647 *p++ = hexdigits[c & 0xf];
648 }
649 else
650 *p++ = c;
651 }
652 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
653 *p++ = quote;
654 *p = '\0';
655 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
656 Py_DECREF(v);
657 return NULL;
658 }
659 return v;
660 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000661}
662
Neal Norwitz6968b052007-02-27 19:02:19 +0000663static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000664string_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000665{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000666 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000667}
668
Neal Norwitz6968b052007-02-27 19:02:19 +0000669static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000670string_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000671{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000672 if (Py_BytesWarningFlag) {
673 if (PyErr_WarnEx(PyExc_BytesWarning,
674 "str() on a bytes instance", 1))
675 return NULL;
676 }
677 return string_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000678}
679
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000680static Py_ssize_t
681string_length(PyBytesObject *a)
682{
683 return Py_SIZE(a);
684}
Neal Norwitz6968b052007-02-27 19:02:19 +0000685
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000686/* This is also used by PyBytes_Concat() */
687static PyObject *
688string_concat(PyObject *a, PyObject *b)
689{
690 Py_ssize_t size;
691 Py_buffer va, vb;
692 PyObject *result = NULL;
693
694 va.len = -1;
695 vb.len = -1;
696 if (_getbuffer(a, &va) < 0 ||
697 _getbuffer(b, &vb) < 0) {
698 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
699 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
700 goto done;
701 }
702
703 /* Optimize end cases */
704 if (va.len == 0 && PyBytes_CheckExact(b)) {
705 result = b;
706 Py_INCREF(result);
707 goto done;
708 }
709 if (vb.len == 0 && PyBytes_CheckExact(a)) {
710 result = a;
711 Py_INCREF(result);
712 goto done;
713 }
714
715 size = va.len + vb.len;
716 if (size < 0) {
717 PyErr_NoMemory();
718 goto done;
719 }
720
721 result = PyBytes_FromStringAndSize(NULL, size);
722 if (result != NULL) {
723 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
724 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
725 }
726
727 done:
728 if (va.len != -1)
Martin v. Löwis423be952008-08-13 15:53:07 +0000729 PyBuffer_Release(&va);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000730 if (vb.len != -1)
Martin v. Löwis423be952008-08-13 15:53:07 +0000731 PyBuffer_Release(&vb);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000732 return result;
733}
Neal Norwitz6968b052007-02-27 19:02:19 +0000734
735static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000736string_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000737{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000738 register Py_ssize_t i;
739 register Py_ssize_t j;
740 register Py_ssize_t size;
741 register PyBytesObject *op;
742 size_t nbytes;
743 if (n < 0)
744 n = 0;
745 /* watch out for overflows: the size can overflow int,
746 * and the # of bytes needed can overflow size_t
747 */
748 size = Py_SIZE(a) * n;
749 if (n && size / n != Py_SIZE(a)) {
750 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000751 "repeated bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000752 return NULL;
753 }
754 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
755 Py_INCREF(a);
756 return (PyObject *)a;
757 }
758 nbytes = (size_t)size;
Mark Dickinsonfd24b322008-12-06 15:33:31 +0000759 if (nbytes + PyBytesObject_SIZE <= nbytes) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000760 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000761 "repeated bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000762 return NULL;
763 }
Mark Dickinsonfd24b322008-12-06 15:33:31 +0000764 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000765 if (op == NULL)
766 return PyErr_NoMemory();
767 PyObject_INIT_VAR(op, &PyBytes_Type, size);
768 op->ob_shash = -1;
769 op->ob_sval[size] = '\0';
770 if (Py_SIZE(a) == 1 && n > 0) {
771 memset(op->ob_sval, a->ob_sval[0] , n);
772 return (PyObject *) op;
773 }
774 i = 0;
775 if (i < size) {
776 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
777 i = Py_SIZE(a);
778 }
779 while (i < size) {
780 j = (i <= size-i) ? i : size-i;
781 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
782 i += j;
783 }
784 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000785}
786
Guido van Rossum98297ee2007-11-06 21:34:58 +0000787static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000788string_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000789{
790 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
791 if (ival == -1 && PyErr_Occurred()) {
792 Py_buffer varg;
793 int pos;
794 PyErr_Clear();
795 if (_getbuffer(arg, &varg) < 0)
796 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000797 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
Guido van Rossum98297ee2007-11-06 21:34:58 +0000798 varg.buf, varg.len, 0);
Martin v. Löwis423be952008-08-13 15:53:07 +0000799 PyBuffer_Release(&varg);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000800 return pos >= 0;
801 }
802 if (ival < 0 || ival >= 256) {
803 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
804 return -1;
805 }
806
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000807 return memchr(PyBytes_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000808}
809
Neal Norwitz6968b052007-02-27 19:02:19 +0000810static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000811string_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000812{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000813 if (i < 0 || i >= Py_SIZE(a)) {
Benjamin Peterson4116f362008-05-27 00:36:20 +0000814 PyErr_SetString(PyExc_IndexError, "index out of range");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000815 return NULL;
816 }
817 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000818}
819
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000820static PyObject*
821string_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000822{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000823 int c;
824 Py_ssize_t len_a, len_b;
825 Py_ssize_t min_len;
826 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000827
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000828 /* Make sure both arguments are strings. */
829 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Barry Warsaw9e9dcd62008-10-17 01:50:37 +0000830 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000831 (PyObject_IsInstance((PyObject*)a,
832 (PyObject*)&PyUnicode_Type) ||
833 PyObject_IsInstance((PyObject*)b,
834 (PyObject*)&PyUnicode_Type))) {
835 if (PyErr_WarnEx(PyExc_BytesWarning,
Georg Brandle5d68ac2008-06-04 11:30:26 +0000836 "Comparison between bytes and string", 1))
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000837 return NULL;
838 }
839 result = Py_NotImplemented;
840 goto out;
841 }
842 if (a == b) {
843 switch (op) {
844 case Py_EQ:case Py_LE:case Py_GE:
845 result = Py_True;
846 goto out;
847 case Py_NE:case Py_LT:case Py_GT:
848 result = Py_False;
849 goto out;
850 }
851 }
852 if (op == Py_EQ) {
853 /* Supporting Py_NE here as well does not save
854 much time, since Py_NE is rarely used. */
855 if (Py_SIZE(a) == Py_SIZE(b)
856 && (a->ob_sval[0] == b->ob_sval[0]
857 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
858 result = Py_True;
859 } else {
860 result = Py_False;
861 }
862 goto out;
863 }
864 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
865 min_len = (len_a < len_b) ? len_a : len_b;
866 if (min_len > 0) {
867 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
868 if (c==0)
869 c = memcmp(a->ob_sval, b->ob_sval, min_len);
870 } else
871 c = 0;
872 if (c == 0)
873 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
874 switch (op) {
875 case Py_LT: c = c < 0; break;
876 case Py_LE: c = c <= 0; break;
877 case Py_EQ: assert(0); break; /* unreachable */
878 case Py_NE: c = c != 0; break;
879 case Py_GT: c = c > 0; break;
880 case Py_GE: c = c >= 0; break;
881 default:
882 result = Py_NotImplemented;
883 goto out;
884 }
885 result = c ? Py_True : Py_False;
886 out:
887 Py_INCREF(result);
888 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000889}
890
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000891static long
892string_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000893{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000894 register Py_ssize_t len;
895 register unsigned char *p;
896 register long x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000897
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000898 if (a->ob_shash != -1)
899 return a->ob_shash;
900 len = Py_SIZE(a);
901 p = (unsigned char *) a->ob_sval;
902 x = *p << 7;
903 while (--len >= 0)
904 x = (1000003*x) ^ *p++;
905 x ^= Py_SIZE(a);
906 if (x == -1)
907 x = -2;
908 a->ob_shash = x;
909 return x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000910}
911
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000912static PyObject*
913string_subscript(PyBytesObject* self, PyObject* item)
914{
915 if (PyIndex_Check(item)) {
916 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
917 if (i == -1 && PyErr_Occurred())
918 return NULL;
919 if (i < 0)
920 i += PyBytes_GET_SIZE(self);
921 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
922 PyErr_SetString(PyExc_IndexError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000923 "index out of range");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000924 return NULL;
925 }
926 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
927 }
928 else if (PySlice_Check(item)) {
929 Py_ssize_t start, stop, step, slicelength, cur, i;
930 char* source_buf;
931 char* result_buf;
932 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000933
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000934 if (PySlice_GetIndicesEx((PySliceObject*)item,
935 PyBytes_GET_SIZE(self),
936 &start, &stop, &step, &slicelength) < 0) {
937 return NULL;
938 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000939
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000940 if (slicelength <= 0) {
941 return PyBytes_FromStringAndSize("", 0);
942 }
943 else if (start == 0 && step == 1 &&
944 slicelength == PyBytes_GET_SIZE(self) &&
945 PyBytes_CheckExact(self)) {
946 Py_INCREF(self);
947 return (PyObject *)self;
948 }
949 else if (step == 1) {
950 return PyBytes_FromStringAndSize(
951 PyBytes_AS_STRING(self) + start,
952 slicelength);
953 }
954 else {
Alexandre Vassalottie2641f42009-04-03 06:38:02 +0000955 source_buf = PyBytes_AS_STRING(self);
956 result = PyBytes_FromStringAndSize(NULL, slicelength);
957 if (result == NULL)
958 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000959
Alexandre Vassalottie2641f42009-04-03 06:38:02 +0000960 result_buf = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000961 for (cur = start, i = 0; i < slicelength;
962 cur += step, i++) {
963 result_buf[i] = source_buf[cur];
964 }
965
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000966 return result;
967 }
968 }
969 else {
970 PyErr_Format(PyExc_TypeError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000971 "byte indices must be integers, not %.200s",
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000972 Py_TYPE(item)->tp_name);
973 return NULL;
974 }
975}
976
977static int
978string_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
979{
Martin v. Löwis423be952008-08-13 15:53:07 +0000980 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
Antoine Pitrou2f89aa62008-08-02 21:02:48 +0000981 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000982}
983
984static PySequenceMethods string_as_sequence = {
985 (lenfunc)string_length, /*sq_length*/
986 (binaryfunc)string_concat, /*sq_concat*/
987 (ssizeargfunc)string_repeat, /*sq_repeat*/
988 (ssizeargfunc)string_item, /*sq_item*/
989 0, /*sq_slice*/
990 0, /*sq_ass_item*/
991 0, /*sq_ass_slice*/
992 (objobjproc)string_contains /*sq_contains*/
993};
994
995static PyMappingMethods string_as_mapping = {
996 (lenfunc)string_length,
997 (binaryfunc)string_subscript,
998 0,
999};
1000
1001static PyBufferProcs string_as_buffer = {
1002 (getbufferproc)string_buffer_getbuffer,
1003 NULL,
1004};
1005
1006
1007#define LEFTSTRIP 0
1008#define RIGHTSTRIP 1
1009#define BOTHSTRIP 2
1010
1011/* Arrays indexed by above */
1012static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1013
1014#define STRIPNAME(i) (stripformat[i]+3)
1015
Neal Norwitz6968b052007-02-27 19:02:19 +00001016
1017/* Don't call if length < 2 */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001018#define Py_STRING_MATCH(target, offset, pattern, length) \
1019 (target[offset] == pattern[0] && \
1020 target[offset+length-1] == pattern[length-1] && \
Neal Norwitz6968b052007-02-27 19:02:19 +00001021 !memcmp(target+offset+1, pattern+1, length-2) )
1022
1023
Neal Norwitz6968b052007-02-27 19:02:19 +00001024/* Overallocate the initial list to reduce the number of reallocs for small
1025 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1026 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1027 text (roughly 11 words per line) and field delimited data (usually 1-10
1028 fields). For large strings the split algorithms are bandwidth limited
1029 so increasing the preallocation likely will not improve things.*/
1030
1031#define MAX_PREALLOC 12
1032
1033/* 5 splits gives 6 elements */
1034#define PREALLOC_SIZE(maxsplit) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001035 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
Neal Norwitz6968b052007-02-27 19:02:19 +00001036
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001037#define SPLIT_ADD(data, left, right) { \
1038 str = PyBytes_FromStringAndSize((data) + (left), \
1039 (right) - (left)); \
1040 if (str == NULL) \
1041 goto onError; \
1042 if (count < MAX_PREALLOC) { \
1043 PyList_SET_ITEM(list, count, str); \
1044 } else { \
1045 if (PyList_Append(list, str)) { \
1046 Py_DECREF(str); \
1047 goto onError; \
1048 } \
1049 else \
1050 Py_DECREF(str); \
1051 } \
1052 count++; }
Neal Norwitz6968b052007-02-27 19:02:19 +00001053
1054/* Always force the list to the expected size. */
Christian Heimes90aa7642007-12-19 02:45:37 +00001055#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
Neal Norwitz6968b052007-02-27 19:02:19 +00001056
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001057#define SKIP_SPACE(s, i, len) { while (i<len && ISSPACE(s[i])) i++; }
1058#define SKIP_NONSPACE(s, i, len) { while (i<len && !ISSPACE(s[i])) i++; }
1059#define RSKIP_SPACE(s, i) { while (i>=0 && ISSPACE(s[i])) i--; }
1060#define RSKIP_NONSPACE(s, i) { while (i>=0 && !ISSPACE(s[i])) i--; }
Neal Norwitz6968b052007-02-27 19:02:19 +00001061
1062Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001063split_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Neal Norwitz6968b052007-02-27 19:02:19 +00001064{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001065 const char *s = PyBytes_AS_STRING(self);
1066 Py_ssize_t i, j, count=0;
1067 PyObject *str;
1068 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Neal Norwitz6968b052007-02-27 19:02:19 +00001069
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001070 if (list == NULL)
1071 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001072
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001073 i = j = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +00001074
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001075 while (maxsplit-- > 0) {
1076 SKIP_SPACE(s, i, len);
1077 if (i==len) break;
1078 j = i; i++;
1079 SKIP_NONSPACE(s, i, len);
1080 if (j == 0 && i == len && PyBytes_CheckExact(self)) {
1081 /* No whitespace in self, so just use it as list[0] */
1082 Py_INCREF(self);
1083 PyList_SET_ITEM(list, 0, (PyObject *)self);
1084 count++;
1085 break;
1086 }
1087 SPLIT_ADD(s, j, i);
1088 }
1089
1090 if (i < len) {
1091 /* Only occurs when maxsplit was reached */
1092 /* Skip any remaining whitespace and copy to end of string */
1093 SKIP_SPACE(s, i, len);
1094 if (i != len)
1095 SPLIT_ADD(s, i, len);
1096 }
1097 FIX_PREALLOC_SIZE(list);
1098 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001099 onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001100 Py_DECREF(list);
1101 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001102}
1103
Guido van Rossum8f950672007-09-10 16:53:45 +00001104Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001105split_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Guido van Rossum8f950672007-09-10 16:53:45 +00001106{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001107 const char *s = PyBytes_AS_STRING(self);
1108 register Py_ssize_t i, j, count=0;
1109 PyObject *str;
1110 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Guido van Rossum8f950672007-09-10 16:53:45 +00001111
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001112 if (list == NULL)
1113 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001114
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001115 i = j = 0;
1116 while ((j < len) && (maxcount-- > 0)) {
1117 for(; j<len; j++) {
1118 /* I found that using memchr makes no difference */
1119 if (s[j] == ch) {
1120 SPLIT_ADD(s, i, j);
1121 i = j = j + 1;
1122 break;
1123 }
1124 }
1125 }
1126 if (i == 0 && count == 0 && PyBytes_CheckExact(self)) {
1127 /* ch not in self, so just use self as list[0] */
1128 Py_INCREF(self);
1129 PyList_SET_ITEM(list, 0, (PyObject *)self);
1130 count++;
1131 }
1132 else if (i <= len) {
1133 SPLIT_ADD(s, i, len);
1134 }
1135 FIX_PREALLOC_SIZE(list);
1136 return list;
Guido van Rossum8f950672007-09-10 16:53:45 +00001137
1138 onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001139 Py_DECREF(list);
1140 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001141}
1142
Neal Norwitz6968b052007-02-27 19:02:19 +00001143PyDoc_STRVAR(split__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001144"B.split([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001145\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001146Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001147If sep is not specified or is None, B is split on ASCII whitespace\n\
1148characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001149If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001150
1151static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001152string_split(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001153{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001154 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
1155 Py_ssize_t maxsplit = -1, count=0;
1156 const char *s = PyBytes_AS_STRING(self), *sub;
1157 Py_buffer vsub;
1158 PyObject *list, *str, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001159#ifdef USE_FAST
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001160 Py_ssize_t pos;
Neal Norwitz6968b052007-02-27 19:02:19 +00001161#endif
1162
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001163 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1164 return NULL;
1165 if (maxsplit < 0)
1166 maxsplit = PY_SSIZE_T_MAX;
1167 if (subobj == Py_None)
1168 return split_whitespace(self, len, maxsplit);
1169 if (_getbuffer(subobj, &vsub) < 0)
1170 return NULL;
1171 sub = vsub.buf;
1172 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001173
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001174 if (n == 0) {
1175 PyErr_SetString(PyExc_ValueError, "empty separator");
Martin v. Löwis423be952008-08-13 15:53:07 +00001176 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001177 return NULL;
1178 }
Amaury Forgeot d'Arc20443f32008-08-22 22:05:20 +00001179 else if (n == 1) {
1180 list = split_char(self, len, sub[0], maxsplit);
1181 PyBuffer_Release(&vsub);
1182 return list;
1183 }
Guido van Rossum8f950672007-09-10 16:53:45 +00001184
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001185 list = PyList_New(PREALLOC_SIZE(maxsplit));
1186 if (list == NULL) {
Martin v. Löwis423be952008-08-13 15:53:07 +00001187 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001188 return NULL;
1189 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001190
1191#ifdef USE_FAST
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001192 i = j = 0;
1193 while (maxsplit-- > 0) {
1194 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1195 if (pos < 0)
1196 break;
1197 j = i+pos;
1198 SPLIT_ADD(s, i, j);
1199 i = j + n;
1200 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001201#else
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001202 i = j = 0;
1203 while ((j+n <= len) && (maxsplit-- > 0)) {
1204 for (; j+n <= len; j++) {
1205 if (Py_STRING_MATCH(s, j, sub, n)) {
1206 SPLIT_ADD(s, i, j);
1207 i = j = j + n;
1208 break;
1209 }
1210 }
1211 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001212#endif
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001213 SPLIT_ADD(s, i, len);
1214 FIX_PREALLOC_SIZE(list);
Martin v. Löwis423be952008-08-13 15:53:07 +00001215 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001216 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001217
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001218 onError:
1219 Py_DECREF(list);
Martin v. Löwis423be952008-08-13 15:53:07 +00001220 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001221 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001222}
1223
Neal Norwitz6968b052007-02-27 19:02:19 +00001224PyDoc_STRVAR(partition__doc__,
1225"B.partition(sep) -> (head, sep, tail)\n\
1226\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001227Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001228the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001229found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001230
1231static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001232string_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001233{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001234 const char *sep;
1235 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001236
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001237 if (PyBytes_Check(sep_obj)) {
1238 sep = PyBytes_AS_STRING(sep_obj);
1239 sep_len = PyBytes_GET_SIZE(sep_obj);
1240 }
1241 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1242 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001243
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001244 return stringlib_partition(
1245 (PyObject*) self,
1246 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1247 sep_obj, sep, sep_len
1248 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001249}
1250
1251PyDoc_STRVAR(rpartition__doc__,
1252"B.rpartition(sep) -> (tail, sep, head)\n\
1253\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001254Search for the separator sep in B, starting at the end of B,\n\
1255and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001256part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001257bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001258
1259static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001260string_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001261{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001262 const char *sep;
1263 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001264
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001265 if (PyBytes_Check(sep_obj)) {
1266 sep = PyBytes_AS_STRING(sep_obj);
1267 sep_len = PyBytes_GET_SIZE(sep_obj);
1268 }
1269 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1270 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001271
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001272 return stringlib_rpartition(
1273 (PyObject*) self,
1274 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1275 sep_obj, sep, sep_len
1276 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001277}
1278
1279Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001280rsplit_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Neal Norwitz6968b052007-02-27 19:02:19 +00001281{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001282 const char *s = PyBytes_AS_STRING(self);
1283 Py_ssize_t i, j, count=0;
1284 PyObject *str;
1285 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Neal Norwitz6968b052007-02-27 19:02:19 +00001286
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001287 if (list == NULL)
1288 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001289
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001290 i = j = len-1;
Neal Norwitz6968b052007-02-27 19:02:19 +00001291
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001292 while (maxsplit-- > 0) {
1293 RSKIP_SPACE(s, i);
1294 if (i<0) break;
1295 j = i; i--;
1296 RSKIP_NONSPACE(s, i);
1297 if (j == len-1 && i < 0 && PyBytes_CheckExact(self)) {
1298 /* No whitespace in self, so just use it as list[0] */
1299 Py_INCREF(self);
1300 PyList_SET_ITEM(list, 0, (PyObject *)self);
1301 count++;
1302 break;
1303 }
1304 SPLIT_ADD(s, i + 1, j + 1);
1305 }
1306 if (i >= 0) {
1307 /* Only occurs when maxsplit was reached. Skip any remaining
1308 whitespace and copy to beginning of string. */
1309 RSKIP_SPACE(s, i);
1310 if (i >= 0)
1311 SPLIT_ADD(s, 0, i + 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001312
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001313 }
1314 FIX_PREALLOC_SIZE(list);
1315 if (PyList_Reverse(list) < 0)
1316 goto onError;
1317 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001318 onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001319 Py_DECREF(list);
1320 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001321}
1322
Guido van Rossum8f950672007-09-10 16:53:45 +00001323Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001324rsplit_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Guido van Rossum8f950672007-09-10 16:53:45 +00001325{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001326 const char *s = PyBytes_AS_STRING(self);
1327 register Py_ssize_t i, j, count=0;
1328 PyObject *str;
1329 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Guido van Rossum8f950672007-09-10 16:53:45 +00001330
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001331 if (list == NULL)
1332 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001333
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001334 i = j = len - 1;
1335 while ((i >= 0) && (maxcount-- > 0)) {
1336 for (; i >= 0; i--) {
1337 if (s[i] == ch) {
1338 SPLIT_ADD(s, i + 1, j + 1);
1339 j = i = i - 1;
1340 break;
1341 }
1342 }
1343 }
1344 if (i < 0 && count == 0 && PyBytes_CheckExact(self)) {
1345 /* ch not in self, so just use self as list[0] */
1346 Py_INCREF(self);
1347 PyList_SET_ITEM(list, 0, (PyObject *)self);
1348 count++;
1349 }
1350 else if (j >= -1) {
1351 SPLIT_ADD(s, 0, j + 1);
1352 }
1353 FIX_PREALLOC_SIZE(list);
1354 if (PyList_Reverse(list) < 0)
1355 goto onError;
1356 return list;
Guido van Rossum8f950672007-09-10 16:53:45 +00001357
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001358 onError:
1359 Py_DECREF(list);
1360 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001361}
1362
Neal Norwitz6968b052007-02-27 19:02:19 +00001363PyDoc_STRVAR(rsplit__doc__,
Benjamin Peterson4116f362008-05-27 00:36:20 +00001364"B.rsplit([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001365\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001366Return a list of the sections in B, using sep as the delimiter,\n\
1367starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001368If sep is not given, B is split on ASCII whitespace characters\n\
1369(space, tab, return, newline, formfeed, vertical tab).\n\
1370If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001371
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001372
Neal Norwitz6968b052007-02-27 19:02:19 +00001373static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001374string_rsplit(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001375{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001376 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
1377 Py_ssize_t maxsplit = -1, count=0;
1378 const char *s, *sub;
1379 Py_buffer vsub;
1380 PyObject *list, *str, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001381
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001382 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1383 return NULL;
1384 if (maxsplit < 0)
1385 maxsplit = PY_SSIZE_T_MAX;
1386 if (subobj == Py_None)
1387 return rsplit_whitespace(self, len, maxsplit);
1388 if (_getbuffer(subobj, &vsub) < 0)
1389 return NULL;
1390 sub = vsub.buf;
1391 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001392
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001393 if (n == 0) {
1394 PyErr_SetString(PyExc_ValueError, "empty separator");
Martin v. Löwis423be952008-08-13 15:53:07 +00001395 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001396 return NULL;
1397 }
Amaury Forgeot d'Arc20443f32008-08-22 22:05:20 +00001398 else if (n == 1) {
1399 list = rsplit_char(self, len, sub[0], maxsplit);
1400 PyBuffer_Release(&vsub);
1401 return list;
1402 }
Guido van Rossum8f950672007-09-10 16:53:45 +00001403
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001404 list = PyList_New(PREALLOC_SIZE(maxsplit));
1405 if (list == NULL) {
Martin v. Löwis423be952008-08-13 15:53:07 +00001406 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001407 return NULL;
1408 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001409
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001410 j = len;
1411 i = j - n;
Neal Norwitz6968b052007-02-27 19:02:19 +00001412
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001413 s = PyBytes_AS_STRING(self);
1414 while ( (i >= 0) && (maxsplit-- > 0) ) {
1415 for (; i>=0; i--) {
1416 if (Py_STRING_MATCH(s, i, sub, n)) {
1417 SPLIT_ADD(s, i + n, j);
1418 j = i;
1419 i -= n;
1420 break;
1421 }
1422 }
1423 }
1424 SPLIT_ADD(s, 0, j);
1425 FIX_PREALLOC_SIZE(list);
1426 if (PyList_Reverse(list) < 0)
1427 goto onError;
Martin v. Löwis423be952008-08-13 15:53:07 +00001428 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001429 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001430
1431onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001432 Py_DECREF(list);
Martin v. Löwis423be952008-08-13 15:53:07 +00001433 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001434 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001435}
1436
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001437#undef SPLIT_ADD
1438#undef MAX_PREALLOC
1439#undef PREALLOC_SIZE
1440
1441
1442PyDoc_STRVAR(join__doc__,
1443"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001444\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001445Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001446Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1447
Neal Norwitz6968b052007-02-27 19:02:19 +00001448static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001449string_join(PyObject *self, PyObject *orig)
Neal Norwitz6968b052007-02-27 19:02:19 +00001450{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001451 char *sep = PyBytes_AS_STRING(self);
1452 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1453 PyObject *res = NULL;
1454 char *p;
1455 Py_ssize_t seqlen = 0;
1456 size_t sz = 0;
1457 Py_ssize_t i;
1458 PyObject *seq, *item;
Neal Norwitz6968b052007-02-27 19:02:19 +00001459
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001460 seq = PySequence_Fast(orig, "");
1461 if (seq == NULL) {
1462 return NULL;
1463 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001464
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001465 seqlen = PySequence_Size(seq);
1466 if (seqlen == 0) {
1467 Py_DECREF(seq);
1468 return PyBytes_FromString("");
1469 }
1470 if (seqlen == 1) {
1471 item = PySequence_Fast_GET_ITEM(seq, 0);
1472 if (PyBytes_CheckExact(item)) {
1473 Py_INCREF(item);
1474 Py_DECREF(seq);
1475 return item;
1476 }
1477 }
1478
1479 /* There are at least two things to join, or else we have a subclass
1480 * of the builtin types in the sequence.
1481 * Do a pre-pass to figure out the total amount of space we'll
1482 * need (sz), and see whether all argument are bytes.
1483 */
1484 /* XXX Shouldn't we use _getbuffer() on these items instead? */
1485 for (i = 0; i < seqlen; i++) {
1486 const size_t old_sz = sz;
1487 item = PySequence_Fast_GET_ITEM(seq, i);
1488 if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
1489 PyErr_Format(PyExc_TypeError,
1490 "sequence item %zd: expected bytes,"
1491 " %.80s found",
1492 i, Py_TYPE(item)->tp_name);
1493 Py_DECREF(seq);
1494 return NULL;
1495 }
1496 sz += Py_SIZE(item);
1497 if (i != 0)
1498 sz += seplen;
1499 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1500 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00001501 "join() result is too long for bytes");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001502 Py_DECREF(seq);
1503 return NULL;
1504 }
1505 }
1506
1507 /* Allocate result space. */
1508 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1509 if (res == NULL) {
1510 Py_DECREF(seq);
1511 return NULL;
1512 }
1513
1514 /* Catenate everything. */
1515 /* I'm not worried about a PyByteArray item growing because there's
1516 nowhere in this function where we release the GIL. */
1517 p = PyBytes_AS_STRING(res);
1518 for (i = 0; i < seqlen; ++i) {
1519 size_t n;
1520 char *q;
1521 if (i) {
1522 Py_MEMCPY(p, sep, seplen);
1523 p += seplen;
1524 }
1525 item = PySequence_Fast_GET_ITEM(seq, i);
1526 n = Py_SIZE(item);
1527 if (PyBytes_Check(item))
1528 q = PyBytes_AS_STRING(item);
1529 else
1530 q = PyByteArray_AS_STRING(item);
1531 Py_MEMCPY(p, q, n);
1532 p += n;
1533 }
1534
1535 Py_DECREF(seq);
1536 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001537}
1538
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001539PyObject *
1540_PyBytes_Join(PyObject *sep, PyObject *x)
1541{
1542 assert(sep != NULL && PyBytes_Check(sep));
1543 assert(x != NULL);
1544 return string_join(sep, x);
1545}
1546
1547Py_LOCAL_INLINE(void)
1548string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1549{
1550 if (*end > len)
1551 *end = len;
1552 else if (*end < 0)
1553 *end += len;
1554 if (*end < 0)
1555 *end = 0;
1556 if (*start < 0)
1557 *start += len;
1558 if (*start < 0)
1559 *start = 0;
1560}
1561
1562Py_LOCAL_INLINE(Py_ssize_t)
1563string_find_internal(PyBytesObject *self, PyObject *args, int dir)
1564{
1565 PyObject *subobj;
1566 const char *sub;
1567 Py_ssize_t sub_len;
1568 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1569 PyObject *obj_start=Py_None, *obj_end=Py_None;
1570
1571 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1572 &obj_start, &obj_end))
1573 return -2;
1574 /* To support None in "start" and "end" arguments, meaning
1575 the same as if they were not passed.
1576 */
1577 if (obj_start != Py_None)
1578 if (!_PyEval_SliceIndex(obj_start, &start))
1579 return -2;
1580 if (obj_end != Py_None)
1581 if (!_PyEval_SliceIndex(obj_end, &end))
1582 return -2;
1583
1584 if (PyBytes_Check(subobj)) {
1585 sub = PyBytes_AS_STRING(subobj);
1586 sub_len = PyBytes_GET_SIZE(subobj);
1587 }
1588 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1589 /* XXX - the "expected a character buffer object" is pretty
1590 confusing for a non-expert. remap to something else ? */
1591 return -2;
1592
1593 if (dir > 0)
1594 return stringlib_find_slice(
1595 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1596 sub, sub_len, start, end);
1597 else
1598 return stringlib_rfind_slice(
1599 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1600 sub, sub_len, start, end);
1601}
1602
1603
1604PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001605"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001606\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001607Return the lowest index in S where substring sub is found,\n\
1608such that sub is contained within s[start:end]. Optional\n\
1609arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001610\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001611Return -1 on failure.");
1612
Neal Norwitz6968b052007-02-27 19:02:19 +00001613static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001614string_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001615{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001616 Py_ssize_t result = string_find_internal(self, args, +1);
1617 if (result == -2)
1618 return NULL;
1619 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001620}
1621
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001622
1623PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001624"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001625\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001626Like B.find() but raise ValueError when the substring is not found.");
1627
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001628static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001629string_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001630{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001631 Py_ssize_t result = string_find_internal(self, args, +1);
1632 if (result == -2)
1633 return NULL;
1634 if (result == -1) {
1635 PyErr_SetString(PyExc_ValueError,
1636 "substring not found");
1637 return NULL;
1638 }
1639 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001640}
1641
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001642
1643PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001644"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001645\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001646Return the highest index in B where substring sub is found,\n\
1647such that sub is contained within s[start:end]. Optional\n\
1648arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001649\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001650Return -1 on failure.");
1651
Neal Norwitz6968b052007-02-27 19:02:19 +00001652static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001653string_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001654{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001655 Py_ssize_t result = string_find_internal(self, args, -1);
1656 if (result == -2)
1657 return NULL;
1658 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001659}
1660
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001661
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001662PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001663"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001664\n\
1665Like B.rfind() but raise ValueError when the substring is not found.");
1666
1667static PyObject *
1668string_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001669{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001670 Py_ssize_t result = string_find_internal(self, args, -1);
1671 if (result == -2)
1672 return NULL;
1673 if (result == -1) {
1674 PyErr_SetString(PyExc_ValueError,
1675 "substring not found");
1676 return NULL;
1677 }
1678 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001679}
1680
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001681
1682Py_LOCAL_INLINE(PyObject *)
1683do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001684{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001685 Py_buffer vsep;
1686 char *s = PyBytes_AS_STRING(self);
1687 Py_ssize_t len = PyBytes_GET_SIZE(self);
1688 char *sep;
1689 Py_ssize_t seplen;
1690 Py_ssize_t i, j;
1691
1692 if (_getbuffer(sepobj, &vsep) < 0)
1693 return NULL;
1694 sep = vsep.buf;
1695 seplen = vsep.len;
1696
1697 i = 0;
1698 if (striptype != RIGHTSTRIP) {
1699 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1700 i++;
1701 }
1702 }
1703
1704 j = len;
1705 if (striptype != LEFTSTRIP) {
1706 do {
1707 j--;
1708 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1709 j++;
1710 }
1711
Martin v. Löwis423be952008-08-13 15:53:07 +00001712 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001713
1714 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1715 Py_INCREF(self);
1716 return (PyObject*)self;
1717 }
1718 else
1719 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001720}
1721
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001722
1723Py_LOCAL_INLINE(PyObject *)
1724do_strip(PyBytesObject *self, int striptype)
1725{
1726 char *s = PyBytes_AS_STRING(self);
1727 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
1728
1729 i = 0;
1730 if (striptype != RIGHTSTRIP) {
1731 while (i < len && ISSPACE(s[i])) {
1732 i++;
1733 }
1734 }
1735
1736 j = len;
1737 if (striptype != LEFTSTRIP) {
1738 do {
1739 j--;
1740 } while (j >= i && ISSPACE(s[j]));
1741 j++;
1742 }
1743
1744 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1745 Py_INCREF(self);
1746 return (PyObject*)self;
1747 }
1748 else
1749 return PyBytes_FromStringAndSize(s+i, j-i);
1750}
1751
1752
1753Py_LOCAL_INLINE(PyObject *)
1754do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1755{
1756 PyObject *sep = NULL;
1757
1758 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1759 return NULL;
1760
1761 if (sep != NULL && sep != Py_None) {
1762 return do_xstrip(self, striptype, sep);
1763 }
1764 return do_strip(self, striptype);
1765}
1766
1767
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001768PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001769"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001770\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001771Strip leading and trailing bytes contained in the argument.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001772If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001773static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001774string_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001775{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001776 if (PyTuple_GET_SIZE(args) == 0)
1777 return do_strip(self, BOTHSTRIP); /* Common case */
1778 else
1779 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001780}
1781
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001782
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001783PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001784"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001785\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001786Strip leading bytes contained in the argument.\n\
1787If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001788static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001789string_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001790{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001791 if (PyTuple_GET_SIZE(args) == 0)
1792 return do_strip(self, LEFTSTRIP); /* Common case */
1793 else
1794 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001795}
1796
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001797
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001798PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001799"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001800\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001801Strip trailing bytes contained in the argument.\n\
1802If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001803static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001804string_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001805{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001806 if (PyTuple_GET_SIZE(args) == 0)
1807 return do_strip(self, RIGHTSTRIP); /* Common case */
1808 else
1809 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001810}
Neal Norwitz6968b052007-02-27 19:02:19 +00001811
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001812
1813PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001814"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001815\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001816Return the number of non-overlapping occurrences of substring sub in\n\
1817string S[start:end]. Optional arguments start and end are interpreted\n\
1818as in slice notation.");
1819
1820static PyObject *
1821string_count(PyBytesObject *self, PyObject *args)
1822{
1823 PyObject *sub_obj;
1824 const char *str = PyBytes_AS_STRING(self), *sub;
1825 Py_ssize_t sub_len;
1826 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1827
1828 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1829 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1830 return NULL;
1831
1832 if (PyBytes_Check(sub_obj)) {
1833 sub = PyBytes_AS_STRING(sub_obj);
1834 sub_len = PyBytes_GET_SIZE(sub_obj);
1835 }
1836 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1837 return NULL;
1838
1839 string_adjust_indices(&start, &end, PyBytes_GET_SIZE(self));
1840
1841 return PyLong_FromSsize_t(
1842 stringlib_count(str + start, end - start, sub, sub_len)
1843 );
1844}
1845
1846
1847PyDoc_STRVAR(translate__doc__,
1848"B.translate(table[, deletechars]) -> bytes\n\
1849\n\
1850Return a copy of B, where all characters occurring in the\n\
1851optional argument deletechars are removed, and the remaining\n\
1852characters have been mapped through the given translation\n\
1853table, which must be a bytes object of length 256.");
1854
1855static PyObject *
1856string_translate(PyBytesObject *self, PyObject *args)
1857{
1858 register char *input, *output;
1859 const char *table;
1860 register Py_ssize_t i, c, changed = 0;
1861 PyObject *input_obj = (PyObject*)self;
1862 const char *output_start, *del_table=NULL;
1863 Py_ssize_t inlen, tablen, dellen = 0;
1864 PyObject *result;
1865 int trans_table[256];
1866 PyObject *tableobj, *delobj = NULL;
1867
1868 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1869 &tableobj, &delobj))
1870 return NULL;
1871
1872 if (PyBytes_Check(tableobj)) {
1873 table = PyBytes_AS_STRING(tableobj);
1874 tablen = PyBytes_GET_SIZE(tableobj);
1875 }
1876 else if (tableobj == Py_None) {
1877 table = NULL;
1878 tablen = 256;
1879 }
1880 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1881 return NULL;
1882
1883 if (tablen != 256) {
1884 PyErr_SetString(PyExc_ValueError,
1885 "translation table must be 256 characters long");
1886 return NULL;
1887 }
1888
1889 if (delobj != NULL) {
1890 if (PyBytes_Check(delobj)) {
1891 del_table = PyBytes_AS_STRING(delobj);
1892 dellen = PyBytes_GET_SIZE(delobj);
1893 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001894 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1895 return NULL;
1896 }
1897 else {
1898 del_table = NULL;
1899 dellen = 0;
1900 }
1901
1902 inlen = PyBytes_GET_SIZE(input_obj);
1903 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1904 if (result == NULL)
1905 return NULL;
1906 output_start = output = PyBytes_AsString(result);
1907 input = PyBytes_AS_STRING(input_obj);
1908
1909 if (dellen == 0 && table != NULL) {
1910 /* If no deletions are required, use faster code */
1911 for (i = inlen; --i >= 0; ) {
1912 c = Py_CHARMASK(*input++);
1913 if (Py_CHARMASK((*output++ = table[c])) != c)
1914 changed = 1;
1915 }
1916 if (changed || !PyBytes_CheckExact(input_obj))
1917 return result;
1918 Py_DECREF(result);
1919 Py_INCREF(input_obj);
1920 return input_obj;
1921 }
1922
1923 if (table == NULL) {
1924 for (i = 0; i < 256; i++)
1925 trans_table[i] = Py_CHARMASK(i);
1926 } else {
1927 for (i = 0; i < 256; i++)
1928 trans_table[i] = Py_CHARMASK(table[i]);
1929 }
1930
1931 for (i = 0; i < dellen; i++)
1932 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1933
1934 for (i = inlen; --i >= 0; ) {
1935 c = Py_CHARMASK(*input++);
1936 if (trans_table[c] != -1)
1937 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1938 continue;
1939 changed = 1;
1940 }
1941 if (!changed && PyBytes_CheckExact(input_obj)) {
1942 Py_DECREF(result);
1943 Py_INCREF(input_obj);
1944 return input_obj;
1945 }
1946 /* Fix the size of the resulting string */
1947 if (inlen > 0)
1948 _PyBytes_Resize(&result, output - output_start);
1949 return result;
1950}
1951
1952
1953#define FORWARD 1
1954#define REVERSE -1
1955
1956/* find and count characters and substrings */
1957
1958#define findchar(target, target_len, c) \
1959 ((char *)memchr((const void *)(target), c, target_len))
1960
1961/* String ops must return a string. */
1962/* If the object is subclass of string, create a copy */
1963Py_LOCAL(PyBytesObject *)
1964return_self(PyBytesObject *self)
1965{
1966 if (PyBytes_CheckExact(self)) {
1967 Py_INCREF(self);
1968 return self;
1969 }
1970 return (PyBytesObject *)PyBytes_FromStringAndSize(
1971 PyBytes_AS_STRING(self),
1972 PyBytes_GET_SIZE(self));
1973}
1974
1975Py_LOCAL_INLINE(Py_ssize_t)
1976countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
1977{
1978 Py_ssize_t count=0;
1979 const char *start=target;
1980 const char *end=target+target_len;
1981
1982 while ( (start=findchar(start, end-start, c)) != NULL ) {
1983 count++;
1984 if (count >= maxcount)
1985 break;
1986 start += 1;
1987 }
1988 return count;
1989}
1990
1991Py_LOCAL(Py_ssize_t)
1992findstring(const char *target, Py_ssize_t target_len,
1993 const char *pattern, Py_ssize_t pattern_len,
1994 Py_ssize_t start,
1995 Py_ssize_t end,
1996 int direction)
1997{
1998 if (start < 0) {
1999 start += target_len;
2000 if (start < 0)
2001 start = 0;
2002 }
2003 if (end > target_len) {
2004 end = target_len;
2005 } else if (end < 0) {
2006 end += target_len;
2007 if (end < 0)
2008 end = 0;
2009 }
2010
2011 /* zero-length substrings always match at the first attempt */
2012 if (pattern_len == 0)
2013 return (direction > 0) ? start : end;
2014
2015 end -= pattern_len;
2016
2017 if (direction < 0) {
2018 for (; end >= start; end--)
2019 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2020 return end;
2021 } else {
2022 for (; start <= end; start++)
2023 if (Py_STRING_MATCH(target, start,pattern,pattern_len))
2024 return start;
2025 }
2026 return -1;
2027}
2028
2029Py_LOCAL_INLINE(Py_ssize_t)
2030countstring(const char *target, Py_ssize_t target_len,
2031 const char *pattern, Py_ssize_t pattern_len,
2032 Py_ssize_t start,
2033 Py_ssize_t end,
2034 int direction, Py_ssize_t maxcount)
2035{
2036 Py_ssize_t count=0;
2037
2038 if (start < 0) {
2039 start += target_len;
2040 if (start < 0)
2041 start = 0;
2042 }
2043 if (end > target_len) {
2044 end = target_len;
2045 } else if (end < 0) {
2046 end += target_len;
2047 if (end < 0)
2048 end = 0;
2049 }
2050
2051 /* zero-length substrings match everywhere */
2052 if (pattern_len == 0 || maxcount == 0) {
2053 if (target_len+1 < maxcount)
2054 return target_len+1;
2055 return maxcount;
2056 }
2057
2058 end -= pattern_len;
2059 if (direction < 0) {
2060 for (; (end >= start); end--)
2061 if (Py_STRING_MATCH(target, end,pattern,pattern_len)) {
2062 count++;
2063 if (--maxcount <= 0) break;
2064 end -= pattern_len-1;
2065 }
2066 } else {
2067 for (; (start <= end); start++)
2068 if (Py_STRING_MATCH(target, start,
2069 pattern, pattern_len)) {
2070 count++;
2071 if (--maxcount <= 0)
2072 break;
2073 start += pattern_len-1;
2074 }
2075 }
2076 return count;
2077}
2078
2079
2080/* Algorithms for different cases of string replacement */
2081
2082/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2083Py_LOCAL(PyBytesObject *)
2084replace_interleave(PyBytesObject *self,
2085 const char *to_s, Py_ssize_t to_len,
2086 Py_ssize_t maxcount)
2087{
2088 char *self_s, *result_s;
2089 Py_ssize_t self_len, result_len;
2090 Py_ssize_t count, i, product;
2091 PyBytesObject *result;
2092
2093 self_len = PyBytes_GET_SIZE(self);
2094
2095 /* 1 at the end plus 1 after every character */
2096 count = self_len+1;
2097 if (maxcount < count)
2098 count = maxcount;
2099
2100 /* Check for overflow */
2101 /* result_len = count * to_len + self_len; */
2102 product = count * to_len;
2103 if (product / to_len != count) {
2104 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002105 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002106 return NULL;
2107 }
2108 result_len = product + self_len;
2109 if (result_len < 0) {
2110 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002111 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002112 return NULL;
2113 }
2114
2115 if (! (result = (PyBytesObject *)
2116 PyBytes_FromStringAndSize(NULL, result_len)) )
2117 return NULL;
2118
2119 self_s = PyBytes_AS_STRING(self);
2120 result_s = PyBytes_AS_STRING(result);
2121
2122 /* TODO: special case single character, which doesn't need memcpy */
2123
2124 /* Lay the first one down (guaranteed this will occur) */
2125 Py_MEMCPY(result_s, to_s, to_len);
2126 result_s += to_len;
2127 count -= 1;
2128
2129 for (i=0; i<count; i++) {
2130 *result_s++ = *self_s++;
2131 Py_MEMCPY(result_s, to_s, to_len);
2132 result_s += to_len;
2133 }
2134
2135 /* Copy the rest of the original string */
2136 Py_MEMCPY(result_s, self_s, self_len-i);
2137
2138 return result;
2139}
2140
2141/* Special case for deleting a single character */
2142/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2143Py_LOCAL(PyBytesObject *)
2144replace_delete_single_character(PyBytesObject *self,
2145 char from_c, Py_ssize_t maxcount)
2146{
2147 char *self_s, *result_s;
2148 char *start, *next, *end;
2149 Py_ssize_t self_len, result_len;
2150 Py_ssize_t count;
2151 PyBytesObject *result;
2152
2153 self_len = PyBytes_GET_SIZE(self);
2154 self_s = PyBytes_AS_STRING(self);
2155
2156 count = countchar(self_s, self_len, from_c, maxcount);
2157 if (count == 0) {
2158 return return_self(self);
2159 }
2160
2161 result_len = self_len - count; /* from_len == 1 */
2162 assert(result_len>=0);
2163
2164 if ( (result = (PyBytesObject *)
2165 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2166 return NULL;
2167 result_s = PyBytes_AS_STRING(result);
2168
2169 start = self_s;
2170 end = self_s + self_len;
2171 while (count-- > 0) {
2172 next = findchar(start, end-start, from_c);
2173 if (next == NULL)
2174 break;
2175 Py_MEMCPY(result_s, start, next-start);
2176 result_s += (next-start);
2177 start = next+1;
2178 }
2179 Py_MEMCPY(result_s, start, end-start);
2180
2181 return result;
2182}
2183
2184/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2185
2186Py_LOCAL(PyBytesObject *)
2187replace_delete_substring(PyBytesObject *self,
2188 const char *from_s, Py_ssize_t from_len,
2189 Py_ssize_t maxcount) {
2190 char *self_s, *result_s;
2191 char *start, *next, *end;
2192 Py_ssize_t self_len, result_len;
2193 Py_ssize_t count, offset;
2194 PyBytesObject *result;
2195
2196 self_len = PyBytes_GET_SIZE(self);
2197 self_s = PyBytes_AS_STRING(self);
2198
2199 count = countstring(self_s, self_len,
2200 from_s, from_len,
2201 0, self_len, 1,
2202 maxcount);
2203
2204 if (count == 0) {
2205 /* no matches */
2206 return return_self(self);
2207 }
2208
2209 result_len = self_len - (count * from_len);
2210 assert (result_len>=0);
2211
2212 if ( (result = (PyBytesObject *)
2213 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2214 return NULL;
2215
2216 result_s = PyBytes_AS_STRING(result);
2217
2218 start = self_s;
2219 end = self_s + self_len;
2220 while (count-- > 0) {
2221 offset = findstring(start, end-start,
2222 from_s, from_len,
2223 0, end-start, FORWARD);
2224 if (offset == -1)
2225 break;
2226 next = start + offset;
2227
2228 Py_MEMCPY(result_s, start, next-start);
2229
2230 result_s += (next-start);
2231 start = next+from_len;
2232 }
2233 Py_MEMCPY(result_s, start, end-start);
2234 return result;
2235}
2236
2237/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2238Py_LOCAL(PyBytesObject *)
2239replace_single_character_in_place(PyBytesObject *self,
2240 char from_c, char to_c,
2241 Py_ssize_t maxcount)
2242{
2243 char *self_s, *result_s, *start, *end, *next;
2244 Py_ssize_t self_len;
2245 PyBytesObject *result;
2246
2247 /* The result string will be the same size */
2248 self_s = PyBytes_AS_STRING(self);
2249 self_len = PyBytes_GET_SIZE(self);
2250
2251 next = findchar(self_s, self_len, from_c);
2252
2253 if (next == NULL) {
2254 /* No matches; return the original string */
2255 return return_self(self);
2256 }
2257
2258 /* Need to make a new string */
2259 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2260 if (result == NULL)
2261 return NULL;
2262 result_s = PyBytes_AS_STRING(result);
2263 Py_MEMCPY(result_s, self_s, self_len);
2264
2265 /* change everything in-place, starting with this one */
2266 start = result_s + (next-self_s);
2267 *start = to_c;
2268 start++;
2269 end = result_s + self_len;
2270
2271 while (--maxcount > 0) {
2272 next = findchar(start, end-start, from_c);
2273 if (next == NULL)
2274 break;
2275 *next = to_c;
2276 start = next+1;
2277 }
2278
2279 return result;
2280}
2281
2282/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2283Py_LOCAL(PyBytesObject *)
2284replace_substring_in_place(PyBytesObject *self,
2285 const char *from_s, Py_ssize_t from_len,
2286 const char *to_s, Py_ssize_t to_len,
2287 Py_ssize_t maxcount)
2288{
2289 char *result_s, *start, *end;
2290 char *self_s;
2291 Py_ssize_t self_len, offset;
2292 PyBytesObject *result;
2293
2294 /* The result string will be the same size */
2295
2296 self_s = PyBytes_AS_STRING(self);
2297 self_len = PyBytes_GET_SIZE(self);
2298
2299 offset = findstring(self_s, self_len,
2300 from_s, from_len,
2301 0, self_len, FORWARD);
2302 if (offset == -1) {
2303 /* No matches; return the original string */
2304 return return_self(self);
2305 }
2306
2307 /* Need to make a new string */
2308 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2309 if (result == NULL)
2310 return NULL;
2311 result_s = PyBytes_AS_STRING(result);
2312 Py_MEMCPY(result_s, self_s, self_len);
2313
2314 /* change everything in-place, starting with this one */
2315 start = result_s + offset;
2316 Py_MEMCPY(start, to_s, from_len);
2317 start += from_len;
2318 end = result_s + self_len;
2319
2320 while ( --maxcount > 0) {
2321 offset = findstring(start, end-start,
2322 from_s, from_len,
2323 0, end-start, FORWARD);
2324 if (offset==-1)
2325 break;
2326 Py_MEMCPY(start+offset, to_s, from_len);
2327 start += offset+from_len;
2328 }
2329
2330 return result;
2331}
2332
2333/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2334Py_LOCAL(PyBytesObject *)
2335replace_single_character(PyBytesObject *self,
2336 char from_c,
2337 const char *to_s, Py_ssize_t to_len,
2338 Py_ssize_t maxcount)
2339{
2340 char *self_s, *result_s;
2341 char *start, *next, *end;
2342 Py_ssize_t self_len, result_len;
2343 Py_ssize_t count, product;
2344 PyBytesObject *result;
2345
2346 self_s = PyBytes_AS_STRING(self);
2347 self_len = PyBytes_GET_SIZE(self);
2348
2349 count = countchar(self_s, self_len, from_c, maxcount);
2350 if (count == 0) {
2351 /* no matches, return unchanged */
2352 return return_self(self);
2353 }
2354
2355 /* use the difference between current and new, hence the "-1" */
2356 /* result_len = self_len + count * (to_len-1) */
2357 product = count * (to_len-1);
2358 if (product / (to_len-1) != count) {
2359 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002360 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002361 return NULL;
2362 }
2363 result_len = self_len + product;
2364 if (result_len < 0) {
2365 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002366 "replacment bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002367 return NULL;
2368 }
2369
2370 if ( (result = (PyBytesObject *)
2371 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2372 return NULL;
2373 result_s = PyBytes_AS_STRING(result);
2374
2375 start = self_s;
2376 end = self_s + self_len;
2377 while (count-- > 0) {
2378 next = findchar(start, end-start, from_c);
2379 if (next == NULL)
2380 break;
2381
2382 if (next == start) {
2383 /* replace with the 'to' */
2384 Py_MEMCPY(result_s, to_s, to_len);
2385 result_s += to_len;
2386 start += 1;
2387 } else {
2388 /* copy the unchanged old then the 'to' */
2389 Py_MEMCPY(result_s, start, next-start);
2390 result_s += (next-start);
2391 Py_MEMCPY(result_s, to_s, to_len);
2392 result_s += to_len;
2393 start = next+1;
2394 }
2395 }
2396 /* Copy the remainder of the remaining string */
2397 Py_MEMCPY(result_s, start, end-start);
2398
2399 return result;
2400}
2401
2402/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2403Py_LOCAL(PyBytesObject *)
2404replace_substring(PyBytesObject *self,
2405 const char *from_s, Py_ssize_t from_len,
2406 const char *to_s, Py_ssize_t to_len,
2407 Py_ssize_t maxcount) {
2408 char *self_s, *result_s;
2409 char *start, *next, *end;
2410 Py_ssize_t self_len, result_len;
2411 Py_ssize_t count, offset, product;
2412 PyBytesObject *result;
2413
2414 self_s = PyBytes_AS_STRING(self);
2415 self_len = PyBytes_GET_SIZE(self);
2416
2417 count = countstring(self_s, self_len,
2418 from_s, from_len,
2419 0, self_len, FORWARD, maxcount);
2420 if (count == 0) {
2421 /* no matches, return unchanged */
2422 return return_self(self);
2423 }
2424
2425 /* Check for overflow */
2426 /* result_len = self_len + count * (to_len-from_len) */
2427 product = count * (to_len-from_len);
2428 if (product / (to_len-from_len) != count) {
2429 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002430 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002431 return NULL;
2432 }
2433 result_len = self_len + product;
2434 if (result_len < 0) {
2435 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002436 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002437 return NULL;
2438 }
2439
2440 if ( (result = (PyBytesObject *)
2441 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2442 return NULL;
2443 result_s = PyBytes_AS_STRING(result);
2444
2445 start = self_s;
2446 end = self_s + self_len;
2447 while (count-- > 0) {
2448 offset = findstring(start, end-start,
2449 from_s, from_len,
2450 0, end-start, FORWARD);
2451 if (offset == -1)
2452 break;
2453 next = start+offset;
2454 if (next == start) {
2455 /* replace with the 'to' */
2456 Py_MEMCPY(result_s, to_s, to_len);
2457 result_s += to_len;
2458 start += from_len;
2459 } else {
2460 /* copy the unchanged old then the 'to' */
2461 Py_MEMCPY(result_s, start, next-start);
2462 result_s += (next-start);
2463 Py_MEMCPY(result_s, to_s, to_len);
2464 result_s += to_len;
2465 start = next+from_len;
2466 }
2467 }
2468 /* Copy the remainder of the remaining string */
2469 Py_MEMCPY(result_s, start, end-start);
2470
2471 return result;
2472}
2473
2474
2475Py_LOCAL(PyBytesObject *)
2476replace(PyBytesObject *self,
2477 const char *from_s, Py_ssize_t from_len,
2478 const char *to_s, Py_ssize_t to_len,
2479 Py_ssize_t maxcount)
2480{
2481 if (maxcount < 0) {
2482 maxcount = PY_SSIZE_T_MAX;
2483 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2484 /* nothing to do; return the original string */
2485 return return_self(self);
2486 }
2487
2488 if (maxcount == 0 ||
2489 (from_len == 0 && to_len == 0)) {
2490 /* nothing to do; return the original string */
2491 return return_self(self);
2492 }
2493
2494 /* Handle zero-length special cases */
2495
2496 if (from_len == 0) {
2497 /* insert the 'to' string everywhere. */
2498 /* >>> "Python".replace("", ".") */
2499 /* '.P.y.t.h.o.n.' */
2500 return replace_interleave(self, to_s, to_len, maxcount);
2501 }
2502
2503 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2504 /* point for an empty self string to generate a non-empty string */
2505 /* Special case so the remaining code always gets a non-empty string */
2506 if (PyBytes_GET_SIZE(self) == 0) {
2507 return return_self(self);
2508 }
2509
2510 if (to_len == 0) {
Georg Brandl17cb8a82008-05-30 08:20:09 +00002511 /* delete all occurrences of 'from' string */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002512 if (from_len == 1) {
2513 return replace_delete_single_character(
2514 self, from_s[0], maxcount);
2515 } else {
2516 return replace_delete_substring(self, from_s,
2517 from_len, maxcount);
2518 }
2519 }
2520
2521 /* Handle special case where both strings have the same length */
2522
2523 if (from_len == to_len) {
2524 if (from_len == 1) {
2525 return replace_single_character_in_place(
2526 self,
2527 from_s[0],
2528 to_s[0],
2529 maxcount);
2530 } else {
2531 return replace_substring_in_place(
2532 self, from_s, from_len, to_s, to_len,
2533 maxcount);
2534 }
2535 }
2536
2537 /* Otherwise use the more generic algorithms */
2538 if (from_len == 1) {
2539 return replace_single_character(self, from_s[0],
2540 to_s, to_len, maxcount);
2541 } else {
2542 /* len('from')>=2, len('to')>=1 */
2543 return replace_substring(self, from_s, from_len, to_s, to_len,
2544 maxcount);
2545 }
2546}
2547
2548PyDoc_STRVAR(replace__doc__,
2549"B.replace(old, new[, count]) -> bytes\n\
2550\n\
2551Return a copy of B with all occurrences of subsection\n\
2552old replaced by new. If the optional argument count is\n\
2553given, only the first count occurrences are replaced.");
2554
2555static PyObject *
2556string_replace(PyBytesObject *self, PyObject *args)
2557{
2558 Py_ssize_t count = -1;
2559 PyObject *from, *to;
2560 const char *from_s, *to_s;
2561 Py_ssize_t from_len, to_len;
2562
2563 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2564 return NULL;
2565
2566 if (PyBytes_Check(from)) {
2567 from_s = PyBytes_AS_STRING(from);
2568 from_len = PyBytes_GET_SIZE(from);
2569 }
2570 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2571 return NULL;
2572
2573 if (PyBytes_Check(to)) {
2574 to_s = PyBytes_AS_STRING(to);
2575 to_len = PyBytes_GET_SIZE(to);
2576 }
2577 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2578 return NULL;
2579
2580 return (PyObject *)replace((PyBytesObject *) self,
2581 from_s, from_len,
2582 to_s, to_len, count);
2583}
2584
2585/** End DALKE **/
2586
2587/* Matches the end (direction >= 0) or start (direction < 0) of self
2588 * against substr, using the start and end arguments. Returns
2589 * -1 on error, 0 if not found and 1 if found.
2590 */
2591Py_LOCAL(int)
2592_string_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
2593 Py_ssize_t end, int direction)
2594{
2595 Py_ssize_t len = PyBytes_GET_SIZE(self);
2596 Py_ssize_t slen;
2597 const char* sub;
2598 const char* str;
2599
2600 if (PyBytes_Check(substr)) {
2601 sub = PyBytes_AS_STRING(substr);
2602 slen = PyBytes_GET_SIZE(substr);
2603 }
2604 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2605 return -1;
2606 str = PyBytes_AS_STRING(self);
2607
2608 string_adjust_indices(&start, &end, len);
2609
2610 if (direction < 0) {
2611 /* startswith */
2612 if (start+slen > len)
2613 return 0;
2614 } else {
2615 /* endswith */
2616 if (end-start < slen || start > len)
2617 return 0;
2618
2619 if (end-slen > start)
2620 start = end - slen;
2621 }
2622 if (end-start >= slen)
2623 return ! memcmp(str+start, sub, slen);
2624 return 0;
2625}
2626
2627
2628PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002629"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002630\n\
2631Return True if B starts with the specified prefix, False otherwise.\n\
2632With optional start, test B beginning at that position.\n\
2633With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002634prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002635
2636static PyObject *
2637string_startswith(PyBytesObject *self, PyObject *args)
2638{
2639 Py_ssize_t start = 0;
2640 Py_ssize_t end = PY_SSIZE_T_MAX;
2641 PyObject *subobj;
2642 int result;
2643
2644 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2645 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2646 return NULL;
2647 if (PyTuple_Check(subobj)) {
2648 Py_ssize_t i;
2649 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2650 result = _string_tailmatch(self,
2651 PyTuple_GET_ITEM(subobj, i),
2652 start, end, -1);
2653 if (result == -1)
2654 return NULL;
2655 else if (result) {
2656 Py_RETURN_TRUE;
2657 }
2658 }
2659 Py_RETURN_FALSE;
2660 }
2661 result = _string_tailmatch(self, subobj, start, end, -1);
2662 if (result == -1)
2663 return NULL;
2664 else
2665 return PyBool_FromLong(result);
2666}
2667
2668
2669PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002670"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002671\n\
2672Return True if B ends with the specified suffix, False otherwise.\n\
2673With optional start, test B beginning at that position.\n\
2674With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002675suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002676
2677static PyObject *
2678string_endswith(PyBytesObject *self, PyObject *args)
2679{
2680 Py_ssize_t start = 0;
2681 Py_ssize_t end = PY_SSIZE_T_MAX;
2682 PyObject *subobj;
2683 int result;
2684
2685 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2686 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2687 return NULL;
2688 if (PyTuple_Check(subobj)) {
2689 Py_ssize_t i;
2690 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2691 result = _string_tailmatch(self,
2692 PyTuple_GET_ITEM(subobj, i),
2693 start, end, +1);
2694 if (result == -1)
2695 return NULL;
2696 else if (result) {
2697 Py_RETURN_TRUE;
2698 }
2699 }
2700 Py_RETURN_FALSE;
2701 }
2702 result = _string_tailmatch(self, subobj, start, end, +1);
2703 if (result == -1)
2704 return NULL;
2705 else
2706 return PyBool_FromLong(result);
2707}
2708
2709
2710PyDoc_STRVAR(decode__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002711"B.decode([encoding[, errors]]) -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002712\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002713Decode S using the codec registered for encoding. encoding defaults\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002714to the default encoding. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002715handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2716a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002717as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002718able to handle UnicodeDecodeErrors.");
2719
2720static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002721string_decode(PyObject *self, PyObject *args)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002722{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002723 const char *encoding = NULL;
2724 const char *errors = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +00002725
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002726 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2727 return NULL;
2728 if (encoding == NULL)
2729 encoding = PyUnicode_GetDefaultEncoding();
Marc-André Lemburgb2750b52008-06-06 12:18:17 +00002730 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002731}
2732
Guido van Rossum20188312006-05-05 15:15:40 +00002733
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002734PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002735"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002736\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002737Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002738Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002739Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002740
2741static int
Guido van Rossumae404e22007-10-26 21:46:44 +00002742hex_digit_to_int(Py_UNICODE c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002743{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002744 if (c >= 128)
2745 return -1;
2746 if (ISDIGIT(c))
2747 return c - '0';
2748 else {
2749 if (ISUPPER(c))
2750 c = TOLOWER(c);
2751 if (c >= 'a' && c <= 'f')
2752 return c - 'a' + 10;
2753 }
2754 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002755}
2756
2757static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002758string_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002759{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002760 PyObject *newstring, *hexobj;
2761 char *buf;
2762 Py_UNICODE *hex;
2763 Py_ssize_t hexlen, byteslen, i, j;
2764 int top, bot;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002765
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002766 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2767 return NULL;
2768 assert(PyUnicode_Check(hexobj));
2769 hexlen = PyUnicode_GET_SIZE(hexobj);
2770 hex = PyUnicode_AS_UNICODE(hexobj);
2771 byteslen = hexlen/2; /* This overestimates if there are spaces */
2772 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2773 if (!newstring)
2774 return NULL;
2775 buf = PyBytes_AS_STRING(newstring);
2776 for (i = j = 0; i < hexlen; i += 2) {
2777 /* skip over spaces in the input */
2778 while (hex[i] == ' ')
2779 i++;
2780 if (i >= hexlen)
2781 break;
2782 top = hex_digit_to_int(hex[i]);
2783 bot = hex_digit_to_int(hex[i+1]);
2784 if (top == -1 || bot == -1) {
2785 PyErr_Format(PyExc_ValueError,
2786 "non-hexadecimal number found in "
2787 "fromhex() arg at position %zd", i);
2788 goto error;
2789 }
2790 buf[j++] = (top << 4) + bot;
2791 }
2792 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2793 goto error;
2794 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002795
2796 error:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002797 Py_XDECREF(newstring);
2798 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002799}
2800
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002801PyDoc_STRVAR(sizeof__doc__,
2802"S.__sizeof__() -> size of S in memory, in bytes");
2803
2804static PyObject *
2805string_sizeof(PyBytesObject *v)
2806{
2807 Py_ssize_t res;
Mark Dickinsonfd24b322008-12-06 15:33:31 +00002808 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002809 return PyLong_FromSsize_t(res);
2810}
2811
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002812
2813static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002814string_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002815{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002816 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002817}
2818
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002819
2820static PyMethodDef
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002821string_methods[] = {
2822 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
2823 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2824 _Py_capitalize__doc__},
2825 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2826 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
2827 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
2828 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
2829 endswith__doc__},
2830 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2831 expandtabs__doc__},
2832 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
2833 {"fromhex", (PyCFunction)string_fromhex, METH_VARARGS|METH_CLASS,
2834 fromhex_doc},
2835 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
2836 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2837 _Py_isalnum__doc__},
2838 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2839 _Py_isalpha__doc__},
2840 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2841 _Py_isdigit__doc__},
2842 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2843 _Py_islower__doc__},
2844 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2845 _Py_isspace__doc__},
2846 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2847 _Py_istitle__doc__},
2848 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2849 _Py_isupper__doc__},
2850 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2851 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2852 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2853 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
2854 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
2855 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2856 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2857 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
2858 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2859 {"rpartition", (PyCFunction)string_rpartition, METH_O,
2860 rpartition__doc__},
2861 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
2862 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
2863 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2864 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
2865 splitlines__doc__},
2866 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
2867 startswith__doc__},
2868 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
2869 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2870 _Py_swapcase__doc__},
2871 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2872 {"translate", (PyCFunction)string_translate, METH_VARARGS,
2873 translate__doc__},
2874 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2875 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002876 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
2877 sizeof__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002878 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002879};
2880
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002881static PyObject *
2882str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2883
2884static PyObject *
2885string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2886{
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002887 PyObject *x = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002888 const char *encoding = NULL;
2889 const char *errors = NULL;
2890 PyObject *new = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002891 static char *kwlist[] = {"source", "encoding", "errors", 0};
2892
2893 if (type != &PyBytes_Type)
2894 return str_subtype_new(type, args, kwds);
2895 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2896 &encoding, &errors))
2897 return NULL;
2898 if (x == NULL) {
2899 if (encoding != NULL || errors != NULL) {
2900 PyErr_SetString(PyExc_TypeError,
2901 "encoding or errors without sequence "
2902 "argument");
2903 return NULL;
2904 }
2905 return PyBytes_FromString("");
2906 }
2907
2908 if (PyUnicode_Check(x)) {
2909 /* Encode via the codec registry */
2910 if (encoding == NULL) {
2911 PyErr_SetString(PyExc_TypeError,
2912 "string argument without an encoding");
2913 return NULL;
2914 }
Marc-André Lemburgb2750b52008-06-06 12:18:17 +00002915 new = PyUnicode_AsEncodedString(x, encoding, errors);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002916 if (new == NULL)
2917 return NULL;
2918 assert(PyBytes_Check(new));
2919 return new;
2920 }
2921
2922 /* If it's not unicode, there can't be encoding or errors */
2923 if (encoding != NULL || errors != NULL) {
2924 PyErr_SetString(PyExc_TypeError,
2925 "encoding or errors without a string argument");
2926 return NULL;
2927 }
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002928 return PyObject_Bytes(x);
2929}
2930
2931PyObject *
2932PyBytes_FromObject(PyObject *x)
2933{
2934 PyObject *new, *it;
2935 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002936
Benjamin Peterson4b24a422008-08-27 00:28:34 +00002937 if (x == NULL) {
2938 PyErr_BadInternalCall();
2939 return NULL;
2940 }
2941
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002942 /* Is it an int? */
2943 size = PyNumber_AsSsize_t(x, PyExc_ValueError);
2944 if (size == -1 && PyErr_Occurred()) {
2945 PyErr_Clear();
2946 }
2947 else {
2948 if (size < 0) {
2949 PyErr_SetString(PyExc_ValueError, "negative count");
2950 return NULL;
2951 }
2952 new = PyBytes_FromStringAndSize(NULL, size);
2953 if (new == NULL) {
2954 return NULL;
2955 }
2956 if (size > 0) {
2957 memset(((PyBytesObject*)new)->ob_sval, 0, size);
2958 }
2959 return new;
2960 }
2961
2962 /* Use the modern buffer interface */
2963 if (PyObject_CheckBuffer(x)) {
2964 Py_buffer view;
2965 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2966 return NULL;
2967 new = PyBytes_FromStringAndSize(NULL, view.len);
2968 if (!new)
2969 goto fail;
Christian Heimes1a8501c2008-10-02 19:56:01 +00002970 /* XXX(brett.cannon): Better way to get to internal buffer? */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002971 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2972 &view, view.len, 'C') < 0)
2973 goto fail;
Martin v. Löwis423be952008-08-13 15:53:07 +00002974 PyBuffer_Release(&view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002975 return new;
2976 fail:
2977 Py_XDECREF(new);
Martin v. Löwis423be952008-08-13 15:53:07 +00002978 PyBuffer_Release(&view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002979 return NULL;
2980 }
2981
2982 /* For iterator version, create a string object and resize as needed */
2983 /* XXX(gb): is 64 a good value? also, optimize if length is known */
2984 /* XXX(guido): perhaps use Pysequence_Fast() -- I can't imagine the
2985 input being a truly long iterator. */
2986 size = 64;
2987 new = PyBytes_FromStringAndSize(NULL, size);
2988 if (new == NULL)
2989 return NULL;
2990
2991 /* XXX Optimize this if the arguments is a list, tuple */
2992
2993 /* Get the iterator */
2994 it = PyObject_GetIter(x);
2995 if (it == NULL)
2996 goto error;
2997
2998 /* Run the iterator to exhaustion */
2999 for (i = 0; ; i++) {
3000 PyObject *item;
3001 Py_ssize_t value;
3002
3003 /* Get the next item */
3004 item = PyIter_Next(it);
3005 if (item == NULL) {
3006 if (PyErr_Occurred())
3007 goto error;
3008 break;
3009 }
3010
3011 /* Interpret it as an int (__index__) */
3012 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3013 Py_DECREF(item);
3014 if (value == -1 && PyErr_Occurred())
3015 goto error;
3016
3017 /* Range check */
3018 if (value < 0 || value >= 256) {
3019 PyErr_SetString(PyExc_ValueError,
3020 "bytes must be in range(0, 256)");
3021 goto error;
3022 }
3023
3024 /* Append the byte */
3025 if (i >= size) {
3026 size *= 2;
3027 if (_PyBytes_Resize(&new, size) < 0)
3028 goto error;
3029 }
3030 ((PyBytesObject *)new)->ob_sval[i] = value;
3031 }
3032 _PyBytes_Resize(&new, i);
3033
3034 /* Clean up and return success */
3035 Py_DECREF(it);
3036 return new;
3037
3038 error:
3039 /* Error handling when new != NULL */
3040 Py_XDECREF(it);
3041 Py_DECREF(new);
3042 return NULL;
3043}
3044
3045static PyObject *
3046str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3047{
3048 PyObject *tmp, *pnew;
3049 Py_ssize_t n;
3050
3051 assert(PyType_IsSubtype(type, &PyBytes_Type));
3052 tmp = string_new(&PyBytes_Type, args, kwds);
3053 if (tmp == NULL)
3054 return NULL;
3055 assert(PyBytes_CheckExact(tmp));
3056 n = PyBytes_GET_SIZE(tmp);
3057 pnew = type->tp_alloc(type, n);
3058 if (pnew != NULL) {
3059 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3060 PyBytes_AS_STRING(tmp), n+1);
3061 ((PyBytesObject *)pnew)->ob_shash =
3062 ((PyBytesObject *)tmp)->ob_shash;
3063 }
3064 Py_DECREF(tmp);
3065 return pnew;
3066}
3067
3068PyDoc_STRVAR(string_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003069"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003070bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003071bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
3072bytes(memory_view) -> bytes\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003073\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003074Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003075 - an iterable yielding integers in range(256)\n\
3076 - a text string encoded using the specified encoding\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003077 - a bytes or a buffer object\n\
3078 - any object implementing the buffer API.");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003079
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003080static PyObject *str_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003081
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003082PyTypeObject PyBytes_Type = {
3083 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3084 "bytes",
Mark Dickinsonfd24b322008-12-06 15:33:31 +00003085 PyBytesObject_SIZE,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003086 sizeof(char),
3087 string_dealloc, /* tp_dealloc */
3088 0, /* tp_print */
3089 0, /* tp_getattr */
3090 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00003091 0, /* tp_reserved */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003092 (reprfunc)string_repr, /* tp_repr */
3093 0, /* tp_as_number */
3094 &string_as_sequence, /* tp_as_sequence */
3095 &string_as_mapping, /* tp_as_mapping */
3096 (hashfunc)string_hash, /* tp_hash */
3097 0, /* tp_call */
3098 string_str, /* tp_str */
3099 PyObject_GenericGetAttr, /* tp_getattro */
3100 0, /* tp_setattro */
3101 &string_as_buffer, /* tp_as_buffer */
3102 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3103 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3104 string_doc, /* tp_doc */
3105 0, /* tp_traverse */
3106 0, /* tp_clear */
3107 (richcmpfunc)string_richcompare, /* tp_richcompare */
3108 0, /* tp_weaklistoffset */
3109 str_iter, /* tp_iter */
3110 0, /* tp_iternext */
3111 string_methods, /* tp_methods */
3112 0, /* tp_members */
3113 0, /* tp_getset */
3114 &PyBaseObject_Type, /* tp_base */
3115 0, /* tp_dict */
3116 0, /* tp_descr_get */
3117 0, /* tp_descr_set */
3118 0, /* tp_dictoffset */
3119 0, /* tp_init */
3120 0, /* tp_alloc */
3121 string_new, /* tp_new */
3122 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003123};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003124
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003125void
3126PyBytes_Concat(register PyObject **pv, register PyObject *w)
3127{
3128 register PyObject *v;
3129 assert(pv != NULL);
3130 if (*pv == NULL)
3131 return;
3132 if (w == NULL) {
3133 Py_DECREF(*pv);
3134 *pv = NULL;
3135 return;
3136 }
3137 v = string_concat(*pv, w);
3138 Py_DECREF(*pv);
3139 *pv = v;
3140}
3141
3142void
3143PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
3144{
3145 PyBytes_Concat(pv, w);
3146 Py_XDECREF(w);
3147}
3148
3149
3150/* The following function breaks the notion that strings are immutable:
3151 it changes the size of a string. We get away with this only if there
3152 is only one module referencing the object. You can also think of it
3153 as creating a new string object and destroying the old one, only
3154 more efficiently. In any case, don't use this if the string may
3155 already be known to some other part of the code...
3156 Note that if there's not enough memory to resize the string, the original
3157 string object at *pv is deallocated, *pv is set to NULL, an "out of
3158 memory" exception is set, and -1 is returned. Else (on success) 0 is
3159 returned, and the value in *pv may or may not be the same as on input.
3160 As always, an extra byte is allocated for a trailing \0 byte (newsize
3161 does *not* include that), and a trailing \0 byte is stored.
3162*/
3163
3164int
3165_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3166{
3167 register PyObject *v;
3168 register PyBytesObject *sv;
3169 v = *pv;
3170 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
3171 *pv = 0;
3172 Py_DECREF(v);
3173 PyErr_BadInternalCall();
3174 return -1;
3175 }
3176 /* XXX UNREF/NEWREF interface should be more symmetrical */
3177 _Py_DEC_REFTOTAL;
3178 _Py_ForgetReference(v);
3179 *pv = (PyObject *)
Mark Dickinsonfd24b322008-12-06 15:33:31 +00003180 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003181 if (*pv == NULL) {
3182 PyObject_Del(v);
3183 PyErr_NoMemory();
3184 return -1;
3185 }
3186 _Py_NewReference(*pv);
3187 sv = (PyBytesObject *) *pv;
3188 Py_SIZE(sv) = newsize;
3189 sv->ob_sval[newsize] = '\0';
3190 sv->ob_shash = -1; /* invalidate cached hash value */
3191 return 0;
3192}
3193
3194/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and
3195 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3196 * Python's regular ints.
3197 * Return value: a new PyString*, or NULL if error.
3198 * . *pbuf is set to point into it,
3199 * *plen set to the # of chars following that.
3200 * Caller must decref it when done using pbuf.
3201 * The string starting at *pbuf is of the form
3202 * "-"? ("0x" | "0X")? digit+
3203 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3204 * set in flags. The case of hex digits will be correct,
3205 * There will be at least prec digits, zero-filled on the left if
3206 * necessary to get that many.
3207 * val object to be converted
3208 * flags bitmask of format flags; only F_ALT is looked at
3209 * prec minimum number of digits; 0-fill on left if needed
3210 * type a character in [duoxX]; u acts the same as d
3211 *
3212 * CAUTION: o, x and X conversions on regular ints can never
3213 * produce a '-' sign, but can for Python's unbounded ints.
3214 */
3215PyObject*
3216_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,
3217 char **pbuf, int *plen)
3218{
3219 PyObject *result = NULL;
3220 char *buf;
3221 Py_ssize_t i;
3222 int sign; /* 1 if '-', else 0 */
3223 int len; /* number of characters */
3224 Py_ssize_t llen;
3225 int numdigits; /* len == numnondigits + numdigits */
3226 int numnondigits = 0;
3227
3228 /* Avoid exceeding SSIZE_T_MAX */
Christian Heimesce694b72008-08-24 16:15:19 +00003229 if (prec > INT_MAX-3) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003230 PyErr_SetString(PyExc_OverflowError,
3231 "precision too large");
3232 return NULL;
3233 }
3234
3235 switch (type) {
3236 case 'd':
3237 case 'u':
3238 /* Special-case boolean: we want 0/1 */
3239 if (PyBool_Check(val))
3240 result = PyNumber_ToBase(val, 10);
3241 else
3242 result = Py_TYPE(val)->tp_str(val);
3243 break;
3244 case 'o':
3245 numnondigits = 2;
3246 result = PyNumber_ToBase(val, 8);
3247 break;
3248 case 'x':
3249 case 'X':
3250 numnondigits = 2;
3251 result = PyNumber_ToBase(val, 16);
3252 break;
3253 default:
3254 assert(!"'type' not in [duoxX]");
3255 }
3256 if (!result)
3257 return NULL;
3258
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00003259 buf = _PyUnicode_AsString(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003260 if (!buf) {
3261 Py_DECREF(result);
3262 return NULL;
3263 }
3264
3265 /* To modify the string in-place, there can only be one reference. */
3266 if (Py_REFCNT(result) != 1) {
3267 PyErr_BadInternalCall();
3268 return NULL;
3269 }
3270 llen = PyUnicode_GetSize(result);
3271 if (llen > INT_MAX) {
3272 PyErr_SetString(PyExc_ValueError,
3273 "string too large in _PyBytes_FormatLong");
3274 return NULL;
3275 }
3276 len = (int)llen;
3277 if (buf[len-1] == 'L') {
3278 --len;
3279 buf[len] = '\0';
3280 }
3281 sign = buf[0] == '-';
3282 numnondigits += sign;
3283 numdigits = len - numnondigits;
3284 assert(numdigits > 0);
3285
3286 /* Get rid of base marker unless F_ALT */
3287 if (((flags & F_ALT) == 0 &&
3288 (type == 'o' || type == 'x' || type == 'X'))) {
3289 assert(buf[sign] == '0');
3290 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
3291 buf[sign+1] == 'o');
3292 numnondigits -= 2;
3293 buf += 2;
3294 len -= 2;
3295 if (sign)
3296 buf[0] = '-';
3297 assert(len == numnondigits + numdigits);
3298 assert(numdigits > 0);
3299 }
3300
3301 /* Fill with leading zeroes to meet minimum width. */
3302 if (prec > numdigits) {
3303 PyObject *r1 = PyBytes_FromStringAndSize(NULL,
3304 numnondigits + prec);
3305 char *b1;
3306 if (!r1) {
3307 Py_DECREF(result);
3308 return NULL;
3309 }
3310 b1 = PyBytes_AS_STRING(r1);
3311 for (i = 0; i < numnondigits; ++i)
3312 *b1++ = *buf++;
3313 for (i = 0; i < prec - numdigits; i++)
3314 *b1++ = '0';
3315 for (i = 0; i < numdigits; i++)
3316 *b1++ = *buf++;
3317 *b1 = '\0';
3318 Py_DECREF(result);
3319 result = r1;
3320 buf = PyBytes_AS_STRING(result);
3321 len = numnondigits + prec;
3322 }
3323
3324 /* Fix up case for hex conversions. */
3325 if (type == 'X') {
3326 /* Need to convert all lower case letters to upper case.
3327 and need to convert 0x to 0X (and -0x to -0X). */
3328 for (i = 0; i < len; i++)
3329 if (buf[i] >= 'a' && buf[i] <= 'x')
3330 buf[i] -= 'a'-'A';
3331 }
3332 *pbuf = buf;
3333 *plen = len;
3334 return result;
3335}
3336
3337void
3338PyBytes_Fini(void)
3339{
3340 int i;
3341 for (i = 0; i < UCHAR_MAX + 1; i++) {
3342 Py_XDECREF(characters[i]);
3343 characters[i] = NULL;
3344 }
3345 Py_XDECREF(nullstring);
3346 nullstring = NULL;
3347}
3348
Benjamin Peterson4116f362008-05-27 00:36:20 +00003349/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003350
3351typedef struct {
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003352 PyObject_HEAD
3353 Py_ssize_t it_index;
3354 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3355} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003356
3357static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003358striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003359{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003360 _PyObject_GC_UNTRACK(it);
3361 Py_XDECREF(it->it_seq);
3362 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003363}
3364
3365static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003366striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003367{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003368 Py_VISIT(it->it_seq);
3369 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003370}
3371
3372static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003373striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003374{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003375 PyBytesObject *seq;
3376 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003377
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003378 assert(it != NULL);
3379 seq = it->it_seq;
3380 if (seq == NULL)
3381 return NULL;
3382 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003383
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003384 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3385 item = PyLong_FromLong(
3386 (unsigned char)seq->ob_sval[it->it_index]);
3387 if (item != NULL)
3388 ++it->it_index;
3389 return item;
3390 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003391
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003392 Py_DECREF(seq);
3393 it->it_seq = NULL;
3394 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003395}
3396
3397static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003398striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003399{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003400 Py_ssize_t len = 0;
3401 if (it->it_seq)
3402 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3403 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003404}
3405
3406PyDoc_STRVAR(length_hint_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003407 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003408
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003409static PyMethodDef striter_methods[] = {
3410 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3411 length_hint_doc},
3412 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003413};
3414
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003415PyTypeObject PyBytesIter_Type = {
3416 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3417 "bytes_iterator", /* tp_name */
3418 sizeof(striterobject), /* tp_basicsize */
3419 0, /* tp_itemsize */
3420 /* methods */
3421 (destructor)striter_dealloc, /* tp_dealloc */
3422 0, /* tp_print */
3423 0, /* tp_getattr */
3424 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00003425 0, /* tp_reserved */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003426 0, /* tp_repr */
3427 0, /* tp_as_number */
3428 0, /* tp_as_sequence */
3429 0, /* tp_as_mapping */
3430 0, /* tp_hash */
3431 0, /* tp_call */
3432 0, /* tp_str */
3433 PyObject_GenericGetAttr, /* tp_getattro */
3434 0, /* tp_setattro */
3435 0, /* tp_as_buffer */
3436 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3437 0, /* tp_doc */
3438 (traverseproc)striter_traverse, /* tp_traverse */
3439 0, /* tp_clear */
3440 0, /* tp_richcompare */
3441 0, /* tp_weaklistoffset */
3442 PyObject_SelfIter, /* tp_iter */
3443 (iternextfunc)striter_next, /* tp_iternext */
3444 striter_methods, /* tp_methods */
3445 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003446};
3447
3448static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003449str_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003450{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003451 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003452
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003453 if (!PyBytes_Check(seq)) {
3454 PyErr_BadInternalCall();
3455 return NULL;
3456 }
3457 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3458 if (it == NULL)
3459 return NULL;
3460 it->it_index = 0;
3461 Py_INCREF(seq);
3462 it->it_seq = (PyBytesObject *)seq;
3463 _PyObject_GC_TRACK(it);
3464 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003465}