blob: d3b598e0e8b6029265396058b4875a6365165282 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Christian Heimes90aa7642007-12-19 02:45:37 +000013 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000014
Gregory P. Smith60d241f2007-10-16 06:31:30 +000015 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000016 {
17 PyErr_Format(PyExc_TypeError,
18 "Type %.100s doesn't support the buffer API",
Christian Heimes90aa7642007-12-19 02:45:37 +000019 Py_TYPE(obj)->tp_name);
Guido van Rossuma74184e2007-08-29 04:05:57 +000020 return -1;
21 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000023 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
24 return -1;
25 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000026}
27
Christian Heimes2c9c7a52008-05-26 13:42:13 +000028#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000029Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000030#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000031
Christian Heimes2c9c7a52008-05-26 13:42:13 +000032static PyBytesObject *characters[UCHAR_MAX + 1];
33static PyBytesObject *nullstring;
34
Mark Dickinsonfd24b322008-12-06 15:33:31 +000035/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
36 for a string of length n should request PyBytesObject_SIZE + n bytes.
37
38 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
39 3 bytes per string allocation on a typical system.
40*/
41#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
42
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043/*
44 For both PyBytes_FromString() and PyBytes_FromStringAndSize(), the
45 parameter `size' denotes number of characters to allocate, not counting any
46 null terminating character.
47
48 For PyBytes_FromString(), the parameter `str' points to a null-terminated
49 string containing exactly `size' bytes.
50
51 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
52 either NULL or else points to a string containing at least `size' bytes.
53 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
54 not have to be null-terminated. (Therefore it is safe to construct a
55 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
56 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
57 bytes (setting the last byte to the null terminating character) and you can
58 fill in the data yourself. If `str' is non-NULL then the resulting
59 PyString object must be treated as immutable and you must not fill in nor
60 alter the data yourself, since the strings may be shared.
61
62 The PyObject member `op->ob_size', which denotes the number of "extra
63 items" in a variable-size object, will contain the number of bytes
64 allocated for string data, not counting the null terminating character. It
65 is therefore equal to the equal to the `size' parameter (for
66 PyBytes_FromStringAndSize()) or the length of the string in the `str'
67 parameter (for PyBytes_FromString()).
68*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000069PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000070PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000071{
Christian Heimes2c9c7a52008-05-26 13:42:13 +000072 register PyBytesObject *op;
73 if (size < 0) {
74 PyErr_SetString(PyExc_SystemError,
75 "Negative size passed to PyBytes_FromStringAndSize");
76 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +000077 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000078 if (size == 0 && (op = nullstring) != NULL) {
79#ifdef COUNT_ALLOCS
80 null_strings++;
81#endif
82 Py_INCREF(op);
83 return (PyObject *)op;
84 }
85 if (size == 1 && str != NULL &&
86 (op = characters[*str & UCHAR_MAX]) != NULL)
87 {
88#ifdef COUNT_ALLOCS
89 one_strings++;
90#endif
91 Py_INCREF(op);
92 return (PyObject *)op;
93 }
94
Mark Dickinsonfd24b322008-12-06 15:33:31 +000095 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Neal Norwitz3ce5d922008-08-24 07:08:55 +000096 PyErr_SetString(PyExc_OverflowError,
97 "byte string is too large");
98 return NULL;
99 }
100
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000101 /* Inline PyObject_NewVar */
Mark Dickinsonfd24b322008-12-06 15:33:31 +0000102 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000103 if (op == NULL)
104 return PyErr_NoMemory();
105 PyObject_INIT_VAR(op, &PyBytes_Type, size);
106 op->ob_shash = -1;
107 if (str != NULL)
108 Py_MEMCPY(op->ob_sval, str, size);
109 op->ob_sval[size] = '\0';
110 /* share short strings */
111 if (size == 0) {
112 nullstring = op;
113 Py_INCREF(op);
114 } else if (size == 1 && str != NULL) {
115 characters[*str & UCHAR_MAX] = op;
116 Py_INCREF(op);
117 }
118 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000119}
120
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000121PyObject *
122PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000123{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000124 register size_t size;
125 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000126
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000127 assert(str != NULL);
128 size = strlen(str);
Mark Dickinsonfd24b322008-12-06 15:33:31 +0000129 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000130 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000131 "byte string is too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000132 return NULL;
133 }
134 if (size == 0 && (op = nullstring) != NULL) {
135#ifdef COUNT_ALLOCS
136 null_strings++;
137#endif
138 Py_INCREF(op);
139 return (PyObject *)op;
140 }
141 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
142#ifdef COUNT_ALLOCS
143 one_strings++;
144#endif
145 Py_INCREF(op);
146 return (PyObject *)op;
147 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000148
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000149 /* Inline PyObject_NewVar */
Mark Dickinsonfd24b322008-12-06 15:33:31 +0000150 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000151 if (op == NULL)
152 return PyErr_NoMemory();
153 PyObject_INIT_VAR(op, &PyBytes_Type, size);
154 op->ob_shash = -1;
155 Py_MEMCPY(op->ob_sval, str, size+1);
156 /* share short strings */
157 if (size == 0) {
158 nullstring = op;
159 Py_INCREF(op);
160 } else if (size == 1) {
161 characters[*str & UCHAR_MAX] = op;
162 Py_INCREF(op);
163 }
164 return (PyObject *) op;
165}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000166
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000167PyObject *
168PyBytes_FromFormatV(const char *format, va_list vargs)
169{
170 va_list count;
171 Py_ssize_t n = 0;
172 const char* f;
173 char *s;
174 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000175
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000176#ifdef VA_LIST_IS_ARRAY
177 Py_MEMCPY(count, vargs, sizeof(va_list));
178#else
179#ifdef __va_copy
180 __va_copy(count, vargs);
181#else
182 count = vargs;
183#endif
184#endif
185 /* step 1: figure out how large a buffer we need */
186 for (f = format; *f; f++) {
187 if (*f == '%') {
188 const char* p = f;
189 while (*++f && *f != '%' && !ISALPHA(*f))
190 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000191
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000192 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
193 * they don't affect the amount of space we reserve.
194 */
195 if ((*f == 'l' || *f == 'z') &&
196 (f[1] == 'd' || f[1] == 'u'))
197 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000198
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000199 switch (*f) {
200 case 'c':
201 (void)va_arg(count, int);
202 /* fall through... */
203 case '%':
204 n++;
205 break;
206 case 'd': case 'u': case 'i': case 'x':
207 (void) va_arg(count, int);
208 /* 20 bytes is enough to hold a 64-bit
209 integer. Decimal takes the most space.
210 This isn't enough for octal. */
211 n += 20;
212 break;
213 case 's':
214 s = va_arg(count, char*);
215 n += strlen(s);
216 break;
217 case 'p':
218 (void) va_arg(count, int);
219 /* maximum 64-bit pointer representation:
220 * 0xffffffffffffffff
221 * so 19 characters is enough.
222 * XXX I count 18 -- what's the extra for?
223 */
224 n += 19;
225 break;
226 default:
227 /* if we stumble upon an unknown
228 formatting code, copy the rest of
229 the format string to the output
230 string. (we cannot just skip the
231 code, since there's no way to know
232 what's in the argument list) */
233 n += strlen(p);
234 goto expand;
235 }
236 } else
237 n++;
238 }
239 expand:
240 /* step 2: fill the buffer */
241 /* Since we've analyzed how much space we need for the worst case,
242 use sprintf directly instead of the slower PyOS_snprintf. */
243 string = PyBytes_FromStringAndSize(NULL, n);
244 if (!string)
245 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000246
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000247 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000248
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000249 for (f = format; *f; f++) {
250 if (*f == '%') {
251 const char* p = f++;
252 Py_ssize_t i;
253 int longflag = 0;
254 int size_tflag = 0;
255 /* parse the width.precision part (we're only
256 interested in the precision value, if any) */
257 n = 0;
258 while (ISDIGIT(*f))
259 n = (n*10) + *f++ - '0';
260 if (*f == '.') {
261 f++;
262 n = 0;
263 while (ISDIGIT(*f))
264 n = (n*10) + *f++ - '0';
265 }
266 while (*f && *f != '%' && !ISALPHA(*f))
267 f++;
268 /* handle the long flag, but only for %ld and %lu.
269 others can be added when necessary. */
270 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
271 longflag = 1;
272 ++f;
273 }
274 /* handle the size_t flag. */
275 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
276 size_tflag = 1;
277 ++f;
278 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000279
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000280 switch (*f) {
281 case 'c':
282 *s++ = va_arg(vargs, int);
283 break;
284 case 'd':
285 if (longflag)
286 sprintf(s, "%ld", va_arg(vargs, long));
287 else if (size_tflag)
288 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
289 va_arg(vargs, Py_ssize_t));
290 else
291 sprintf(s, "%d", va_arg(vargs, int));
292 s += strlen(s);
293 break;
294 case 'u':
295 if (longflag)
296 sprintf(s, "%lu",
297 va_arg(vargs, unsigned long));
298 else if (size_tflag)
299 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
300 va_arg(vargs, size_t));
301 else
302 sprintf(s, "%u",
303 va_arg(vargs, unsigned int));
304 s += strlen(s);
305 break;
306 case 'i':
307 sprintf(s, "%i", va_arg(vargs, int));
308 s += strlen(s);
309 break;
310 case 'x':
311 sprintf(s, "%x", va_arg(vargs, int));
312 s += strlen(s);
313 break;
314 case 's':
315 p = va_arg(vargs, char*);
316 i = strlen(p);
317 if (n > 0 && i > n)
318 i = n;
319 Py_MEMCPY(s, p, i);
320 s += i;
321 break;
322 case 'p':
323 sprintf(s, "%p", va_arg(vargs, void*));
324 /* %p is ill-defined: ensure leading 0x. */
325 if (s[1] == 'X')
326 s[1] = 'x';
327 else if (s[1] != 'x') {
328 memmove(s+2, s, strlen(s)+1);
329 s[0] = '0';
330 s[1] = 'x';
331 }
332 s += strlen(s);
333 break;
334 case '%':
335 *s++ = '%';
336 break;
337 default:
338 strcpy(s, p);
339 s += strlen(s);
340 goto end;
341 }
342 } else
343 *s++ = *f;
344 }
345
346 end:
347 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
348 return string;
349}
350
351PyObject *
352PyBytes_FromFormat(const char *format, ...)
353{
354 PyObject* ret;
355 va_list vargs;
356
357#ifdef HAVE_STDARG_PROTOTYPES
358 va_start(vargs, format);
359#else
360 va_start(vargs);
361#endif
362 ret = PyBytes_FromFormatV(format, vargs);
363 va_end(vargs);
364 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000365}
366
367static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000368string_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000369{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000370 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000371}
372
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000373/* Unescape a backslash-escaped string. If unicode is non-zero,
374 the string is a u-literal. If recode_encoding is non-zero,
375 the string is UTF-8 encoded and should be re-encoded in the
376 specified encoding. */
377
378PyObject *PyBytes_DecodeEscape(const char *s,
379 Py_ssize_t len,
380 const char *errors,
381 Py_ssize_t unicode,
382 const char *recode_encoding)
383{
384 int c;
385 char *p, *buf;
386 const char *end;
387 PyObject *v;
388 Py_ssize_t newlen = recode_encoding ? 4*len:len;
389 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
390 if (v == NULL)
391 return NULL;
392 p = buf = PyBytes_AsString(v);
393 end = s + len;
394 while (s < end) {
395 if (*s != '\\') {
396 non_esc:
397 if (recode_encoding && (*s & 0x80)) {
398 PyObject *u, *w;
399 char *r;
400 const char* t;
401 Py_ssize_t rn;
402 t = s;
403 /* Decode non-ASCII bytes as UTF-8. */
404 while (t < end && (*t & 0x80)) t++;
405 u = PyUnicode_DecodeUTF8(s, t - s, errors);
406 if(!u) goto failed;
407
408 /* Recode them in target encoding. */
409 w = PyUnicode_AsEncodedString(
410 u, recode_encoding, errors);
411 Py_DECREF(u);
412 if (!w) goto failed;
413
414 /* Append bytes to output buffer. */
415 assert(PyBytes_Check(w));
416 r = PyBytes_AS_STRING(w);
417 rn = PyBytes_GET_SIZE(w);
418 Py_MEMCPY(p, r, rn);
419 p += rn;
420 Py_DECREF(w);
421 s = t;
422 } else {
423 *p++ = *s++;
424 }
425 continue;
426 }
427 s++;
428 if (s==end) {
429 PyErr_SetString(PyExc_ValueError,
430 "Trailing \\ in string");
431 goto failed;
432 }
433 switch (*s++) {
434 /* XXX This assumes ASCII! */
435 case '\n': break;
436 case '\\': *p++ = '\\'; break;
437 case '\'': *p++ = '\''; break;
438 case '\"': *p++ = '\"'; break;
439 case 'b': *p++ = '\b'; break;
440 case 'f': *p++ = '\014'; break; /* FF */
441 case 't': *p++ = '\t'; break;
442 case 'n': *p++ = '\n'; break;
443 case 'r': *p++ = '\r'; break;
444 case 'v': *p++ = '\013'; break; /* VT */
445 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
446 case '0': case '1': case '2': case '3':
447 case '4': case '5': case '6': case '7':
448 c = s[-1] - '0';
449 if (s < end && '0' <= *s && *s <= '7') {
450 c = (c<<3) + *s++ - '0';
451 if (s < end && '0' <= *s && *s <= '7')
452 c = (c<<3) + *s++ - '0';
453 }
454 *p++ = c;
455 break;
456 case 'x':
457 if (s+1 < end && ISXDIGIT(s[0]) && ISXDIGIT(s[1])) {
458 unsigned int x = 0;
459 c = Py_CHARMASK(*s);
460 s++;
461 if (ISDIGIT(c))
462 x = c - '0';
463 else if (ISLOWER(c))
464 x = 10 + c - 'a';
465 else
466 x = 10 + c - 'A';
467 x = x << 4;
468 c = Py_CHARMASK(*s);
469 s++;
470 if (ISDIGIT(c))
471 x += c - '0';
472 else if (ISLOWER(c))
473 x += 10 + c - 'a';
474 else
475 x += 10 + c - 'A';
476 *p++ = x;
477 break;
478 }
479 if (!errors || strcmp(errors, "strict") == 0) {
480 PyErr_SetString(PyExc_ValueError,
481 "invalid \\x escape");
482 goto failed;
483 }
484 if (strcmp(errors, "replace") == 0) {
485 *p++ = '?';
486 } else if (strcmp(errors, "ignore") == 0)
487 /* do nothing */;
488 else {
489 PyErr_Format(PyExc_ValueError,
490 "decoding error; unknown "
491 "error handling code: %.400s",
492 errors);
493 goto failed;
494 }
495 default:
496 *p++ = '\\';
497 s--;
498 goto non_esc; /* an arbitry number of unescaped
499 UTF-8 bytes may follow. */
500 }
501 }
502 if (p-buf < newlen)
503 _PyBytes_Resize(&v, p - buf);
504 return v;
505 failed:
506 Py_DECREF(v);
507 return NULL;
508}
509
510/* -------------------------------------------------------------------- */
511/* object api */
512
513Py_ssize_t
514PyBytes_Size(register PyObject *op)
515{
516 if (!PyBytes_Check(op)) {
517 PyErr_Format(PyExc_TypeError,
518 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
519 return -1;
520 }
521 return Py_SIZE(op);
522}
523
524char *
525PyBytes_AsString(register PyObject *op)
526{
527 if (!PyBytes_Check(op)) {
528 PyErr_Format(PyExc_TypeError,
529 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
530 return NULL;
531 }
532 return ((PyBytesObject *)op)->ob_sval;
533}
534
535int
536PyBytes_AsStringAndSize(register PyObject *obj,
537 register char **s,
538 register Py_ssize_t *len)
539{
540 if (s == NULL) {
541 PyErr_BadInternalCall();
542 return -1;
543 }
544
545 if (!PyBytes_Check(obj)) {
546 PyErr_Format(PyExc_TypeError,
547 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
548 return -1;
549 }
550
551 *s = PyBytes_AS_STRING(obj);
552 if (len != NULL)
553 *len = PyBytes_GET_SIZE(obj);
554 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
555 PyErr_SetString(PyExc_TypeError,
556 "expected bytes with no null");
557 return -1;
558 }
559 return 0;
560}
Neal Norwitz6968b052007-02-27 19:02:19 +0000561
562/* -------------------------------------------------------------------- */
563/* Methods */
564
565#define STRINGLIB_CHAR char
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000566
Neal Norwitz6968b052007-02-27 19:02:19 +0000567#define STRINGLIB_CMP memcmp
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000568#define STRINGLIB_LEN PyBytes_GET_SIZE
569#define STRINGLIB_NEW PyBytes_FromStringAndSize
570#define STRINGLIB_STR PyBytes_AS_STRING
571/* #define STRINGLIB_WANT_CONTAINS_OBJ 1 */
572
573#define STRINGLIB_EMPTY nullstring
574#define STRINGLIB_CHECK_EXACT PyBytes_CheckExact
575#define STRINGLIB_MUTABLE 0
Neal Norwitz6968b052007-02-27 19:02:19 +0000576
577#include "stringlib/fastsearch.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000578
Neal Norwitz6968b052007-02-27 19:02:19 +0000579#include "stringlib/count.h"
580#include "stringlib/find.h"
581#include "stringlib/partition.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000582#include "stringlib/ctype.h"
583#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000584
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000585#define _Py_InsertThousandsGrouping _PyBytes_InsertThousandsGrouping
586#include "stringlib/localeutil.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000587
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000588PyObject *
589PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000590{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000591 static const char *hexdigits = "0123456789abcdef";
592 register PyBytesObject* op = (PyBytesObject*) obj;
593 Py_ssize_t length = Py_SIZE(op);
594 size_t newsize = 3 + 4 * length;
595 PyObject *v;
596 if (newsize > PY_SSIZE_T_MAX || (newsize-3) / 4 != length) {
597 PyErr_SetString(PyExc_OverflowError,
598 "bytes object is too large to make repr");
599 return NULL;
600 }
601 v = PyUnicode_FromUnicode(NULL, newsize);
602 if (v == NULL) {
603 return NULL;
604 }
605 else {
606 register Py_ssize_t i;
607 register Py_UNICODE c;
608 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
609 int quote;
610
611 /* Figure out which quote to use; single is preferred */
612 quote = '\'';
613 if (smartquotes) {
614 char *test, *start;
615 start = PyBytes_AS_STRING(op);
616 for (test = start; test < start+length; ++test) {
617 if (*test == '"') {
618 quote = '\''; /* back to single */
619 goto decided;
620 }
621 else if (*test == '\'')
622 quote = '"';
623 }
624 decided:
625 ;
626 }
627
628 *p++ = 'b', *p++ = quote;
629 for (i = 0; i < length; i++) {
630 /* There's at least enough room for a hex escape
631 and a closing quote. */
632 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
633 c = op->ob_sval[i];
634 if (c == quote || c == '\\')
635 *p++ = '\\', *p++ = c;
636 else if (c == '\t')
637 *p++ = '\\', *p++ = 't';
638 else if (c == '\n')
639 *p++ = '\\', *p++ = 'n';
640 else if (c == '\r')
641 *p++ = '\\', *p++ = 'r';
642 else if (c < ' ' || c >= 0x7f) {
643 *p++ = '\\';
644 *p++ = 'x';
645 *p++ = hexdigits[(c & 0xf0) >> 4];
646 *p++ = hexdigits[c & 0xf];
647 }
648 else
649 *p++ = c;
650 }
651 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
652 *p++ = quote;
653 *p = '\0';
654 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
655 Py_DECREF(v);
656 return NULL;
657 }
658 return v;
659 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000660}
661
Neal Norwitz6968b052007-02-27 19:02:19 +0000662static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000663string_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000664{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000665 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000666}
667
Neal Norwitz6968b052007-02-27 19:02:19 +0000668static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000669string_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000670{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000671 if (Py_BytesWarningFlag) {
672 if (PyErr_WarnEx(PyExc_BytesWarning,
673 "str() on a bytes instance", 1))
674 return NULL;
675 }
676 return string_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000677}
678
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000679static Py_ssize_t
680string_length(PyBytesObject *a)
681{
682 return Py_SIZE(a);
683}
Neal Norwitz6968b052007-02-27 19:02:19 +0000684
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000685/* This is also used by PyBytes_Concat() */
686static PyObject *
687string_concat(PyObject *a, PyObject *b)
688{
689 Py_ssize_t size;
690 Py_buffer va, vb;
691 PyObject *result = NULL;
692
693 va.len = -1;
694 vb.len = -1;
695 if (_getbuffer(a, &va) < 0 ||
696 _getbuffer(b, &vb) < 0) {
697 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
698 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
699 goto done;
700 }
701
702 /* Optimize end cases */
703 if (va.len == 0 && PyBytes_CheckExact(b)) {
704 result = b;
705 Py_INCREF(result);
706 goto done;
707 }
708 if (vb.len == 0 && PyBytes_CheckExact(a)) {
709 result = a;
710 Py_INCREF(result);
711 goto done;
712 }
713
714 size = va.len + vb.len;
715 if (size < 0) {
716 PyErr_NoMemory();
717 goto done;
718 }
719
720 result = PyBytes_FromStringAndSize(NULL, size);
721 if (result != NULL) {
722 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
723 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
724 }
725
726 done:
727 if (va.len != -1)
Martin v. Löwis423be952008-08-13 15:53:07 +0000728 PyBuffer_Release(&va);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000729 if (vb.len != -1)
Martin v. Löwis423be952008-08-13 15:53:07 +0000730 PyBuffer_Release(&vb);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000731 return result;
732}
Neal Norwitz6968b052007-02-27 19:02:19 +0000733
734static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000735string_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000736{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000737 register Py_ssize_t i;
738 register Py_ssize_t j;
739 register Py_ssize_t size;
740 register PyBytesObject *op;
741 size_t nbytes;
742 if (n < 0)
743 n = 0;
744 /* watch out for overflows: the size can overflow int,
745 * and the # of bytes needed can overflow size_t
746 */
747 size = Py_SIZE(a) * n;
748 if (n && size / n != Py_SIZE(a)) {
749 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000750 "repeated bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000751 return NULL;
752 }
753 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
754 Py_INCREF(a);
755 return (PyObject *)a;
756 }
757 nbytes = (size_t)size;
Mark Dickinsonfd24b322008-12-06 15:33:31 +0000758 if (nbytes + PyBytesObject_SIZE <= nbytes) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000759 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000760 "repeated bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000761 return NULL;
762 }
Mark Dickinsonfd24b322008-12-06 15:33:31 +0000763 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000764 if (op == NULL)
765 return PyErr_NoMemory();
766 PyObject_INIT_VAR(op, &PyBytes_Type, size);
767 op->ob_shash = -1;
768 op->ob_sval[size] = '\0';
769 if (Py_SIZE(a) == 1 && n > 0) {
770 memset(op->ob_sval, a->ob_sval[0] , n);
771 return (PyObject *) op;
772 }
773 i = 0;
774 if (i < size) {
775 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
776 i = Py_SIZE(a);
777 }
778 while (i < size) {
779 j = (i <= size-i) ? i : size-i;
780 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
781 i += j;
782 }
783 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000784}
785
Guido van Rossum98297ee2007-11-06 21:34:58 +0000786static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000787string_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000788{
789 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
790 if (ival == -1 && PyErr_Occurred()) {
791 Py_buffer varg;
792 int pos;
793 PyErr_Clear();
794 if (_getbuffer(arg, &varg) < 0)
795 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000796 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
Guido van Rossum98297ee2007-11-06 21:34:58 +0000797 varg.buf, varg.len, 0);
Martin v. Löwis423be952008-08-13 15:53:07 +0000798 PyBuffer_Release(&varg);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000799 return pos >= 0;
800 }
801 if (ival < 0 || ival >= 256) {
802 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
803 return -1;
804 }
805
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000806 return memchr(PyBytes_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000807}
808
Neal Norwitz6968b052007-02-27 19:02:19 +0000809static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000810string_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000811{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000812 if (i < 0 || i >= Py_SIZE(a)) {
Benjamin Peterson4116f362008-05-27 00:36:20 +0000813 PyErr_SetString(PyExc_IndexError, "index out of range");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000814 return NULL;
815 }
816 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000817}
818
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000819static PyObject*
820string_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000821{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000822 int c;
823 Py_ssize_t len_a, len_b;
824 Py_ssize_t min_len;
825 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000826
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000827 /* Make sure both arguments are strings. */
828 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Barry Warsaw9e9dcd62008-10-17 01:50:37 +0000829 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000830 (PyObject_IsInstance((PyObject*)a,
831 (PyObject*)&PyUnicode_Type) ||
832 PyObject_IsInstance((PyObject*)b,
833 (PyObject*)&PyUnicode_Type))) {
834 if (PyErr_WarnEx(PyExc_BytesWarning,
Georg Brandle5d68ac2008-06-04 11:30:26 +0000835 "Comparison between bytes and string", 1))
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000836 return NULL;
837 }
838 result = Py_NotImplemented;
839 goto out;
840 }
841 if (a == b) {
842 switch (op) {
843 case Py_EQ:case Py_LE:case Py_GE:
844 result = Py_True;
845 goto out;
846 case Py_NE:case Py_LT:case Py_GT:
847 result = Py_False;
848 goto out;
849 }
850 }
851 if (op == Py_EQ) {
852 /* Supporting Py_NE here as well does not save
853 much time, since Py_NE is rarely used. */
854 if (Py_SIZE(a) == Py_SIZE(b)
855 && (a->ob_sval[0] == b->ob_sval[0]
856 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
857 result = Py_True;
858 } else {
859 result = Py_False;
860 }
861 goto out;
862 }
863 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
864 min_len = (len_a < len_b) ? len_a : len_b;
865 if (min_len > 0) {
866 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
867 if (c==0)
868 c = memcmp(a->ob_sval, b->ob_sval, min_len);
869 } else
870 c = 0;
871 if (c == 0)
872 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
873 switch (op) {
874 case Py_LT: c = c < 0; break;
875 case Py_LE: c = c <= 0; break;
876 case Py_EQ: assert(0); break; /* unreachable */
877 case Py_NE: c = c != 0; break;
878 case Py_GT: c = c > 0; break;
879 case Py_GE: c = c >= 0; break;
880 default:
881 result = Py_NotImplemented;
882 goto out;
883 }
884 result = c ? Py_True : Py_False;
885 out:
886 Py_INCREF(result);
887 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000888}
889
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000890static long
891string_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000892{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000893 register Py_ssize_t len;
894 register unsigned char *p;
895 register long x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000896
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000897 if (a->ob_shash != -1)
898 return a->ob_shash;
899 len = Py_SIZE(a);
900 p = (unsigned char *) a->ob_sval;
901 x = *p << 7;
902 while (--len >= 0)
903 x = (1000003*x) ^ *p++;
904 x ^= Py_SIZE(a);
905 if (x == -1)
906 x = -2;
907 a->ob_shash = x;
908 return x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000909}
910
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000911static PyObject*
912string_subscript(PyBytesObject* self, PyObject* item)
913{
914 if (PyIndex_Check(item)) {
915 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
916 if (i == -1 && PyErr_Occurred())
917 return NULL;
918 if (i < 0)
919 i += PyBytes_GET_SIZE(self);
920 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
921 PyErr_SetString(PyExc_IndexError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000922 "index out of range");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000923 return NULL;
924 }
925 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
926 }
927 else if (PySlice_Check(item)) {
928 Py_ssize_t start, stop, step, slicelength, cur, i;
929 char* source_buf;
930 char* result_buf;
931 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000932
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000933 if (PySlice_GetIndicesEx((PySliceObject*)item,
934 PyBytes_GET_SIZE(self),
935 &start, &stop, &step, &slicelength) < 0) {
936 return NULL;
937 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000938
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000939 if (slicelength <= 0) {
940 return PyBytes_FromStringAndSize("", 0);
941 }
942 else if (start == 0 && step == 1 &&
943 slicelength == PyBytes_GET_SIZE(self) &&
944 PyBytes_CheckExact(self)) {
945 Py_INCREF(self);
946 return (PyObject *)self;
947 }
948 else if (step == 1) {
949 return PyBytes_FromStringAndSize(
950 PyBytes_AS_STRING(self) + start,
951 slicelength);
952 }
953 else {
Alexandre Vassalottie2641f42009-04-03 06:38:02 +0000954 source_buf = PyBytes_AS_STRING(self);
955 result = PyBytes_FromStringAndSize(NULL, slicelength);
956 if (result == NULL)
957 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000958
Alexandre Vassalottie2641f42009-04-03 06:38:02 +0000959 result_buf = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000960 for (cur = start, i = 0; i < slicelength;
961 cur += step, i++) {
962 result_buf[i] = source_buf[cur];
963 }
964
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000965 return result;
966 }
967 }
968 else {
969 PyErr_Format(PyExc_TypeError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000970 "byte indices must be integers, not %.200s",
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000971 Py_TYPE(item)->tp_name);
972 return NULL;
973 }
974}
975
976static int
977string_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
978{
Martin v. Löwis423be952008-08-13 15:53:07 +0000979 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
Antoine Pitrou2f89aa62008-08-02 21:02:48 +0000980 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000981}
982
983static PySequenceMethods string_as_sequence = {
984 (lenfunc)string_length, /*sq_length*/
985 (binaryfunc)string_concat, /*sq_concat*/
986 (ssizeargfunc)string_repeat, /*sq_repeat*/
987 (ssizeargfunc)string_item, /*sq_item*/
988 0, /*sq_slice*/
989 0, /*sq_ass_item*/
990 0, /*sq_ass_slice*/
991 (objobjproc)string_contains /*sq_contains*/
992};
993
994static PyMappingMethods string_as_mapping = {
995 (lenfunc)string_length,
996 (binaryfunc)string_subscript,
997 0,
998};
999
1000static PyBufferProcs string_as_buffer = {
1001 (getbufferproc)string_buffer_getbuffer,
1002 NULL,
1003};
1004
1005
1006#define LEFTSTRIP 0
1007#define RIGHTSTRIP 1
1008#define BOTHSTRIP 2
1009
1010/* Arrays indexed by above */
1011static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1012
1013#define STRIPNAME(i) (stripformat[i]+3)
1014
Neal Norwitz6968b052007-02-27 19:02:19 +00001015
1016/* Don't call if length < 2 */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001017#define Py_STRING_MATCH(target, offset, pattern, length) \
1018 (target[offset] == pattern[0] && \
1019 target[offset+length-1] == pattern[length-1] && \
Neal Norwitz6968b052007-02-27 19:02:19 +00001020 !memcmp(target+offset+1, pattern+1, length-2) )
1021
1022
Neal Norwitz6968b052007-02-27 19:02:19 +00001023/* Overallocate the initial list to reduce the number of reallocs for small
1024 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1025 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1026 text (roughly 11 words per line) and field delimited data (usually 1-10
1027 fields). For large strings the split algorithms are bandwidth limited
1028 so increasing the preallocation likely will not improve things.*/
1029
1030#define MAX_PREALLOC 12
1031
1032/* 5 splits gives 6 elements */
1033#define PREALLOC_SIZE(maxsplit) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001034 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
Neal Norwitz6968b052007-02-27 19:02:19 +00001035
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001036#define SPLIT_ADD(data, left, right) { \
1037 str = PyBytes_FromStringAndSize((data) + (left), \
1038 (right) - (left)); \
1039 if (str == NULL) \
1040 goto onError; \
1041 if (count < MAX_PREALLOC) { \
1042 PyList_SET_ITEM(list, count, str); \
1043 } else { \
1044 if (PyList_Append(list, str)) { \
1045 Py_DECREF(str); \
1046 goto onError; \
1047 } \
1048 else \
1049 Py_DECREF(str); \
1050 } \
1051 count++; }
Neal Norwitz6968b052007-02-27 19:02:19 +00001052
1053/* Always force the list to the expected size. */
Christian Heimes90aa7642007-12-19 02:45:37 +00001054#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
Neal Norwitz6968b052007-02-27 19:02:19 +00001055
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001056#define SKIP_SPACE(s, i, len) { while (i<len && ISSPACE(s[i])) i++; }
1057#define SKIP_NONSPACE(s, i, len) { while (i<len && !ISSPACE(s[i])) i++; }
1058#define RSKIP_SPACE(s, i) { while (i>=0 && ISSPACE(s[i])) i--; }
1059#define RSKIP_NONSPACE(s, i) { while (i>=0 && !ISSPACE(s[i])) i--; }
Neal Norwitz6968b052007-02-27 19:02:19 +00001060
1061Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001062split_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Neal Norwitz6968b052007-02-27 19:02:19 +00001063{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001064 const char *s = PyBytes_AS_STRING(self);
1065 Py_ssize_t i, j, count=0;
1066 PyObject *str;
1067 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Neal Norwitz6968b052007-02-27 19:02:19 +00001068
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001069 if (list == NULL)
1070 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001071
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001072 i = j = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +00001073
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001074 while (maxsplit-- > 0) {
1075 SKIP_SPACE(s, i, len);
1076 if (i==len) break;
1077 j = i; i++;
1078 SKIP_NONSPACE(s, i, len);
1079 if (j == 0 && i == len && PyBytes_CheckExact(self)) {
1080 /* No whitespace in self, so just use it as list[0] */
1081 Py_INCREF(self);
1082 PyList_SET_ITEM(list, 0, (PyObject *)self);
1083 count++;
1084 break;
1085 }
1086 SPLIT_ADD(s, j, i);
1087 }
1088
1089 if (i < len) {
1090 /* Only occurs when maxsplit was reached */
1091 /* Skip any remaining whitespace and copy to end of string */
1092 SKIP_SPACE(s, i, len);
1093 if (i != len)
1094 SPLIT_ADD(s, i, len);
1095 }
1096 FIX_PREALLOC_SIZE(list);
1097 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001098 onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001099 Py_DECREF(list);
1100 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001101}
1102
Guido van Rossum8f950672007-09-10 16:53:45 +00001103Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001104split_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Guido van Rossum8f950672007-09-10 16:53:45 +00001105{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001106 const char *s = PyBytes_AS_STRING(self);
1107 register Py_ssize_t i, j, count=0;
1108 PyObject *str;
1109 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Guido van Rossum8f950672007-09-10 16:53:45 +00001110
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001111 if (list == NULL)
1112 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001113
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001114 i = j = 0;
1115 while ((j < len) && (maxcount-- > 0)) {
1116 for(; j<len; j++) {
1117 /* I found that using memchr makes no difference */
1118 if (s[j] == ch) {
1119 SPLIT_ADD(s, i, j);
1120 i = j = j + 1;
1121 break;
1122 }
1123 }
1124 }
1125 if (i == 0 && count == 0 && PyBytes_CheckExact(self)) {
1126 /* ch not in self, so just use self as list[0] */
1127 Py_INCREF(self);
1128 PyList_SET_ITEM(list, 0, (PyObject *)self);
1129 count++;
1130 }
1131 else if (i <= len) {
1132 SPLIT_ADD(s, i, len);
1133 }
1134 FIX_PREALLOC_SIZE(list);
1135 return list;
Guido van Rossum8f950672007-09-10 16:53:45 +00001136
1137 onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001138 Py_DECREF(list);
1139 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001140}
1141
Neal Norwitz6968b052007-02-27 19:02:19 +00001142PyDoc_STRVAR(split__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001143"B.split([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001144\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001145Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001146If sep is not specified or is None, B is split on ASCII whitespace\n\
1147characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001148If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001149
1150static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001151string_split(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001152{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001153 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
1154 Py_ssize_t maxsplit = -1, count=0;
1155 const char *s = PyBytes_AS_STRING(self), *sub;
1156 Py_buffer vsub;
1157 PyObject *list, *str, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001158#ifdef USE_FAST
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001159 Py_ssize_t pos;
Neal Norwitz6968b052007-02-27 19:02:19 +00001160#endif
1161
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001162 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1163 return NULL;
1164 if (maxsplit < 0)
1165 maxsplit = PY_SSIZE_T_MAX;
1166 if (subobj == Py_None)
1167 return split_whitespace(self, len, maxsplit);
1168 if (_getbuffer(subobj, &vsub) < 0)
1169 return NULL;
1170 sub = vsub.buf;
1171 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001172
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001173 if (n == 0) {
1174 PyErr_SetString(PyExc_ValueError, "empty separator");
Martin v. Löwis423be952008-08-13 15:53:07 +00001175 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001176 return NULL;
1177 }
Amaury Forgeot d'Arc20443f32008-08-22 22:05:20 +00001178 else if (n == 1) {
1179 list = split_char(self, len, sub[0], maxsplit);
1180 PyBuffer_Release(&vsub);
1181 return list;
1182 }
Guido van Rossum8f950672007-09-10 16:53:45 +00001183
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001184 list = PyList_New(PREALLOC_SIZE(maxsplit));
1185 if (list == NULL) {
Martin v. Löwis423be952008-08-13 15:53:07 +00001186 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001187 return NULL;
1188 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001189
1190#ifdef USE_FAST
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001191 i = j = 0;
1192 while (maxsplit-- > 0) {
1193 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1194 if (pos < 0)
1195 break;
1196 j = i+pos;
1197 SPLIT_ADD(s, i, j);
1198 i = j + n;
1199 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001200#else
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001201 i = j = 0;
1202 while ((j+n <= len) && (maxsplit-- > 0)) {
1203 for (; j+n <= len; j++) {
1204 if (Py_STRING_MATCH(s, j, sub, n)) {
1205 SPLIT_ADD(s, i, j);
1206 i = j = j + n;
1207 break;
1208 }
1209 }
1210 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001211#endif
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001212 SPLIT_ADD(s, i, len);
1213 FIX_PREALLOC_SIZE(list);
Martin v. Löwis423be952008-08-13 15:53:07 +00001214 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001215 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001216
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001217 onError:
1218 Py_DECREF(list);
Martin v. Löwis423be952008-08-13 15:53:07 +00001219 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001220 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001221}
1222
Neal Norwitz6968b052007-02-27 19:02:19 +00001223PyDoc_STRVAR(partition__doc__,
1224"B.partition(sep) -> (head, sep, tail)\n\
1225\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001226Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001227the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001228found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001229
1230static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001231string_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001232{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001233 const char *sep;
1234 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001235
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001236 if (PyBytes_Check(sep_obj)) {
1237 sep = PyBytes_AS_STRING(sep_obj);
1238 sep_len = PyBytes_GET_SIZE(sep_obj);
1239 }
1240 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1241 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001242
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001243 return stringlib_partition(
1244 (PyObject*) self,
1245 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1246 sep_obj, sep, sep_len
1247 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001248}
1249
1250PyDoc_STRVAR(rpartition__doc__,
1251"B.rpartition(sep) -> (tail, sep, head)\n\
1252\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001253Search for the separator sep in B, starting at the end of B,\n\
1254and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001255part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001256bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001257
1258static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001259string_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001260{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001261 const char *sep;
1262 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001263
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001264 if (PyBytes_Check(sep_obj)) {
1265 sep = PyBytes_AS_STRING(sep_obj);
1266 sep_len = PyBytes_GET_SIZE(sep_obj);
1267 }
1268 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1269 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001270
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001271 return stringlib_rpartition(
1272 (PyObject*) self,
1273 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1274 sep_obj, sep, sep_len
1275 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001276}
1277
1278Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001279rsplit_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Neal Norwitz6968b052007-02-27 19:02:19 +00001280{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001281 const char *s = PyBytes_AS_STRING(self);
1282 Py_ssize_t i, j, count=0;
1283 PyObject *str;
1284 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Neal Norwitz6968b052007-02-27 19:02:19 +00001285
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001286 if (list == NULL)
1287 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001288
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001289 i = j = len-1;
Neal Norwitz6968b052007-02-27 19:02:19 +00001290
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001291 while (maxsplit-- > 0) {
1292 RSKIP_SPACE(s, i);
1293 if (i<0) break;
1294 j = i; i--;
1295 RSKIP_NONSPACE(s, i);
1296 if (j == len-1 && i < 0 && PyBytes_CheckExact(self)) {
1297 /* No whitespace in self, so just use it as list[0] */
1298 Py_INCREF(self);
1299 PyList_SET_ITEM(list, 0, (PyObject *)self);
1300 count++;
1301 break;
1302 }
1303 SPLIT_ADD(s, i + 1, j + 1);
1304 }
1305 if (i >= 0) {
1306 /* Only occurs when maxsplit was reached. Skip any remaining
1307 whitespace and copy to beginning of string. */
1308 RSKIP_SPACE(s, i);
1309 if (i >= 0)
1310 SPLIT_ADD(s, 0, i + 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001311
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001312 }
1313 FIX_PREALLOC_SIZE(list);
1314 if (PyList_Reverse(list) < 0)
1315 goto onError;
1316 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001317 onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001318 Py_DECREF(list);
1319 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001320}
1321
Guido van Rossum8f950672007-09-10 16:53:45 +00001322Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001323rsplit_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Guido van Rossum8f950672007-09-10 16:53:45 +00001324{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001325 const char *s = PyBytes_AS_STRING(self);
1326 register Py_ssize_t i, j, count=0;
1327 PyObject *str;
1328 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Guido van Rossum8f950672007-09-10 16:53:45 +00001329
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001330 if (list == NULL)
1331 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001332
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001333 i = j = len - 1;
1334 while ((i >= 0) && (maxcount-- > 0)) {
1335 for (; i >= 0; i--) {
1336 if (s[i] == ch) {
1337 SPLIT_ADD(s, i + 1, j + 1);
1338 j = i = i - 1;
1339 break;
1340 }
1341 }
1342 }
1343 if (i < 0 && count == 0 && PyBytes_CheckExact(self)) {
1344 /* ch not in self, so just use self as list[0] */
1345 Py_INCREF(self);
1346 PyList_SET_ITEM(list, 0, (PyObject *)self);
1347 count++;
1348 }
1349 else if (j >= -1) {
1350 SPLIT_ADD(s, 0, j + 1);
1351 }
1352 FIX_PREALLOC_SIZE(list);
1353 if (PyList_Reverse(list) < 0)
1354 goto onError;
1355 return list;
Guido van Rossum8f950672007-09-10 16:53:45 +00001356
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001357 onError:
1358 Py_DECREF(list);
1359 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001360}
1361
Neal Norwitz6968b052007-02-27 19:02:19 +00001362PyDoc_STRVAR(rsplit__doc__,
Benjamin Peterson4116f362008-05-27 00:36:20 +00001363"B.rsplit([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001364\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001365Return a list of the sections in B, using sep as the delimiter,\n\
1366starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001367If sep is not given, B is split on ASCII whitespace characters\n\
1368(space, tab, return, newline, formfeed, vertical tab).\n\
1369If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001370
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001371
Neal Norwitz6968b052007-02-27 19:02:19 +00001372static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001373string_rsplit(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001374{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001375 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
1376 Py_ssize_t maxsplit = -1, count=0;
1377 const char *s, *sub;
1378 Py_buffer vsub;
1379 PyObject *list, *str, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001380
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001381 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1382 return NULL;
1383 if (maxsplit < 0)
1384 maxsplit = PY_SSIZE_T_MAX;
1385 if (subobj == Py_None)
1386 return rsplit_whitespace(self, len, maxsplit);
1387 if (_getbuffer(subobj, &vsub) < 0)
1388 return NULL;
1389 sub = vsub.buf;
1390 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001391
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001392 if (n == 0) {
1393 PyErr_SetString(PyExc_ValueError, "empty separator");
Martin v. Löwis423be952008-08-13 15:53:07 +00001394 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001395 return NULL;
1396 }
Amaury Forgeot d'Arc20443f32008-08-22 22:05:20 +00001397 else if (n == 1) {
1398 list = rsplit_char(self, len, sub[0], maxsplit);
1399 PyBuffer_Release(&vsub);
1400 return list;
1401 }
Guido van Rossum8f950672007-09-10 16:53:45 +00001402
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001403 list = PyList_New(PREALLOC_SIZE(maxsplit));
1404 if (list == NULL) {
Martin v. Löwis423be952008-08-13 15:53:07 +00001405 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001406 return NULL;
1407 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001408
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001409 j = len;
1410 i = j - n;
Neal Norwitz6968b052007-02-27 19:02:19 +00001411
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001412 s = PyBytes_AS_STRING(self);
1413 while ( (i >= 0) && (maxsplit-- > 0) ) {
1414 for (; i>=0; i--) {
1415 if (Py_STRING_MATCH(s, i, sub, n)) {
1416 SPLIT_ADD(s, i + n, j);
1417 j = i;
1418 i -= n;
1419 break;
1420 }
1421 }
1422 }
1423 SPLIT_ADD(s, 0, j);
1424 FIX_PREALLOC_SIZE(list);
1425 if (PyList_Reverse(list) < 0)
1426 goto onError;
Martin v. Löwis423be952008-08-13 15:53:07 +00001427 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001428 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001429
1430onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001431 Py_DECREF(list);
Martin v. Löwis423be952008-08-13 15:53:07 +00001432 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001433 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001434}
1435
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001436#undef SPLIT_ADD
1437#undef MAX_PREALLOC
1438#undef PREALLOC_SIZE
1439
1440
1441PyDoc_STRVAR(join__doc__,
1442"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001443\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001444Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001445Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1446
Neal Norwitz6968b052007-02-27 19:02:19 +00001447static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001448string_join(PyObject *self, PyObject *orig)
Neal Norwitz6968b052007-02-27 19:02:19 +00001449{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001450 char *sep = PyBytes_AS_STRING(self);
1451 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1452 PyObject *res = NULL;
1453 char *p;
1454 Py_ssize_t seqlen = 0;
1455 size_t sz = 0;
1456 Py_ssize_t i;
1457 PyObject *seq, *item;
Neal Norwitz6968b052007-02-27 19:02:19 +00001458
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001459 seq = PySequence_Fast(orig, "");
1460 if (seq == NULL) {
1461 return NULL;
1462 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001463
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001464 seqlen = PySequence_Size(seq);
1465 if (seqlen == 0) {
1466 Py_DECREF(seq);
1467 return PyBytes_FromString("");
1468 }
1469 if (seqlen == 1) {
1470 item = PySequence_Fast_GET_ITEM(seq, 0);
1471 if (PyBytes_CheckExact(item)) {
1472 Py_INCREF(item);
1473 Py_DECREF(seq);
1474 return item;
1475 }
1476 }
1477
1478 /* There are at least two things to join, or else we have a subclass
1479 * of the builtin types in the sequence.
1480 * Do a pre-pass to figure out the total amount of space we'll
1481 * need (sz), and see whether all argument are bytes.
1482 */
1483 /* XXX Shouldn't we use _getbuffer() on these items instead? */
1484 for (i = 0; i < seqlen; i++) {
1485 const size_t old_sz = sz;
1486 item = PySequence_Fast_GET_ITEM(seq, i);
1487 if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
1488 PyErr_Format(PyExc_TypeError,
1489 "sequence item %zd: expected bytes,"
1490 " %.80s found",
1491 i, Py_TYPE(item)->tp_name);
1492 Py_DECREF(seq);
1493 return NULL;
1494 }
1495 sz += Py_SIZE(item);
1496 if (i != 0)
1497 sz += seplen;
1498 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1499 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00001500 "join() result is too long for bytes");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001501 Py_DECREF(seq);
1502 return NULL;
1503 }
1504 }
1505
1506 /* Allocate result space. */
1507 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1508 if (res == NULL) {
1509 Py_DECREF(seq);
1510 return NULL;
1511 }
1512
1513 /* Catenate everything. */
1514 /* I'm not worried about a PyByteArray item growing because there's
1515 nowhere in this function where we release the GIL. */
1516 p = PyBytes_AS_STRING(res);
1517 for (i = 0; i < seqlen; ++i) {
1518 size_t n;
1519 char *q;
1520 if (i) {
1521 Py_MEMCPY(p, sep, seplen);
1522 p += seplen;
1523 }
1524 item = PySequence_Fast_GET_ITEM(seq, i);
1525 n = Py_SIZE(item);
1526 if (PyBytes_Check(item))
1527 q = PyBytes_AS_STRING(item);
1528 else
1529 q = PyByteArray_AS_STRING(item);
1530 Py_MEMCPY(p, q, n);
1531 p += n;
1532 }
1533
1534 Py_DECREF(seq);
1535 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001536}
1537
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001538PyObject *
1539_PyBytes_Join(PyObject *sep, PyObject *x)
1540{
1541 assert(sep != NULL && PyBytes_Check(sep));
1542 assert(x != NULL);
1543 return string_join(sep, x);
1544}
1545
1546Py_LOCAL_INLINE(void)
1547string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1548{
1549 if (*end > len)
1550 *end = len;
1551 else if (*end < 0)
1552 *end += len;
1553 if (*end < 0)
1554 *end = 0;
1555 if (*start < 0)
1556 *start += len;
1557 if (*start < 0)
1558 *start = 0;
1559}
1560
1561Py_LOCAL_INLINE(Py_ssize_t)
1562string_find_internal(PyBytesObject *self, PyObject *args, int dir)
1563{
1564 PyObject *subobj;
1565 const char *sub;
1566 Py_ssize_t sub_len;
1567 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1568 PyObject *obj_start=Py_None, *obj_end=Py_None;
1569
1570 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1571 &obj_start, &obj_end))
1572 return -2;
1573 /* To support None in "start" and "end" arguments, meaning
1574 the same as if they were not passed.
1575 */
1576 if (obj_start != Py_None)
1577 if (!_PyEval_SliceIndex(obj_start, &start))
1578 return -2;
1579 if (obj_end != Py_None)
1580 if (!_PyEval_SliceIndex(obj_end, &end))
1581 return -2;
1582
1583 if (PyBytes_Check(subobj)) {
1584 sub = PyBytes_AS_STRING(subobj);
1585 sub_len = PyBytes_GET_SIZE(subobj);
1586 }
1587 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1588 /* XXX - the "expected a character buffer object" is pretty
1589 confusing for a non-expert. remap to something else ? */
1590 return -2;
1591
1592 if (dir > 0)
1593 return stringlib_find_slice(
1594 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1595 sub, sub_len, start, end);
1596 else
1597 return stringlib_rfind_slice(
1598 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1599 sub, sub_len, start, end);
1600}
1601
1602
1603PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001604"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001605\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001606Return the lowest index in S where substring sub is found,\n\
1607such that sub is contained within s[start:end]. Optional\n\
1608arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001609\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001610Return -1 on failure.");
1611
Neal Norwitz6968b052007-02-27 19:02:19 +00001612static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001613string_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001614{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001615 Py_ssize_t result = string_find_internal(self, args, +1);
1616 if (result == -2)
1617 return NULL;
1618 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001619}
1620
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001621
1622PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001623"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001624\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001625Like B.find() but raise ValueError when the substring is not found.");
1626
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001627static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001628string_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001629{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001630 Py_ssize_t result = string_find_internal(self, args, +1);
1631 if (result == -2)
1632 return NULL;
1633 if (result == -1) {
1634 PyErr_SetString(PyExc_ValueError,
1635 "substring not found");
1636 return NULL;
1637 }
1638 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001639}
1640
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001641
1642PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001643"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001644\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001645Return the highest index in B where substring sub is found,\n\
1646such that sub is contained within s[start:end]. Optional\n\
1647arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001648\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001649Return -1 on failure.");
1650
Neal Norwitz6968b052007-02-27 19:02:19 +00001651static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001652string_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001653{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001654 Py_ssize_t result = string_find_internal(self, args, -1);
1655 if (result == -2)
1656 return NULL;
1657 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001658}
1659
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001660
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001661PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001662"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001663\n\
1664Like B.rfind() but raise ValueError when the substring is not found.");
1665
1666static PyObject *
1667string_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001668{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001669 Py_ssize_t result = string_find_internal(self, args, -1);
1670 if (result == -2)
1671 return NULL;
1672 if (result == -1) {
1673 PyErr_SetString(PyExc_ValueError,
1674 "substring not found");
1675 return NULL;
1676 }
1677 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001678}
1679
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001680
1681Py_LOCAL_INLINE(PyObject *)
1682do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001683{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001684 Py_buffer vsep;
1685 char *s = PyBytes_AS_STRING(self);
1686 Py_ssize_t len = PyBytes_GET_SIZE(self);
1687 char *sep;
1688 Py_ssize_t seplen;
1689 Py_ssize_t i, j;
1690
1691 if (_getbuffer(sepobj, &vsep) < 0)
1692 return NULL;
1693 sep = vsep.buf;
1694 seplen = vsep.len;
1695
1696 i = 0;
1697 if (striptype != RIGHTSTRIP) {
1698 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1699 i++;
1700 }
1701 }
1702
1703 j = len;
1704 if (striptype != LEFTSTRIP) {
1705 do {
1706 j--;
1707 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1708 j++;
1709 }
1710
Martin v. Löwis423be952008-08-13 15:53:07 +00001711 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001712
1713 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1714 Py_INCREF(self);
1715 return (PyObject*)self;
1716 }
1717 else
1718 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001719}
1720
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001721
1722Py_LOCAL_INLINE(PyObject *)
1723do_strip(PyBytesObject *self, int striptype)
1724{
1725 char *s = PyBytes_AS_STRING(self);
1726 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
1727
1728 i = 0;
1729 if (striptype != RIGHTSTRIP) {
1730 while (i < len && ISSPACE(s[i])) {
1731 i++;
1732 }
1733 }
1734
1735 j = len;
1736 if (striptype != LEFTSTRIP) {
1737 do {
1738 j--;
1739 } while (j >= i && ISSPACE(s[j]));
1740 j++;
1741 }
1742
1743 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1744 Py_INCREF(self);
1745 return (PyObject*)self;
1746 }
1747 else
1748 return PyBytes_FromStringAndSize(s+i, j-i);
1749}
1750
1751
1752Py_LOCAL_INLINE(PyObject *)
1753do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1754{
1755 PyObject *sep = NULL;
1756
1757 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1758 return NULL;
1759
1760 if (sep != NULL && sep != Py_None) {
1761 return do_xstrip(self, striptype, sep);
1762 }
1763 return do_strip(self, striptype);
1764}
1765
1766
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001767PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001768"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001769\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001770Strip leading and trailing bytes contained in the argument.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001771If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001772static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001773string_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001774{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001775 if (PyTuple_GET_SIZE(args) == 0)
1776 return do_strip(self, BOTHSTRIP); /* Common case */
1777 else
1778 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001779}
1780
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001781
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001782PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001783"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001784\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001785Strip leading bytes contained in the argument.\n\
1786If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001787static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001788string_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001789{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001790 if (PyTuple_GET_SIZE(args) == 0)
1791 return do_strip(self, LEFTSTRIP); /* Common case */
1792 else
1793 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001794}
1795
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001796
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001797PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001798"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001799\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001800Strip trailing bytes contained in the argument.\n\
1801If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001802static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001803string_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001804{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001805 if (PyTuple_GET_SIZE(args) == 0)
1806 return do_strip(self, RIGHTSTRIP); /* Common case */
1807 else
1808 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001809}
Neal Norwitz6968b052007-02-27 19:02:19 +00001810
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001811
1812PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001813"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001814\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001815Return the number of non-overlapping occurrences of substring sub in\n\
1816string S[start:end]. Optional arguments start and end are interpreted\n\
1817as in slice notation.");
1818
1819static PyObject *
1820string_count(PyBytesObject *self, PyObject *args)
1821{
1822 PyObject *sub_obj;
1823 const char *str = PyBytes_AS_STRING(self), *sub;
1824 Py_ssize_t sub_len;
1825 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1826
1827 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1828 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1829 return NULL;
1830
1831 if (PyBytes_Check(sub_obj)) {
1832 sub = PyBytes_AS_STRING(sub_obj);
1833 sub_len = PyBytes_GET_SIZE(sub_obj);
1834 }
1835 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1836 return NULL;
1837
1838 string_adjust_indices(&start, &end, PyBytes_GET_SIZE(self));
1839
1840 return PyLong_FromSsize_t(
1841 stringlib_count(str + start, end - start, sub, sub_len)
1842 );
1843}
1844
1845
1846PyDoc_STRVAR(translate__doc__,
1847"B.translate(table[, deletechars]) -> bytes\n\
1848\n\
1849Return a copy of B, where all characters occurring in the\n\
1850optional argument deletechars are removed, and the remaining\n\
1851characters have been mapped through the given translation\n\
1852table, which must be a bytes object of length 256.");
1853
1854static PyObject *
1855string_translate(PyBytesObject *self, PyObject *args)
1856{
1857 register char *input, *output;
1858 const char *table;
1859 register Py_ssize_t i, c, changed = 0;
1860 PyObject *input_obj = (PyObject*)self;
1861 const char *output_start, *del_table=NULL;
1862 Py_ssize_t inlen, tablen, dellen = 0;
1863 PyObject *result;
1864 int trans_table[256];
1865 PyObject *tableobj, *delobj = NULL;
1866
1867 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1868 &tableobj, &delobj))
1869 return NULL;
1870
1871 if (PyBytes_Check(tableobj)) {
1872 table = PyBytes_AS_STRING(tableobj);
1873 tablen = PyBytes_GET_SIZE(tableobj);
1874 }
1875 else if (tableobj == Py_None) {
1876 table = NULL;
1877 tablen = 256;
1878 }
1879 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1880 return NULL;
1881
1882 if (tablen != 256) {
1883 PyErr_SetString(PyExc_ValueError,
1884 "translation table must be 256 characters long");
1885 return NULL;
1886 }
1887
1888 if (delobj != NULL) {
1889 if (PyBytes_Check(delobj)) {
1890 del_table = PyBytes_AS_STRING(delobj);
1891 dellen = PyBytes_GET_SIZE(delobj);
1892 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001893 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1894 return NULL;
1895 }
1896 else {
1897 del_table = NULL;
1898 dellen = 0;
1899 }
1900
1901 inlen = PyBytes_GET_SIZE(input_obj);
1902 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1903 if (result == NULL)
1904 return NULL;
1905 output_start = output = PyBytes_AsString(result);
1906 input = PyBytes_AS_STRING(input_obj);
1907
1908 if (dellen == 0 && table != NULL) {
1909 /* If no deletions are required, use faster code */
1910 for (i = inlen; --i >= 0; ) {
1911 c = Py_CHARMASK(*input++);
1912 if (Py_CHARMASK((*output++ = table[c])) != c)
1913 changed = 1;
1914 }
1915 if (changed || !PyBytes_CheckExact(input_obj))
1916 return result;
1917 Py_DECREF(result);
1918 Py_INCREF(input_obj);
1919 return input_obj;
1920 }
1921
1922 if (table == NULL) {
1923 for (i = 0; i < 256; i++)
1924 trans_table[i] = Py_CHARMASK(i);
1925 } else {
1926 for (i = 0; i < 256; i++)
1927 trans_table[i] = Py_CHARMASK(table[i]);
1928 }
1929
1930 for (i = 0; i < dellen; i++)
1931 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1932
1933 for (i = inlen; --i >= 0; ) {
1934 c = Py_CHARMASK(*input++);
1935 if (trans_table[c] != -1)
1936 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1937 continue;
1938 changed = 1;
1939 }
1940 if (!changed && PyBytes_CheckExact(input_obj)) {
1941 Py_DECREF(result);
1942 Py_INCREF(input_obj);
1943 return input_obj;
1944 }
1945 /* Fix the size of the resulting string */
1946 if (inlen > 0)
1947 _PyBytes_Resize(&result, output - output_start);
1948 return result;
1949}
1950
1951
1952#define FORWARD 1
1953#define REVERSE -1
1954
1955/* find and count characters and substrings */
1956
1957#define findchar(target, target_len, c) \
1958 ((char *)memchr((const void *)(target), c, target_len))
1959
1960/* String ops must return a string. */
1961/* If the object is subclass of string, create a copy */
1962Py_LOCAL(PyBytesObject *)
1963return_self(PyBytesObject *self)
1964{
1965 if (PyBytes_CheckExact(self)) {
1966 Py_INCREF(self);
1967 return self;
1968 }
1969 return (PyBytesObject *)PyBytes_FromStringAndSize(
1970 PyBytes_AS_STRING(self),
1971 PyBytes_GET_SIZE(self));
1972}
1973
1974Py_LOCAL_INLINE(Py_ssize_t)
1975countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
1976{
1977 Py_ssize_t count=0;
1978 const char *start=target;
1979 const char *end=target+target_len;
1980
1981 while ( (start=findchar(start, end-start, c)) != NULL ) {
1982 count++;
1983 if (count >= maxcount)
1984 break;
1985 start += 1;
1986 }
1987 return count;
1988}
1989
1990Py_LOCAL(Py_ssize_t)
1991findstring(const char *target, Py_ssize_t target_len,
1992 const char *pattern, Py_ssize_t pattern_len,
1993 Py_ssize_t start,
1994 Py_ssize_t end,
1995 int direction)
1996{
1997 if (start < 0) {
1998 start += target_len;
1999 if (start < 0)
2000 start = 0;
2001 }
2002 if (end > target_len) {
2003 end = target_len;
2004 } else if (end < 0) {
2005 end += target_len;
2006 if (end < 0)
2007 end = 0;
2008 }
2009
2010 /* zero-length substrings always match at the first attempt */
2011 if (pattern_len == 0)
2012 return (direction > 0) ? start : end;
2013
2014 end -= pattern_len;
2015
2016 if (direction < 0) {
2017 for (; end >= start; end--)
2018 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2019 return end;
2020 } else {
2021 for (; start <= end; start++)
2022 if (Py_STRING_MATCH(target, start,pattern,pattern_len))
2023 return start;
2024 }
2025 return -1;
2026}
2027
2028Py_LOCAL_INLINE(Py_ssize_t)
2029countstring(const char *target, Py_ssize_t target_len,
2030 const char *pattern, Py_ssize_t pattern_len,
2031 Py_ssize_t start,
2032 Py_ssize_t end,
2033 int direction, Py_ssize_t maxcount)
2034{
2035 Py_ssize_t count=0;
2036
2037 if (start < 0) {
2038 start += target_len;
2039 if (start < 0)
2040 start = 0;
2041 }
2042 if (end > target_len) {
2043 end = target_len;
2044 } else if (end < 0) {
2045 end += target_len;
2046 if (end < 0)
2047 end = 0;
2048 }
2049
2050 /* zero-length substrings match everywhere */
2051 if (pattern_len == 0 || maxcount == 0) {
2052 if (target_len+1 < maxcount)
2053 return target_len+1;
2054 return maxcount;
2055 }
2056
2057 end -= pattern_len;
2058 if (direction < 0) {
2059 for (; (end >= start); end--)
2060 if (Py_STRING_MATCH(target, end,pattern,pattern_len)) {
2061 count++;
2062 if (--maxcount <= 0) break;
2063 end -= pattern_len-1;
2064 }
2065 } else {
2066 for (; (start <= end); start++)
2067 if (Py_STRING_MATCH(target, start,
2068 pattern, pattern_len)) {
2069 count++;
2070 if (--maxcount <= 0)
2071 break;
2072 start += pattern_len-1;
2073 }
2074 }
2075 return count;
2076}
2077
2078
2079/* Algorithms for different cases of string replacement */
2080
2081/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2082Py_LOCAL(PyBytesObject *)
2083replace_interleave(PyBytesObject *self,
2084 const char *to_s, Py_ssize_t to_len,
2085 Py_ssize_t maxcount)
2086{
2087 char *self_s, *result_s;
2088 Py_ssize_t self_len, result_len;
2089 Py_ssize_t count, i, product;
2090 PyBytesObject *result;
2091
2092 self_len = PyBytes_GET_SIZE(self);
2093
2094 /* 1 at the end plus 1 after every character */
2095 count = self_len+1;
2096 if (maxcount < count)
2097 count = maxcount;
2098
2099 /* Check for overflow */
2100 /* result_len = count * to_len + self_len; */
2101 product = count * to_len;
2102 if (product / to_len != count) {
2103 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002104 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002105 return NULL;
2106 }
2107 result_len = product + self_len;
2108 if (result_len < 0) {
2109 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002110 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002111 return NULL;
2112 }
2113
2114 if (! (result = (PyBytesObject *)
2115 PyBytes_FromStringAndSize(NULL, result_len)) )
2116 return NULL;
2117
2118 self_s = PyBytes_AS_STRING(self);
2119 result_s = PyBytes_AS_STRING(result);
2120
2121 /* TODO: special case single character, which doesn't need memcpy */
2122
2123 /* Lay the first one down (guaranteed this will occur) */
2124 Py_MEMCPY(result_s, to_s, to_len);
2125 result_s += to_len;
2126 count -= 1;
2127
2128 for (i=0; i<count; i++) {
2129 *result_s++ = *self_s++;
2130 Py_MEMCPY(result_s, to_s, to_len);
2131 result_s += to_len;
2132 }
2133
2134 /* Copy the rest of the original string */
2135 Py_MEMCPY(result_s, self_s, self_len-i);
2136
2137 return result;
2138}
2139
2140/* Special case for deleting a single character */
2141/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2142Py_LOCAL(PyBytesObject *)
2143replace_delete_single_character(PyBytesObject *self,
2144 char from_c, Py_ssize_t maxcount)
2145{
2146 char *self_s, *result_s;
2147 char *start, *next, *end;
2148 Py_ssize_t self_len, result_len;
2149 Py_ssize_t count;
2150 PyBytesObject *result;
2151
2152 self_len = PyBytes_GET_SIZE(self);
2153 self_s = PyBytes_AS_STRING(self);
2154
2155 count = countchar(self_s, self_len, from_c, maxcount);
2156 if (count == 0) {
2157 return return_self(self);
2158 }
2159
2160 result_len = self_len - count; /* from_len == 1 */
2161 assert(result_len>=0);
2162
2163 if ( (result = (PyBytesObject *)
2164 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2165 return NULL;
2166 result_s = PyBytes_AS_STRING(result);
2167
2168 start = self_s;
2169 end = self_s + self_len;
2170 while (count-- > 0) {
2171 next = findchar(start, end-start, from_c);
2172 if (next == NULL)
2173 break;
2174 Py_MEMCPY(result_s, start, next-start);
2175 result_s += (next-start);
2176 start = next+1;
2177 }
2178 Py_MEMCPY(result_s, start, end-start);
2179
2180 return result;
2181}
2182
2183/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2184
2185Py_LOCAL(PyBytesObject *)
2186replace_delete_substring(PyBytesObject *self,
2187 const char *from_s, Py_ssize_t from_len,
2188 Py_ssize_t maxcount) {
2189 char *self_s, *result_s;
2190 char *start, *next, *end;
2191 Py_ssize_t self_len, result_len;
2192 Py_ssize_t count, offset;
2193 PyBytesObject *result;
2194
2195 self_len = PyBytes_GET_SIZE(self);
2196 self_s = PyBytes_AS_STRING(self);
2197
2198 count = countstring(self_s, self_len,
2199 from_s, from_len,
2200 0, self_len, 1,
2201 maxcount);
2202
2203 if (count == 0) {
2204 /* no matches */
2205 return return_self(self);
2206 }
2207
2208 result_len = self_len - (count * from_len);
2209 assert (result_len>=0);
2210
2211 if ( (result = (PyBytesObject *)
2212 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2213 return NULL;
2214
2215 result_s = PyBytes_AS_STRING(result);
2216
2217 start = self_s;
2218 end = self_s + self_len;
2219 while (count-- > 0) {
2220 offset = findstring(start, end-start,
2221 from_s, from_len,
2222 0, end-start, FORWARD);
2223 if (offset == -1)
2224 break;
2225 next = start + offset;
2226
2227 Py_MEMCPY(result_s, start, next-start);
2228
2229 result_s += (next-start);
2230 start = next+from_len;
2231 }
2232 Py_MEMCPY(result_s, start, end-start);
2233 return result;
2234}
2235
2236/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2237Py_LOCAL(PyBytesObject *)
2238replace_single_character_in_place(PyBytesObject *self,
2239 char from_c, char to_c,
2240 Py_ssize_t maxcount)
2241{
2242 char *self_s, *result_s, *start, *end, *next;
2243 Py_ssize_t self_len;
2244 PyBytesObject *result;
2245
2246 /* The result string will be the same size */
2247 self_s = PyBytes_AS_STRING(self);
2248 self_len = PyBytes_GET_SIZE(self);
2249
2250 next = findchar(self_s, self_len, from_c);
2251
2252 if (next == NULL) {
2253 /* No matches; return the original string */
2254 return return_self(self);
2255 }
2256
2257 /* Need to make a new string */
2258 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2259 if (result == NULL)
2260 return NULL;
2261 result_s = PyBytes_AS_STRING(result);
2262 Py_MEMCPY(result_s, self_s, self_len);
2263
2264 /* change everything in-place, starting with this one */
2265 start = result_s + (next-self_s);
2266 *start = to_c;
2267 start++;
2268 end = result_s + self_len;
2269
2270 while (--maxcount > 0) {
2271 next = findchar(start, end-start, from_c);
2272 if (next == NULL)
2273 break;
2274 *next = to_c;
2275 start = next+1;
2276 }
2277
2278 return result;
2279}
2280
2281/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2282Py_LOCAL(PyBytesObject *)
2283replace_substring_in_place(PyBytesObject *self,
2284 const char *from_s, Py_ssize_t from_len,
2285 const char *to_s, Py_ssize_t to_len,
2286 Py_ssize_t maxcount)
2287{
2288 char *result_s, *start, *end;
2289 char *self_s;
2290 Py_ssize_t self_len, offset;
2291 PyBytesObject *result;
2292
2293 /* The result string will be the same size */
2294
2295 self_s = PyBytes_AS_STRING(self);
2296 self_len = PyBytes_GET_SIZE(self);
2297
2298 offset = findstring(self_s, self_len,
2299 from_s, from_len,
2300 0, self_len, FORWARD);
2301 if (offset == -1) {
2302 /* No matches; return the original string */
2303 return return_self(self);
2304 }
2305
2306 /* Need to make a new string */
2307 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2308 if (result == NULL)
2309 return NULL;
2310 result_s = PyBytes_AS_STRING(result);
2311 Py_MEMCPY(result_s, self_s, self_len);
2312
2313 /* change everything in-place, starting with this one */
2314 start = result_s + offset;
2315 Py_MEMCPY(start, to_s, from_len);
2316 start += from_len;
2317 end = result_s + self_len;
2318
2319 while ( --maxcount > 0) {
2320 offset = findstring(start, end-start,
2321 from_s, from_len,
2322 0, end-start, FORWARD);
2323 if (offset==-1)
2324 break;
2325 Py_MEMCPY(start+offset, to_s, from_len);
2326 start += offset+from_len;
2327 }
2328
2329 return result;
2330}
2331
2332/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2333Py_LOCAL(PyBytesObject *)
2334replace_single_character(PyBytesObject *self,
2335 char from_c,
2336 const char *to_s, Py_ssize_t to_len,
2337 Py_ssize_t maxcount)
2338{
2339 char *self_s, *result_s;
2340 char *start, *next, *end;
2341 Py_ssize_t self_len, result_len;
2342 Py_ssize_t count, product;
2343 PyBytesObject *result;
2344
2345 self_s = PyBytes_AS_STRING(self);
2346 self_len = PyBytes_GET_SIZE(self);
2347
2348 count = countchar(self_s, self_len, from_c, maxcount);
2349 if (count == 0) {
2350 /* no matches, return unchanged */
2351 return return_self(self);
2352 }
2353
2354 /* use the difference between current and new, hence the "-1" */
2355 /* result_len = self_len + count * (to_len-1) */
2356 product = count * (to_len-1);
2357 if (product / (to_len-1) != count) {
2358 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002359 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002360 return NULL;
2361 }
2362 result_len = self_len + product;
2363 if (result_len < 0) {
2364 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002365 "replacment bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002366 return NULL;
2367 }
2368
2369 if ( (result = (PyBytesObject *)
2370 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2371 return NULL;
2372 result_s = PyBytes_AS_STRING(result);
2373
2374 start = self_s;
2375 end = self_s + self_len;
2376 while (count-- > 0) {
2377 next = findchar(start, end-start, from_c);
2378 if (next == NULL)
2379 break;
2380
2381 if (next == start) {
2382 /* replace with the 'to' */
2383 Py_MEMCPY(result_s, to_s, to_len);
2384 result_s += to_len;
2385 start += 1;
2386 } else {
2387 /* copy the unchanged old then the 'to' */
2388 Py_MEMCPY(result_s, start, next-start);
2389 result_s += (next-start);
2390 Py_MEMCPY(result_s, to_s, to_len);
2391 result_s += to_len;
2392 start = next+1;
2393 }
2394 }
2395 /* Copy the remainder of the remaining string */
2396 Py_MEMCPY(result_s, start, end-start);
2397
2398 return result;
2399}
2400
2401/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2402Py_LOCAL(PyBytesObject *)
2403replace_substring(PyBytesObject *self,
2404 const char *from_s, Py_ssize_t from_len,
2405 const char *to_s, Py_ssize_t to_len,
2406 Py_ssize_t maxcount) {
2407 char *self_s, *result_s;
2408 char *start, *next, *end;
2409 Py_ssize_t self_len, result_len;
2410 Py_ssize_t count, offset, product;
2411 PyBytesObject *result;
2412
2413 self_s = PyBytes_AS_STRING(self);
2414 self_len = PyBytes_GET_SIZE(self);
2415
2416 count = countstring(self_s, self_len,
2417 from_s, from_len,
2418 0, self_len, FORWARD, maxcount);
2419 if (count == 0) {
2420 /* no matches, return unchanged */
2421 return return_self(self);
2422 }
2423
2424 /* Check for overflow */
2425 /* result_len = self_len + count * (to_len-from_len) */
2426 product = count * (to_len-from_len);
2427 if (product / (to_len-from_len) != count) {
2428 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002429 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002430 return NULL;
2431 }
2432 result_len = self_len + product;
2433 if (result_len < 0) {
2434 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002435 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002436 return NULL;
2437 }
2438
2439 if ( (result = (PyBytesObject *)
2440 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2441 return NULL;
2442 result_s = PyBytes_AS_STRING(result);
2443
2444 start = self_s;
2445 end = self_s + self_len;
2446 while (count-- > 0) {
2447 offset = findstring(start, end-start,
2448 from_s, from_len,
2449 0, end-start, FORWARD);
2450 if (offset == -1)
2451 break;
2452 next = start+offset;
2453 if (next == start) {
2454 /* replace with the 'to' */
2455 Py_MEMCPY(result_s, to_s, to_len);
2456 result_s += to_len;
2457 start += from_len;
2458 } else {
2459 /* copy the unchanged old then the 'to' */
2460 Py_MEMCPY(result_s, start, next-start);
2461 result_s += (next-start);
2462 Py_MEMCPY(result_s, to_s, to_len);
2463 result_s += to_len;
2464 start = next+from_len;
2465 }
2466 }
2467 /* Copy the remainder of the remaining string */
2468 Py_MEMCPY(result_s, start, end-start);
2469
2470 return result;
2471}
2472
2473
2474Py_LOCAL(PyBytesObject *)
2475replace(PyBytesObject *self,
2476 const char *from_s, Py_ssize_t from_len,
2477 const char *to_s, Py_ssize_t to_len,
2478 Py_ssize_t maxcount)
2479{
2480 if (maxcount < 0) {
2481 maxcount = PY_SSIZE_T_MAX;
2482 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2483 /* nothing to do; return the original string */
2484 return return_self(self);
2485 }
2486
2487 if (maxcount == 0 ||
2488 (from_len == 0 && to_len == 0)) {
2489 /* nothing to do; return the original string */
2490 return return_self(self);
2491 }
2492
2493 /* Handle zero-length special cases */
2494
2495 if (from_len == 0) {
2496 /* insert the 'to' string everywhere. */
2497 /* >>> "Python".replace("", ".") */
2498 /* '.P.y.t.h.o.n.' */
2499 return replace_interleave(self, to_s, to_len, maxcount);
2500 }
2501
2502 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2503 /* point for an empty self string to generate a non-empty string */
2504 /* Special case so the remaining code always gets a non-empty string */
2505 if (PyBytes_GET_SIZE(self) == 0) {
2506 return return_self(self);
2507 }
2508
2509 if (to_len == 0) {
Georg Brandl17cb8a82008-05-30 08:20:09 +00002510 /* delete all occurrences of 'from' string */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002511 if (from_len == 1) {
2512 return replace_delete_single_character(
2513 self, from_s[0], maxcount);
2514 } else {
2515 return replace_delete_substring(self, from_s,
2516 from_len, maxcount);
2517 }
2518 }
2519
2520 /* Handle special case where both strings have the same length */
2521
2522 if (from_len == to_len) {
2523 if (from_len == 1) {
2524 return replace_single_character_in_place(
2525 self,
2526 from_s[0],
2527 to_s[0],
2528 maxcount);
2529 } else {
2530 return replace_substring_in_place(
2531 self, from_s, from_len, to_s, to_len,
2532 maxcount);
2533 }
2534 }
2535
2536 /* Otherwise use the more generic algorithms */
2537 if (from_len == 1) {
2538 return replace_single_character(self, from_s[0],
2539 to_s, to_len, maxcount);
2540 } else {
2541 /* len('from')>=2, len('to')>=1 */
2542 return replace_substring(self, from_s, from_len, to_s, to_len,
2543 maxcount);
2544 }
2545}
2546
2547PyDoc_STRVAR(replace__doc__,
2548"B.replace(old, new[, count]) -> bytes\n\
2549\n\
2550Return a copy of B with all occurrences of subsection\n\
2551old replaced by new. If the optional argument count is\n\
2552given, only the first count occurrences are replaced.");
2553
2554static PyObject *
2555string_replace(PyBytesObject *self, PyObject *args)
2556{
2557 Py_ssize_t count = -1;
2558 PyObject *from, *to;
2559 const char *from_s, *to_s;
2560 Py_ssize_t from_len, to_len;
2561
2562 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2563 return NULL;
2564
2565 if (PyBytes_Check(from)) {
2566 from_s = PyBytes_AS_STRING(from);
2567 from_len = PyBytes_GET_SIZE(from);
2568 }
2569 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2570 return NULL;
2571
2572 if (PyBytes_Check(to)) {
2573 to_s = PyBytes_AS_STRING(to);
2574 to_len = PyBytes_GET_SIZE(to);
2575 }
2576 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2577 return NULL;
2578
2579 return (PyObject *)replace((PyBytesObject *) self,
2580 from_s, from_len,
2581 to_s, to_len, count);
2582}
2583
2584/** End DALKE **/
2585
2586/* Matches the end (direction >= 0) or start (direction < 0) of self
2587 * against substr, using the start and end arguments. Returns
2588 * -1 on error, 0 if not found and 1 if found.
2589 */
2590Py_LOCAL(int)
2591_string_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
2592 Py_ssize_t end, int direction)
2593{
2594 Py_ssize_t len = PyBytes_GET_SIZE(self);
2595 Py_ssize_t slen;
2596 const char* sub;
2597 const char* str;
2598
2599 if (PyBytes_Check(substr)) {
2600 sub = PyBytes_AS_STRING(substr);
2601 slen = PyBytes_GET_SIZE(substr);
2602 }
2603 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2604 return -1;
2605 str = PyBytes_AS_STRING(self);
2606
2607 string_adjust_indices(&start, &end, len);
2608
2609 if (direction < 0) {
2610 /* startswith */
2611 if (start+slen > len)
2612 return 0;
2613 } else {
2614 /* endswith */
2615 if (end-start < slen || start > len)
2616 return 0;
2617
2618 if (end-slen > start)
2619 start = end - slen;
2620 }
2621 if (end-start >= slen)
2622 return ! memcmp(str+start, sub, slen);
2623 return 0;
2624}
2625
2626
2627PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002628"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002629\n\
2630Return True if B starts with the specified prefix, False otherwise.\n\
2631With optional start, test B beginning at that position.\n\
2632With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002633prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002634
2635static PyObject *
2636string_startswith(PyBytesObject *self, PyObject *args)
2637{
2638 Py_ssize_t start = 0;
2639 Py_ssize_t end = PY_SSIZE_T_MAX;
2640 PyObject *subobj;
2641 int result;
2642
2643 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2644 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2645 return NULL;
2646 if (PyTuple_Check(subobj)) {
2647 Py_ssize_t i;
2648 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2649 result = _string_tailmatch(self,
2650 PyTuple_GET_ITEM(subobj, i),
2651 start, end, -1);
2652 if (result == -1)
2653 return NULL;
2654 else if (result) {
2655 Py_RETURN_TRUE;
2656 }
2657 }
2658 Py_RETURN_FALSE;
2659 }
2660 result = _string_tailmatch(self, subobj, start, end, -1);
2661 if (result == -1)
2662 return NULL;
2663 else
2664 return PyBool_FromLong(result);
2665}
2666
2667
2668PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002669"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002670\n\
2671Return True if B ends with the specified suffix, False otherwise.\n\
2672With optional start, test B beginning at that position.\n\
2673With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002674suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002675
2676static PyObject *
2677string_endswith(PyBytesObject *self, PyObject *args)
2678{
2679 Py_ssize_t start = 0;
2680 Py_ssize_t end = PY_SSIZE_T_MAX;
2681 PyObject *subobj;
2682 int result;
2683
2684 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2685 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2686 return NULL;
2687 if (PyTuple_Check(subobj)) {
2688 Py_ssize_t i;
2689 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2690 result = _string_tailmatch(self,
2691 PyTuple_GET_ITEM(subobj, i),
2692 start, end, +1);
2693 if (result == -1)
2694 return NULL;
2695 else if (result) {
2696 Py_RETURN_TRUE;
2697 }
2698 }
2699 Py_RETURN_FALSE;
2700 }
2701 result = _string_tailmatch(self, subobj, start, end, +1);
2702 if (result == -1)
2703 return NULL;
2704 else
2705 return PyBool_FromLong(result);
2706}
2707
2708
2709PyDoc_STRVAR(decode__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002710"B.decode([encoding[, errors]]) -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002711\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002712Decode S using the codec registered for encoding. encoding defaults\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002713to the default encoding. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002714handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2715a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002716as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002717able to handle UnicodeDecodeErrors.");
2718
2719static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002720string_decode(PyObject *self, PyObject *args)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002721{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002722 const char *encoding = NULL;
2723 const char *errors = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +00002724
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002725 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2726 return NULL;
2727 if (encoding == NULL)
2728 encoding = PyUnicode_GetDefaultEncoding();
Marc-André Lemburgb2750b52008-06-06 12:18:17 +00002729 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002730}
2731
Guido van Rossum20188312006-05-05 15:15:40 +00002732
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002733PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002734"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002735\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002736Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002737Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002738Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002739
2740static int
Guido van Rossumae404e22007-10-26 21:46:44 +00002741hex_digit_to_int(Py_UNICODE c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002742{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002743 if (c >= 128)
2744 return -1;
2745 if (ISDIGIT(c))
2746 return c - '0';
2747 else {
2748 if (ISUPPER(c))
2749 c = TOLOWER(c);
2750 if (c >= 'a' && c <= 'f')
2751 return c - 'a' + 10;
2752 }
2753 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002754}
2755
2756static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002757string_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002758{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002759 PyObject *newstring, *hexobj;
2760 char *buf;
2761 Py_UNICODE *hex;
2762 Py_ssize_t hexlen, byteslen, i, j;
2763 int top, bot;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002764
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002765 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2766 return NULL;
2767 assert(PyUnicode_Check(hexobj));
2768 hexlen = PyUnicode_GET_SIZE(hexobj);
2769 hex = PyUnicode_AS_UNICODE(hexobj);
2770 byteslen = hexlen/2; /* This overestimates if there are spaces */
2771 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2772 if (!newstring)
2773 return NULL;
2774 buf = PyBytes_AS_STRING(newstring);
2775 for (i = j = 0; i < hexlen; i += 2) {
2776 /* skip over spaces in the input */
2777 while (hex[i] == ' ')
2778 i++;
2779 if (i >= hexlen)
2780 break;
2781 top = hex_digit_to_int(hex[i]);
2782 bot = hex_digit_to_int(hex[i+1]);
2783 if (top == -1 || bot == -1) {
2784 PyErr_Format(PyExc_ValueError,
2785 "non-hexadecimal number found in "
2786 "fromhex() arg at position %zd", i);
2787 goto error;
2788 }
2789 buf[j++] = (top << 4) + bot;
2790 }
2791 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2792 goto error;
2793 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002794
2795 error:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002796 Py_XDECREF(newstring);
2797 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002798}
2799
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002800PyDoc_STRVAR(sizeof__doc__,
2801"S.__sizeof__() -> size of S in memory, in bytes");
2802
2803static PyObject *
2804string_sizeof(PyBytesObject *v)
2805{
2806 Py_ssize_t res;
Mark Dickinsonfd24b322008-12-06 15:33:31 +00002807 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002808 return PyLong_FromSsize_t(res);
2809}
2810
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002811
2812static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002813string_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002814{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002815 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002816}
2817
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002818
2819static PyMethodDef
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002820string_methods[] = {
2821 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
2822 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2823 _Py_capitalize__doc__},
2824 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2825 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
2826 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
2827 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
2828 endswith__doc__},
2829 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2830 expandtabs__doc__},
2831 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
2832 {"fromhex", (PyCFunction)string_fromhex, METH_VARARGS|METH_CLASS,
2833 fromhex_doc},
2834 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
2835 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2836 _Py_isalnum__doc__},
2837 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2838 _Py_isalpha__doc__},
2839 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2840 _Py_isdigit__doc__},
2841 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2842 _Py_islower__doc__},
2843 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2844 _Py_isspace__doc__},
2845 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2846 _Py_istitle__doc__},
2847 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2848 _Py_isupper__doc__},
2849 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2850 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2851 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2852 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
2853 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
2854 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2855 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2856 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
2857 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2858 {"rpartition", (PyCFunction)string_rpartition, METH_O,
2859 rpartition__doc__},
2860 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
2861 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
2862 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2863 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
2864 splitlines__doc__},
2865 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
2866 startswith__doc__},
2867 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
2868 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2869 _Py_swapcase__doc__},
2870 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2871 {"translate", (PyCFunction)string_translate, METH_VARARGS,
2872 translate__doc__},
2873 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2874 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002875 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
2876 sizeof__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002877 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002878};
2879
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002880static PyObject *
2881str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2882
2883static PyObject *
2884string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2885{
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002886 PyObject *x = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002887 const char *encoding = NULL;
2888 const char *errors = NULL;
2889 PyObject *new = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002890 static char *kwlist[] = {"source", "encoding", "errors", 0};
2891
2892 if (type != &PyBytes_Type)
2893 return str_subtype_new(type, args, kwds);
2894 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2895 &encoding, &errors))
2896 return NULL;
2897 if (x == NULL) {
2898 if (encoding != NULL || errors != NULL) {
2899 PyErr_SetString(PyExc_TypeError,
2900 "encoding or errors without sequence "
2901 "argument");
2902 return NULL;
2903 }
2904 return PyBytes_FromString("");
2905 }
2906
2907 if (PyUnicode_Check(x)) {
2908 /* Encode via the codec registry */
2909 if (encoding == NULL) {
2910 PyErr_SetString(PyExc_TypeError,
2911 "string argument without an encoding");
2912 return NULL;
2913 }
Marc-André Lemburgb2750b52008-06-06 12:18:17 +00002914 new = PyUnicode_AsEncodedString(x, encoding, errors);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002915 if (new == NULL)
2916 return NULL;
2917 assert(PyBytes_Check(new));
2918 return new;
2919 }
2920
2921 /* If it's not unicode, there can't be encoding or errors */
2922 if (encoding != NULL || errors != NULL) {
2923 PyErr_SetString(PyExc_TypeError,
2924 "encoding or errors without a string argument");
2925 return NULL;
2926 }
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002927 return PyObject_Bytes(x);
2928}
2929
2930PyObject *
2931PyBytes_FromObject(PyObject *x)
2932{
2933 PyObject *new, *it;
2934 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002935
Benjamin Peterson4b24a422008-08-27 00:28:34 +00002936 if (x == NULL) {
2937 PyErr_BadInternalCall();
2938 return NULL;
2939 }
2940
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002941 /* Is it an int? */
2942 size = PyNumber_AsSsize_t(x, PyExc_ValueError);
2943 if (size == -1 && PyErr_Occurred()) {
2944 PyErr_Clear();
2945 }
2946 else {
2947 if (size < 0) {
2948 PyErr_SetString(PyExc_ValueError, "negative count");
2949 return NULL;
2950 }
2951 new = PyBytes_FromStringAndSize(NULL, size);
2952 if (new == NULL) {
2953 return NULL;
2954 }
2955 if (size > 0) {
2956 memset(((PyBytesObject*)new)->ob_sval, 0, size);
2957 }
2958 return new;
2959 }
2960
2961 /* Use the modern buffer interface */
2962 if (PyObject_CheckBuffer(x)) {
2963 Py_buffer view;
2964 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2965 return NULL;
2966 new = PyBytes_FromStringAndSize(NULL, view.len);
2967 if (!new)
2968 goto fail;
Christian Heimes1a8501c2008-10-02 19:56:01 +00002969 /* XXX(brett.cannon): Better way to get to internal buffer? */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002970 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2971 &view, view.len, 'C') < 0)
2972 goto fail;
Martin v. Löwis423be952008-08-13 15:53:07 +00002973 PyBuffer_Release(&view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002974 return new;
2975 fail:
2976 Py_XDECREF(new);
Martin v. Löwis423be952008-08-13 15:53:07 +00002977 PyBuffer_Release(&view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002978 return NULL;
2979 }
2980
2981 /* For iterator version, create a string object and resize as needed */
2982 /* XXX(gb): is 64 a good value? also, optimize if length is known */
2983 /* XXX(guido): perhaps use Pysequence_Fast() -- I can't imagine the
2984 input being a truly long iterator. */
2985 size = 64;
2986 new = PyBytes_FromStringAndSize(NULL, size);
2987 if (new == NULL)
2988 return NULL;
2989
2990 /* XXX Optimize this if the arguments is a list, tuple */
2991
2992 /* Get the iterator */
2993 it = PyObject_GetIter(x);
2994 if (it == NULL)
2995 goto error;
2996
2997 /* Run the iterator to exhaustion */
2998 for (i = 0; ; i++) {
2999 PyObject *item;
3000 Py_ssize_t value;
3001
3002 /* Get the next item */
3003 item = PyIter_Next(it);
3004 if (item == NULL) {
3005 if (PyErr_Occurred())
3006 goto error;
3007 break;
3008 }
3009
3010 /* Interpret it as an int (__index__) */
3011 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3012 Py_DECREF(item);
3013 if (value == -1 && PyErr_Occurred())
3014 goto error;
3015
3016 /* Range check */
3017 if (value < 0 || value >= 256) {
3018 PyErr_SetString(PyExc_ValueError,
3019 "bytes must be in range(0, 256)");
3020 goto error;
3021 }
3022
3023 /* Append the byte */
3024 if (i >= size) {
3025 size *= 2;
3026 if (_PyBytes_Resize(&new, size) < 0)
3027 goto error;
3028 }
3029 ((PyBytesObject *)new)->ob_sval[i] = value;
3030 }
3031 _PyBytes_Resize(&new, i);
3032
3033 /* Clean up and return success */
3034 Py_DECREF(it);
3035 return new;
3036
3037 error:
3038 /* Error handling when new != NULL */
3039 Py_XDECREF(it);
3040 Py_DECREF(new);
3041 return NULL;
3042}
3043
3044static PyObject *
3045str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3046{
3047 PyObject *tmp, *pnew;
3048 Py_ssize_t n;
3049
3050 assert(PyType_IsSubtype(type, &PyBytes_Type));
3051 tmp = string_new(&PyBytes_Type, args, kwds);
3052 if (tmp == NULL)
3053 return NULL;
3054 assert(PyBytes_CheckExact(tmp));
3055 n = PyBytes_GET_SIZE(tmp);
3056 pnew = type->tp_alloc(type, n);
3057 if (pnew != NULL) {
3058 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3059 PyBytes_AS_STRING(tmp), n+1);
3060 ((PyBytesObject *)pnew)->ob_shash =
3061 ((PyBytesObject *)tmp)->ob_shash;
3062 }
3063 Py_DECREF(tmp);
3064 return pnew;
3065}
3066
3067PyDoc_STRVAR(string_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003068"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003069bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003070bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
3071bytes(memory_view) -> bytes\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003072\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003073Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003074 - an iterable yielding integers in range(256)\n\
3075 - a text string encoded using the specified encoding\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003076 - a bytes or a buffer object\n\
3077 - any object implementing the buffer API.");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003078
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003079static PyObject *str_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003080
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003081PyTypeObject PyBytes_Type = {
3082 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3083 "bytes",
Mark Dickinsonfd24b322008-12-06 15:33:31 +00003084 PyBytesObject_SIZE,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003085 sizeof(char),
3086 string_dealloc, /* tp_dealloc */
3087 0, /* tp_print */
3088 0, /* tp_getattr */
3089 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00003090 0, /* tp_reserved */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003091 (reprfunc)string_repr, /* tp_repr */
3092 0, /* tp_as_number */
3093 &string_as_sequence, /* tp_as_sequence */
3094 &string_as_mapping, /* tp_as_mapping */
3095 (hashfunc)string_hash, /* tp_hash */
3096 0, /* tp_call */
3097 string_str, /* tp_str */
3098 PyObject_GenericGetAttr, /* tp_getattro */
3099 0, /* tp_setattro */
3100 &string_as_buffer, /* tp_as_buffer */
3101 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3102 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3103 string_doc, /* tp_doc */
3104 0, /* tp_traverse */
3105 0, /* tp_clear */
3106 (richcmpfunc)string_richcompare, /* tp_richcompare */
3107 0, /* tp_weaklistoffset */
3108 str_iter, /* tp_iter */
3109 0, /* tp_iternext */
3110 string_methods, /* tp_methods */
3111 0, /* tp_members */
3112 0, /* tp_getset */
3113 &PyBaseObject_Type, /* tp_base */
3114 0, /* tp_dict */
3115 0, /* tp_descr_get */
3116 0, /* tp_descr_set */
3117 0, /* tp_dictoffset */
3118 0, /* tp_init */
3119 0, /* tp_alloc */
3120 string_new, /* tp_new */
3121 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003122};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003123
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003124void
3125PyBytes_Concat(register PyObject **pv, register PyObject *w)
3126{
3127 register PyObject *v;
3128 assert(pv != NULL);
3129 if (*pv == NULL)
3130 return;
3131 if (w == NULL) {
3132 Py_DECREF(*pv);
3133 *pv = NULL;
3134 return;
3135 }
3136 v = string_concat(*pv, w);
3137 Py_DECREF(*pv);
3138 *pv = v;
3139}
3140
3141void
3142PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
3143{
3144 PyBytes_Concat(pv, w);
3145 Py_XDECREF(w);
3146}
3147
3148
3149/* The following function breaks the notion that strings are immutable:
3150 it changes the size of a string. We get away with this only if there
3151 is only one module referencing the object. You can also think of it
3152 as creating a new string object and destroying the old one, only
3153 more efficiently. In any case, don't use this if the string may
3154 already be known to some other part of the code...
3155 Note that if there's not enough memory to resize the string, the original
3156 string object at *pv is deallocated, *pv is set to NULL, an "out of
3157 memory" exception is set, and -1 is returned. Else (on success) 0 is
3158 returned, and the value in *pv may or may not be the same as on input.
3159 As always, an extra byte is allocated for a trailing \0 byte (newsize
3160 does *not* include that), and a trailing \0 byte is stored.
3161*/
3162
3163int
3164_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3165{
3166 register PyObject *v;
3167 register PyBytesObject *sv;
3168 v = *pv;
3169 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
3170 *pv = 0;
3171 Py_DECREF(v);
3172 PyErr_BadInternalCall();
3173 return -1;
3174 }
3175 /* XXX UNREF/NEWREF interface should be more symmetrical */
3176 _Py_DEC_REFTOTAL;
3177 _Py_ForgetReference(v);
3178 *pv = (PyObject *)
Mark Dickinsonfd24b322008-12-06 15:33:31 +00003179 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003180 if (*pv == NULL) {
3181 PyObject_Del(v);
3182 PyErr_NoMemory();
3183 return -1;
3184 }
3185 _Py_NewReference(*pv);
3186 sv = (PyBytesObject *) *pv;
3187 Py_SIZE(sv) = newsize;
3188 sv->ob_sval[newsize] = '\0';
3189 sv->ob_shash = -1; /* invalidate cached hash value */
3190 return 0;
3191}
3192
3193/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and
3194 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3195 * Python's regular ints.
3196 * Return value: a new PyString*, or NULL if error.
3197 * . *pbuf is set to point into it,
3198 * *plen set to the # of chars following that.
3199 * Caller must decref it when done using pbuf.
3200 * The string starting at *pbuf is of the form
3201 * "-"? ("0x" | "0X")? digit+
3202 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3203 * set in flags. The case of hex digits will be correct,
3204 * There will be at least prec digits, zero-filled on the left if
3205 * necessary to get that many.
3206 * val object to be converted
3207 * flags bitmask of format flags; only F_ALT is looked at
3208 * prec minimum number of digits; 0-fill on left if needed
3209 * type a character in [duoxX]; u acts the same as d
3210 *
3211 * CAUTION: o, x and X conversions on regular ints can never
3212 * produce a '-' sign, but can for Python's unbounded ints.
3213 */
3214PyObject*
3215_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,
3216 char **pbuf, int *plen)
3217{
3218 PyObject *result = NULL;
3219 char *buf;
3220 Py_ssize_t i;
3221 int sign; /* 1 if '-', else 0 */
3222 int len; /* number of characters */
3223 Py_ssize_t llen;
3224 int numdigits; /* len == numnondigits + numdigits */
3225 int numnondigits = 0;
3226
3227 /* Avoid exceeding SSIZE_T_MAX */
Christian Heimesce694b72008-08-24 16:15:19 +00003228 if (prec > INT_MAX-3) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003229 PyErr_SetString(PyExc_OverflowError,
3230 "precision too large");
3231 return NULL;
3232 }
3233
3234 switch (type) {
3235 case 'd':
3236 case 'u':
3237 /* Special-case boolean: we want 0/1 */
3238 if (PyBool_Check(val))
3239 result = PyNumber_ToBase(val, 10);
3240 else
3241 result = Py_TYPE(val)->tp_str(val);
3242 break;
3243 case 'o':
3244 numnondigits = 2;
3245 result = PyNumber_ToBase(val, 8);
3246 break;
3247 case 'x':
3248 case 'X':
3249 numnondigits = 2;
3250 result = PyNumber_ToBase(val, 16);
3251 break;
3252 default:
3253 assert(!"'type' not in [duoxX]");
3254 }
3255 if (!result)
3256 return NULL;
3257
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00003258 buf = _PyUnicode_AsString(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003259 if (!buf) {
3260 Py_DECREF(result);
3261 return NULL;
3262 }
3263
3264 /* To modify the string in-place, there can only be one reference. */
3265 if (Py_REFCNT(result) != 1) {
3266 PyErr_BadInternalCall();
3267 return NULL;
3268 }
3269 llen = PyUnicode_GetSize(result);
3270 if (llen > INT_MAX) {
3271 PyErr_SetString(PyExc_ValueError,
3272 "string too large in _PyBytes_FormatLong");
3273 return NULL;
3274 }
3275 len = (int)llen;
3276 if (buf[len-1] == 'L') {
3277 --len;
3278 buf[len] = '\0';
3279 }
3280 sign = buf[0] == '-';
3281 numnondigits += sign;
3282 numdigits = len - numnondigits;
3283 assert(numdigits > 0);
3284
3285 /* Get rid of base marker unless F_ALT */
3286 if (((flags & F_ALT) == 0 &&
3287 (type == 'o' || type == 'x' || type == 'X'))) {
3288 assert(buf[sign] == '0');
3289 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
3290 buf[sign+1] == 'o');
3291 numnondigits -= 2;
3292 buf += 2;
3293 len -= 2;
3294 if (sign)
3295 buf[0] = '-';
3296 assert(len == numnondigits + numdigits);
3297 assert(numdigits > 0);
3298 }
3299
3300 /* Fill with leading zeroes to meet minimum width. */
3301 if (prec > numdigits) {
3302 PyObject *r1 = PyBytes_FromStringAndSize(NULL,
3303 numnondigits + prec);
3304 char *b1;
3305 if (!r1) {
3306 Py_DECREF(result);
3307 return NULL;
3308 }
3309 b1 = PyBytes_AS_STRING(r1);
3310 for (i = 0; i < numnondigits; ++i)
3311 *b1++ = *buf++;
3312 for (i = 0; i < prec - numdigits; i++)
3313 *b1++ = '0';
3314 for (i = 0; i < numdigits; i++)
3315 *b1++ = *buf++;
3316 *b1 = '\0';
3317 Py_DECREF(result);
3318 result = r1;
3319 buf = PyBytes_AS_STRING(result);
3320 len = numnondigits + prec;
3321 }
3322
3323 /* Fix up case for hex conversions. */
3324 if (type == 'X') {
3325 /* Need to convert all lower case letters to upper case.
3326 and need to convert 0x to 0X (and -0x to -0X). */
3327 for (i = 0; i < len; i++)
3328 if (buf[i] >= 'a' && buf[i] <= 'x')
3329 buf[i] -= 'a'-'A';
3330 }
3331 *pbuf = buf;
3332 *plen = len;
3333 return result;
3334}
3335
3336void
3337PyBytes_Fini(void)
3338{
3339 int i;
3340 for (i = 0; i < UCHAR_MAX + 1; i++) {
3341 Py_XDECREF(characters[i]);
3342 characters[i] = NULL;
3343 }
3344 Py_XDECREF(nullstring);
3345 nullstring = NULL;
3346}
3347
Benjamin Peterson4116f362008-05-27 00:36:20 +00003348/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003349
3350typedef struct {
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003351 PyObject_HEAD
3352 Py_ssize_t it_index;
3353 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3354} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003355
3356static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003357striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003358{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003359 _PyObject_GC_UNTRACK(it);
3360 Py_XDECREF(it->it_seq);
3361 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003362}
3363
3364static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003365striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003366{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003367 Py_VISIT(it->it_seq);
3368 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003369}
3370
3371static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003372striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003373{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003374 PyBytesObject *seq;
3375 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003376
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003377 assert(it != NULL);
3378 seq = it->it_seq;
3379 if (seq == NULL)
3380 return NULL;
3381 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003382
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003383 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3384 item = PyLong_FromLong(
3385 (unsigned char)seq->ob_sval[it->it_index]);
3386 if (item != NULL)
3387 ++it->it_index;
3388 return item;
3389 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003390
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003391 Py_DECREF(seq);
3392 it->it_seq = NULL;
3393 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003394}
3395
3396static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003397striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003398{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003399 Py_ssize_t len = 0;
3400 if (it->it_seq)
3401 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3402 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003403}
3404
3405PyDoc_STRVAR(length_hint_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003406 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003407
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003408static PyMethodDef striter_methods[] = {
3409 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3410 length_hint_doc},
3411 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003412};
3413
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003414PyTypeObject PyBytesIter_Type = {
3415 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3416 "bytes_iterator", /* tp_name */
3417 sizeof(striterobject), /* tp_basicsize */
3418 0, /* tp_itemsize */
3419 /* methods */
3420 (destructor)striter_dealloc, /* tp_dealloc */
3421 0, /* tp_print */
3422 0, /* tp_getattr */
3423 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00003424 0, /* tp_reserved */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003425 0, /* tp_repr */
3426 0, /* tp_as_number */
3427 0, /* tp_as_sequence */
3428 0, /* tp_as_mapping */
3429 0, /* tp_hash */
3430 0, /* tp_call */
3431 0, /* tp_str */
3432 PyObject_GenericGetAttr, /* tp_getattro */
3433 0, /* tp_setattro */
3434 0, /* tp_as_buffer */
3435 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3436 0, /* tp_doc */
3437 (traverseproc)striter_traverse, /* tp_traverse */
3438 0, /* tp_clear */
3439 0, /* tp_richcompare */
3440 0, /* tp_weaklistoffset */
3441 PyObject_SelfIter, /* tp_iter */
3442 (iternextfunc)striter_next, /* tp_iternext */
3443 striter_methods, /* tp_methods */
3444 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003445};
3446
3447static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003448str_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003449{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003450 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003451
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003452 if (!PyBytes_Check(seq)) {
3453 PyErr_BadInternalCall();
3454 return NULL;
3455 }
3456 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3457 if (it == NULL)
3458 return NULL;
3459 it->it_index = 0;
3460 Py_INCREF(seq);
3461 it->it_seq = (PyBytesObject *)seq;
3462 _PyObject_GC_TRACK(it);
3463 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003464}