blob: f9569a7b1fdaa12ae62817a9550d0be9846a0fbb [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Christian Heimes90aa7642007-12-19 02:45:37 +000013 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000014
Gregory P. Smith60d241f2007-10-16 06:31:30 +000015 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000016 {
17 PyErr_Format(PyExc_TypeError,
18 "Type %.100s doesn't support the buffer API",
Christian Heimes90aa7642007-12-19 02:45:37 +000019 Py_TYPE(obj)->tp_name);
Guido van Rossuma74184e2007-08-29 04:05:57 +000020 return -1;
21 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000023 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
24 return -1;
25 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000026}
27
Christian Heimes2c9c7a52008-05-26 13:42:13 +000028#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000029Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000030#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000031
Christian Heimes2c9c7a52008-05-26 13:42:13 +000032static PyBytesObject *characters[UCHAR_MAX + 1];
33static PyBytesObject *nullstring;
34
Mark Dickinsonfd24b322008-12-06 15:33:31 +000035/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
36 for a string of length n should request PyBytesObject_SIZE + n bytes.
37
38 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
39 3 bytes per string allocation on a typical system.
40*/
41#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
42
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043/*
44 For both PyBytes_FromString() and PyBytes_FromStringAndSize(), the
45 parameter `size' denotes number of characters to allocate, not counting any
46 null terminating character.
47
48 For PyBytes_FromString(), the parameter `str' points to a null-terminated
49 string containing exactly `size' bytes.
50
51 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
52 either NULL or else points to a string containing at least `size' bytes.
53 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
54 not have to be null-terminated. (Therefore it is safe to construct a
55 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
56 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
57 bytes (setting the last byte to the null terminating character) and you can
58 fill in the data yourself. If `str' is non-NULL then the resulting
59 PyString object must be treated as immutable and you must not fill in nor
60 alter the data yourself, since the strings may be shared.
61
62 The PyObject member `op->ob_size', which denotes the number of "extra
63 items" in a variable-size object, will contain the number of bytes
64 allocated for string data, not counting the null terminating character. It
65 is therefore equal to the equal to the `size' parameter (for
66 PyBytes_FromStringAndSize()) or the length of the string in the `str'
67 parameter (for PyBytes_FromString()).
68*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000069PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000070PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000071{
Christian Heimes2c9c7a52008-05-26 13:42:13 +000072 register PyBytesObject *op;
73 if (size < 0) {
74 PyErr_SetString(PyExc_SystemError,
75 "Negative size passed to PyBytes_FromStringAndSize");
76 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +000077 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000078 if (size == 0 && (op = nullstring) != NULL) {
79#ifdef COUNT_ALLOCS
80 null_strings++;
81#endif
82 Py_INCREF(op);
83 return (PyObject *)op;
84 }
85 if (size == 1 && str != NULL &&
86 (op = characters[*str & UCHAR_MAX]) != NULL)
87 {
88#ifdef COUNT_ALLOCS
89 one_strings++;
90#endif
91 Py_INCREF(op);
92 return (PyObject *)op;
93 }
94
Mark Dickinsonfd24b322008-12-06 15:33:31 +000095 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Neal Norwitz3ce5d922008-08-24 07:08:55 +000096 PyErr_SetString(PyExc_OverflowError,
97 "byte string is too large");
98 return NULL;
99 }
100
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000101 /* Inline PyObject_NewVar */
Mark Dickinsonfd24b322008-12-06 15:33:31 +0000102 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000103 if (op == NULL)
104 return PyErr_NoMemory();
105 PyObject_INIT_VAR(op, &PyBytes_Type, size);
106 op->ob_shash = -1;
107 if (str != NULL)
108 Py_MEMCPY(op->ob_sval, str, size);
109 op->ob_sval[size] = '\0';
110 /* share short strings */
111 if (size == 0) {
112 nullstring = op;
113 Py_INCREF(op);
114 } else if (size == 1 && str != NULL) {
115 characters[*str & UCHAR_MAX] = op;
116 Py_INCREF(op);
117 }
118 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000119}
120
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000121PyObject *
122PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000123{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000124 register size_t size;
125 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000126
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000127 assert(str != NULL);
128 size = strlen(str);
Mark Dickinsonfd24b322008-12-06 15:33:31 +0000129 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000130 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000131 "byte string is too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000132 return NULL;
133 }
134 if (size == 0 && (op = nullstring) != NULL) {
135#ifdef COUNT_ALLOCS
136 null_strings++;
137#endif
138 Py_INCREF(op);
139 return (PyObject *)op;
140 }
141 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
142#ifdef COUNT_ALLOCS
143 one_strings++;
144#endif
145 Py_INCREF(op);
146 return (PyObject *)op;
147 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000148
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000149 /* Inline PyObject_NewVar */
Mark Dickinsonfd24b322008-12-06 15:33:31 +0000150 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000151 if (op == NULL)
152 return PyErr_NoMemory();
153 PyObject_INIT_VAR(op, &PyBytes_Type, size);
154 op->ob_shash = -1;
155 Py_MEMCPY(op->ob_sval, str, size+1);
156 /* share short strings */
157 if (size == 0) {
158 nullstring = op;
159 Py_INCREF(op);
160 } else if (size == 1) {
161 characters[*str & UCHAR_MAX] = op;
162 Py_INCREF(op);
163 }
164 return (PyObject *) op;
165}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000166
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000167PyObject *
168PyBytes_FromFormatV(const char *format, va_list vargs)
169{
170 va_list count;
171 Py_ssize_t n = 0;
172 const char* f;
173 char *s;
174 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000175
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000176#ifdef VA_LIST_IS_ARRAY
177 Py_MEMCPY(count, vargs, sizeof(va_list));
178#else
179#ifdef __va_copy
180 __va_copy(count, vargs);
181#else
182 count = vargs;
183#endif
184#endif
185 /* step 1: figure out how large a buffer we need */
186 for (f = format; *f; f++) {
187 if (*f == '%') {
188 const char* p = f;
189 while (*++f && *f != '%' && !ISALPHA(*f))
190 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000191
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000192 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
193 * they don't affect the amount of space we reserve.
194 */
195 if ((*f == 'l' || *f == 'z') &&
196 (f[1] == 'd' || f[1] == 'u'))
197 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000198
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000199 switch (*f) {
200 case 'c':
201 (void)va_arg(count, int);
202 /* fall through... */
203 case '%':
204 n++;
205 break;
206 case 'd': case 'u': case 'i': case 'x':
207 (void) va_arg(count, int);
208 /* 20 bytes is enough to hold a 64-bit
209 integer. Decimal takes the most space.
210 This isn't enough for octal. */
211 n += 20;
212 break;
213 case 's':
214 s = va_arg(count, char*);
215 n += strlen(s);
216 break;
217 case 'p':
218 (void) va_arg(count, int);
219 /* maximum 64-bit pointer representation:
220 * 0xffffffffffffffff
221 * so 19 characters is enough.
222 * XXX I count 18 -- what's the extra for?
223 */
224 n += 19;
225 break;
226 default:
227 /* if we stumble upon an unknown
228 formatting code, copy the rest of
229 the format string to the output
230 string. (we cannot just skip the
231 code, since there's no way to know
232 what's in the argument list) */
233 n += strlen(p);
234 goto expand;
235 }
236 } else
237 n++;
238 }
239 expand:
240 /* step 2: fill the buffer */
241 /* Since we've analyzed how much space we need for the worst case,
242 use sprintf directly instead of the slower PyOS_snprintf. */
243 string = PyBytes_FromStringAndSize(NULL, n);
244 if (!string)
245 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000246
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000247 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000248
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000249 for (f = format; *f; f++) {
250 if (*f == '%') {
251 const char* p = f++;
252 Py_ssize_t i;
253 int longflag = 0;
254 int size_tflag = 0;
255 /* parse the width.precision part (we're only
256 interested in the precision value, if any) */
257 n = 0;
258 while (ISDIGIT(*f))
259 n = (n*10) + *f++ - '0';
260 if (*f == '.') {
261 f++;
262 n = 0;
263 while (ISDIGIT(*f))
264 n = (n*10) + *f++ - '0';
265 }
266 while (*f && *f != '%' && !ISALPHA(*f))
267 f++;
268 /* handle the long flag, but only for %ld and %lu.
269 others can be added when necessary. */
270 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
271 longflag = 1;
272 ++f;
273 }
274 /* handle the size_t flag. */
275 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
276 size_tflag = 1;
277 ++f;
278 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000279
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000280 switch (*f) {
281 case 'c':
282 *s++ = va_arg(vargs, int);
283 break;
284 case 'd':
285 if (longflag)
286 sprintf(s, "%ld", va_arg(vargs, long));
287 else if (size_tflag)
288 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
289 va_arg(vargs, Py_ssize_t));
290 else
291 sprintf(s, "%d", va_arg(vargs, int));
292 s += strlen(s);
293 break;
294 case 'u':
295 if (longflag)
296 sprintf(s, "%lu",
297 va_arg(vargs, unsigned long));
298 else if (size_tflag)
299 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
300 va_arg(vargs, size_t));
301 else
302 sprintf(s, "%u",
303 va_arg(vargs, unsigned int));
304 s += strlen(s);
305 break;
306 case 'i':
307 sprintf(s, "%i", va_arg(vargs, int));
308 s += strlen(s);
309 break;
310 case 'x':
311 sprintf(s, "%x", va_arg(vargs, int));
312 s += strlen(s);
313 break;
314 case 's':
315 p = va_arg(vargs, char*);
316 i = strlen(p);
317 if (n > 0 && i > n)
318 i = n;
319 Py_MEMCPY(s, p, i);
320 s += i;
321 break;
322 case 'p':
323 sprintf(s, "%p", va_arg(vargs, void*));
324 /* %p is ill-defined: ensure leading 0x. */
325 if (s[1] == 'X')
326 s[1] = 'x';
327 else if (s[1] != 'x') {
328 memmove(s+2, s, strlen(s)+1);
329 s[0] = '0';
330 s[1] = 'x';
331 }
332 s += strlen(s);
333 break;
334 case '%':
335 *s++ = '%';
336 break;
337 default:
338 strcpy(s, p);
339 s += strlen(s);
340 goto end;
341 }
342 } else
343 *s++ = *f;
344 }
345
346 end:
347 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
348 return string;
349}
350
351PyObject *
352PyBytes_FromFormat(const char *format, ...)
353{
354 PyObject* ret;
355 va_list vargs;
356
357#ifdef HAVE_STDARG_PROTOTYPES
358 va_start(vargs, format);
359#else
360 va_start(vargs);
361#endif
362 ret = PyBytes_FromFormatV(format, vargs);
363 va_end(vargs);
364 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000365}
366
367static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000368string_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000369{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000370 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000371}
372
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000373/* Unescape a backslash-escaped string. If unicode is non-zero,
374 the string is a u-literal. If recode_encoding is non-zero,
375 the string is UTF-8 encoded and should be re-encoded in the
376 specified encoding. */
377
378PyObject *PyBytes_DecodeEscape(const char *s,
379 Py_ssize_t len,
380 const char *errors,
381 Py_ssize_t unicode,
382 const char *recode_encoding)
383{
384 int c;
385 char *p, *buf;
386 const char *end;
387 PyObject *v;
388 Py_ssize_t newlen = recode_encoding ? 4*len:len;
389 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
390 if (v == NULL)
391 return NULL;
392 p = buf = PyBytes_AsString(v);
393 end = s + len;
394 while (s < end) {
395 if (*s != '\\') {
396 non_esc:
397 if (recode_encoding && (*s & 0x80)) {
398 PyObject *u, *w;
399 char *r;
400 const char* t;
401 Py_ssize_t rn;
402 t = s;
403 /* Decode non-ASCII bytes as UTF-8. */
404 while (t < end && (*t & 0x80)) t++;
405 u = PyUnicode_DecodeUTF8(s, t - s, errors);
406 if(!u) goto failed;
407
408 /* Recode them in target encoding. */
409 w = PyUnicode_AsEncodedString(
410 u, recode_encoding, errors);
411 Py_DECREF(u);
412 if (!w) goto failed;
413
414 /* Append bytes to output buffer. */
415 assert(PyBytes_Check(w));
416 r = PyBytes_AS_STRING(w);
417 rn = PyBytes_GET_SIZE(w);
418 Py_MEMCPY(p, r, rn);
419 p += rn;
420 Py_DECREF(w);
421 s = t;
422 } else {
423 *p++ = *s++;
424 }
425 continue;
426 }
427 s++;
428 if (s==end) {
429 PyErr_SetString(PyExc_ValueError,
430 "Trailing \\ in string");
431 goto failed;
432 }
433 switch (*s++) {
434 /* XXX This assumes ASCII! */
435 case '\n': break;
436 case '\\': *p++ = '\\'; break;
437 case '\'': *p++ = '\''; break;
438 case '\"': *p++ = '\"'; break;
439 case 'b': *p++ = '\b'; break;
440 case 'f': *p++ = '\014'; break; /* FF */
441 case 't': *p++ = '\t'; break;
442 case 'n': *p++ = '\n'; break;
443 case 'r': *p++ = '\r'; break;
444 case 'v': *p++ = '\013'; break; /* VT */
445 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
446 case '0': case '1': case '2': case '3':
447 case '4': case '5': case '6': case '7':
448 c = s[-1] - '0';
449 if (s < end && '0' <= *s && *s <= '7') {
450 c = (c<<3) + *s++ - '0';
451 if (s < end && '0' <= *s && *s <= '7')
452 c = (c<<3) + *s++ - '0';
453 }
454 *p++ = c;
455 break;
456 case 'x':
457 if (s+1 < end && ISXDIGIT(s[0]) && ISXDIGIT(s[1])) {
458 unsigned int x = 0;
459 c = Py_CHARMASK(*s);
460 s++;
461 if (ISDIGIT(c))
462 x = c - '0';
463 else if (ISLOWER(c))
464 x = 10 + c - 'a';
465 else
466 x = 10 + c - 'A';
467 x = x << 4;
468 c = Py_CHARMASK(*s);
469 s++;
470 if (ISDIGIT(c))
471 x += c - '0';
472 else if (ISLOWER(c))
473 x += 10 + c - 'a';
474 else
475 x += 10 + c - 'A';
476 *p++ = x;
477 break;
478 }
479 if (!errors || strcmp(errors, "strict") == 0) {
480 PyErr_SetString(PyExc_ValueError,
481 "invalid \\x escape");
482 goto failed;
483 }
484 if (strcmp(errors, "replace") == 0) {
485 *p++ = '?';
486 } else if (strcmp(errors, "ignore") == 0)
487 /* do nothing */;
488 else {
489 PyErr_Format(PyExc_ValueError,
490 "decoding error; unknown "
491 "error handling code: %.400s",
492 errors);
493 goto failed;
494 }
495 default:
496 *p++ = '\\';
497 s--;
498 goto non_esc; /* an arbitry number of unescaped
499 UTF-8 bytes may follow. */
500 }
501 }
502 if (p-buf < newlen)
503 _PyBytes_Resize(&v, p - buf);
504 return v;
505 failed:
506 Py_DECREF(v);
507 return NULL;
508}
509
510/* -------------------------------------------------------------------- */
511/* object api */
512
513Py_ssize_t
514PyBytes_Size(register PyObject *op)
515{
516 if (!PyBytes_Check(op)) {
517 PyErr_Format(PyExc_TypeError,
518 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
519 return -1;
520 }
521 return Py_SIZE(op);
522}
523
524char *
525PyBytes_AsString(register PyObject *op)
526{
527 if (!PyBytes_Check(op)) {
528 PyErr_Format(PyExc_TypeError,
529 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
530 return NULL;
531 }
532 return ((PyBytesObject *)op)->ob_sval;
533}
534
535int
536PyBytes_AsStringAndSize(register PyObject *obj,
537 register char **s,
538 register Py_ssize_t *len)
539{
540 if (s == NULL) {
541 PyErr_BadInternalCall();
542 return -1;
543 }
544
545 if (!PyBytes_Check(obj)) {
546 PyErr_Format(PyExc_TypeError,
547 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
548 return -1;
549 }
550
551 *s = PyBytes_AS_STRING(obj);
552 if (len != NULL)
553 *len = PyBytes_GET_SIZE(obj);
554 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
555 PyErr_SetString(PyExc_TypeError,
556 "expected bytes with no null");
557 return -1;
558 }
559 return 0;
560}
Neal Norwitz6968b052007-02-27 19:02:19 +0000561
562/* -------------------------------------------------------------------- */
563/* Methods */
564
565#define STRINGLIB_CHAR char
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000566
Neal Norwitz6968b052007-02-27 19:02:19 +0000567#define STRINGLIB_CMP memcmp
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000568#define STRINGLIB_LEN PyBytes_GET_SIZE
569#define STRINGLIB_NEW PyBytes_FromStringAndSize
570#define STRINGLIB_STR PyBytes_AS_STRING
571/* #define STRINGLIB_WANT_CONTAINS_OBJ 1 */
572
573#define STRINGLIB_EMPTY nullstring
574#define STRINGLIB_CHECK_EXACT PyBytes_CheckExact
575#define STRINGLIB_MUTABLE 0
Neal Norwitz6968b052007-02-27 19:02:19 +0000576
577#include "stringlib/fastsearch.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000578
Neal Norwitz6968b052007-02-27 19:02:19 +0000579#include "stringlib/count.h"
580#include "stringlib/find.h"
581#include "stringlib/partition.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000582#include "stringlib/ctype.h"
583#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000584
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000585#define _Py_InsertThousandsGrouping _PyBytes_InsertThousandsGrouping
586#include "stringlib/localeutil.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000587
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000588PyObject *
589PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000590{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000591 static const char *hexdigits = "0123456789abcdef";
592 register PyBytesObject* op = (PyBytesObject*) obj;
593 Py_ssize_t length = Py_SIZE(op);
594 size_t newsize = 3 + 4 * length;
595 PyObject *v;
596 if (newsize > PY_SSIZE_T_MAX || (newsize-3) / 4 != length) {
597 PyErr_SetString(PyExc_OverflowError,
598 "bytes object is too large to make repr");
599 return NULL;
600 }
601 v = PyUnicode_FromUnicode(NULL, newsize);
602 if (v == NULL) {
603 return NULL;
604 }
605 else {
606 register Py_ssize_t i;
607 register Py_UNICODE c;
608 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
609 int quote;
610
611 /* Figure out which quote to use; single is preferred */
612 quote = '\'';
613 if (smartquotes) {
614 char *test, *start;
615 start = PyBytes_AS_STRING(op);
616 for (test = start; test < start+length; ++test) {
617 if (*test == '"') {
618 quote = '\''; /* back to single */
619 goto decided;
620 }
621 else if (*test == '\'')
622 quote = '"';
623 }
624 decided:
625 ;
626 }
627
628 *p++ = 'b', *p++ = quote;
629 for (i = 0; i < length; i++) {
630 /* There's at least enough room for a hex escape
631 and a closing quote. */
632 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
633 c = op->ob_sval[i];
634 if (c == quote || c == '\\')
635 *p++ = '\\', *p++ = c;
636 else if (c == '\t')
637 *p++ = '\\', *p++ = 't';
638 else if (c == '\n')
639 *p++ = '\\', *p++ = 'n';
640 else if (c == '\r')
641 *p++ = '\\', *p++ = 'r';
642 else if (c < ' ' || c >= 0x7f) {
643 *p++ = '\\';
644 *p++ = 'x';
645 *p++ = hexdigits[(c & 0xf0) >> 4];
646 *p++ = hexdigits[c & 0xf];
647 }
648 else
649 *p++ = c;
650 }
651 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
652 *p++ = quote;
653 *p = '\0';
654 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
655 Py_DECREF(v);
656 return NULL;
657 }
658 return v;
659 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000660}
661
Neal Norwitz6968b052007-02-27 19:02:19 +0000662static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000663string_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000664{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000665 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000666}
667
Neal Norwitz6968b052007-02-27 19:02:19 +0000668static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000669string_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000670{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000671 if (Py_BytesWarningFlag) {
672 if (PyErr_WarnEx(PyExc_BytesWarning,
673 "str() on a bytes instance", 1))
674 return NULL;
675 }
676 return string_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000677}
678
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000679static Py_ssize_t
680string_length(PyBytesObject *a)
681{
682 return Py_SIZE(a);
683}
Neal Norwitz6968b052007-02-27 19:02:19 +0000684
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000685/* This is also used by PyBytes_Concat() */
686static PyObject *
687string_concat(PyObject *a, PyObject *b)
688{
689 Py_ssize_t size;
690 Py_buffer va, vb;
691 PyObject *result = NULL;
692
693 va.len = -1;
694 vb.len = -1;
695 if (_getbuffer(a, &va) < 0 ||
696 _getbuffer(b, &vb) < 0) {
697 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
698 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
699 goto done;
700 }
701
702 /* Optimize end cases */
703 if (va.len == 0 && PyBytes_CheckExact(b)) {
704 result = b;
705 Py_INCREF(result);
706 goto done;
707 }
708 if (vb.len == 0 && PyBytes_CheckExact(a)) {
709 result = a;
710 Py_INCREF(result);
711 goto done;
712 }
713
714 size = va.len + vb.len;
715 if (size < 0) {
716 PyErr_NoMemory();
717 goto done;
718 }
719
720 result = PyBytes_FromStringAndSize(NULL, size);
721 if (result != NULL) {
722 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
723 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
724 }
725
726 done:
727 if (va.len != -1)
Martin v. Löwis423be952008-08-13 15:53:07 +0000728 PyBuffer_Release(&va);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000729 if (vb.len != -1)
Martin v. Löwis423be952008-08-13 15:53:07 +0000730 PyBuffer_Release(&vb);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000731 return result;
732}
Neal Norwitz6968b052007-02-27 19:02:19 +0000733
734static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000735string_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000736{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000737 register Py_ssize_t i;
738 register Py_ssize_t j;
739 register Py_ssize_t size;
740 register PyBytesObject *op;
741 size_t nbytes;
742 if (n < 0)
743 n = 0;
744 /* watch out for overflows: the size can overflow int,
745 * and the # of bytes needed can overflow size_t
746 */
747 size = Py_SIZE(a) * n;
748 if (n && size / n != Py_SIZE(a)) {
749 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000750 "repeated bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000751 return NULL;
752 }
753 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
754 Py_INCREF(a);
755 return (PyObject *)a;
756 }
757 nbytes = (size_t)size;
Mark Dickinsonfd24b322008-12-06 15:33:31 +0000758 if (nbytes + PyBytesObject_SIZE <= nbytes) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000759 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000760 "repeated bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000761 return NULL;
762 }
Mark Dickinsonfd24b322008-12-06 15:33:31 +0000763 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000764 if (op == NULL)
765 return PyErr_NoMemory();
766 PyObject_INIT_VAR(op, &PyBytes_Type, size);
767 op->ob_shash = -1;
768 op->ob_sval[size] = '\0';
769 if (Py_SIZE(a) == 1 && n > 0) {
770 memset(op->ob_sval, a->ob_sval[0] , n);
771 return (PyObject *) op;
772 }
773 i = 0;
774 if (i < size) {
775 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
776 i = Py_SIZE(a);
777 }
778 while (i < size) {
779 j = (i <= size-i) ? i : size-i;
780 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
781 i += j;
782 }
783 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000784}
785
Guido van Rossum98297ee2007-11-06 21:34:58 +0000786static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000787string_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000788{
789 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
790 if (ival == -1 && PyErr_Occurred()) {
791 Py_buffer varg;
792 int pos;
793 PyErr_Clear();
794 if (_getbuffer(arg, &varg) < 0)
795 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000796 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
Guido van Rossum98297ee2007-11-06 21:34:58 +0000797 varg.buf, varg.len, 0);
Martin v. Löwis423be952008-08-13 15:53:07 +0000798 PyBuffer_Release(&varg);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000799 return pos >= 0;
800 }
801 if (ival < 0 || ival >= 256) {
802 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
803 return -1;
804 }
805
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000806 return memchr(PyBytes_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000807}
808
Neal Norwitz6968b052007-02-27 19:02:19 +0000809static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000810string_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000811{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000812 if (i < 0 || i >= Py_SIZE(a)) {
Benjamin Peterson4116f362008-05-27 00:36:20 +0000813 PyErr_SetString(PyExc_IndexError, "index out of range");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000814 return NULL;
815 }
816 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000817}
818
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000819static PyObject*
820string_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000821{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000822 int c;
823 Py_ssize_t len_a, len_b;
824 Py_ssize_t min_len;
825 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000826
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000827 /* Make sure both arguments are strings. */
828 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Barry Warsaw9e9dcd62008-10-17 01:50:37 +0000829 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000830 (PyObject_IsInstance((PyObject*)a,
831 (PyObject*)&PyUnicode_Type) ||
832 PyObject_IsInstance((PyObject*)b,
833 (PyObject*)&PyUnicode_Type))) {
834 if (PyErr_WarnEx(PyExc_BytesWarning,
Georg Brandle5d68ac2008-06-04 11:30:26 +0000835 "Comparison between bytes and string", 1))
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000836 return NULL;
837 }
838 result = Py_NotImplemented;
839 goto out;
840 }
841 if (a == b) {
842 switch (op) {
843 case Py_EQ:case Py_LE:case Py_GE:
844 result = Py_True;
845 goto out;
846 case Py_NE:case Py_LT:case Py_GT:
847 result = Py_False;
848 goto out;
849 }
850 }
851 if (op == Py_EQ) {
852 /* Supporting Py_NE here as well does not save
853 much time, since Py_NE is rarely used. */
854 if (Py_SIZE(a) == Py_SIZE(b)
855 && (a->ob_sval[0] == b->ob_sval[0]
856 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
857 result = Py_True;
858 } else {
859 result = Py_False;
860 }
861 goto out;
862 }
863 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
864 min_len = (len_a < len_b) ? len_a : len_b;
865 if (min_len > 0) {
866 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
867 if (c==0)
868 c = memcmp(a->ob_sval, b->ob_sval, min_len);
869 } else
870 c = 0;
871 if (c == 0)
872 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
873 switch (op) {
874 case Py_LT: c = c < 0; break;
875 case Py_LE: c = c <= 0; break;
876 case Py_EQ: assert(0); break; /* unreachable */
877 case Py_NE: c = c != 0; break;
878 case Py_GT: c = c > 0; break;
879 case Py_GE: c = c >= 0; break;
880 default:
881 result = Py_NotImplemented;
882 goto out;
883 }
884 result = c ? Py_True : Py_False;
885 out:
886 Py_INCREF(result);
887 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000888}
889
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000890static long
891string_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000892{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000893 register Py_ssize_t len;
894 register unsigned char *p;
895 register long x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000896
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000897 if (a->ob_shash != -1)
898 return a->ob_shash;
899 len = Py_SIZE(a);
900 p = (unsigned char *) a->ob_sval;
901 x = *p << 7;
902 while (--len >= 0)
903 x = (1000003*x) ^ *p++;
904 x ^= Py_SIZE(a);
905 if (x == -1)
906 x = -2;
907 a->ob_shash = x;
908 return x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000909}
910
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000911static PyObject*
912string_subscript(PyBytesObject* self, PyObject* item)
913{
914 if (PyIndex_Check(item)) {
915 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
916 if (i == -1 && PyErr_Occurred())
917 return NULL;
918 if (i < 0)
919 i += PyBytes_GET_SIZE(self);
920 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
921 PyErr_SetString(PyExc_IndexError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000922 "index out of range");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000923 return NULL;
924 }
925 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
926 }
927 else if (PySlice_Check(item)) {
928 Py_ssize_t start, stop, step, slicelength, cur, i;
929 char* source_buf;
930 char* result_buf;
931 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000932
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000933 if (PySlice_GetIndicesEx((PySliceObject*)item,
934 PyBytes_GET_SIZE(self),
935 &start, &stop, &step, &slicelength) < 0) {
936 return NULL;
937 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000938
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000939 if (slicelength <= 0) {
940 return PyBytes_FromStringAndSize("", 0);
941 }
942 else if (start == 0 && step == 1 &&
943 slicelength == PyBytes_GET_SIZE(self) &&
944 PyBytes_CheckExact(self)) {
945 Py_INCREF(self);
946 return (PyObject *)self;
947 }
948 else if (step == 1) {
949 return PyBytes_FromStringAndSize(
950 PyBytes_AS_STRING(self) + start,
951 slicelength);
952 }
953 else {
954 source_buf = PyBytes_AsString((PyObject*)self);
955 result_buf = (char *)PyMem_Malloc(slicelength);
956 if (result_buf == NULL)
957 return PyErr_NoMemory();
Neal Norwitz6968b052007-02-27 19:02:19 +0000958
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000959 for (cur = start, i = 0; i < slicelength;
960 cur += step, i++) {
961 result_buf[i] = source_buf[cur];
962 }
963
964 result = PyBytes_FromStringAndSize(result_buf,
965 slicelength);
966 PyMem_Free(result_buf);
967 return result;
968 }
969 }
970 else {
971 PyErr_Format(PyExc_TypeError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000972 "byte indices must be integers, not %.200s",
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000973 Py_TYPE(item)->tp_name);
974 return NULL;
975 }
976}
977
978static int
979string_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
980{
Martin v. Löwis423be952008-08-13 15:53:07 +0000981 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
Antoine Pitrou2f89aa62008-08-02 21:02:48 +0000982 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000983}
984
985static PySequenceMethods string_as_sequence = {
986 (lenfunc)string_length, /*sq_length*/
987 (binaryfunc)string_concat, /*sq_concat*/
988 (ssizeargfunc)string_repeat, /*sq_repeat*/
989 (ssizeargfunc)string_item, /*sq_item*/
990 0, /*sq_slice*/
991 0, /*sq_ass_item*/
992 0, /*sq_ass_slice*/
993 (objobjproc)string_contains /*sq_contains*/
994};
995
996static PyMappingMethods string_as_mapping = {
997 (lenfunc)string_length,
998 (binaryfunc)string_subscript,
999 0,
1000};
1001
1002static PyBufferProcs string_as_buffer = {
1003 (getbufferproc)string_buffer_getbuffer,
1004 NULL,
1005};
1006
1007
1008#define LEFTSTRIP 0
1009#define RIGHTSTRIP 1
1010#define BOTHSTRIP 2
1011
1012/* Arrays indexed by above */
1013static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1014
1015#define STRIPNAME(i) (stripformat[i]+3)
1016
Neal Norwitz6968b052007-02-27 19:02:19 +00001017
1018/* Don't call if length < 2 */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001019#define Py_STRING_MATCH(target, offset, pattern, length) \
1020 (target[offset] == pattern[0] && \
1021 target[offset+length-1] == pattern[length-1] && \
Neal Norwitz6968b052007-02-27 19:02:19 +00001022 !memcmp(target+offset+1, pattern+1, length-2) )
1023
1024
Neal Norwitz6968b052007-02-27 19:02:19 +00001025/* Overallocate the initial list to reduce the number of reallocs for small
1026 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1027 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1028 text (roughly 11 words per line) and field delimited data (usually 1-10
1029 fields). For large strings the split algorithms are bandwidth limited
1030 so increasing the preallocation likely will not improve things.*/
1031
1032#define MAX_PREALLOC 12
1033
1034/* 5 splits gives 6 elements */
1035#define PREALLOC_SIZE(maxsplit) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001036 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
Neal Norwitz6968b052007-02-27 19:02:19 +00001037
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001038#define SPLIT_ADD(data, left, right) { \
1039 str = PyBytes_FromStringAndSize((data) + (left), \
1040 (right) - (left)); \
1041 if (str == NULL) \
1042 goto onError; \
1043 if (count < MAX_PREALLOC) { \
1044 PyList_SET_ITEM(list, count, str); \
1045 } else { \
1046 if (PyList_Append(list, str)) { \
1047 Py_DECREF(str); \
1048 goto onError; \
1049 } \
1050 else \
1051 Py_DECREF(str); \
1052 } \
1053 count++; }
Neal Norwitz6968b052007-02-27 19:02:19 +00001054
1055/* Always force the list to the expected size. */
Christian Heimes90aa7642007-12-19 02:45:37 +00001056#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
Neal Norwitz6968b052007-02-27 19:02:19 +00001057
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001058#define SKIP_SPACE(s, i, len) { while (i<len && ISSPACE(s[i])) i++; }
1059#define SKIP_NONSPACE(s, i, len) { while (i<len && !ISSPACE(s[i])) i++; }
1060#define RSKIP_SPACE(s, i) { while (i>=0 && ISSPACE(s[i])) i--; }
1061#define RSKIP_NONSPACE(s, i) { while (i>=0 && !ISSPACE(s[i])) i--; }
Neal Norwitz6968b052007-02-27 19:02:19 +00001062
1063Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001064split_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Neal Norwitz6968b052007-02-27 19:02:19 +00001065{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001066 const char *s = PyBytes_AS_STRING(self);
1067 Py_ssize_t i, j, count=0;
1068 PyObject *str;
1069 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Neal Norwitz6968b052007-02-27 19:02:19 +00001070
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001071 if (list == NULL)
1072 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001073
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001074 i = j = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +00001075
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001076 while (maxsplit-- > 0) {
1077 SKIP_SPACE(s, i, len);
1078 if (i==len) break;
1079 j = i; i++;
1080 SKIP_NONSPACE(s, i, len);
1081 if (j == 0 && i == len && PyBytes_CheckExact(self)) {
1082 /* No whitespace in self, so just use it as list[0] */
1083 Py_INCREF(self);
1084 PyList_SET_ITEM(list, 0, (PyObject *)self);
1085 count++;
1086 break;
1087 }
1088 SPLIT_ADD(s, j, i);
1089 }
1090
1091 if (i < len) {
1092 /* Only occurs when maxsplit was reached */
1093 /* Skip any remaining whitespace and copy to end of string */
1094 SKIP_SPACE(s, i, len);
1095 if (i != len)
1096 SPLIT_ADD(s, i, len);
1097 }
1098 FIX_PREALLOC_SIZE(list);
1099 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001100 onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001101 Py_DECREF(list);
1102 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001103}
1104
Guido van Rossum8f950672007-09-10 16:53:45 +00001105Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001106split_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Guido van Rossum8f950672007-09-10 16:53:45 +00001107{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001108 const char *s = PyBytes_AS_STRING(self);
1109 register Py_ssize_t i, j, count=0;
1110 PyObject *str;
1111 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Guido van Rossum8f950672007-09-10 16:53:45 +00001112
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001113 if (list == NULL)
1114 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001115
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001116 i = j = 0;
1117 while ((j < len) && (maxcount-- > 0)) {
1118 for(; j<len; j++) {
1119 /* I found that using memchr makes no difference */
1120 if (s[j] == ch) {
1121 SPLIT_ADD(s, i, j);
1122 i = j = j + 1;
1123 break;
1124 }
1125 }
1126 }
1127 if (i == 0 && count == 0 && PyBytes_CheckExact(self)) {
1128 /* ch not in self, so just use self as list[0] */
1129 Py_INCREF(self);
1130 PyList_SET_ITEM(list, 0, (PyObject *)self);
1131 count++;
1132 }
1133 else if (i <= len) {
1134 SPLIT_ADD(s, i, len);
1135 }
1136 FIX_PREALLOC_SIZE(list);
1137 return list;
Guido van Rossum8f950672007-09-10 16:53:45 +00001138
1139 onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001140 Py_DECREF(list);
1141 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001142}
1143
Neal Norwitz6968b052007-02-27 19:02:19 +00001144PyDoc_STRVAR(split__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001145"B.split([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001146\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001147Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001148If sep is not specified or is None, B is split on ASCII whitespace\n\
1149characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001150If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001151
1152static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001153string_split(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001154{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001155 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
1156 Py_ssize_t maxsplit = -1, count=0;
1157 const char *s = PyBytes_AS_STRING(self), *sub;
1158 Py_buffer vsub;
1159 PyObject *list, *str, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001160#ifdef USE_FAST
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001161 Py_ssize_t pos;
Neal Norwitz6968b052007-02-27 19:02:19 +00001162#endif
1163
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001164 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1165 return NULL;
1166 if (maxsplit < 0)
1167 maxsplit = PY_SSIZE_T_MAX;
1168 if (subobj == Py_None)
1169 return split_whitespace(self, len, maxsplit);
1170 if (_getbuffer(subobj, &vsub) < 0)
1171 return NULL;
1172 sub = vsub.buf;
1173 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001174
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001175 if (n == 0) {
1176 PyErr_SetString(PyExc_ValueError, "empty separator");
Martin v. Löwis423be952008-08-13 15:53:07 +00001177 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001178 return NULL;
1179 }
Amaury Forgeot d'Arc20443f32008-08-22 22:05:20 +00001180 else if (n == 1) {
1181 list = split_char(self, len, sub[0], maxsplit);
1182 PyBuffer_Release(&vsub);
1183 return list;
1184 }
Guido van Rossum8f950672007-09-10 16:53:45 +00001185
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001186 list = PyList_New(PREALLOC_SIZE(maxsplit));
1187 if (list == NULL) {
Martin v. Löwis423be952008-08-13 15:53:07 +00001188 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001189 return NULL;
1190 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001191
1192#ifdef USE_FAST
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001193 i = j = 0;
1194 while (maxsplit-- > 0) {
1195 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1196 if (pos < 0)
1197 break;
1198 j = i+pos;
1199 SPLIT_ADD(s, i, j);
1200 i = j + n;
1201 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001202#else
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001203 i = j = 0;
1204 while ((j+n <= len) && (maxsplit-- > 0)) {
1205 for (; j+n <= len; j++) {
1206 if (Py_STRING_MATCH(s, j, sub, n)) {
1207 SPLIT_ADD(s, i, j);
1208 i = j = j + n;
1209 break;
1210 }
1211 }
1212 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001213#endif
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001214 SPLIT_ADD(s, i, len);
1215 FIX_PREALLOC_SIZE(list);
Martin v. Löwis423be952008-08-13 15:53:07 +00001216 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001217 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001218
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001219 onError:
1220 Py_DECREF(list);
Martin v. Löwis423be952008-08-13 15:53:07 +00001221 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001222 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001223}
1224
Neal Norwitz6968b052007-02-27 19:02:19 +00001225PyDoc_STRVAR(partition__doc__,
1226"B.partition(sep) -> (head, sep, tail)\n\
1227\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001228Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001229the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001230found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001231
1232static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001233string_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001234{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001235 const char *sep;
1236 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001237
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001238 if (PyBytes_Check(sep_obj)) {
1239 sep = PyBytes_AS_STRING(sep_obj);
1240 sep_len = PyBytes_GET_SIZE(sep_obj);
1241 }
1242 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1243 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001244
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001245 return stringlib_partition(
1246 (PyObject*) self,
1247 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1248 sep_obj, sep, sep_len
1249 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001250}
1251
1252PyDoc_STRVAR(rpartition__doc__,
1253"B.rpartition(sep) -> (tail, sep, head)\n\
1254\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001255Search for the separator sep in B, starting at the end of B,\n\
1256and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001257part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001258bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001259
1260static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001261string_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001262{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001263 const char *sep;
1264 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001265
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001266 if (PyBytes_Check(sep_obj)) {
1267 sep = PyBytes_AS_STRING(sep_obj);
1268 sep_len = PyBytes_GET_SIZE(sep_obj);
1269 }
1270 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1271 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001272
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001273 return stringlib_rpartition(
1274 (PyObject*) self,
1275 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1276 sep_obj, sep, sep_len
1277 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001278}
1279
1280Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001281rsplit_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Neal Norwitz6968b052007-02-27 19:02:19 +00001282{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001283 const char *s = PyBytes_AS_STRING(self);
1284 Py_ssize_t i, j, count=0;
1285 PyObject *str;
1286 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Neal Norwitz6968b052007-02-27 19:02:19 +00001287
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001288 if (list == NULL)
1289 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001290
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001291 i = j = len-1;
Neal Norwitz6968b052007-02-27 19:02:19 +00001292
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001293 while (maxsplit-- > 0) {
1294 RSKIP_SPACE(s, i);
1295 if (i<0) break;
1296 j = i; i--;
1297 RSKIP_NONSPACE(s, i);
1298 if (j == len-1 && i < 0 && PyBytes_CheckExact(self)) {
1299 /* No whitespace in self, so just use it as list[0] */
1300 Py_INCREF(self);
1301 PyList_SET_ITEM(list, 0, (PyObject *)self);
1302 count++;
1303 break;
1304 }
1305 SPLIT_ADD(s, i + 1, j + 1);
1306 }
1307 if (i >= 0) {
1308 /* Only occurs when maxsplit was reached. Skip any remaining
1309 whitespace and copy to beginning of string. */
1310 RSKIP_SPACE(s, i);
1311 if (i >= 0)
1312 SPLIT_ADD(s, 0, i + 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001313
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001314 }
1315 FIX_PREALLOC_SIZE(list);
1316 if (PyList_Reverse(list) < 0)
1317 goto onError;
1318 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001319 onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001320 Py_DECREF(list);
1321 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001322}
1323
Guido van Rossum8f950672007-09-10 16:53:45 +00001324Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001325rsplit_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Guido van Rossum8f950672007-09-10 16:53:45 +00001326{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001327 const char *s = PyBytes_AS_STRING(self);
1328 register Py_ssize_t i, j, count=0;
1329 PyObject *str;
1330 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Guido van Rossum8f950672007-09-10 16:53:45 +00001331
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001332 if (list == NULL)
1333 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001334
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001335 i = j = len - 1;
1336 while ((i >= 0) && (maxcount-- > 0)) {
1337 for (; i >= 0; i--) {
1338 if (s[i] == ch) {
1339 SPLIT_ADD(s, i + 1, j + 1);
1340 j = i = i - 1;
1341 break;
1342 }
1343 }
1344 }
1345 if (i < 0 && count == 0 && PyBytes_CheckExact(self)) {
1346 /* ch not in self, so just use self as list[0] */
1347 Py_INCREF(self);
1348 PyList_SET_ITEM(list, 0, (PyObject *)self);
1349 count++;
1350 }
1351 else if (j >= -1) {
1352 SPLIT_ADD(s, 0, j + 1);
1353 }
1354 FIX_PREALLOC_SIZE(list);
1355 if (PyList_Reverse(list) < 0)
1356 goto onError;
1357 return list;
Guido van Rossum8f950672007-09-10 16:53:45 +00001358
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001359 onError:
1360 Py_DECREF(list);
1361 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001362}
1363
Neal Norwitz6968b052007-02-27 19:02:19 +00001364PyDoc_STRVAR(rsplit__doc__,
Benjamin Peterson4116f362008-05-27 00:36:20 +00001365"B.rsplit([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001366\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001367Return a list of the sections in B, using sep as the delimiter,\n\
1368starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001369If sep is not given, B is split on ASCII whitespace characters\n\
1370(space, tab, return, newline, formfeed, vertical tab).\n\
1371If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001372
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001373
Neal Norwitz6968b052007-02-27 19:02:19 +00001374static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001375string_rsplit(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001376{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001377 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
1378 Py_ssize_t maxsplit = -1, count=0;
1379 const char *s, *sub;
1380 Py_buffer vsub;
1381 PyObject *list, *str, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001382
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001383 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1384 return NULL;
1385 if (maxsplit < 0)
1386 maxsplit = PY_SSIZE_T_MAX;
1387 if (subobj == Py_None)
1388 return rsplit_whitespace(self, len, maxsplit);
1389 if (_getbuffer(subobj, &vsub) < 0)
1390 return NULL;
1391 sub = vsub.buf;
1392 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001393
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001394 if (n == 0) {
1395 PyErr_SetString(PyExc_ValueError, "empty separator");
Martin v. Löwis423be952008-08-13 15:53:07 +00001396 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001397 return NULL;
1398 }
Amaury Forgeot d'Arc20443f32008-08-22 22:05:20 +00001399 else if (n == 1) {
1400 list = rsplit_char(self, len, sub[0], maxsplit);
1401 PyBuffer_Release(&vsub);
1402 return list;
1403 }
Guido van Rossum8f950672007-09-10 16:53:45 +00001404
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001405 list = PyList_New(PREALLOC_SIZE(maxsplit));
1406 if (list == NULL) {
Martin v. Löwis423be952008-08-13 15:53:07 +00001407 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001408 return NULL;
1409 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001410
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001411 j = len;
1412 i = j - n;
Neal Norwitz6968b052007-02-27 19:02:19 +00001413
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001414 s = PyBytes_AS_STRING(self);
1415 while ( (i >= 0) && (maxsplit-- > 0) ) {
1416 for (; i>=0; i--) {
1417 if (Py_STRING_MATCH(s, i, sub, n)) {
1418 SPLIT_ADD(s, i + n, j);
1419 j = i;
1420 i -= n;
1421 break;
1422 }
1423 }
1424 }
1425 SPLIT_ADD(s, 0, j);
1426 FIX_PREALLOC_SIZE(list);
1427 if (PyList_Reverse(list) < 0)
1428 goto onError;
Martin v. Löwis423be952008-08-13 15:53:07 +00001429 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001430 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001431
1432onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001433 Py_DECREF(list);
Martin v. Löwis423be952008-08-13 15:53:07 +00001434 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001435 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001436}
1437
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001438#undef SPLIT_ADD
1439#undef MAX_PREALLOC
1440#undef PREALLOC_SIZE
1441
1442
1443PyDoc_STRVAR(join__doc__,
1444"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001445\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001446Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001447Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1448
Neal Norwitz6968b052007-02-27 19:02:19 +00001449static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001450string_join(PyObject *self, PyObject *orig)
Neal Norwitz6968b052007-02-27 19:02:19 +00001451{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001452 char *sep = PyBytes_AS_STRING(self);
1453 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1454 PyObject *res = NULL;
1455 char *p;
1456 Py_ssize_t seqlen = 0;
1457 size_t sz = 0;
1458 Py_ssize_t i;
1459 PyObject *seq, *item;
Neal Norwitz6968b052007-02-27 19:02:19 +00001460
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001461 seq = PySequence_Fast(orig, "");
1462 if (seq == NULL) {
1463 return NULL;
1464 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001465
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001466 seqlen = PySequence_Size(seq);
1467 if (seqlen == 0) {
1468 Py_DECREF(seq);
1469 return PyBytes_FromString("");
1470 }
1471 if (seqlen == 1) {
1472 item = PySequence_Fast_GET_ITEM(seq, 0);
1473 if (PyBytes_CheckExact(item)) {
1474 Py_INCREF(item);
1475 Py_DECREF(seq);
1476 return item;
1477 }
1478 }
1479
1480 /* There are at least two things to join, or else we have a subclass
1481 * of the builtin types in the sequence.
1482 * Do a pre-pass to figure out the total amount of space we'll
1483 * need (sz), and see whether all argument are bytes.
1484 */
1485 /* XXX Shouldn't we use _getbuffer() on these items instead? */
1486 for (i = 0; i < seqlen; i++) {
1487 const size_t old_sz = sz;
1488 item = PySequence_Fast_GET_ITEM(seq, i);
1489 if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
1490 PyErr_Format(PyExc_TypeError,
1491 "sequence item %zd: expected bytes,"
1492 " %.80s found",
1493 i, Py_TYPE(item)->tp_name);
1494 Py_DECREF(seq);
1495 return NULL;
1496 }
1497 sz += Py_SIZE(item);
1498 if (i != 0)
1499 sz += seplen;
1500 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1501 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00001502 "join() result is too long for bytes");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001503 Py_DECREF(seq);
1504 return NULL;
1505 }
1506 }
1507
1508 /* Allocate result space. */
1509 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1510 if (res == NULL) {
1511 Py_DECREF(seq);
1512 return NULL;
1513 }
1514
1515 /* Catenate everything. */
1516 /* I'm not worried about a PyByteArray item growing because there's
1517 nowhere in this function where we release the GIL. */
1518 p = PyBytes_AS_STRING(res);
1519 for (i = 0; i < seqlen; ++i) {
1520 size_t n;
1521 char *q;
1522 if (i) {
1523 Py_MEMCPY(p, sep, seplen);
1524 p += seplen;
1525 }
1526 item = PySequence_Fast_GET_ITEM(seq, i);
1527 n = Py_SIZE(item);
1528 if (PyBytes_Check(item))
1529 q = PyBytes_AS_STRING(item);
1530 else
1531 q = PyByteArray_AS_STRING(item);
1532 Py_MEMCPY(p, q, n);
1533 p += n;
1534 }
1535
1536 Py_DECREF(seq);
1537 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001538}
1539
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001540PyObject *
1541_PyBytes_Join(PyObject *sep, PyObject *x)
1542{
1543 assert(sep != NULL && PyBytes_Check(sep));
1544 assert(x != NULL);
1545 return string_join(sep, x);
1546}
1547
1548Py_LOCAL_INLINE(void)
1549string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1550{
1551 if (*end > len)
1552 *end = len;
1553 else if (*end < 0)
1554 *end += len;
1555 if (*end < 0)
1556 *end = 0;
1557 if (*start < 0)
1558 *start += len;
1559 if (*start < 0)
1560 *start = 0;
1561}
1562
1563Py_LOCAL_INLINE(Py_ssize_t)
1564string_find_internal(PyBytesObject *self, PyObject *args, int dir)
1565{
1566 PyObject *subobj;
1567 const char *sub;
1568 Py_ssize_t sub_len;
1569 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1570 PyObject *obj_start=Py_None, *obj_end=Py_None;
1571
1572 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1573 &obj_start, &obj_end))
1574 return -2;
1575 /* To support None in "start" and "end" arguments, meaning
1576 the same as if they were not passed.
1577 */
1578 if (obj_start != Py_None)
1579 if (!_PyEval_SliceIndex(obj_start, &start))
1580 return -2;
1581 if (obj_end != Py_None)
1582 if (!_PyEval_SliceIndex(obj_end, &end))
1583 return -2;
1584
1585 if (PyBytes_Check(subobj)) {
1586 sub = PyBytes_AS_STRING(subobj);
1587 sub_len = PyBytes_GET_SIZE(subobj);
1588 }
1589 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1590 /* XXX - the "expected a character buffer object" is pretty
1591 confusing for a non-expert. remap to something else ? */
1592 return -2;
1593
1594 if (dir > 0)
1595 return stringlib_find_slice(
1596 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1597 sub, sub_len, start, end);
1598 else
1599 return stringlib_rfind_slice(
1600 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1601 sub, sub_len, start, end);
1602}
1603
1604
1605PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001606"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001607\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001608Return the lowest index in S where substring sub is found,\n\
1609such that sub is contained within s[start:end]. Optional\n\
1610arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001611\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001612Return -1 on failure.");
1613
Neal Norwitz6968b052007-02-27 19:02:19 +00001614static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001615string_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001616{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001617 Py_ssize_t result = string_find_internal(self, args, +1);
1618 if (result == -2)
1619 return NULL;
1620 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001621}
1622
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001623
1624PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001625"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001626\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001627Like B.find() but raise ValueError when the substring is not found.");
1628
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001629static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001630string_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001631{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001632 Py_ssize_t result = string_find_internal(self, args, +1);
1633 if (result == -2)
1634 return NULL;
1635 if (result == -1) {
1636 PyErr_SetString(PyExc_ValueError,
1637 "substring not found");
1638 return NULL;
1639 }
1640 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001641}
1642
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001643
1644PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001645"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001646\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001647Return the highest index in B where substring sub is found,\n\
1648such that sub is contained within s[start:end]. Optional\n\
1649arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001650\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001651Return -1 on failure.");
1652
Neal Norwitz6968b052007-02-27 19:02:19 +00001653static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001654string_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001655{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001656 Py_ssize_t result = string_find_internal(self, args, -1);
1657 if (result == -2)
1658 return NULL;
1659 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001660}
1661
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001662
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001663PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001664"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001665\n\
1666Like B.rfind() but raise ValueError when the substring is not found.");
1667
1668static PyObject *
1669string_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001670{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001671 Py_ssize_t result = string_find_internal(self, args, -1);
1672 if (result == -2)
1673 return NULL;
1674 if (result == -1) {
1675 PyErr_SetString(PyExc_ValueError,
1676 "substring not found");
1677 return NULL;
1678 }
1679 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001680}
1681
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001682
1683Py_LOCAL_INLINE(PyObject *)
1684do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001685{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001686 Py_buffer vsep;
1687 char *s = PyBytes_AS_STRING(self);
1688 Py_ssize_t len = PyBytes_GET_SIZE(self);
1689 char *sep;
1690 Py_ssize_t seplen;
1691 Py_ssize_t i, j;
1692
1693 if (_getbuffer(sepobj, &vsep) < 0)
1694 return NULL;
1695 sep = vsep.buf;
1696 seplen = vsep.len;
1697
1698 i = 0;
1699 if (striptype != RIGHTSTRIP) {
1700 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1701 i++;
1702 }
1703 }
1704
1705 j = len;
1706 if (striptype != LEFTSTRIP) {
1707 do {
1708 j--;
1709 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1710 j++;
1711 }
1712
Martin v. Löwis423be952008-08-13 15:53:07 +00001713 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001714
1715 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1716 Py_INCREF(self);
1717 return (PyObject*)self;
1718 }
1719 else
1720 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001721}
1722
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001723
1724Py_LOCAL_INLINE(PyObject *)
1725do_strip(PyBytesObject *self, int striptype)
1726{
1727 char *s = PyBytes_AS_STRING(self);
1728 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
1729
1730 i = 0;
1731 if (striptype != RIGHTSTRIP) {
1732 while (i < len && ISSPACE(s[i])) {
1733 i++;
1734 }
1735 }
1736
1737 j = len;
1738 if (striptype != LEFTSTRIP) {
1739 do {
1740 j--;
1741 } while (j >= i && ISSPACE(s[j]));
1742 j++;
1743 }
1744
1745 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1746 Py_INCREF(self);
1747 return (PyObject*)self;
1748 }
1749 else
1750 return PyBytes_FromStringAndSize(s+i, j-i);
1751}
1752
1753
1754Py_LOCAL_INLINE(PyObject *)
1755do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1756{
1757 PyObject *sep = NULL;
1758
1759 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1760 return NULL;
1761
1762 if (sep != NULL && sep != Py_None) {
1763 return do_xstrip(self, striptype, sep);
1764 }
1765 return do_strip(self, striptype);
1766}
1767
1768
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001769PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001770"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001771\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001772Strip leading and trailing bytes contained in the argument.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001773If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001774static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001775string_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001776{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001777 if (PyTuple_GET_SIZE(args) == 0)
1778 return do_strip(self, BOTHSTRIP); /* Common case */
1779 else
1780 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001781}
1782
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001783
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001784PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001785"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001786\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001787Strip leading bytes contained in the argument.\n\
1788If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001789static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001790string_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001791{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001792 if (PyTuple_GET_SIZE(args) == 0)
1793 return do_strip(self, LEFTSTRIP); /* Common case */
1794 else
1795 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001796}
1797
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001798
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001799PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001800"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001801\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001802Strip trailing bytes contained in the argument.\n\
1803If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001804static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001805string_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001806{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001807 if (PyTuple_GET_SIZE(args) == 0)
1808 return do_strip(self, RIGHTSTRIP); /* Common case */
1809 else
1810 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001811}
Neal Norwitz6968b052007-02-27 19:02:19 +00001812
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001813
1814PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001815"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001816\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001817Return the number of non-overlapping occurrences of substring sub in\n\
1818string S[start:end]. Optional arguments start and end are interpreted\n\
1819as in slice notation.");
1820
1821static PyObject *
1822string_count(PyBytesObject *self, PyObject *args)
1823{
1824 PyObject *sub_obj;
1825 const char *str = PyBytes_AS_STRING(self), *sub;
1826 Py_ssize_t sub_len;
1827 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1828
1829 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1830 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1831 return NULL;
1832
1833 if (PyBytes_Check(sub_obj)) {
1834 sub = PyBytes_AS_STRING(sub_obj);
1835 sub_len = PyBytes_GET_SIZE(sub_obj);
1836 }
1837 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1838 return NULL;
1839
1840 string_adjust_indices(&start, &end, PyBytes_GET_SIZE(self));
1841
1842 return PyLong_FromSsize_t(
1843 stringlib_count(str + start, end - start, sub, sub_len)
1844 );
1845}
1846
1847
1848PyDoc_STRVAR(translate__doc__,
1849"B.translate(table[, deletechars]) -> bytes\n\
1850\n\
1851Return a copy of B, where all characters occurring in the\n\
1852optional argument deletechars are removed, and the remaining\n\
1853characters have been mapped through the given translation\n\
1854table, which must be a bytes object of length 256.");
1855
1856static PyObject *
1857string_translate(PyBytesObject *self, PyObject *args)
1858{
1859 register char *input, *output;
1860 const char *table;
1861 register Py_ssize_t i, c, changed = 0;
1862 PyObject *input_obj = (PyObject*)self;
1863 const char *output_start, *del_table=NULL;
1864 Py_ssize_t inlen, tablen, dellen = 0;
1865 PyObject *result;
1866 int trans_table[256];
1867 PyObject *tableobj, *delobj = NULL;
1868
1869 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1870 &tableobj, &delobj))
1871 return NULL;
1872
1873 if (PyBytes_Check(tableobj)) {
1874 table = PyBytes_AS_STRING(tableobj);
1875 tablen = PyBytes_GET_SIZE(tableobj);
1876 }
1877 else if (tableobj == Py_None) {
1878 table = NULL;
1879 tablen = 256;
1880 }
1881 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1882 return NULL;
1883
1884 if (tablen != 256) {
1885 PyErr_SetString(PyExc_ValueError,
1886 "translation table must be 256 characters long");
1887 return NULL;
1888 }
1889
1890 if (delobj != NULL) {
1891 if (PyBytes_Check(delobj)) {
1892 del_table = PyBytes_AS_STRING(delobj);
1893 dellen = PyBytes_GET_SIZE(delobj);
1894 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001895 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1896 return NULL;
1897 }
1898 else {
1899 del_table = NULL;
1900 dellen = 0;
1901 }
1902
1903 inlen = PyBytes_GET_SIZE(input_obj);
1904 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1905 if (result == NULL)
1906 return NULL;
1907 output_start = output = PyBytes_AsString(result);
1908 input = PyBytes_AS_STRING(input_obj);
1909
1910 if (dellen == 0 && table != NULL) {
1911 /* If no deletions are required, use faster code */
1912 for (i = inlen; --i >= 0; ) {
1913 c = Py_CHARMASK(*input++);
1914 if (Py_CHARMASK((*output++ = table[c])) != c)
1915 changed = 1;
1916 }
1917 if (changed || !PyBytes_CheckExact(input_obj))
1918 return result;
1919 Py_DECREF(result);
1920 Py_INCREF(input_obj);
1921 return input_obj;
1922 }
1923
1924 if (table == NULL) {
1925 for (i = 0; i < 256; i++)
1926 trans_table[i] = Py_CHARMASK(i);
1927 } else {
1928 for (i = 0; i < 256; i++)
1929 trans_table[i] = Py_CHARMASK(table[i]);
1930 }
1931
1932 for (i = 0; i < dellen; i++)
1933 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1934
1935 for (i = inlen; --i >= 0; ) {
1936 c = Py_CHARMASK(*input++);
1937 if (trans_table[c] != -1)
1938 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1939 continue;
1940 changed = 1;
1941 }
1942 if (!changed && PyBytes_CheckExact(input_obj)) {
1943 Py_DECREF(result);
1944 Py_INCREF(input_obj);
1945 return input_obj;
1946 }
1947 /* Fix the size of the resulting string */
1948 if (inlen > 0)
1949 _PyBytes_Resize(&result, output - output_start);
1950 return result;
1951}
1952
1953
1954#define FORWARD 1
1955#define REVERSE -1
1956
1957/* find and count characters and substrings */
1958
1959#define findchar(target, target_len, c) \
1960 ((char *)memchr((const void *)(target), c, target_len))
1961
1962/* String ops must return a string. */
1963/* If the object is subclass of string, create a copy */
1964Py_LOCAL(PyBytesObject *)
1965return_self(PyBytesObject *self)
1966{
1967 if (PyBytes_CheckExact(self)) {
1968 Py_INCREF(self);
1969 return self;
1970 }
1971 return (PyBytesObject *)PyBytes_FromStringAndSize(
1972 PyBytes_AS_STRING(self),
1973 PyBytes_GET_SIZE(self));
1974}
1975
1976Py_LOCAL_INLINE(Py_ssize_t)
1977countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
1978{
1979 Py_ssize_t count=0;
1980 const char *start=target;
1981 const char *end=target+target_len;
1982
1983 while ( (start=findchar(start, end-start, c)) != NULL ) {
1984 count++;
1985 if (count >= maxcount)
1986 break;
1987 start += 1;
1988 }
1989 return count;
1990}
1991
1992Py_LOCAL(Py_ssize_t)
1993findstring(const char *target, Py_ssize_t target_len,
1994 const char *pattern, Py_ssize_t pattern_len,
1995 Py_ssize_t start,
1996 Py_ssize_t end,
1997 int direction)
1998{
1999 if (start < 0) {
2000 start += target_len;
2001 if (start < 0)
2002 start = 0;
2003 }
2004 if (end > target_len) {
2005 end = target_len;
2006 } else if (end < 0) {
2007 end += target_len;
2008 if (end < 0)
2009 end = 0;
2010 }
2011
2012 /* zero-length substrings always match at the first attempt */
2013 if (pattern_len == 0)
2014 return (direction > 0) ? start : end;
2015
2016 end -= pattern_len;
2017
2018 if (direction < 0) {
2019 for (; end >= start; end--)
2020 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2021 return end;
2022 } else {
2023 for (; start <= end; start++)
2024 if (Py_STRING_MATCH(target, start,pattern,pattern_len))
2025 return start;
2026 }
2027 return -1;
2028}
2029
2030Py_LOCAL_INLINE(Py_ssize_t)
2031countstring(const char *target, Py_ssize_t target_len,
2032 const char *pattern, Py_ssize_t pattern_len,
2033 Py_ssize_t start,
2034 Py_ssize_t end,
2035 int direction, Py_ssize_t maxcount)
2036{
2037 Py_ssize_t count=0;
2038
2039 if (start < 0) {
2040 start += target_len;
2041 if (start < 0)
2042 start = 0;
2043 }
2044 if (end > target_len) {
2045 end = target_len;
2046 } else if (end < 0) {
2047 end += target_len;
2048 if (end < 0)
2049 end = 0;
2050 }
2051
2052 /* zero-length substrings match everywhere */
2053 if (pattern_len == 0 || maxcount == 0) {
2054 if (target_len+1 < maxcount)
2055 return target_len+1;
2056 return maxcount;
2057 }
2058
2059 end -= pattern_len;
2060 if (direction < 0) {
2061 for (; (end >= start); end--)
2062 if (Py_STRING_MATCH(target, end,pattern,pattern_len)) {
2063 count++;
2064 if (--maxcount <= 0) break;
2065 end -= pattern_len-1;
2066 }
2067 } else {
2068 for (; (start <= end); start++)
2069 if (Py_STRING_MATCH(target, start,
2070 pattern, pattern_len)) {
2071 count++;
2072 if (--maxcount <= 0)
2073 break;
2074 start += pattern_len-1;
2075 }
2076 }
2077 return count;
2078}
2079
2080
2081/* Algorithms for different cases of string replacement */
2082
2083/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2084Py_LOCAL(PyBytesObject *)
2085replace_interleave(PyBytesObject *self,
2086 const char *to_s, Py_ssize_t to_len,
2087 Py_ssize_t maxcount)
2088{
2089 char *self_s, *result_s;
2090 Py_ssize_t self_len, result_len;
2091 Py_ssize_t count, i, product;
2092 PyBytesObject *result;
2093
2094 self_len = PyBytes_GET_SIZE(self);
2095
2096 /* 1 at the end plus 1 after every character */
2097 count = self_len+1;
2098 if (maxcount < count)
2099 count = maxcount;
2100
2101 /* Check for overflow */
2102 /* result_len = count * to_len + self_len; */
2103 product = count * to_len;
2104 if (product / to_len != count) {
2105 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002106 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002107 return NULL;
2108 }
2109 result_len = product + self_len;
2110 if (result_len < 0) {
2111 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002112 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002113 return NULL;
2114 }
2115
2116 if (! (result = (PyBytesObject *)
2117 PyBytes_FromStringAndSize(NULL, result_len)) )
2118 return NULL;
2119
2120 self_s = PyBytes_AS_STRING(self);
2121 result_s = PyBytes_AS_STRING(result);
2122
2123 /* TODO: special case single character, which doesn't need memcpy */
2124
2125 /* Lay the first one down (guaranteed this will occur) */
2126 Py_MEMCPY(result_s, to_s, to_len);
2127 result_s += to_len;
2128 count -= 1;
2129
2130 for (i=0; i<count; i++) {
2131 *result_s++ = *self_s++;
2132 Py_MEMCPY(result_s, to_s, to_len);
2133 result_s += to_len;
2134 }
2135
2136 /* Copy the rest of the original string */
2137 Py_MEMCPY(result_s, self_s, self_len-i);
2138
2139 return result;
2140}
2141
2142/* Special case for deleting a single character */
2143/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2144Py_LOCAL(PyBytesObject *)
2145replace_delete_single_character(PyBytesObject *self,
2146 char from_c, Py_ssize_t maxcount)
2147{
2148 char *self_s, *result_s;
2149 char *start, *next, *end;
2150 Py_ssize_t self_len, result_len;
2151 Py_ssize_t count;
2152 PyBytesObject *result;
2153
2154 self_len = PyBytes_GET_SIZE(self);
2155 self_s = PyBytes_AS_STRING(self);
2156
2157 count = countchar(self_s, self_len, from_c, maxcount);
2158 if (count == 0) {
2159 return return_self(self);
2160 }
2161
2162 result_len = self_len - count; /* from_len == 1 */
2163 assert(result_len>=0);
2164
2165 if ( (result = (PyBytesObject *)
2166 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2167 return NULL;
2168 result_s = PyBytes_AS_STRING(result);
2169
2170 start = self_s;
2171 end = self_s + self_len;
2172 while (count-- > 0) {
2173 next = findchar(start, end-start, from_c);
2174 if (next == NULL)
2175 break;
2176 Py_MEMCPY(result_s, start, next-start);
2177 result_s += (next-start);
2178 start = next+1;
2179 }
2180 Py_MEMCPY(result_s, start, end-start);
2181
2182 return result;
2183}
2184
2185/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2186
2187Py_LOCAL(PyBytesObject *)
2188replace_delete_substring(PyBytesObject *self,
2189 const char *from_s, Py_ssize_t from_len,
2190 Py_ssize_t maxcount) {
2191 char *self_s, *result_s;
2192 char *start, *next, *end;
2193 Py_ssize_t self_len, result_len;
2194 Py_ssize_t count, offset;
2195 PyBytesObject *result;
2196
2197 self_len = PyBytes_GET_SIZE(self);
2198 self_s = PyBytes_AS_STRING(self);
2199
2200 count = countstring(self_s, self_len,
2201 from_s, from_len,
2202 0, self_len, 1,
2203 maxcount);
2204
2205 if (count == 0) {
2206 /* no matches */
2207 return return_self(self);
2208 }
2209
2210 result_len = self_len - (count * from_len);
2211 assert (result_len>=0);
2212
2213 if ( (result = (PyBytesObject *)
2214 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2215 return NULL;
2216
2217 result_s = PyBytes_AS_STRING(result);
2218
2219 start = self_s;
2220 end = self_s + self_len;
2221 while (count-- > 0) {
2222 offset = findstring(start, end-start,
2223 from_s, from_len,
2224 0, end-start, FORWARD);
2225 if (offset == -1)
2226 break;
2227 next = start + offset;
2228
2229 Py_MEMCPY(result_s, start, next-start);
2230
2231 result_s += (next-start);
2232 start = next+from_len;
2233 }
2234 Py_MEMCPY(result_s, start, end-start);
2235 return result;
2236}
2237
2238/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2239Py_LOCAL(PyBytesObject *)
2240replace_single_character_in_place(PyBytesObject *self,
2241 char from_c, char to_c,
2242 Py_ssize_t maxcount)
2243{
2244 char *self_s, *result_s, *start, *end, *next;
2245 Py_ssize_t self_len;
2246 PyBytesObject *result;
2247
2248 /* The result string will be the same size */
2249 self_s = PyBytes_AS_STRING(self);
2250 self_len = PyBytes_GET_SIZE(self);
2251
2252 next = findchar(self_s, self_len, from_c);
2253
2254 if (next == NULL) {
2255 /* No matches; return the original string */
2256 return return_self(self);
2257 }
2258
2259 /* Need to make a new string */
2260 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2261 if (result == NULL)
2262 return NULL;
2263 result_s = PyBytes_AS_STRING(result);
2264 Py_MEMCPY(result_s, self_s, self_len);
2265
2266 /* change everything in-place, starting with this one */
2267 start = result_s + (next-self_s);
2268 *start = to_c;
2269 start++;
2270 end = result_s + self_len;
2271
2272 while (--maxcount > 0) {
2273 next = findchar(start, end-start, from_c);
2274 if (next == NULL)
2275 break;
2276 *next = to_c;
2277 start = next+1;
2278 }
2279
2280 return result;
2281}
2282
2283/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2284Py_LOCAL(PyBytesObject *)
2285replace_substring_in_place(PyBytesObject *self,
2286 const char *from_s, Py_ssize_t from_len,
2287 const char *to_s, Py_ssize_t to_len,
2288 Py_ssize_t maxcount)
2289{
2290 char *result_s, *start, *end;
2291 char *self_s;
2292 Py_ssize_t self_len, offset;
2293 PyBytesObject *result;
2294
2295 /* The result string will be the same size */
2296
2297 self_s = PyBytes_AS_STRING(self);
2298 self_len = PyBytes_GET_SIZE(self);
2299
2300 offset = findstring(self_s, self_len,
2301 from_s, from_len,
2302 0, self_len, FORWARD);
2303 if (offset == -1) {
2304 /* No matches; return the original string */
2305 return return_self(self);
2306 }
2307
2308 /* Need to make a new string */
2309 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2310 if (result == NULL)
2311 return NULL;
2312 result_s = PyBytes_AS_STRING(result);
2313 Py_MEMCPY(result_s, self_s, self_len);
2314
2315 /* change everything in-place, starting with this one */
2316 start = result_s + offset;
2317 Py_MEMCPY(start, to_s, from_len);
2318 start += from_len;
2319 end = result_s + self_len;
2320
2321 while ( --maxcount > 0) {
2322 offset = findstring(start, end-start,
2323 from_s, from_len,
2324 0, end-start, FORWARD);
2325 if (offset==-1)
2326 break;
2327 Py_MEMCPY(start+offset, to_s, from_len);
2328 start += offset+from_len;
2329 }
2330
2331 return result;
2332}
2333
2334/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2335Py_LOCAL(PyBytesObject *)
2336replace_single_character(PyBytesObject *self,
2337 char from_c,
2338 const char *to_s, Py_ssize_t to_len,
2339 Py_ssize_t maxcount)
2340{
2341 char *self_s, *result_s;
2342 char *start, *next, *end;
2343 Py_ssize_t self_len, result_len;
2344 Py_ssize_t count, product;
2345 PyBytesObject *result;
2346
2347 self_s = PyBytes_AS_STRING(self);
2348 self_len = PyBytes_GET_SIZE(self);
2349
2350 count = countchar(self_s, self_len, from_c, maxcount);
2351 if (count == 0) {
2352 /* no matches, return unchanged */
2353 return return_self(self);
2354 }
2355
2356 /* use the difference between current and new, hence the "-1" */
2357 /* result_len = self_len + count * (to_len-1) */
2358 product = count * (to_len-1);
2359 if (product / (to_len-1) != count) {
2360 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002361 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002362 return NULL;
2363 }
2364 result_len = self_len + product;
2365 if (result_len < 0) {
2366 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002367 "replacment bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002368 return NULL;
2369 }
2370
2371 if ( (result = (PyBytesObject *)
2372 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2373 return NULL;
2374 result_s = PyBytes_AS_STRING(result);
2375
2376 start = self_s;
2377 end = self_s + self_len;
2378 while (count-- > 0) {
2379 next = findchar(start, end-start, from_c);
2380 if (next == NULL)
2381 break;
2382
2383 if (next == start) {
2384 /* replace with the 'to' */
2385 Py_MEMCPY(result_s, to_s, to_len);
2386 result_s += to_len;
2387 start += 1;
2388 } else {
2389 /* copy the unchanged old then the 'to' */
2390 Py_MEMCPY(result_s, start, next-start);
2391 result_s += (next-start);
2392 Py_MEMCPY(result_s, to_s, to_len);
2393 result_s += to_len;
2394 start = next+1;
2395 }
2396 }
2397 /* Copy the remainder of the remaining string */
2398 Py_MEMCPY(result_s, start, end-start);
2399
2400 return result;
2401}
2402
2403/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2404Py_LOCAL(PyBytesObject *)
2405replace_substring(PyBytesObject *self,
2406 const char *from_s, Py_ssize_t from_len,
2407 const char *to_s, Py_ssize_t to_len,
2408 Py_ssize_t maxcount) {
2409 char *self_s, *result_s;
2410 char *start, *next, *end;
2411 Py_ssize_t self_len, result_len;
2412 Py_ssize_t count, offset, product;
2413 PyBytesObject *result;
2414
2415 self_s = PyBytes_AS_STRING(self);
2416 self_len = PyBytes_GET_SIZE(self);
2417
2418 count = countstring(self_s, self_len,
2419 from_s, from_len,
2420 0, self_len, FORWARD, maxcount);
2421 if (count == 0) {
2422 /* no matches, return unchanged */
2423 return return_self(self);
2424 }
2425
2426 /* Check for overflow */
2427 /* result_len = self_len + count * (to_len-from_len) */
2428 product = count * (to_len-from_len);
2429 if (product / (to_len-from_len) != count) {
2430 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002431 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002432 return NULL;
2433 }
2434 result_len = self_len + product;
2435 if (result_len < 0) {
2436 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002437 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002438 return NULL;
2439 }
2440
2441 if ( (result = (PyBytesObject *)
2442 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2443 return NULL;
2444 result_s = PyBytes_AS_STRING(result);
2445
2446 start = self_s;
2447 end = self_s + self_len;
2448 while (count-- > 0) {
2449 offset = findstring(start, end-start,
2450 from_s, from_len,
2451 0, end-start, FORWARD);
2452 if (offset == -1)
2453 break;
2454 next = start+offset;
2455 if (next == start) {
2456 /* replace with the 'to' */
2457 Py_MEMCPY(result_s, to_s, to_len);
2458 result_s += to_len;
2459 start += from_len;
2460 } else {
2461 /* copy the unchanged old then the 'to' */
2462 Py_MEMCPY(result_s, start, next-start);
2463 result_s += (next-start);
2464 Py_MEMCPY(result_s, to_s, to_len);
2465 result_s += to_len;
2466 start = next+from_len;
2467 }
2468 }
2469 /* Copy the remainder of the remaining string */
2470 Py_MEMCPY(result_s, start, end-start);
2471
2472 return result;
2473}
2474
2475
2476Py_LOCAL(PyBytesObject *)
2477replace(PyBytesObject *self,
2478 const char *from_s, Py_ssize_t from_len,
2479 const char *to_s, Py_ssize_t to_len,
2480 Py_ssize_t maxcount)
2481{
2482 if (maxcount < 0) {
2483 maxcount = PY_SSIZE_T_MAX;
2484 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2485 /* nothing to do; return the original string */
2486 return return_self(self);
2487 }
2488
2489 if (maxcount == 0 ||
2490 (from_len == 0 && to_len == 0)) {
2491 /* nothing to do; return the original string */
2492 return return_self(self);
2493 }
2494
2495 /* Handle zero-length special cases */
2496
2497 if (from_len == 0) {
2498 /* insert the 'to' string everywhere. */
2499 /* >>> "Python".replace("", ".") */
2500 /* '.P.y.t.h.o.n.' */
2501 return replace_interleave(self, to_s, to_len, maxcount);
2502 }
2503
2504 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2505 /* point for an empty self string to generate a non-empty string */
2506 /* Special case so the remaining code always gets a non-empty string */
2507 if (PyBytes_GET_SIZE(self) == 0) {
2508 return return_self(self);
2509 }
2510
2511 if (to_len == 0) {
Georg Brandl17cb8a82008-05-30 08:20:09 +00002512 /* delete all occurrences of 'from' string */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002513 if (from_len == 1) {
2514 return replace_delete_single_character(
2515 self, from_s[0], maxcount);
2516 } else {
2517 return replace_delete_substring(self, from_s,
2518 from_len, maxcount);
2519 }
2520 }
2521
2522 /* Handle special case where both strings have the same length */
2523
2524 if (from_len == to_len) {
2525 if (from_len == 1) {
2526 return replace_single_character_in_place(
2527 self,
2528 from_s[0],
2529 to_s[0],
2530 maxcount);
2531 } else {
2532 return replace_substring_in_place(
2533 self, from_s, from_len, to_s, to_len,
2534 maxcount);
2535 }
2536 }
2537
2538 /* Otherwise use the more generic algorithms */
2539 if (from_len == 1) {
2540 return replace_single_character(self, from_s[0],
2541 to_s, to_len, maxcount);
2542 } else {
2543 /* len('from')>=2, len('to')>=1 */
2544 return replace_substring(self, from_s, from_len, to_s, to_len,
2545 maxcount);
2546 }
2547}
2548
2549PyDoc_STRVAR(replace__doc__,
2550"B.replace(old, new[, count]) -> bytes\n\
2551\n\
2552Return a copy of B with all occurrences of subsection\n\
2553old replaced by new. If the optional argument count is\n\
2554given, only the first count occurrences are replaced.");
2555
2556static PyObject *
2557string_replace(PyBytesObject *self, PyObject *args)
2558{
2559 Py_ssize_t count = -1;
2560 PyObject *from, *to;
2561 const char *from_s, *to_s;
2562 Py_ssize_t from_len, to_len;
2563
2564 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2565 return NULL;
2566
2567 if (PyBytes_Check(from)) {
2568 from_s = PyBytes_AS_STRING(from);
2569 from_len = PyBytes_GET_SIZE(from);
2570 }
2571 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2572 return NULL;
2573
2574 if (PyBytes_Check(to)) {
2575 to_s = PyBytes_AS_STRING(to);
2576 to_len = PyBytes_GET_SIZE(to);
2577 }
2578 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2579 return NULL;
2580
2581 return (PyObject *)replace((PyBytesObject *) self,
2582 from_s, from_len,
2583 to_s, to_len, count);
2584}
2585
2586/** End DALKE **/
2587
2588/* Matches the end (direction >= 0) or start (direction < 0) of self
2589 * against substr, using the start and end arguments. Returns
2590 * -1 on error, 0 if not found and 1 if found.
2591 */
2592Py_LOCAL(int)
2593_string_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
2594 Py_ssize_t end, int direction)
2595{
2596 Py_ssize_t len = PyBytes_GET_SIZE(self);
2597 Py_ssize_t slen;
2598 const char* sub;
2599 const char* str;
2600
2601 if (PyBytes_Check(substr)) {
2602 sub = PyBytes_AS_STRING(substr);
2603 slen = PyBytes_GET_SIZE(substr);
2604 }
2605 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2606 return -1;
2607 str = PyBytes_AS_STRING(self);
2608
2609 string_adjust_indices(&start, &end, len);
2610
2611 if (direction < 0) {
2612 /* startswith */
2613 if (start+slen > len)
2614 return 0;
2615 } else {
2616 /* endswith */
2617 if (end-start < slen || start > len)
2618 return 0;
2619
2620 if (end-slen > start)
2621 start = end - slen;
2622 }
2623 if (end-start >= slen)
2624 return ! memcmp(str+start, sub, slen);
2625 return 0;
2626}
2627
2628
2629PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002630"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002631\n\
2632Return True if B starts with the specified prefix, False otherwise.\n\
2633With optional start, test B beginning at that position.\n\
2634With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002635prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002636
2637static PyObject *
2638string_startswith(PyBytesObject *self, PyObject *args)
2639{
2640 Py_ssize_t start = 0;
2641 Py_ssize_t end = PY_SSIZE_T_MAX;
2642 PyObject *subobj;
2643 int result;
2644
2645 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2646 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2647 return NULL;
2648 if (PyTuple_Check(subobj)) {
2649 Py_ssize_t i;
2650 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2651 result = _string_tailmatch(self,
2652 PyTuple_GET_ITEM(subobj, i),
2653 start, end, -1);
2654 if (result == -1)
2655 return NULL;
2656 else if (result) {
2657 Py_RETURN_TRUE;
2658 }
2659 }
2660 Py_RETURN_FALSE;
2661 }
2662 result = _string_tailmatch(self, subobj, start, end, -1);
2663 if (result == -1)
2664 return NULL;
2665 else
2666 return PyBool_FromLong(result);
2667}
2668
2669
2670PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002671"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002672\n\
2673Return True if B ends with the specified suffix, False otherwise.\n\
2674With optional start, test B beginning at that position.\n\
2675With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002676suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002677
2678static PyObject *
2679string_endswith(PyBytesObject *self, PyObject *args)
2680{
2681 Py_ssize_t start = 0;
2682 Py_ssize_t end = PY_SSIZE_T_MAX;
2683 PyObject *subobj;
2684 int result;
2685
2686 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2687 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2688 return NULL;
2689 if (PyTuple_Check(subobj)) {
2690 Py_ssize_t i;
2691 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2692 result = _string_tailmatch(self,
2693 PyTuple_GET_ITEM(subobj, i),
2694 start, end, +1);
2695 if (result == -1)
2696 return NULL;
2697 else if (result) {
2698 Py_RETURN_TRUE;
2699 }
2700 }
2701 Py_RETURN_FALSE;
2702 }
2703 result = _string_tailmatch(self, subobj, start, end, +1);
2704 if (result == -1)
2705 return NULL;
2706 else
2707 return PyBool_FromLong(result);
2708}
2709
2710
2711PyDoc_STRVAR(decode__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002712"B.decode([encoding[, errors]]) -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002713\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002714Decode S using the codec registered for encoding. encoding defaults\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002715to the default encoding. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002716handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2717a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002718as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002719able to handle UnicodeDecodeErrors.");
2720
2721static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002722string_decode(PyObject *self, PyObject *args)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002723{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002724 const char *encoding = NULL;
2725 const char *errors = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +00002726
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002727 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2728 return NULL;
2729 if (encoding == NULL)
2730 encoding = PyUnicode_GetDefaultEncoding();
Marc-André Lemburgb2750b52008-06-06 12:18:17 +00002731 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002732}
2733
Guido van Rossum20188312006-05-05 15:15:40 +00002734
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002735PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002736"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002737\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002738Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002739Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002740Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002741
2742static int
Guido van Rossumae404e22007-10-26 21:46:44 +00002743hex_digit_to_int(Py_UNICODE c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002744{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002745 if (c >= 128)
2746 return -1;
2747 if (ISDIGIT(c))
2748 return c - '0';
2749 else {
2750 if (ISUPPER(c))
2751 c = TOLOWER(c);
2752 if (c >= 'a' && c <= 'f')
2753 return c - 'a' + 10;
2754 }
2755 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002756}
2757
2758static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002759string_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002760{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002761 PyObject *newstring, *hexobj;
2762 char *buf;
2763 Py_UNICODE *hex;
2764 Py_ssize_t hexlen, byteslen, i, j;
2765 int top, bot;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002766
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002767 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2768 return NULL;
2769 assert(PyUnicode_Check(hexobj));
2770 hexlen = PyUnicode_GET_SIZE(hexobj);
2771 hex = PyUnicode_AS_UNICODE(hexobj);
2772 byteslen = hexlen/2; /* This overestimates if there are spaces */
2773 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2774 if (!newstring)
2775 return NULL;
2776 buf = PyBytes_AS_STRING(newstring);
2777 for (i = j = 0; i < hexlen; i += 2) {
2778 /* skip over spaces in the input */
2779 while (hex[i] == ' ')
2780 i++;
2781 if (i >= hexlen)
2782 break;
2783 top = hex_digit_to_int(hex[i]);
2784 bot = hex_digit_to_int(hex[i+1]);
2785 if (top == -1 || bot == -1) {
2786 PyErr_Format(PyExc_ValueError,
2787 "non-hexadecimal number found in "
2788 "fromhex() arg at position %zd", i);
2789 goto error;
2790 }
2791 buf[j++] = (top << 4) + bot;
2792 }
2793 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2794 goto error;
2795 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002796
2797 error:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002798 Py_XDECREF(newstring);
2799 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002800}
2801
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002802PyDoc_STRVAR(sizeof__doc__,
2803"S.__sizeof__() -> size of S in memory, in bytes");
2804
2805static PyObject *
2806string_sizeof(PyBytesObject *v)
2807{
2808 Py_ssize_t res;
Mark Dickinsonfd24b322008-12-06 15:33:31 +00002809 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002810 return PyLong_FromSsize_t(res);
2811}
2812
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002813
2814static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002815string_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002816{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002817 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002818}
2819
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002820
2821static PyMethodDef
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002822string_methods[] = {
2823 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
2824 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2825 _Py_capitalize__doc__},
2826 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2827 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
2828 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
2829 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
2830 endswith__doc__},
2831 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2832 expandtabs__doc__},
2833 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
2834 {"fromhex", (PyCFunction)string_fromhex, METH_VARARGS|METH_CLASS,
2835 fromhex_doc},
2836 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
2837 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2838 _Py_isalnum__doc__},
2839 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2840 _Py_isalpha__doc__},
2841 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2842 _Py_isdigit__doc__},
2843 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2844 _Py_islower__doc__},
2845 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2846 _Py_isspace__doc__},
2847 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2848 _Py_istitle__doc__},
2849 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2850 _Py_isupper__doc__},
2851 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2852 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2853 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2854 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
2855 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
2856 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2857 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2858 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
2859 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2860 {"rpartition", (PyCFunction)string_rpartition, METH_O,
2861 rpartition__doc__},
2862 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
2863 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
2864 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2865 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
2866 splitlines__doc__},
2867 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
2868 startswith__doc__},
2869 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
2870 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2871 _Py_swapcase__doc__},
2872 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2873 {"translate", (PyCFunction)string_translate, METH_VARARGS,
2874 translate__doc__},
2875 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2876 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002877 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
2878 sizeof__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002879 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002880};
2881
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002882static PyObject *
2883str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2884
2885static PyObject *
2886string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2887{
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002888 PyObject *x = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002889 const char *encoding = NULL;
2890 const char *errors = NULL;
2891 PyObject *new = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002892 static char *kwlist[] = {"source", "encoding", "errors", 0};
2893
2894 if (type != &PyBytes_Type)
2895 return str_subtype_new(type, args, kwds);
2896 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2897 &encoding, &errors))
2898 return NULL;
2899 if (x == NULL) {
2900 if (encoding != NULL || errors != NULL) {
2901 PyErr_SetString(PyExc_TypeError,
2902 "encoding or errors without sequence "
2903 "argument");
2904 return NULL;
2905 }
2906 return PyBytes_FromString("");
2907 }
2908
2909 if (PyUnicode_Check(x)) {
2910 /* Encode via the codec registry */
2911 if (encoding == NULL) {
2912 PyErr_SetString(PyExc_TypeError,
2913 "string argument without an encoding");
2914 return NULL;
2915 }
Marc-André Lemburgb2750b52008-06-06 12:18:17 +00002916 new = PyUnicode_AsEncodedString(x, encoding, errors);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002917 if (new == NULL)
2918 return NULL;
2919 assert(PyBytes_Check(new));
2920 return new;
2921 }
2922
2923 /* If it's not unicode, there can't be encoding or errors */
2924 if (encoding != NULL || errors != NULL) {
2925 PyErr_SetString(PyExc_TypeError,
2926 "encoding or errors without a string argument");
2927 return NULL;
2928 }
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002929 return PyObject_Bytes(x);
2930}
2931
2932PyObject *
2933PyBytes_FromObject(PyObject *x)
2934{
2935 PyObject *new, *it;
2936 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002937
Benjamin Peterson4b24a422008-08-27 00:28:34 +00002938 if (x == NULL) {
2939 PyErr_BadInternalCall();
2940 return NULL;
2941 }
2942
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002943 /* Is it an int? */
2944 size = PyNumber_AsSsize_t(x, PyExc_ValueError);
2945 if (size == -1 && PyErr_Occurred()) {
2946 PyErr_Clear();
2947 }
2948 else {
2949 if (size < 0) {
2950 PyErr_SetString(PyExc_ValueError, "negative count");
2951 return NULL;
2952 }
2953 new = PyBytes_FromStringAndSize(NULL, size);
2954 if (new == NULL) {
2955 return NULL;
2956 }
2957 if (size > 0) {
2958 memset(((PyBytesObject*)new)->ob_sval, 0, size);
2959 }
2960 return new;
2961 }
2962
2963 /* Use the modern buffer interface */
2964 if (PyObject_CheckBuffer(x)) {
2965 Py_buffer view;
2966 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2967 return NULL;
2968 new = PyBytes_FromStringAndSize(NULL, view.len);
2969 if (!new)
2970 goto fail;
Christian Heimes1a8501c2008-10-02 19:56:01 +00002971 /* XXX(brett.cannon): Better way to get to internal buffer? */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002972 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2973 &view, view.len, 'C') < 0)
2974 goto fail;
Martin v. Löwis423be952008-08-13 15:53:07 +00002975 PyBuffer_Release(&view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002976 return new;
2977 fail:
2978 Py_XDECREF(new);
Martin v. Löwis423be952008-08-13 15:53:07 +00002979 PyBuffer_Release(&view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002980 return NULL;
2981 }
2982
2983 /* For iterator version, create a string object and resize as needed */
2984 /* XXX(gb): is 64 a good value? also, optimize if length is known */
2985 /* XXX(guido): perhaps use Pysequence_Fast() -- I can't imagine the
2986 input being a truly long iterator. */
2987 size = 64;
2988 new = PyBytes_FromStringAndSize(NULL, size);
2989 if (new == NULL)
2990 return NULL;
2991
2992 /* XXX Optimize this if the arguments is a list, tuple */
2993
2994 /* Get the iterator */
2995 it = PyObject_GetIter(x);
2996 if (it == NULL)
2997 goto error;
2998
2999 /* Run the iterator to exhaustion */
3000 for (i = 0; ; i++) {
3001 PyObject *item;
3002 Py_ssize_t value;
3003
3004 /* Get the next item */
3005 item = PyIter_Next(it);
3006 if (item == NULL) {
3007 if (PyErr_Occurred())
3008 goto error;
3009 break;
3010 }
3011
3012 /* Interpret it as an int (__index__) */
3013 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3014 Py_DECREF(item);
3015 if (value == -1 && PyErr_Occurred())
3016 goto error;
3017
3018 /* Range check */
3019 if (value < 0 || value >= 256) {
3020 PyErr_SetString(PyExc_ValueError,
3021 "bytes must be in range(0, 256)");
3022 goto error;
3023 }
3024
3025 /* Append the byte */
3026 if (i >= size) {
3027 size *= 2;
3028 if (_PyBytes_Resize(&new, size) < 0)
3029 goto error;
3030 }
3031 ((PyBytesObject *)new)->ob_sval[i] = value;
3032 }
3033 _PyBytes_Resize(&new, i);
3034
3035 /* Clean up and return success */
3036 Py_DECREF(it);
3037 return new;
3038
3039 error:
3040 /* Error handling when new != NULL */
3041 Py_XDECREF(it);
3042 Py_DECREF(new);
3043 return NULL;
3044}
3045
3046static PyObject *
3047str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3048{
3049 PyObject *tmp, *pnew;
3050 Py_ssize_t n;
3051
3052 assert(PyType_IsSubtype(type, &PyBytes_Type));
3053 tmp = string_new(&PyBytes_Type, args, kwds);
3054 if (tmp == NULL)
3055 return NULL;
3056 assert(PyBytes_CheckExact(tmp));
3057 n = PyBytes_GET_SIZE(tmp);
3058 pnew = type->tp_alloc(type, n);
3059 if (pnew != NULL) {
3060 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3061 PyBytes_AS_STRING(tmp), n+1);
3062 ((PyBytesObject *)pnew)->ob_shash =
3063 ((PyBytesObject *)tmp)->ob_shash;
3064 }
3065 Py_DECREF(tmp);
3066 return pnew;
3067}
3068
3069PyDoc_STRVAR(string_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003070"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003071bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003072bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
3073bytes(memory_view) -> bytes\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003074\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003075Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003076 - an iterable yielding integers in range(256)\n\
3077 - a text string encoded using the specified encoding\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003078 - a bytes or a buffer object\n\
3079 - any object implementing the buffer API.");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003080
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003081static PyObject *str_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003082
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003083PyTypeObject PyBytes_Type = {
3084 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3085 "bytes",
Mark Dickinsonfd24b322008-12-06 15:33:31 +00003086 PyBytesObject_SIZE,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003087 sizeof(char),
3088 string_dealloc, /* tp_dealloc */
3089 0, /* tp_print */
3090 0, /* tp_getattr */
3091 0, /* tp_setattr */
3092 0, /* tp_compare */
3093 (reprfunc)string_repr, /* tp_repr */
3094 0, /* tp_as_number */
3095 &string_as_sequence, /* tp_as_sequence */
3096 &string_as_mapping, /* tp_as_mapping */
3097 (hashfunc)string_hash, /* tp_hash */
3098 0, /* tp_call */
3099 string_str, /* tp_str */
3100 PyObject_GenericGetAttr, /* tp_getattro */
3101 0, /* tp_setattro */
3102 &string_as_buffer, /* tp_as_buffer */
3103 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3104 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3105 string_doc, /* tp_doc */
3106 0, /* tp_traverse */
3107 0, /* tp_clear */
3108 (richcmpfunc)string_richcompare, /* tp_richcompare */
3109 0, /* tp_weaklistoffset */
3110 str_iter, /* tp_iter */
3111 0, /* tp_iternext */
3112 string_methods, /* tp_methods */
3113 0, /* tp_members */
3114 0, /* tp_getset */
3115 &PyBaseObject_Type, /* tp_base */
3116 0, /* tp_dict */
3117 0, /* tp_descr_get */
3118 0, /* tp_descr_set */
3119 0, /* tp_dictoffset */
3120 0, /* tp_init */
3121 0, /* tp_alloc */
3122 string_new, /* tp_new */
3123 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003124};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003125
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003126void
3127PyBytes_Concat(register PyObject **pv, register PyObject *w)
3128{
3129 register PyObject *v;
3130 assert(pv != NULL);
3131 if (*pv == NULL)
3132 return;
3133 if (w == NULL) {
3134 Py_DECREF(*pv);
3135 *pv = NULL;
3136 return;
3137 }
3138 v = string_concat(*pv, w);
3139 Py_DECREF(*pv);
3140 *pv = v;
3141}
3142
3143void
3144PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
3145{
3146 PyBytes_Concat(pv, w);
3147 Py_XDECREF(w);
3148}
3149
3150
3151/* The following function breaks the notion that strings are immutable:
3152 it changes the size of a string. We get away with this only if there
3153 is only one module referencing the object. You can also think of it
3154 as creating a new string object and destroying the old one, only
3155 more efficiently. In any case, don't use this if the string may
3156 already be known to some other part of the code...
3157 Note that if there's not enough memory to resize the string, the original
3158 string object at *pv is deallocated, *pv is set to NULL, an "out of
3159 memory" exception is set, and -1 is returned. Else (on success) 0 is
3160 returned, and the value in *pv may or may not be the same as on input.
3161 As always, an extra byte is allocated for a trailing \0 byte (newsize
3162 does *not* include that), and a trailing \0 byte is stored.
3163*/
3164
3165int
3166_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3167{
3168 register PyObject *v;
3169 register PyBytesObject *sv;
3170 v = *pv;
3171 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
3172 *pv = 0;
3173 Py_DECREF(v);
3174 PyErr_BadInternalCall();
3175 return -1;
3176 }
3177 /* XXX UNREF/NEWREF interface should be more symmetrical */
3178 _Py_DEC_REFTOTAL;
3179 _Py_ForgetReference(v);
3180 *pv = (PyObject *)
Mark Dickinsonfd24b322008-12-06 15:33:31 +00003181 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003182 if (*pv == NULL) {
3183 PyObject_Del(v);
3184 PyErr_NoMemory();
3185 return -1;
3186 }
3187 _Py_NewReference(*pv);
3188 sv = (PyBytesObject *) *pv;
3189 Py_SIZE(sv) = newsize;
3190 sv->ob_sval[newsize] = '\0';
3191 sv->ob_shash = -1; /* invalidate cached hash value */
3192 return 0;
3193}
3194
3195/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and
3196 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3197 * Python's regular ints.
3198 * Return value: a new PyString*, or NULL if error.
3199 * . *pbuf is set to point into it,
3200 * *plen set to the # of chars following that.
3201 * Caller must decref it when done using pbuf.
3202 * The string starting at *pbuf is of the form
3203 * "-"? ("0x" | "0X")? digit+
3204 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3205 * set in flags. The case of hex digits will be correct,
3206 * There will be at least prec digits, zero-filled on the left if
3207 * necessary to get that many.
3208 * val object to be converted
3209 * flags bitmask of format flags; only F_ALT is looked at
3210 * prec minimum number of digits; 0-fill on left if needed
3211 * type a character in [duoxX]; u acts the same as d
3212 *
3213 * CAUTION: o, x and X conversions on regular ints can never
3214 * produce a '-' sign, but can for Python's unbounded ints.
3215 */
3216PyObject*
3217_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,
3218 char **pbuf, int *plen)
3219{
3220 PyObject *result = NULL;
3221 char *buf;
3222 Py_ssize_t i;
3223 int sign; /* 1 if '-', else 0 */
3224 int len; /* number of characters */
3225 Py_ssize_t llen;
3226 int numdigits; /* len == numnondigits + numdigits */
3227 int numnondigits = 0;
3228
3229 /* Avoid exceeding SSIZE_T_MAX */
Christian Heimesce694b72008-08-24 16:15:19 +00003230 if (prec > INT_MAX-3) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003231 PyErr_SetString(PyExc_OverflowError,
3232 "precision too large");
3233 return NULL;
3234 }
3235
3236 switch (type) {
3237 case 'd':
3238 case 'u':
3239 /* Special-case boolean: we want 0/1 */
3240 if (PyBool_Check(val))
3241 result = PyNumber_ToBase(val, 10);
3242 else
3243 result = Py_TYPE(val)->tp_str(val);
3244 break;
3245 case 'o':
3246 numnondigits = 2;
3247 result = PyNumber_ToBase(val, 8);
3248 break;
3249 case 'x':
3250 case 'X':
3251 numnondigits = 2;
3252 result = PyNumber_ToBase(val, 16);
3253 break;
3254 default:
3255 assert(!"'type' not in [duoxX]");
3256 }
3257 if (!result)
3258 return NULL;
3259
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00003260 buf = _PyUnicode_AsString(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003261 if (!buf) {
3262 Py_DECREF(result);
3263 return NULL;
3264 }
3265
3266 /* To modify the string in-place, there can only be one reference. */
3267 if (Py_REFCNT(result) != 1) {
3268 PyErr_BadInternalCall();
3269 return NULL;
3270 }
3271 llen = PyUnicode_GetSize(result);
3272 if (llen > INT_MAX) {
3273 PyErr_SetString(PyExc_ValueError,
3274 "string too large in _PyBytes_FormatLong");
3275 return NULL;
3276 }
3277 len = (int)llen;
3278 if (buf[len-1] == 'L') {
3279 --len;
3280 buf[len] = '\0';
3281 }
3282 sign = buf[0] == '-';
3283 numnondigits += sign;
3284 numdigits = len - numnondigits;
3285 assert(numdigits > 0);
3286
3287 /* Get rid of base marker unless F_ALT */
3288 if (((flags & F_ALT) == 0 &&
3289 (type == 'o' || type == 'x' || type == 'X'))) {
3290 assert(buf[sign] == '0');
3291 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
3292 buf[sign+1] == 'o');
3293 numnondigits -= 2;
3294 buf += 2;
3295 len -= 2;
3296 if (sign)
3297 buf[0] = '-';
3298 assert(len == numnondigits + numdigits);
3299 assert(numdigits > 0);
3300 }
3301
3302 /* Fill with leading zeroes to meet minimum width. */
3303 if (prec > numdigits) {
3304 PyObject *r1 = PyBytes_FromStringAndSize(NULL,
3305 numnondigits + prec);
3306 char *b1;
3307 if (!r1) {
3308 Py_DECREF(result);
3309 return NULL;
3310 }
3311 b1 = PyBytes_AS_STRING(r1);
3312 for (i = 0; i < numnondigits; ++i)
3313 *b1++ = *buf++;
3314 for (i = 0; i < prec - numdigits; i++)
3315 *b1++ = '0';
3316 for (i = 0; i < numdigits; i++)
3317 *b1++ = *buf++;
3318 *b1 = '\0';
3319 Py_DECREF(result);
3320 result = r1;
3321 buf = PyBytes_AS_STRING(result);
3322 len = numnondigits + prec;
3323 }
3324
3325 /* Fix up case for hex conversions. */
3326 if (type == 'X') {
3327 /* Need to convert all lower case letters to upper case.
3328 and need to convert 0x to 0X (and -0x to -0X). */
3329 for (i = 0; i < len; i++)
3330 if (buf[i] >= 'a' && buf[i] <= 'x')
3331 buf[i] -= 'a'-'A';
3332 }
3333 *pbuf = buf;
3334 *plen = len;
3335 return result;
3336}
3337
3338void
3339PyBytes_Fini(void)
3340{
3341 int i;
3342 for (i = 0; i < UCHAR_MAX + 1; i++) {
3343 Py_XDECREF(characters[i]);
3344 characters[i] = NULL;
3345 }
3346 Py_XDECREF(nullstring);
3347 nullstring = NULL;
3348}
3349
Benjamin Peterson4116f362008-05-27 00:36:20 +00003350/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003351
3352typedef struct {
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003353 PyObject_HEAD
3354 Py_ssize_t it_index;
3355 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3356} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003357
3358static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003359striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003360{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003361 _PyObject_GC_UNTRACK(it);
3362 Py_XDECREF(it->it_seq);
3363 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003364}
3365
3366static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003367striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003368{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003369 Py_VISIT(it->it_seq);
3370 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003371}
3372
3373static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003374striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003375{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003376 PyBytesObject *seq;
3377 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003378
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003379 assert(it != NULL);
3380 seq = it->it_seq;
3381 if (seq == NULL)
3382 return NULL;
3383 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003384
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003385 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3386 item = PyLong_FromLong(
3387 (unsigned char)seq->ob_sval[it->it_index]);
3388 if (item != NULL)
3389 ++it->it_index;
3390 return item;
3391 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003392
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003393 Py_DECREF(seq);
3394 it->it_seq = NULL;
3395 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003396}
3397
3398static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003399striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003400{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003401 Py_ssize_t len = 0;
3402 if (it->it_seq)
3403 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3404 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003405}
3406
3407PyDoc_STRVAR(length_hint_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003408 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003409
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003410static PyMethodDef striter_methods[] = {
3411 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3412 length_hint_doc},
3413 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003414};
3415
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003416PyTypeObject PyBytesIter_Type = {
3417 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3418 "bytes_iterator", /* tp_name */
3419 sizeof(striterobject), /* tp_basicsize */
3420 0, /* tp_itemsize */
3421 /* methods */
3422 (destructor)striter_dealloc, /* tp_dealloc */
3423 0, /* tp_print */
3424 0, /* tp_getattr */
3425 0, /* tp_setattr */
3426 0, /* tp_compare */
3427 0, /* tp_repr */
3428 0, /* tp_as_number */
3429 0, /* tp_as_sequence */
3430 0, /* tp_as_mapping */
3431 0, /* tp_hash */
3432 0, /* tp_call */
3433 0, /* tp_str */
3434 PyObject_GenericGetAttr, /* tp_getattro */
3435 0, /* tp_setattro */
3436 0, /* tp_as_buffer */
3437 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3438 0, /* tp_doc */
3439 (traverseproc)striter_traverse, /* tp_traverse */
3440 0, /* tp_clear */
3441 0, /* tp_richcompare */
3442 0, /* tp_weaklistoffset */
3443 PyObject_SelfIter, /* tp_iter */
3444 (iternextfunc)striter_next, /* tp_iternext */
3445 striter_methods, /* tp_methods */
3446 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003447};
3448
3449static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003450str_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003451{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003452 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003453
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003454 if (!PyBytes_Check(seq)) {
3455 PyErr_BadInternalCall();
3456 return NULL;
3457 }
3458 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3459 if (it == NULL)
3460 return NULL;
3461 it->it_index = 0;
3462 Py_INCREF(seq);
3463 it->it_seq = (PyBytesObject *)seq;
3464 _PyObject_GC_TRACK(it);
3465 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003466}