blob: 79c1e4f0d4e8b04a93878024e25ca288727a8c3d [file] [log] [blame]
Christian Heimes44720832008-05-26 13:01:01 +00001/* String object implementation */
Christian Heimes1a6387e2008-03-26 12:49:49 +00002
3#define PY_SSIZE_T_CLEAN
Christian Heimes44720832008-05-26 13:01:01 +00004
Christian Heimes1a6387e2008-03-26 12:49:49 +00005#include "Python.h"
Christian Heimes44720832008-05-26 13:01:01 +00006#include <ctype.h>
7
8#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
12static PyBytesObject *characters[UCHAR_MAX + 1];
13static PyBytesObject *nullstring;
14
15/* This dictionary holds all interned strings. Note that references to
16 strings in this dictionary are *not* counted in the string's ob_refcnt.
17 When the interned string reaches a refcnt of 0 the string deallocation
18 function will delete the reference from this dictionary.
19
20 Another way to look at this is that to say that the actual reference
21 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
22*/
23static PyObject *interned;
24
25/*
26 For both PyBytes_FromString() and PyBytes_FromStringAndSize(), the
27 parameter `size' denotes number of characters to allocate, not counting any
28 null terminating character.
29
30 For PyBytes_FromString(), the parameter `str' points to a null-terminated
31 string containing exactly `size' bytes.
32
33 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
34 either NULL or else points to a string containing at least `size' bytes.
35 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
36 not have to be null-terminated. (Therefore it is safe to construct a
37 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
38 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
39 bytes (setting the last byte to the null terminating character) and you can
40 fill in the data yourself. If `str' is non-NULL then the resulting
41 PyString object must be treated as immutable and you must not fill in nor
42 alter the data yourself, since the strings may be shared.
43
44 The PyObject member `op->ob_size', which denotes the number of "extra
45 items" in a variable-size object, will contain the number of bytes
46 allocated for string data, not counting the null terminating character. It
47 is therefore equal to the equal to the `size' parameter (for
48 PyBytes_FromStringAndSize()) or the length of the string in the `str'
49 parameter (for PyBytes_FromString()).
50*/
51PyObject *
52PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Christian Heimes1a6387e2008-03-26 12:49:49 +000053{
Christian Heimes44720832008-05-26 13:01:01 +000054 register PyBytesObject *op;
55 if (size < 0) {
56 PyErr_SetString(PyExc_SystemError,
57 "Negative size passed to PyBytes_FromStringAndSize");
58 return NULL;
59 }
60 if (size == 0 && (op = nullstring) != NULL) {
61#ifdef COUNT_ALLOCS
62 null_strings++;
63#endif
64 Py_INCREF(op);
65 return (PyObject *)op;
66 }
67 if (size == 1 && str != NULL &&
68 (op = characters[*str & UCHAR_MAX]) != NULL)
69 {
70#ifdef COUNT_ALLOCS
71 one_strings++;
72#endif
73 Py_INCREF(op);
74 return (PyObject *)op;
75 }
76
77 /* Inline PyObject_NewVar */
78 op = (PyBytesObject *)PyObject_MALLOC(sizeof(PyBytesObject) + size);
79 if (op == NULL)
80 return PyErr_NoMemory();
81 PyObject_INIT_VAR(op, &PyBytes_Type, size);
82 op->ob_shash = -1;
83 op->ob_sstate = SSTATE_NOT_INTERNED;
84 if (str != NULL)
85 Py_MEMCPY(op->ob_sval, str, size);
86 op->ob_sval[size] = '\0';
87 /* share short strings */
88 if (size == 0) {
89 PyObject *t = (PyObject *)op;
90 PyBytes_InternInPlace(&t);
91 op = (PyBytesObject *)t;
92 nullstring = op;
93 Py_INCREF(op);
94 } else if (size == 1 && str != NULL) {
95 PyObject *t = (PyObject *)op;
96 PyBytes_InternInPlace(&t);
97 op = (PyBytesObject *)t;
98 characters[*str & UCHAR_MAX] = op;
99 Py_INCREF(op);
100 }
101 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000102}
103
Christian Heimes44720832008-05-26 13:01:01 +0000104PyObject *
105PyBytes_FromString(const char *str)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000106{
Christian Heimes44720832008-05-26 13:01:01 +0000107 register size_t size;
108 register PyBytesObject *op;
109
110 assert(str != NULL);
111 size = strlen(str);
112 if (size > PY_SSIZE_T_MAX) {
113 PyErr_SetString(PyExc_OverflowError,
114 "string is too long for a Python string");
115 return NULL;
116 }
117 if (size == 0 && (op = nullstring) != NULL) {
118#ifdef COUNT_ALLOCS
119 null_strings++;
120#endif
121 Py_INCREF(op);
122 return (PyObject *)op;
123 }
124 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
125#ifdef COUNT_ALLOCS
126 one_strings++;
127#endif
128 Py_INCREF(op);
129 return (PyObject *)op;
130 }
131
132 /* Inline PyObject_NewVar */
133 op = (PyBytesObject *)PyObject_MALLOC(sizeof(PyBytesObject) + size);
134 if (op == NULL)
135 return PyErr_NoMemory();
136 PyObject_INIT_VAR(op, &PyBytes_Type, size);
137 op->ob_shash = -1;
138 op->ob_sstate = SSTATE_NOT_INTERNED;
139 Py_MEMCPY(op->ob_sval, str, size+1);
140 /* share short strings */
141 if (size == 0) {
142 PyObject *t = (PyObject *)op;
143 PyBytes_InternInPlace(&t);
144 op = (PyBytesObject *)t;
145 nullstring = op;
146 Py_INCREF(op);
147 } else if (size == 1) {
148 PyObject *t = (PyObject *)op;
149 PyBytes_InternInPlace(&t);
150 op = (PyBytesObject *)t;
151 characters[*str & UCHAR_MAX] = op;
152 Py_INCREF(op);
153 }
154 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000155}
156
Christian Heimes44720832008-05-26 13:01:01 +0000157PyObject *
158PyBytes_FromFormatV(const char *format, va_list vargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000159{
Christian Heimes44720832008-05-26 13:01:01 +0000160 va_list count;
161 Py_ssize_t n = 0;
162 const char* f;
163 char *s;
164 PyObject* string;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000165
Christian Heimes44720832008-05-26 13:01:01 +0000166#ifdef VA_LIST_IS_ARRAY
167 Py_MEMCPY(count, vargs, sizeof(va_list));
168#else
169#ifdef __va_copy
170 __va_copy(count, vargs);
171#else
172 count = vargs;
173#endif
174#endif
175 /* step 1: figure out how large a buffer we need */
176 for (f = format; *f; f++) {
177 if (*f == '%') {
178 const char* p = f;
179 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
180 ;
181
182 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
183 * they don't affect the amount of space we reserve.
184 */
185 if ((*f == 'l' || *f == 'z') &&
186 (f[1] == 'd' || f[1] == 'u'))
187 ++f;
188
189 switch (*f) {
190 case 'c':
191 (void)va_arg(count, int);
192 /* fall through... */
193 case '%':
194 n++;
195 break;
196 case 'd': case 'u': case 'i': case 'x':
197 (void) va_arg(count, int);
198 /* 20 bytes is enough to hold a 64-bit
199 integer. Decimal takes the most space.
200 This isn't enough for octal. */
201 n += 20;
202 break;
203 case 's':
204 s = va_arg(count, char*);
205 n += strlen(s);
206 break;
207 case 'p':
208 (void) va_arg(count, int);
209 /* maximum 64-bit pointer representation:
210 * 0xffffffffffffffff
211 * so 19 characters is enough.
212 * XXX I count 18 -- what's the extra for?
213 */
214 n += 19;
215 break;
216 default:
217 /* if we stumble upon an unknown
218 formatting code, copy the rest of
219 the format string to the output
220 string. (we cannot just skip the
221 code, since there's no way to know
222 what's in the argument list) */
223 n += strlen(p);
224 goto expand;
225 }
226 } else
227 n++;
228 }
229 expand:
230 /* step 2: fill the buffer */
231 /* Since we've analyzed how much space we need for the worst case,
232 use sprintf directly instead of the slower PyOS_snprintf. */
233 string = PyBytes_FromStringAndSize(NULL, n);
234 if (!string)
235 return NULL;
236
237 s = PyBytes_AsString(string);
238
239 for (f = format; *f; f++) {
240 if (*f == '%') {
241 const char* p = f++;
242 Py_ssize_t i;
243 int longflag = 0;
244 int size_tflag = 0;
245 /* parse the width.precision part (we're only
246 interested in the precision value, if any) */
247 n = 0;
248 while (isdigit(Py_CHARMASK(*f)))
249 n = (n*10) + *f++ - '0';
250 if (*f == '.') {
251 f++;
252 n = 0;
253 while (isdigit(Py_CHARMASK(*f)))
254 n = (n*10) + *f++ - '0';
255 }
256 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
257 f++;
258 /* handle the long flag, but only for %ld and %lu.
259 others can be added when necessary. */
260 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
261 longflag = 1;
262 ++f;
263 }
264 /* handle the size_t flag. */
265 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
266 size_tflag = 1;
267 ++f;
268 }
269
270 switch (*f) {
271 case 'c':
272 *s++ = va_arg(vargs, int);
273 break;
274 case 'd':
275 if (longflag)
276 sprintf(s, "%ld", va_arg(vargs, long));
277 else if (size_tflag)
278 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
279 va_arg(vargs, Py_ssize_t));
280 else
281 sprintf(s, "%d", va_arg(vargs, int));
282 s += strlen(s);
283 break;
284 case 'u':
285 if (longflag)
286 sprintf(s, "%lu",
287 va_arg(vargs, unsigned long));
288 else if (size_tflag)
289 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
290 va_arg(vargs, size_t));
291 else
292 sprintf(s, "%u",
293 va_arg(vargs, unsigned int));
294 s += strlen(s);
295 break;
296 case 'i':
297 sprintf(s, "%i", va_arg(vargs, int));
298 s += strlen(s);
299 break;
300 case 'x':
301 sprintf(s, "%x", va_arg(vargs, int));
302 s += strlen(s);
303 break;
304 case 's':
305 p = va_arg(vargs, char*);
306 i = strlen(p);
307 if (n > 0 && i > n)
308 i = n;
309 Py_MEMCPY(s, p, i);
310 s += i;
311 break;
312 case 'p':
313 sprintf(s, "%p", va_arg(vargs, void*));
314 /* %p is ill-defined: ensure leading 0x. */
315 if (s[1] == 'X')
316 s[1] = 'x';
317 else if (s[1] != 'x') {
318 memmove(s+2, s, strlen(s)+1);
319 s[0] = '0';
320 s[1] = 'x';
321 }
322 s += strlen(s);
323 break;
324 case '%':
325 *s++ = '%';
326 break;
327 default:
328 strcpy(s, p);
329 s += strlen(s);
330 goto end;
331 }
332 } else
333 *s++ = *f;
334 }
335
336 end:
337 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
338 return string;
339}
340
341PyObject *
342PyBytes_FromFormat(const char *format, ...)
343{
344 PyObject* ret;
345 va_list vargs;
346
347#ifdef HAVE_STDARG_PROTOTYPES
348 va_start(vargs, format);
349#else
350 va_start(vargs);
351#endif
352 ret = PyBytes_FromFormatV(format, vargs);
353 va_end(vargs);
354 return ret;
355}
356
357
358PyObject *PyBytes_Decode(const char *s,
359 Py_ssize_t size,
360 const char *encoding,
361 const char *errors)
362{
363 PyObject *v, *str;
364
365 str = PyBytes_FromStringAndSize(s, size);
366 if (str == NULL)
367 return NULL;
368 v = PyBytes_AsDecodedString(str, encoding, errors);
369 Py_DECREF(str);
370 return v;
371}
372
373PyObject *PyBytes_AsDecodedObject(PyObject *str,
374 const char *encoding,
375 const char *errors)
376{
377 PyObject *v;
378
379 if (!PyBytes_Check(str)) {
380 PyErr_BadArgument();
381 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000382 }
383
Christian Heimes44720832008-05-26 13:01:01 +0000384 if (encoding == NULL) {
385#ifdef Py_USING_UNICODE
386 encoding = PyUnicode_GetDefaultEncoding();
387#else
388 PyErr_SetString(PyExc_ValueError, "no encoding specified");
389 goto onError;
390#endif
Christian Heimes1a6387e2008-03-26 12:49:49 +0000391 }
Christian Heimes44720832008-05-26 13:01:01 +0000392
393 /* Decode via the codec registry */
394 v = PyCodec_Decode(str, encoding, errors);
395 if (v == NULL)
396 goto onError;
397
398 return v;
399
400 onError:
401 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000402}
403
Christian Heimes44720832008-05-26 13:01:01 +0000404PyObject *PyBytes_AsDecodedString(PyObject *str,
405 const char *encoding,
406 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000407{
Christian Heimes44720832008-05-26 13:01:01 +0000408 PyObject *v;
409
410 v = PyBytes_AsDecodedObject(str, encoding, errors);
411 if (v == NULL)
412 goto onError;
413
414#ifdef Py_USING_UNICODE
415 /* Convert Unicode to a string using the default encoding */
416 if (PyUnicode_Check(v)) {
417 PyObject *temp = v;
418 v = PyUnicode_AsEncodedString(v, NULL, NULL);
419 Py_DECREF(temp);
420 if (v == NULL)
421 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000422 }
Christian Heimes44720832008-05-26 13:01:01 +0000423#endif
424 if (!PyBytes_Check(v)) {
425 PyErr_Format(PyExc_TypeError,
426 "decoder did not return a string object (type=%.400s)",
427 Py_TYPE(v)->tp_name);
428 Py_DECREF(v);
429 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000430 }
Christian Heimes44720832008-05-26 13:01:01 +0000431
432 return v;
433
434 onError:
435 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000436}
437
Christian Heimes44720832008-05-26 13:01:01 +0000438PyObject *PyBytes_Encode(const char *s,
439 Py_ssize_t size,
440 const char *encoding,
441 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000442{
Christian Heimes44720832008-05-26 13:01:01 +0000443 PyObject *v, *str;
444
445 str = PyBytes_FromStringAndSize(s, size);
446 if (str == NULL)
447 return NULL;
448 v = PyBytes_AsEncodedString(str, encoding, errors);
449 Py_DECREF(str);
450 return v;
451}
452
453PyObject *PyBytes_AsEncodedObject(PyObject *str,
454 const char *encoding,
455 const char *errors)
456{
457 PyObject *v;
458
459 if (!PyBytes_Check(str)) {
460 PyErr_BadArgument();
461 goto onError;
462 }
463
464 if (encoding == NULL) {
465#ifdef Py_USING_UNICODE
466 encoding = PyUnicode_GetDefaultEncoding();
467#else
468 PyErr_SetString(PyExc_ValueError, "no encoding specified");
469 goto onError;
470#endif
471 }
472
473 /* Encode via the codec registry */
474 v = PyCodec_Encode(str, encoding, errors);
475 if (v == NULL)
476 goto onError;
477
478 return v;
479
480 onError:
481 return NULL;
482}
483
484PyObject *PyBytes_AsEncodedString(PyObject *str,
485 const char *encoding,
486 const char *errors)
487{
488 PyObject *v;
489
490 v = PyBytes_AsEncodedObject(str, encoding, errors);
491 if (v == NULL)
492 goto onError;
493
494#ifdef Py_USING_UNICODE
495 /* Convert Unicode to a string using the default encoding */
496 if (PyUnicode_Check(v)) {
497 PyObject *temp = v;
498 v = PyUnicode_AsEncodedString(v, NULL, NULL);
499 Py_DECREF(temp);
500 if (v == NULL)
501 goto onError;
502 }
503#endif
504 if (!PyBytes_Check(v)) {
505 PyErr_Format(PyExc_TypeError,
506 "encoder did not return a string object (type=%.400s)",
507 Py_TYPE(v)->tp_name);
508 Py_DECREF(v);
509 goto onError;
510 }
511
512 return v;
513
514 onError:
515 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000516}
517
518static void
Christian Heimes44720832008-05-26 13:01:01 +0000519string_dealloc(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000520{
Christian Heimes44720832008-05-26 13:01:01 +0000521 switch (PyBytes_CHECK_INTERNED(op)) {
522 case SSTATE_NOT_INTERNED:
523 break;
524
525 case SSTATE_INTERNED_MORTAL:
526 /* revive dead object temporarily for DelItem */
527 Py_REFCNT(op) = 3;
528 if (PyDict_DelItem(interned, op) != 0)
529 Py_FatalError(
530 "deletion of interned string failed");
531 break;
532
533 case SSTATE_INTERNED_IMMORTAL:
534 Py_FatalError("Immortal interned string died.");
535
536 default:
537 Py_FatalError("Inconsistent interned string state.");
538 }
539 Py_TYPE(op)->tp_free(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000540}
541
Christian Heimes44720832008-05-26 13:01:01 +0000542/* Unescape a backslash-escaped string. If unicode is non-zero,
543 the string is a u-literal. If recode_encoding is non-zero,
544 the string is UTF-8 encoded and should be re-encoded in the
545 specified encoding. */
546
547PyObject *PyBytes_DecodeEscape(const char *s,
548 Py_ssize_t len,
549 const char *errors,
550 Py_ssize_t unicode,
551 const char *recode_encoding)
552{
553 int c;
554 char *p, *buf;
555 const char *end;
556 PyObject *v;
557 Py_ssize_t newlen = recode_encoding ? 4*len:len;
558 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
559 if (v == NULL)
560 return NULL;
561 p = buf = PyBytes_AsString(v);
562 end = s + len;
563 while (s < end) {
564 if (*s != '\\') {
565 non_esc:
566#ifdef Py_USING_UNICODE
567 if (recode_encoding && (*s & 0x80)) {
568 PyObject *u, *w;
569 char *r;
570 const char* t;
571 Py_ssize_t rn;
572 t = s;
573 /* Decode non-ASCII bytes as UTF-8. */
574 while (t < end && (*t & 0x80)) t++;
575 u = PyUnicode_DecodeUTF8(s, t - s, errors);
576 if(!u) goto failed;
577
578 /* Recode them in target encoding. */
579 w = PyUnicode_AsEncodedString(
580 u, recode_encoding, errors);
581 Py_DECREF(u);
582 if (!w) goto failed;
583
584 /* Append bytes to output buffer. */
585 assert(PyBytes_Check(w));
586 r = PyBytes_AS_STRING(w);
587 rn = PyBytes_GET_SIZE(w);
588 Py_MEMCPY(p, r, rn);
589 p += rn;
590 Py_DECREF(w);
591 s = t;
592 } else {
593 *p++ = *s++;
594 }
595#else
596 *p++ = *s++;
597#endif
598 continue;
599 }
600 s++;
601 if (s==end) {
602 PyErr_SetString(PyExc_ValueError,
603 "Trailing \\ in string");
604 goto failed;
605 }
606 switch (*s++) {
607 /* XXX This assumes ASCII! */
608 case '\n': break;
609 case '\\': *p++ = '\\'; break;
610 case '\'': *p++ = '\''; break;
611 case '\"': *p++ = '\"'; break;
612 case 'b': *p++ = '\b'; break;
613 case 'f': *p++ = '\014'; break; /* FF */
614 case 't': *p++ = '\t'; break;
615 case 'n': *p++ = '\n'; break;
616 case 'r': *p++ = '\r'; break;
617 case 'v': *p++ = '\013'; break; /* VT */
618 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
619 case '0': case '1': case '2': case '3':
620 case '4': case '5': case '6': case '7':
621 c = s[-1] - '0';
622 if (s < end && '0' <= *s && *s <= '7') {
623 c = (c<<3) + *s++ - '0';
624 if (s < end && '0' <= *s && *s <= '7')
625 c = (c<<3) + *s++ - '0';
626 }
627 *p++ = c;
628 break;
629 case 'x':
630 if (s+1 < end &&
631 isxdigit(Py_CHARMASK(s[0])) &&
632 isxdigit(Py_CHARMASK(s[1])))
633 {
634 unsigned int x = 0;
635 c = Py_CHARMASK(*s);
636 s++;
637 if (isdigit(c))
638 x = c - '0';
639 else if (islower(c))
640 x = 10 + c - 'a';
641 else
642 x = 10 + c - 'A';
643 x = x << 4;
644 c = Py_CHARMASK(*s);
645 s++;
646 if (isdigit(c))
647 x += c - '0';
648 else if (islower(c))
649 x += 10 + c - 'a';
650 else
651 x += 10 + c - 'A';
652 *p++ = x;
653 break;
654 }
655 if (!errors || strcmp(errors, "strict") == 0) {
656 PyErr_SetString(PyExc_ValueError,
657 "invalid \\x escape");
658 goto failed;
659 }
660 if (strcmp(errors, "replace") == 0) {
661 *p++ = '?';
662 } else if (strcmp(errors, "ignore") == 0)
663 /* do nothing */;
664 else {
665 PyErr_Format(PyExc_ValueError,
666 "decoding error; "
667 "unknown error handling code: %.400s",
668 errors);
669 goto failed;
670 }
671#ifndef Py_USING_UNICODE
672 case 'u':
673 case 'U':
674 case 'N':
675 if (unicode) {
676 PyErr_SetString(PyExc_ValueError,
677 "Unicode escapes not legal "
678 "when Unicode disabled");
679 goto failed;
680 }
681#endif
682 default:
683 *p++ = '\\';
684 s--;
685 goto non_esc; /* an arbitry number of unescaped
686 UTF-8 bytes may follow. */
687 }
688 }
689 if (p-buf < newlen)
690 _PyBytes_Resize(&v, p - buf);
691 return v;
692 failed:
693 Py_DECREF(v);
694 return NULL;
695}
696
697/* -------------------------------------------------------------------- */
698/* object api */
699
Christian Heimes1a6387e2008-03-26 12:49:49 +0000700static Py_ssize_t
Christian Heimes44720832008-05-26 13:01:01 +0000701string_getsize(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000702{
Christian Heimes44720832008-05-26 13:01:01 +0000703 char *s;
704 Py_ssize_t len;
705 if (PyBytes_AsStringAndSize(op, &s, &len))
706 return -1;
707 return len;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000708}
709
Christian Heimes44720832008-05-26 13:01:01 +0000710static /*const*/ char *
711string_getbuffer(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000712{
Christian Heimes44720832008-05-26 13:01:01 +0000713 char *s;
714 Py_ssize_t len;
715 if (PyBytes_AsStringAndSize(op, &s, &len))
716 return NULL;
717 return s;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000718}
719
720Py_ssize_t
Christian Heimes44720832008-05-26 13:01:01 +0000721PyBytes_Size(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000722{
Christian Heimes44720832008-05-26 13:01:01 +0000723 if (!PyBytes_Check(op))
724 return string_getsize(op);
725 return Py_SIZE(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000726}
727
Christian Heimes44720832008-05-26 13:01:01 +0000728/*const*/ char *
729PyBytes_AsString(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000730{
Christian Heimes44720832008-05-26 13:01:01 +0000731 if (!PyBytes_Check(op))
732 return string_getbuffer(op);
733 return ((PyBytesObject *)op) -> ob_sval;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000734}
735
736int
Christian Heimes44720832008-05-26 13:01:01 +0000737PyBytes_AsStringAndSize(register PyObject *obj,
738 register char **s,
739 register Py_ssize_t *len)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000740{
Christian Heimes44720832008-05-26 13:01:01 +0000741 if (s == NULL) {
742 PyErr_BadInternalCall();
743 return -1;
744 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000745
Christian Heimes44720832008-05-26 13:01:01 +0000746 if (!PyBytes_Check(obj)) {
747#ifdef Py_USING_UNICODE
748 if (PyUnicode_Check(obj)) {
749 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
750 if (obj == NULL)
751 return -1;
752 }
753 else
Christian Heimes1a6387e2008-03-26 12:49:49 +0000754#endif
Christian Heimes44720832008-05-26 13:01:01 +0000755 {
756 PyErr_Format(PyExc_TypeError,
757 "expected string or Unicode object, "
758 "%.200s found", Py_TYPE(obj)->tp_name);
759 return -1;
760 }
761 }
762
763 *s = PyBytes_AS_STRING(obj);
764 if (len != NULL)
765 *len = PyBytes_GET_SIZE(obj);
766 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
767 PyErr_SetString(PyExc_TypeError,
768 "expected string without null bytes");
769 return -1;
770 }
771 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000772}
773
Christian Heimes1a6387e2008-03-26 12:49:49 +0000774/* -------------------------------------------------------------------- */
775/* Methods */
776
Christian Heimes44720832008-05-26 13:01:01 +0000777#include "stringlib/stringdefs.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000778#include "stringlib/fastsearch.h"
Christian Heimes44720832008-05-26 13:01:01 +0000779
Christian Heimes1a6387e2008-03-26 12:49:49 +0000780#include "stringlib/count.h"
781#include "stringlib/find.h"
782#include "stringlib/partition.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000783
Christian Heimes44720832008-05-26 13:01:01 +0000784#define _Py_InsertThousandsGrouping _PyBytes_InsertThousandsGrouping
785#include "stringlib/localeutil.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000786
Christian Heimes1a6387e2008-03-26 12:49:49 +0000787
788
789static int
Christian Heimes44720832008-05-26 13:01:01 +0000790string_print(PyBytesObject *op, FILE *fp, int flags)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000791{
Christian Heimes44720832008-05-26 13:01:01 +0000792 Py_ssize_t i, str_len;
793 char c;
794 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000795
Christian Heimes44720832008-05-26 13:01:01 +0000796 /* XXX Ought to check for interrupts when writing long strings */
797 if (! PyBytes_CheckExact(op)) {
798 int ret;
799 /* A str subclass may have its own __str__ method. */
800 op = (PyBytesObject *) PyObject_Str((PyObject *)op);
801 if (op == NULL)
802 return -1;
803 ret = string_print(op, fp, flags);
804 Py_DECREF(op);
805 return ret;
806 }
807 if (flags & Py_PRINT_RAW) {
808 char *data = op->ob_sval;
809 Py_ssize_t size = Py_SIZE(op);
810 Py_BEGIN_ALLOW_THREADS
811 while (size > INT_MAX) {
812 /* Very long strings cannot be written atomically.
813 * But don't write exactly INT_MAX bytes at a time
814 * to avoid memory aligment issues.
815 */
816 const int chunk_size = INT_MAX & ~0x3FFF;
817 fwrite(data, 1, chunk_size, fp);
818 data += chunk_size;
819 size -= chunk_size;
820 }
821#ifdef __VMS
822 if (size) fwrite(data, (int)size, 1, fp);
823#else
824 fwrite(data, 1, (int)size, fp);
825#endif
826 Py_END_ALLOW_THREADS
827 return 0;
828 }
829
830 /* figure out which quote to use; single is preferred */
831 quote = '\'';
832 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
833 !memchr(op->ob_sval, '"', Py_SIZE(op)))
834 quote = '"';
835
836 str_len = Py_SIZE(op);
837 Py_BEGIN_ALLOW_THREADS
838 fputc(quote, fp);
839 for (i = 0; i < str_len; i++) {
840 /* Since strings are immutable and the caller should have a
841 reference, accessing the interal buffer should not be an issue
842 with the GIL released. */
843 c = op->ob_sval[i];
844 if (c == quote || c == '\\')
845 fprintf(fp, "\\%c", c);
846 else if (c == '\t')
847 fprintf(fp, "\\t");
848 else if (c == '\n')
849 fprintf(fp, "\\n");
850 else if (c == '\r')
851 fprintf(fp, "\\r");
852 else if (c < ' ' || c >= 0x7f)
853 fprintf(fp, "\\x%02x", c & 0xff);
854 else
855 fputc(c, fp);
856 }
857 fputc(quote, fp);
858 Py_END_ALLOW_THREADS
859 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000860}
861
Christian Heimes44720832008-05-26 13:01:01 +0000862PyObject *
863PyBytes_Repr(PyObject *obj, int smartquotes)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000864{
Christian Heimes44720832008-05-26 13:01:01 +0000865 register PyBytesObject* op = (PyBytesObject*) obj;
866 size_t newsize = 2 + 4 * Py_SIZE(op);
867 PyObject *v;
868 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
869 PyErr_SetString(PyExc_OverflowError,
870 "string is too large to make repr");
Christian Heimes1a6387e2008-03-26 12:49:49 +0000871 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000872 }
873 v = PyBytes_FromStringAndSize((char *)NULL, newsize);
874 if (v == NULL) {
875 return NULL;
876 }
877 else {
878 register Py_ssize_t i;
879 register char c;
880 register char *p;
881 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000882
Christian Heimes44720832008-05-26 13:01:01 +0000883 /* figure out which quote to use; single is preferred */
884 quote = '\'';
885 if (smartquotes &&
886 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
887 !memchr(op->ob_sval, '"', Py_SIZE(op)))
888 quote = '"';
889
890 p = PyBytes_AS_STRING(v);
891 *p++ = quote;
892 for (i = 0; i < Py_SIZE(op); i++) {
893 /* There's at least enough room for a hex escape
894 and a closing quote. */
895 assert(newsize - (p - PyBytes_AS_STRING(v)) >= 5);
896 c = op->ob_sval[i];
897 if (c == quote || c == '\\')
898 *p++ = '\\', *p++ = c;
899 else if (c == '\t')
900 *p++ = '\\', *p++ = 't';
901 else if (c == '\n')
902 *p++ = '\\', *p++ = 'n';
903 else if (c == '\r')
904 *p++ = '\\', *p++ = 'r';
905 else if (c < ' ' || c >= 0x7f) {
906 /* For performance, we don't want to call
907 PyOS_snprintf here (extra layers of
908 function call). */
909 sprintf(p, "\\x%02x", c & 0xff);
910 p += 4;
911 }
912 else
913 *p++ = c;
914 }
915 assert(newsize - (p - PyBytes_AS_STRING(v)) >= 1);
916 *p++ = quote;
917 *p = '\0';
918 _PyBytes_Resize(
919 &v, (p - PyBytes_AS_STRING(v)));
920 return v;
921 }
922}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000923
924static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000925string_repr(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000926{
Christian Heimes44720832008-05-26 13:01:01 +0000927 return PyBytes_Repr(op, 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000928}
929
Christian Heimes1a6387e2008-03-26 12:49:49 +0000930static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000931string_str(PyObject *s)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000932{
Christian Heimes44720832008-05-26 13:01:01 +0000933 assert(PyBytes_Check(s));
934 if (PyBytes_CheckExact(s)) {
935 Py_INCREF(s);
936 return s;
937 }
938 else {
939 /* Subtype -- return genuine string with the same value. */
940 PyBytesObject *t = (PyBytesObject *) s;
941 return PyBytes_FromStringAndSize(t->ob_sval, Py_SIZE(t));
942 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000943}
944
Christian Heimes44720832008-05-26 13:01:01 +0000945static Py_ssize_t
946string_length(PyBytesObject *a)
947{
948 return Py_SIZE(a);
949}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000950
Christian Heimes44720832008-05-26 13:01:01 +0000951static PyObject *
952string_concat(register PyBytesObject *a, register PyObject *bb)
953{
954 register Py_ssize_t size;
955 register PyBytesObject *op;
956 if (!PyBytes_Check(bb)) {
957#ifdef Py_USING_UNICODE
958 if (PyUnicode_Check(bb))
959 return PyUnicode_Concat((PyObject *)a, bb);
960#endif
961 if (PyByteArray_Check(bb))
962 return PyByteArray_Concat((PyObject *)a, bb);
963 PyErr_Format(PyExc_TypeError,
964 "cannot concatenate 'str' and '%.200s' objects",
965 Py_TYPE(bb)->tp_name);
966 return NULL;
967 }
968#define b ((PyBytesObject *)bb)
969 /* Optimize cases with empty left or right operand */
970 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
971 PyBytes_CheckExact(a) && PyBytes_CheckExact(b)) {
972 if (Py_SIZE(a) == 0) {
973 Py_INCREF(bb);
974 return bb;
975 }
976 Py_INCREF(a);
977 return (PyObject *)a;
978 }
979 size = Py_SIZE(a) + Py_SIZE(b);
980 if (size < 0) {
981 PyErr_SetString(PyExc_OverflowError,
982 "strings are too large to concat");
983 return NULL;
984 }
985
986 /* Inline PyObject_NewVar */
987 op = (PyBytesObject *)PyObject_MALLOC(sizeof(PyBytesObject) + size);
988 if (op == NULL)
989 return PyErr_NoMemory();
990 PyObject_INIT_VAR(op, &PyBytes_Type, size);
991 op->ob_shash = -1;
992 op->ob_sstate = SSTATE_NOT_INTERNED;
993 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
994 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
995 op->ob_sval[size] = '\0';
996 return (PyObject *) op;
997#undef b
998}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000999
Christian Heimes44720832008-05-26 13:01:01 +00001000static PyObject *
1001string_repeat(register PyBytesObject *a, register Py_ssize_t n)
1002{
1003 register Py_ssize_t i;
1004 register Py_ssize_t j;
1005 register Py_ssize_t size;
1006 register PyBytesObject *op;
1007 size_t nbytes;
1008 if (n < 0)
1009 n = 0;
1010 /* watch out for overflows: the size can overflow int,
1011 * and the # of bytes needed can overflow size_t
1012 */
1013 size = Py_SIZE(a) * n;
1014 if (n && size / n != Py_SIZE(a)) {
1015 PyErr_SetString(PyExc_OverflowError,
1016 "repeated string is too long");
1017 return NULL;
1018 }
1019 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1020 Py_INCREF(a);
1021 return (PyObject *)a;
1022 }
1023 nbytes = (size_t)size;
1024 if (nbytes + sizeof(PyBytesObject) <= nbytes) {
1025 PyErr_SetString(PyExc_OverflowError,
1026 "repeated string is too long");
1027 return NULL;
1028 }
1029 op = (PyBytesObject *)
1030 PyObject_MALLOC(sizeof(PyBytesObject) + nbytes);
1031 if (op == NULL)
1032 return PyErr_NoMemory();
1033 PyObject_INIT_VAR(op, &PyBytes_Type, size);
1034 op->ob_shash = -1;
1035 op->ob_sstate = SSTATE_NOT_INTERNED;
1036 op->ob_sval[size] = '\0';
1037 if (Py_SIZE(a) == 1 && n > 0) {
1038 memset(op->ob_sval, a->ob_sval[0] , n);
1039 return (PyObject *) op;
1040 }
1041 i = 0;
1042 if (i < size) {
1043 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1044 i = Py_SIZE(a);
1045 }
1046 while (i < size) {
1047 j = (i <= size-i) ? i : size-i;
1048 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1049 i += j;
1050 }
1051 return (PyObject *) op;
1052}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001053
Christian Heimes44720832008-05-26 13:01:01 +00001054/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1055
1056static PyObject *
1057string_slice(register PyBytesObject *a, register Py_ssize_t i,
1058 register Py_ssize_t j)
1059 /* j -- may be negative! */
1060{
1061 if (i < 0)
1062 i = 0;
1063 if (j < 0)
1064 j = 0; /* Avoid signed/unsigned bug in next line */
1065 if (j > Py_SIZE(a))
1066 j = Py_SIZE(a);
1067 if (i == 0 && j == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1068 /* It's the same as a */
1069 Py_INCREF(a);
1070 return (PyObject *)a;
1071 }
1072 if (j < i)
1073 j = i;
1074 return PyBytes_FromStringAndSize(a->ob_sval + i, j-i);
1075}
1076
1077static int
1078string_contains(PyObject *str_obj, PyObject *sub_obj)
1079{
1080 if (!PyBytes_CheckExact(sub_obj)) {
1081#ifdef Py_USING_UNICODE
1082 if (PyUnicode_Check(sub_obj))
1083 return PyUnicode_Contains(str_obj, sub_obj);
1084#endif
1085 if (!PyBytes_Check(sub_obj)) {
1086 PyErr_Format(PyExc_TypeError,
1087 "'in <string>' requires string as left operand, "
1088 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1089 return -1;
1090 }
1091 }
1092
1093 return stringlib_contains_obj(str_obj, sub_obj);
1094}
1095
1096static PyObject *
1097string_item(PyBytesObject *a, register Py_ssize_t i)
1098{
1099 char pchar;
1100 PyObject *v;
1101 if (i < 0 || i >= Py_SIZE(a)) {
1102 PyErr_SetString(PyExc_IndexError, "string index out of range");
1103 return NULL;
1104 }
1105 pchar = a->ob_sval[i];
1106 v = (PyObject *)characters[pchar & UCHAR_MAX];
1107 if (v == NULL)
1108 v = PyBytes_FromStringAndSize(&pchar, 1);
1109 else {
1110#ifdef COUNT_ALLOCS
1111 one_strings++;
1112#endif
1113 Py_INCREF(v);
1114 }
1115 return v;
1116}
1117
1118static PyObject*
1119string_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
1120{
1121 int c;
1122 Py_ssize_t len_a, len_b;
1123 Py_ssize_t min_len;
1124 PyObject *result;
1125
1126 /* Make sure both arguments are strings. */
1127 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
1128 result = Py_NotImplemented;
1129 goto out;
1130 }
1131 if (a == b) {
1132 switch (op) {
1133 case Py_EQ:case Py_LE:case Py_GE:
1134 result = Py_True;
1135 goto out;
1136 case Py_NE:case Py_LT:case Py_GT:
1137 result = Py_False;
1138 goto out;
1139 }
1140 }
1141 if (op == Py_EQ) {
1142 /* Supporting Py_NE here as well does not save
1143 much time, since Py_NE is rarely used. */
1144 if (Py_SIZE(a) == Py_SIZE(b)
1145 && (a->ob_sval[0] == b->ob_sval[0]
1146 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1147 result = Py_True;
1148 } else {
1149 result = Py_False;
1150 }
1151 goto out;
1152 }
1153 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1154 min_len = (len_a < len_b) ? len_a : len_b;
1155 if (min_len > 0) {
1156 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1157 if (c==0)
1158 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1159 } else
1160 c = 0;
1161 if (c == 0)
1162 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1163 switch (op) {
1164 case Py_LT: c = c < 0; break;
1165 case Py_LE: c = c <= 0; break;
1166 case Py_EQ: assert(0); break; /* unreachable */
1167 case Py_NE: c = c != 0; break;
1168 case Py_GT: c = c > 0; break;
1169 case Py_GE: c = c >= 0; break;
1170 default:
1171 result = Py_NotImplemented;
1172 goto out;
1173 }
1174 result = c ? Py_True : Py_False;
1175 out:
1176 Py_INCREF(result);
1177 return result;
1178}
1179
1180int
1181_PyBytes_Eq(PyObject *o1, PyObject *o2)
1182{
1183 PyBytesObject *a = (PyBytesObject*) o1;
1184 PyBytesObject *b = (PyBytesObject*) o2;
1185 return Py_SIZE(a) == Py_SIZE(b)
1186 && *a->ob_sval == *b->ob_sval
1187 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1188}
1189
1190static long
1191string_hash(PyBytesObject *a)
1192{
1193 register Py_ssize_t len;
1194 register unsigned char *p;
1195 register long x;
1196
1197 if (a->ob_shash != -1)
1198 return a->ob_shash;
1199 len = Py_SIZE(a);
1200 p = (unsigned char *) a->ob_sval;
1201 x = *p << 7;
1202 while (--len >= 0)
1203 x = (1000003*x) ^ *p++;
1204 x ^= Py_SIZE(a);
1205 if (x == -1)
1206 x = -2;
1207 a->ob_shash = x;
1208 return x;
1209}
1210
1211static PyObject*
1212string_subscript(PyBytesObject* self, PyObject* item)
1213{
1214 if (PyIndex_Check(item)) {
1215 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1216 if (i == -1 && PyErr_Occurred())
1217 return NULL;
1218 if (i < 0)
1219 i += PyBytes_GET_SIZE(self);
1220 return string_item(self, i);
1221 }
1222 else if (PySlice_Check(item)) {
1223 Py_ssize_t start, stop, step, slicelength, cur, i;
1224 char* source_buf;
1225 char* result_buf;
1226 PyObject* result;
1227
1228 if (PySlice_GetIndicesEx((PySliceObject*)item,
1229 PyBytes_GET_SIZE(self),
1230 &start, &stop, &step, &slicelength) < 0) {
1231 return NULL;
1232 }
1233
1234 if (slicelength <= 0) {
1235 return PyBytes_FromStringAndSize("", 0);
1236 }
1237 else if (start == 0 && step == 1 &&
1238 slicelength == PyBytes_GET_SIZE(self) &&
1239 PyBytes_CheckExact(self)) {
1240 Py_INCREF(self);
1241 return (PyObject *)self;
1242 }
1243 else if (step == 1) {
1244 return PyBytes_FromStringAndSize(
1245 PyBytes_AS_STRING(self) + start,
1246 slicelength);
1247 }
1248 else {
1249 source_buf = PyBytes_AsString((PyObject*)self);
1250 result_buf = (char *)PyMem_Malloc(slicelength);
1251 if (result_buf == NULL)
1252 return PyErr_NoMemory();
1253
1254 for (cur = start, i = 0; i < slicelength;
1255 cur += step, i++) {
1256 result_buf[i] = source_buf[cur];
1257 }
1258
1259 result = PyBytes_FromStringAndSize(result_buf,
1260 slicelength);
1261 PyMem_Free(result_buf);
1262 return result;
1263 }
1264 }
1265 else {
1266 PyErr_Format(PyExc_TypeError,
1267 "string indices must be integers, not %.200s",
1268 Py_TYPE(item)->tp_name);
1269 return NULL;
1270 }
1271}
1272
1273static Py_ssize_t
1274string_buffer_getreadbuf(PyBytesObject *self, Py_ssize_t index, const void **ptr)
1275{
1276 if ( index != 0 ) {
1277 PyErr_SetString(PyExc_SystemError,
1278 "accessing non-existent string segment");
1279 return -1;
1280 }
1281 *ptr = (void *)self->ob_sval;
1282 return Py_SIZE(self);
1283}
1284
1285static Py_ssize_t
1286string_buffer_getwritebuf(PyBytesObject *self, Py_ssize_t index, const void **ptr)
1287{
1288 PyErr_SetString(PyExc_TypeError,
1289 "Cannot use string as modifiable buffer");
1290 return -1;
1291}
1292
1293static Py_ssize_t
1294string_buffer_getsegcount(PyBytesObject *self, Py_ssize_t *lenp)
1295{
1296 if ( lenp )
1297 *lenp = Py_SIZE(self);
1298 return 1;
1299}
1300
1301static Py_ssize_t
1302string_buffer_getcharbuf(PyBytesObject *self, Py_ssize_t index, const char **ptr)
1303{
1304 if ( index != 0 ) {
1305 PyErr_SetString(PyExc_SystemError,
1306 "accessing non-existent string segment");
1307 return -1;
1308 }
1309 *ptr = self->ob_sval;
1310 return Py_SIZE(self);
1311}
1312
1313static int
1314string_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
1315{
1316 return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_SIZE(self),
1317 0, flags);
1318}
1319
1320static PySequenceMethods string_as_sequence = {
1321 (lenfunc)string_length, /*sq_length*/
1322 (binaryfunc)string_concat, /*sq_concat*/
1323 (ssizeargfunc)string_repeat, /*sq_repeat*/
1324 (ssizeargfunc)string_item, /*sq_item*/
1325 (ssizessizeargfunc)string_slice, /*sq_slice*/
1326 0, /*sq_ass_item*/
1327 0, /*sq_ass_slice*/
1328 (objobjproc)string_contains /*sq_contains*/
1329};
1330
1331static PyMappingMethods string_as_mapping = {
1332 (lenfunc)string_length,
1333 (binaryfunc)string_subscript,
1334 0,
1335};
1336
1337static PyBufferProcs string_as_buffer = {
1338 (readbufferproc)string_buffer_getreadbuf,
1339 (writebufferproc)string_buffer_getwritebuf,
1340 (segcountproc)string_buffer_getsegcount,
1341 (charbufferproc)string_buffer_getcharbuf,
1342 (getbufferproc)string_buffer_getbuffer,
1343 0, /* XXX */
1344};
1345
1346
1347
1348#define LEFTSTRIP 0
1349#define RIGHTSTRIP 1
1350#define BOTHSTRIP 2
1351
1352/* Arrays indexed by above */
1353static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1354
1355#define STRIPNAME(i) (stripformat[i]+3)
1356
Christian Heimes1a6387e2008-03-26 12:49:49 +00001357
1358/* Don't call if length < 2 */
Christian Heimes44720832008-05-26 13:01:01 +00001359#define Py_STRING_MATCH(target, offset, pattern, length) \
1360 (target[offset] == pattern[0] && \
1361 target[offset+length-1] == pattern[length-1] && \
Christian Heimes1a6387e2008-03-26 12:49:49 +00001362 !memcmp(target+offset+1, pattern+1, length-2) )
1363
1364
Christian Heimes1a6387e2008-03-26 12:49:49 +00001365/* Overallocate the initial list to reduce the number of reallocs for small
1366 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1367 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1368 text (roughly 11 words per line) and field delimited data (usually 1-10
1369 fields). For large strings the split algorithms are bandwidth limited
1370 so increasing the preallocation likely will not improve things.*/
1371
1372#define MAX_PREALLOC 12
1373
1374/* 5 splits gives 6 elements */
1375#define PREALLOC_SIZE(maxsplit) \
Christian Heimes44720832008-05-26 13:01:01 +00001376 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001377
Christian Heimes44720832008-05-26 13:01:01 +00001378#define SPLIT_APPEND(data, left, right) \
1379 str = PyBytes_FromStringAndSize((data) + (left), \
1380 (right) - (left)); \
1381 if (str == NULL) \
1382 goto onError; \
1383 if (PyList_Append(list, str)) { \
1384 Py_DECREF(str); \
1385 goto onError; \
1386 } \
1387 else \
1388 Py_DECREF(str);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001389
Christian Heimes44720832008-05-26 13:01:01 +00001390#define SPLIT_ADD(data, left, right) { \
1391 str = PyBytes_FromStringAndSize((data) + (left), \
1392 (right) - (left)); \
1393 if (str == NULL) \
1394 goto onError; \
1395 if (count < MAX_PREALLOC) { \
1396 PyList_SET_ITEM(list, count, str); \
1397 } else { \
1398 if (PyList_Append(list, str)) { \
1399 Py_DECREF(str); \
1400 goto onError; \
1401 } \
1402 else \
1403 Py_DECREF(str); \
1404 } \
1405 count++; }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001406
1407/* Always force the list to the expected size. */
1408#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
1409
Christian Heimes44720832008-05-26 13:01:01 +00001410#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1411#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1412#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1413#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001414
1415Py_LOCAL_INLINE(PyObject *)
Christian Heimes44720832008-05-26 13:01:01 +00001416split_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001417{
Christian Heimes44720832008-05-26 13:01:01 +00001418 const char *s = PyBytes_AS_STRING(self);
1419 Py_ssize_t i, j, count=0;
1420 PyObject *str;
1421 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001422
Christian Heimes44720832008-05-26 13:01:01 +00001423 if (list == NULL)
1424 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001425
Christian Heimes44720832008-05-26 13:01:01 +00001426 i = j = 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001427
Christian Heimes44720832008-05-26 13:01:01 +00001428 while (maxsplit-- > 0) {
1429 SKIP_SPACE(s, i, len);
1430 if (i==len) break;
1431 j = i; i++;
1432 SKIP_NONSPACE(s, i, len);
1433 if (j == 0 && i == len && PyBytes_CheckExact(self)) {
1434 /* No whitespace in self, so just use it as list[0] */
1435 Py_INCREF(self);
1436 PyList_SET_ITEM(list, 0, (PyObject *)self);
1437 count++;
1438 break;
1439 }
1440 SPLIT_ADD(s, j, i);
1441 }
1442
1443 if (i < len) {
1444 /* Only occurs when maxsplit was reached */
1445 /* Skip any remaining whitespace and copy to end of string */
1446 SKIP_SPACE(s, i, len);
1447 if (i != len)
1448 SPLIT_ADD(s, i, len);
1449 }
1450 FIX_PREALLOC_SIZE(list);
1451 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001452 onError:
Christian Heimes44720832008-05-26 13:01:01 +00001453 Py_DECREF(list);
1454 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001455}
1456
Christian Heimes1a6387e2008-03-26 12:49:49 +00001457Py_LOCAL_INLINE(PyObject *)
Christian Heimes44720832008-05-26 13:01:01 +00001458split_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001459{
Christian Heimes44720832008-05-26 13:01:01 +00001460 const char *s = PyBytes_AS_STRING(self);
1461 register Py_ssize_t i, j, count=0;
1462 PyObject *str;
1463 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001464
Christian Heimes44720832008-05-26 13:01:01 +00001465 if (list == NULL)
1466 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001467
Christian Heimes44720832008-05-26 13:01:01 +00001468 i = j = 0;
1469 while ((j < len) && (maxcount-- > 0)) {
1470 for(; j<len; j++) {
1471 /* I found that using memchr makes no difference */
1472 if (s[j] == ch) {
1473 SPLIT_ADD(s, i, j);
1474 i = j = j + 1;
1475 break;
1476 }
1477 }
1478 }
1479 if (i == 0 && count == 0 && PyBytes_CheckExact(self)) {
1480 /* ch not in self, so just use self as list[0] */
1481 Py_INCREF(self);
1482 PyList_SET_ITEM(list, 0, (PyObject *)self);
1483 count++;
1484 }
1485 else if (i <= len) {
1486 SPLIT_ADD(s, i, len);
1487 }
1488 FIX_PREALLOC_SIZE(list);
1489 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001490
1491 onError:
Christian Heimes44720832008-05-26 13:01:01 +00001492 Py_DECREF(list);
1493 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001494}
1495
1496PyDoc_STRVAR(split__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001497"S.split([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001498\n\
Christian Heimes44720832008-05-26 13:01:01 +00001499Return a list of the words in the string S, using sep as the\n\
1500delimiter string. If maxsplit is given, at most maxsplit\n\
1501splits are done. If sep is not specified or is None, any\n\
1502whitespace string is a separator and empty strings are removed\n\
1503from the result.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001504
1505static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00001506string_split(PyBytesObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001507{
Christian Heimes44720832008-05-26 13:01:01 +00001508 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
1509 Py_ssize_t maxsplit = -1, count=0;
1510 const char *s = PyBytes_AS_STRING(self), *sub;
1511 PyObject *list, *str, *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001512#ifdef USE_FAST
Christian Heimes44720832008-05-26 13:01:01 +00001513 Py_ssize_t pos;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001514#endif
1515
Christian Heimes44720832008-05-26 13:01:01 +00001516 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1517 return NULL;
1518 if (maxsplit < 0)
1519 maxsplit = PY_SSIZE_T_MAX;
1520 if (subobj == Py_None)
1521 return split_whitespace(self, len, maxsplit);
1522 if (PyBytes_Check(subobj)) {
1523 sub = PyBytes_AS_STRING(subobj);
1524 n = PyBytes_GET_SIZE(subobj);
1525 }
1526#ifdef Py_USING_UNICODE
1527 else if (PyUnicode_Check(subobj))
1528 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1529#endif
1530 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1531 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001532
Christian Heimes44720832008-05-26 13:01:01 +00001533 if (n == 0) {
1534 PyErr_SetString(PyExc_ValueError, "empty separator");
1535 return NULL;
1536 }
1537 else if (n == 1)
1538 return split_char(self, len, sub[0], maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001539
Christian Heimes44720832008-05-26 13:01:01 +00001540 list = PyList_New(PREALLOC_SIZE(maxsplit));
1541 if (list == NULL)
1542 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001543
1544#ifdef USE_FAST
Christian Heimes44720832008-05-26 13:01:01 +00001545 i = j = 0;
1546 while (maxsplit-- > 0) {
1547 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1548 if (pos < 0)
1549 break;
1550 j = i+pos;
1551 SPLIT_ADD(s, i, j);
1552 i = j + n;
1553 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001554#else
Christian Heimes44720832008-05-26 13:01:01 +00001555 i = j = 0;
1556 while ((j+n <= len) && (maxsplit-- > 0)) {
1557 for (; j+n <= len; j++) {
1558 if (Py_STRING_MATCH(s, j, sub, n)) {
1559 SPLIT_ADD(s, i, j);
1560 i = j = j + n;
1561 break;
1562 }
1563 }
1564 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001565#endif
Christian Heimes44720832008-05-26 13:01:01 +00001566 SPLIT_ADD(s, i, len);
1567 FIX_PREALLOC_SIZE(list);
1568 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001569
Christian Heimes44720832008-05-26 13:01:01 +00001570 onError:
1571 Py_DECREF(list);
1572 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001573}
1574
1575PyDoc_STRVAR(partition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001576"S.partition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001577\n\
Christian Heimes44720832008-05-26 13:01:01 +00001578Searches for the separator sep in S, and returns the part before it,\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001579the separator itself, and the part after it. If the separator is not\n\
Christian Heimes44720832008-05-26 13:01:01 +00001580found, returns S and two empty strings.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001581
1582static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00001583string_partition(PyBytesObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001584{
Christian Heimes44720832008-05-26 13:01:01 +00001585 const char *sep;
1586 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001587
Christian Heimes44720832008-05-26 13:01:01 +00001588 if (PyBytes_Check(sep_obj)) {
1589 sep = PyBytes_AS_STRING(sep_obj);
1590 sep_len = PyBytes_GET_SIZE(sep_obj);
1591 }
1592#ifdef Py_USING_UNICODE
1593 else if (PyUnicode_Check(sep_obj))
1594 return PyUnicode_Partition((PyObject *) self, sep_obj);
1595#endif
1596 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1597 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001598
Christian Heimes44720832008-05-26 13:01:01 +00001599 return stringlib_partition(
1600 (PyObject*) self,
1601 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1602 sep_obj, sep, sep_len
1603 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001604}
1605
1606PyDoc_STRVAR(rpartition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001607"S.rpartition(sep) -> (tail, sep, head)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001608\n\
Christian Heimes44720832008-05-26 13:01:01 +00001609Searches for the separator sep in S, starting at the end of S, and returns\n\
1610the part before it, the separator itself, and the part after it. If the\n\
1611separator is not found, returns two empty strings and S.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001612
1613static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00001614string_rpartition(PyBytesObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001615{
Christian Heimes44720832008-05-26 13:01:01 +00001616 const char *sep;
1617 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001618
Christian Heimes44720832008-05-26 13:01:01 +00001619 if (PyBytes_Check(sep_obj)) {
1620 sep = PyBytes_AS_STRING(sep_obj);
1621 sep_len = PyBytes_GET_SIZE(sep_obj);
1622 }
1623#ifdef Py_USING_UNICODE
1624 else if (PyUnicode_Check(sep_obj))
1625 return PyUnicode_Partition((PyObject *) self, sep_obj);
1626#endif
1627 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1628 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001629
Christian Heimes44720832008-05-26 13:01:01 +00001630 return stringlib_rpartition(
1631 (PyObject*) self,
1632 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1633 sep_obj, sep, sep_len
1634 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001635}
1636
1637Py_LOCAL_INLINE(PyObject *)
Christian Heimes44720832008-05-26 13:01:01 +00001638rsplit_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001639{
Christian Heimes44720832008-05-26 13:01:01 +00001640 const char *s = PyBytes_AS_STRING(self);
1641 Py_ssize_t i, j, count=0;
1642 PyObject *str;
1643 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001644
Christian Heimes44720832008-05-26 13:01:01 +00001645 if (list == NULL)
1646 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001647
Christian Heimes44720832008-05-26 13:01:01 +00001648 i = j = len-1;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001649
Christian Heimes44720832008-05-26 13:01:01 +00001650 while (maxsplit-- > 0) {
1651 RSKIP_SPACE(s, i);
1652 if (i<0) break;
1653 j = i; i--;
1654 RSKIP_NONSPACE(s, i);
1655 if (j == len-1 && i < 0 && PyBytes_CheckExact(self)) {
1656 /* No whitespace in self, so just use it as list[0] */
1657 Py_INCREF(self);
1658 PyList_SET_ITEM(list, 0, (PyObject *)self);
1659 count++;
1660 break;
1661 }
1662 SPLIT_ADD(s, i + 1, j + 1);
1663 }
1664 if (i >= 0) {
1665 /* Only occurs when maxsplit was reached */
1666 /* Skip any remaining whitespace and copy to beginning of string */
1667 RSKIP_SPACE(s, i);
1668 if (i >= 0)
1669 SPLIT_ADD(s, 0, i + 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001670
Christian Heimes44720832008-05-26 13:01:01 +00001671 }
1672 FIX_PREALLOC_SIZE(list);
1673 if (PyList_Reverse(list) < 0)
1674 goto onError;
1675 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001676 onError:
Christian Heimes44720832008-05-26 13:01:01 +00001677 Py_DECREF(list);
1678 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001679}
1680
1681Py_LOCAL_INLINE(PyObject *)
Christian Heimes44720832008-05-26 13:01:01 +00001682rsplit_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001683{
Christian Heimes44720832008-05-26 13:01:01 +00001684 const char *s = PyBytes_AS_STRING(self);
1685 register Py_ssize_t i, j, count=0;
1686 PyObject *str;
1687 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001688
Christian Heimes44720832008-05-26 13:01:01 +00001689 if (list == NULL)
1690 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001691
Christian Heimes44720832008-05-26 13:01:01 +00001692 i = j = len - 1;
1693 while ((i >= 0) && (maxcount-- > 0)) {
1694 for (; i >= 0; i--) {
1695 if (s[i] == ch) {
1696 SPLIT_ADD(s, i + 1, j + 1);
1697 j = i = i - 1;
1698 break;
1699 }
1700 }
1701 }
1702 if (i < 0 && count == 0 && PyBytes_CheckExact(self)) {
1703 /* ch not in self, so just use self as list[0] */
1704 Py_INCREF(self);
1705 PyList_SET_ITEM(list, 0, (PyObject *)self);
1706 count++;
1707 }
1708 else if (j >= -1) {
1709 SPLIT_ADD(s, 0, j + 1);
1710 }
1711 FIX_PREALLOC_SIZE(list);
1712 if (PyList_Reverse(list) < 0)
1713 goto onError;
1714 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001715
Christian Heimes44720832008-05-26 13:01:01 +00001716 onError:
1717 Py_DECREF(list);
1718 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001719}
1720
1721PyDoc_STRVAR(rsplit__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001722"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001723\n\
Christian Heimes44720832008-05-26 13:01:01 +00001724Return a list of the words in the string S, using sep as the\n\
1725delimiter string, starting at the end of the string and working\n\
1726to the front. If maxsplit is given, at most maxsplit splits are\n\
1727done. If sep is not specified or is None, any whitespace string\n\
1728is a separator.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001729
1730static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00001731string_rsplit(PyBytesObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001732{
Christian Heimes44720832008-05-26 13:01:01 +00001733 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
1734 Py_ssize_t maxsplit = -1, count=0;
1735 const char *s, *sub;
1736 PyObject *list, *str, *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001737
Christian Heimes44720832008-05-26 13:01:01 +00001738 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1739 return NULL;
1740 if (maxsplit < 0)
1741 maxsplit = PY_SSIZE_T_MAX;
1742 if (subobj == Py_None)
1743 return rsplit_whitespace(self, len, maxsplit);
1744 if (PyBytes_Check(subobj)) {
1745 sub = PyBytes_AS_STRING(subobj);
1746 n = PyBytes_GET_SIZE(subobj);
1747 }
1748#ifdef Py_USING_UNICODE
1749 else if (PyUnicode_Check(subobj))
1750 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1751#endif
1752 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1753 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001754
Christian Heimes44720832008-05-26 13:01:01 +00001755 if (n == 0) {
1756 PyErr_SetString(PyExc_ValueError, "empty separator");
1757 return NULL;
1758 }
1759 else if (n == 1)
1760 return rsplit_char(self, len, sub[0], maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001761
Christian Heimes44720832008-05-26 13:01:01 +00001762 list = PyList_New(PREALLOC_SIZE(maxsplit));
1763 if (list == NULL)
1764 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001765
Christian Heimes44720832008-05-26 13:01:01 +00001766 j = len;
1767 i = j - n;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001768
Christian Heimes44720832008-05-26 13:01:01 +00001769 s = PyBytes_AS_STRING(self);
1770 while ( (i >= 0) && (maxsplit-- > 0) ) {
1771 for (; i>=0; i--) {
1772 if (Py_STRING_MATCH(s, i, sub, n)) {
1773 SPLIT_ADD(s, i + n, j);
1774 j = i;
1775 i -= n;
1776 break;
1777 }
1778 }
1779 }
1780 SPLIT_ADD(s, 0, j);
1781 FIX_PREALLOC_SIZE(list);
1782 if (PyList_Reverse(list) < 0)
1783 goto onError;
1784 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001785
1786onError:
Christian Heimes44720832008-05-26 13:01:01 +00001787 Py_DECREF(list);
1788 return NULL;
1789}
1790
1791
1792PyDoc_STRVAR(join__doc__,
1793"S.join(sequence) -> string\n\
1794\n\
1795Return a string which is the concatenation of the strings in the\n\
1796sequence. The separator between elements is S.");
1797
1798static PyObject *
1799string_join(PyBytesObject *self, PyObject *orig)
1800{
1801 char *sep = PyBytes_AS_STRING(self);
1802 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1803 PyObject *res = NULL;
1804 char *p;
1805 Py_ssize_t seqlen = 0;
1806 size_t sz = 0;
1807 Py_ssize_t i;
1808 PyObject *seq, *item;
1809
1810 seq = PySequence_Fast(orig, "");
1811 if (seq == NULL) {
1812 return NULL;
1813 }
1814
1815 seqlen = PySequence_Size(seq);
1816 if (seqlen == 0) {
1817 Py_DECREF(seq);
1818 return PyBytes_FromString("");
1819 }
1820 if (seqlen == 1) {
1821 item = PySequence_Fast_GET_ITEM(seq, 0);
1822 if (PyBytes_CheckExact(item) || PyUnicode_CheckExact(item)) {
1823 Py_INCREF(item);
1824 Py_DECREF(seq);
1825 return item;
1826 }
1827 }
1828
1829 /* There are at least two things to join, or else we have a subclass
1830 * of the builtin types in the sequence.
1831 * Do a pre-pass to figure out the total amount of space we'll
1832 * need (sz), see whether any argument is absurd, and defer to
1833 * the Unicode join if appropriate.
1834 */
1835 for (i = 0; i < seqlen; i++) {
1836 const size_t old_sz = sz;
1837 item = PySequence_Fast_GET_ITEM(seq, i);
1838 if (!PyBytes_Check(item)){
1839#ifdef Py_USING_UNICODE
1840 if (PyUnicode_Check(item)) {
1841 /* Defer to Unicode join.
1842 * CAUTION: There's no gurantee that the
1843 * original sequence can be iterated over
1844 * again, so we must pass seq here.
1845 */
1846 PyObject *result;
1847 result = PyUnicode_Join((PyObject *)self, seq);
1848 Py_DECREF(seq);
1849 return result;
1850 }
1851#endif
1852 PyErr_Format(PyExc_TypeError,
1853 "sequence item %zd: expected string,"
1854 " %.80s found",
1855 i, Py_TYPE(item)->tp_name);
1856 Py_DECREF(seq);
1857 return NULL;
1858 }
1859 sz += PyBytes_GET_SIZE(item);
1860 if (i != 0)
1861 sz += seplen;
1862 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1863 PyErr_SetString(PyExc_OverflowError,
1864 "join() result is too long for a Python string");
1865 Py_DECREF(seq);
1866 return NULL;
1867 }
1868 }
1869
1870 /* Allocate result space. */
1871 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1872 if (res == NULL) {
1873 Py_DECREF(seq);
1874 return NULL;
1875 }
1876
1877 /* Catenate everything. */
1878 p = PyBytes_AS_STRING(res);
1879 for (i = 0; i < seqlen; ++i) {
1880 size_t n;
1881 item = PySequence_Fast_GET_ITEM(seq, i);
1882 n = PyBytes_GET_SIZE(item);
1883 Py_MEMCPY(p, PyBytes_AS_STRING(item), n);
1884 p += n;
1885 if (i < seqlen - 1) {
1886 Py_MEMCPY(p, sep, seplen);
1887 p += seplen;
1888 }
1889 }
1890
1891 Py_DECREF(seq);
1892 return res;
1893}
1894
1895PyObject *
1896_PyBytes_Join(PyObject *sep, PyObject *x)
1897{
1898 assert(sep != NULL && PyBytes_Check(sep));
1899 assert(x != NULL);
1900 return string_join((PyBytesObject *)sep, x);
1901}
1902
1903Py_LOCAL_INLINE(void)
1904string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1905{
1906 if (*end > len)
1907 *end = len;
1908 else if (*end < 0)
1909 *end += len;
1910 if (*end < 0)
1911 *end = 0;
1912 if (*start < 0)
1913 *start += len;
1914 if (*start < 0)
1915 *start = 0;
1916}
1917
1918Py_LOCAL_INLINE(Py_ssize_t)
1919string_find_internal(PyBytesObject *self, PyObject *args, int dir)
1920{
1921 PyObject *subobj;
1922 const char *sub;
1923 Py_ssize_t sub_len;
1924 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1925 PyObject *obj_start=Py_None, *obj_end=Py_None;
1926
1927 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1928 &obj_start, &obj_end))
1929 return -2;
1930 /* To support None in "start" and "end" arguments, meaning
1931 the same as if they were not passed.
1932 */
1933 if (obj_start != Py_None)
1934 if (!_PyEval_SliceIndex(obj_start, &start))
1935 return -2;
1936 if (obj_end != Py_None)
1937 if (!_PyEval_SliceIndex(obj_end, &end))
1938 return -2;
1939
1940 if (PyBytes_Check(subobj)) {
1941 sub = PyBytes_AS_STRING(subobj);
1942 sub_len = PyBytes_GET_SIZE(subobj);
1943 }
1944#ifdef Py_USING_UNICODE
1945 else if (PyUnicode_Check(subobj))
1946 return PyUnicode_Find(
1947 (PyObject *)self, subobj, start, end, dir);
1948#endif
1949 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1950 /* XXX - the "expected a character buffer object" is pretty
1951 confusing for a non-expert. remap to something else ? */
1952 return -2;
1953
1954 if (dir > 0)
1955 return stringlib_find_slice(
1956 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1957 sub, sub_len, start, end);
1958 else
1959 return stringlib_rfind_slice(
1960 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1961 sub, sub_len, start, end);
1962}
1963
1964
1965PyDoc_STRVAR(find__doc__,
1966"S.find(sub [,start [,end]]) -> int\n\
1967\n\
1968Return the lowest index in S where substring sub is found,\n\
1969such that sub is contained within s[start:end]. Optional\n\
1970arguments start and end are interpreted as in slice notation.\n\
1971\n\
1972Return -1 on failure.");
1973
1974static PyObject *
1975string_find(PyBytesObject *self, PyObject *args)
1976{
1977 Py_ssize_t result = string_find_internal(self, args, +1);
1978 if (result == -2)
1979 return NULL;
1980 return PyInt_FromSsize_t(result);
1981}
1982
1983
1984PyDoc_STRVAR(index__doc__,
1985"S.index(sub [,start [,end]]) -> int\n\
1986\n\
1987Like S.find() but raise ValueError when the substring is not found.");
1988
1989static PyObject *
1990string_index(PyBytesObject *self, PyObject *args)
1991{
1992 Py_ssize_t result = string_find_internal(self, args, +1);
1993 if (result == -2)
1994 return NULL;
1995 if (result == -1) {
1996 PyErr_SetString(PyExc_ValueError,
1997 "substring not found");
1998 return NULL;
1999 }
2000 return PyInt_FromSsize_t(result);
2001}
2002
2003
2004PyDoc_STRVAR(rfind__doc__,
2005"S.rfind(sub [,start [,end]]) -> int\n\
2006\n\
2007Return the highest index in S where substring sub is found,\n\
2008such that sub is contained within s[start:end]. Optional\n\
2009arguments start and end are interpreted as in slice notation.\n\
2010\n\
2011Return -1 on failure.");
2012
2013static PyObject *
2014string_rfind(PyBytesObject *self, PyObject *args)
2015{
2016 Py_ssize_t result = string_find_internal(self, args, -1);
2017 if (result == -2)
2018 return NULL;
2019 return PyInt_FromSsize_t(result);
2020}
2021
2022
2023PyDoc_STRVAR(rindex__doc__,
2024"S.rindex(sub [,start [,end]]) -> int\n\
2025\n\
2026Like S.rfind() but raise ValueError when the substring is not found.");
2027
2028static PyObject *
2029string_rindex(PyBytesObject *self, PyObject *args)
2030{
2031 Py_ssize_t result = string_find_internal(self, args, -1);
2032 if (result == -2)
2033 return NULL;
2034 if (result == -1) {
2035 PyErr_SetString(PyExc_ValueError,
2036 "substring not found");
2037 return NULL;
2038 }
2039 return PyInt_FromSsize_t(result);
2040}
2041
2042
2043Py_LOCAL_INLINE(PyObject *)
2044do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
2045{
2046 char *s = PyBytes_AS_STRING(self);
2047 Py_ssize_t len = PyBytes_GET_SIZE(self);
2048 char *sep = PyBytes_AS_STRING(sepobj);
2049 Py_ssize_t seplen = PyBytes_GET_SIZE(sepobj);
2050 Py_ssize_t i, j;
2051
2052 i = 0;
2053 if (striptype != RIGHTSTRIP) {
2054 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2055 i++;
2056 }
2057 }
2058
2059 j = len;
2060 if (striptype != LEFTSTRIP) {
2061 do {
2062 j--;
2063 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2064 j++;
2065 }
2066
2067 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2068 Py_INCREF(self);
2069 return (PyObject*)self;
2070 }
2071 else
2072 return PyBytes_FromStringAndSize(s+i, j-i);
2073}
2074
2075
2076Py_LOCAL_INLINE(PyObject *)
2077do_strip(PyBytesObject *self, int striptype)
2078{
2079 char *s = PyBytes_AS_STRING(self);
2080 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
2081
2082 i = 0;
2083 if (striptype != RIGHTSTRIP) {
2084 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2085 i++;
2086 }
2087 }
2088
2089 j = len;
2090 if (striptype != LEFTSTRIP) {
2091 do {
2092 j--;
2093 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2094 j++;
2095 }
2096
2097 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2098 Py_INCREF(self);
2099 return (PyObject*)self;
2100 }
2101 else
2102 return PyBytes_FromStringAndSize(s+i, j-i);
2103}
2104
2105
2106Py_LOCAL_INLINE(PyObject *)
2107do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
2108{
2109 PyObject *sep = NULL;
2110
2111 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
2112 return NULL;
2113
2114 if (sep != NULL && sep != Py_None) {
2115 if (PyBytes_Check(sep))
2116 return do_xstrip(self, striptype, sep);
2117#ifdef Py_USING_UNICODE
2118 else if (PyUnicode_Check(sep)) {
2119 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2120 PyObject *res;
2121 if (uniself==NULL)
2122 return NULL;
2123 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2124 striptype, sep);
2125 Py_DECREF(uniself);
2126 return res;
2127 }
2128#endif
2129 PyErr_Format(PyExc_TypeError,
2130#ifdef Py_USING_UNICODE
2131 "%s arg must be None, str or unicode",
2132#else
2133 "%s arg must be None or str",
2134#endif
2135 STRIPNAME(striptype));
2136 return NULL;
2137 }
2138
2139 return do_strip(self, striptype);
2140}
2141
2142
2143PyDoc_STRVAR(strip__doc__,
2144"S.strip([chars]) -> string or unicode\n\
2145\n\
2146Return a copy of the string S with leading and trailing\n\
2147whitespace removed.\n\
2148If chars is given and not None, remove characters in chars instead.\n\
2149If chars is unicode, S will be converted to unicode before stripping");
2150
2151static PyObject *
2152string_strip(PyBytesObject *self, PyObject *args)
2153{
2154 if (PyTuple_GET_SIZE(args) == 0)
2155 return do_strip(self, BOTHSTRIP); /* Common case */
2156 else
2157 return do_argstrip(self, BOTHSTRIP, args);
2158}
2159
2160
2161PyDoc_STRVAR(lstrip__doc__,
2162"S.lstrip([chars]) -> string or unicode\n\
2163\n\
2164Return a copy of the string S with leading whitespace removed.\n\
2165If chars is given and not None, remove characters in chars instead.\n\
2166If chars is unicode, S will be converted to unicode before stripping");
2167
2168static PyObject *
2169string_lstrip(PyBytesObject *self, PyObject *args)
2170{
2171 if (PyTuple_GET_SIZE(args) == 0)
2172 return do_strip(self, LEFTSTRIP); /* Common case */
2173 else
2174 return do_argstrip(self, LEFTSTRIP, args);
2175}
2176
2177
2178PyDoc_STRVAR(rstrip__doc__,
2179"S.rstrip([chars]) -> string or unicode\n\
2180\n\
2181Return a copy of the string S with trailing whitespace removed.\n\
2182If chars is given and not None, remove characters in chars instead.\n\
2183If chars is unicode, S will be converted to unicode before stripping");
2184
2185static PyObject *
2186string_rstrip(PyBytesObject *self, PyObject *args)
2187{
2188 if (PyTuple_GET_SIZE(args) == 0)
2189 return do_strip(self, RIGHTSTRIP); /* Common case */
2190 else
2191 return do_argstrip(self, RIGHTSTRIP, args);
2192}
2193
2194
2195PyDoc_STRVAR(lower__doc__,
2196"S.lower() -> string\n\
2197\n\
2198Return a copy of the string S converted to lowercase.");
2199
2200/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2201#ifndef _tolower
2202#define _tolower tolower
2203#endif
2204
2205static PyObject *
2206string_lower(PyBytesObject *self)
2207{
2208 char *s;
2209 Py_ssize_t i, n = PyBytes_GET_SIZE(self);
2210 PyObject *newobj;
2211
2212 newobj = PyBytes_FromStringAndSize(NULL, n);
2213 if (!newobj)
2214 return NULL;
2215
2216 s = PyBytes_AS_STRING(newobj);
2217
2218 Py_MEMCPY(s, PyBytes_AS_STRING(self), n);
2219
2220 for (i = 0; i < n; i++) {
2221 int c = Py_CHARMASK(s[i]);
2222 if (isupper(c))
2223 s[i] = _tolower(c);
2224 }
2225
2226 return newobj;
2227}
2228
2229PyDoc_STRVAR(upper__doc__,
2230"S.upper() -> string\n\
2231\n\
2232Return a copy of the string S converted to uppercase.");
2233
2234#ifndef _toupper
2235#define _toupper toupper
2236#endif
2237
2238static PyObject *
2239string_upper(PyBytesObject *self)
2240{
2241 char *s;
2242 Py_ssize_t i, n = PyBytes_GET_SIZE(self);
2243 PyObject *newobj;
2244
2245 newobj = PyBytes_FromStringAndSize(NULL, n);
2246 if (!newobj)
2247 return NULL;
2248
2249 s = PyBytes_AS_STRING(newobj);
2250
2251 Py_MEMCPY(s, PyBytes_AS_STRING(self), n);
2252
2253 for (i = 0; i < n; i++) {
2254 int c = Py_CHARMASK(s[i]);
2255 if (islower(c))
2256 s[i] = _toupper(c);
2257 }
2258
2259 return newobj;
2260}
2261
2262PyDoc_STRVAR(title__doc__,
2263"S.title() -> string\n\
2264\n\
2265Return a titlecased version of S, i.e. words start with uppercase\n\
2266characters, all remaining cased characters have lowercase.");
2267
2268static PyObject*
2269string_title(PyBytesObject *self)
2270{
2271 char *s = PyBytes_AS_STRING(self), *s_new;
2272 Py_ssize_t i, n = PyBytes_GET_SIZE(self);
2273 int previous_is_cased = 0;
2274 PyObject *newobj;
2275
2276 newobj = PyBytes_FromStringAndSize(NULL, n);
2277 if (newobj == NULL)
2278 return NULL;
2279 s_new = PyBytes_AsString(newobj);
2280 for (i = 0; i < n; i++) {
2281 int c = Py_CHARMASK(*s++);
2282 if (islower(c)) {
2283 if (!previous_is_cased)
2284 c = toupper(c);
2285 previous_is_cased = 1;
2286 } else if (isupper(c)) {
2287 if (previous_is_cased)
2288 c = tolower(c);
2289 previous_is_cased = 1;
2290 } else
2291 previous_is_cased = 0;
2292 *s_new++ = c;
2293 }
2294 return newobj;
2295}
2296
2297PyDoc_STRVAR(capitalize__doc__,
2298"S.capitalize() -> string\n\
2299\n\
2300Return a copy of the string S with only its first character\n\
2301capitalized.");
2302
2303static PyObject *
2304string_capitalize(PyBytesObject *self)
2305{
2306 char *s = PyBytes_AS_STRING(self), *s_new;
2307 Py_ssize_t i, n = PyBytes_GET_SIZE(self);
2308 PyObject *newobj;
2309
2310 newobj = PyBytes_FromStringAndSize(NULL, n);
2311 if (newobj == NULL)
2312 return NULL;
2313 s_new = PyBytes_AsString(newobj);
2314 if (0 < n) {
2315 int c = Py_CHARMASK(*s++);
2316 if (islower(c))
2317 *s_new = toupper(c);
2318 else
2319 *s_new = c;
2320 s_new++;
2321 }
2322 for (i = 1; i < n; i++) {
2323 int c = Py_CHARMASK(*s++);
2324 if (isupper(c))
2325 *s_new = tolower(c);
2326 else
2327 *s_new = c;
2328 s_new++;
2329 }
2330 return newobj;
2331}
2332
2333
2334PyDoc_STRVAR(count__doc__,
2335"S.count(sub[, start[, end]]) -> int\n\
2336\n\
2337Return the number of non-overlapping occurrences of substring sub in\n\
2338string S[start:end]. Optional arguments start and end are interpreted\n\
2339as in slice notation.");
2340
2341static PyObject *
2342string_count(PyBytesObject *self, PyObject *args)
2343{
2344 PyObject *sub_obj;
2345 const char *str = PyBytes_AS_STRING(self), *sub;
2346 Py_ssize_t sub_len;
2347 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2348
2349 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2350 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2351 return NULL;
2352
2353 if (PyBytes_Check(sub_obj)) {
2354 sub = PyBytes_AS_STRING(sub_obj);
2355 sub_len = PyBytes_GET_SIZE(sub_obj);
2356 }
2357#ifdef Py_USING_UNICODE
2358 else if (PyUnicode_Check(sub_obj)) {
2359 Py_ssize_t count;
2360 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2361 if (count == -1)
2362 return NULL;
2363 else
2364 return PyInt_FromSsize_t(count);
2365 }
2366#endif
2367 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2368 return NULL;
2369
2370 string_adjust_indices(&start, &end, PyBytes_GET_SIZE(self));
2371
2372 return PyInt_FromSsize_t(
2373 stringlib_count(str + start, end - start, sub, sub_len)
2374 );
2375}
2376
2377PyDoc_STRVAR(swapcase__doc__,
2378"S.swapcase() -> string\n\
2379\n\
2380Return a copy of the string S with uppercase characters\n\
2381converted to lowercase and vice versa.");
2382
2383static PyObject *
2384string_swapcase(PyBytesObject *self)
2385{
2386 char *s = PyBytes_AS_STRING(self), *s_new;
2387 Py_ssize_t i, n = PyBytes_GET_SIZE(self);
2388 PyObject *newobj;
2389
2390 newobj = PyBytes_FromStringAndSize(NULL, n);
2391 if (newobj == NULL)
2392 return NULL;
2393 s_new = PyBytes_AsString(newobj);
2394 for (i = 0; i < n; i++) {
2395 int c = Py_CHARMASK(*s++);
2396 if (islower(c)) {
2397 *s_new = toupper(c);
2398 }
2399 else if (isupper(c)) {
2400 *s_new = tolower(c);
2401 }
2402 else
2403 *s_new = c;
2404 s_new++;
2405 }
2406 return newobj;
2407}
2408
2409
2410PyDoc_STRVAR(translate__doc__,
2411"S.translate(table [,deletechars]) -> string\n\
2412\n\
2413Return a copy of the string S, where all characters occurring\n\
2414in the optional argument deletechars are removed, and the\n\
2415remaining characters have been mapped through the given\n\
2416translation table, which must be a string of length 256.");
2417
2418static PyObject *
2419string_translate(PyBytesObject *self, PyObject *args)
2420{
2421 register char *input, *output;
2422 const char *table;
2423 register Py_ssize_t i, c, changed = 0;
2424 PyObject *input_obj = (PyObject*)self;
2425 const char *output_start, *del_table=NULL;
2426 Py_ssize_t inlen, tablen, dellen = 0;
2427 PyObject *result;
2428 int trans_table[256];
2429 PyObject *tableobj, *delobj = NULL;
2430
2431 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2432 &tableobj, &delobj))
2433 return NULL;
2434
2435 if (PyBytes_Check(tableobj)) {
2436 table = PyBytes_AS_STRING(tableobj);
2437 tablen = PyBytes_GET_SIZE(tableobj);
2438 }
2439 else if (tableobj == Py_None) {
2440 table = NULL;
2441 tablen = 256;
2442 }
2443#ifdef Py_USING_UNICODE
2444 else if (PyUnicode_Check(tableobj)) {
2445 /* Unicode .translate() does not support the deletechars
2446 parameter; instead a mapping to None will cause characters
2447 to be deleted. */
2448 if (delobj != NULL) {
2449 PyErr_SetString(PyExc_TypeError,
2450 "deletions are implemented differently for unicode");
2451 return NULL;
2452 }
2453 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2454 }
2455#endif
2456 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2457 return NULL;
2458
2459 if (tablen != 256) {
2460 PyErr_SetString(PyExc_ValueError,
2461 "translation table must be 256 characters long");
2462 return NULL;
2463 }
2464
2465 if (delobj != NULL) {
2466 if (PyBytes_Check(delobj)) {
2467 del_table = PyBytes_AS_STRING(delobj);
2468 dellen = PyBytes_GET_SIZE(delobj);
2469 }
2470#ifdef Py_USING_UNICODE
2471 else if (PyUnicode_Check(delobj)) {
2472 PyErr_SetString(PyExc_TypeError,
2473 "deletions are implemented differently for unicode");
2474 return NULL;
2475 }
2476#endif
2477 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2478 return NULL;
2479 }
2480 else {
2481 del_table = NULL;
2482 dellen = 0;
2483 }
2484
2485 inlen = PyBytes_GET_SIZE(input_obj);
2486 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2487 if (result == NULL)
2488 return NULL;
2489 output_start = output = PyBytes_AsString(result);
2490 input = PyBytes_AS_STRING(input_obj);
2491
2492 if (dellen == 0 && table != NULL) {
2493 /* If no deletions are required, use faster code */
2494 for (i = inlen; --i >= 0; ) {
2495 c = Py_CHARMASK(*input++);
2496 if (Py_CHARMASK((*output++ = table[c])) != c)
2497 changed = 1;
2498 }
2499 if (changed || !PyBytes_CheckExact(input_obj))
2500 return result;
2501 Py_DECREF(result);
2502 Py_INCREF(input_obj);
2503 return input_obj;
2504 }
2505
2506 if (table == NULL) {
2507 for (i = 0; i < 256; i++)
2508 trans_table[i] = Py_CHARMASK(i);
2509 } else {
2510 for (i = 0; i < 256; i++)
2511 trans_table[i] = Py_CHARMASK(table[i]);
2512 }
2513
2514 for (i = 0; i < dellen; i++)
2515 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2516
2517 for (i = inlen; --i >= 0; ) {
2518 c = Py_CHARMASK(*input++);
2519 if (trans_table[c] != -1)
2520 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2521 continue;
2522 changed = 1;
2523 }
2524 if (!changed && PyBytes_CheckExact(input_obj)) {
2525 Py_DECREF(result);
2526 Py_INCREF(input_obj);
2527 return input_obj;
2528 }
2529 /* Fix the size of the resulting string */
2530 if (inlen > 0)
2531 _PyBytes_Resize(&result, output - output_start);
2532 return result;
2533}
2534
2535
2536#define FORWARD 1
2537#define REVERSE -1
2538
2539/* find and count characters and substrings */
2540
2541#define findchar(target, target_len, c) \
2542 ((char *)memchr((const void *)(target), c, target_len))
2543
2544/* String ops must return a string. */
2545/* If the object is subclass of string, create a copy */
2546Py_LOCAL(PyBytesObject *)
2547return_self(PyBytesObject *self)
2548{
2549 if (PyBytes_CheckExact(self)) {
2550 Py_INCREF(self);
2551 return self;
2552 }
2553 return (PyBytesObject *)PyBytes_FromStringAndSize(
2554 PyBytes_AS_STRING(self),
2555 PyBytes_GET_SIZE(self));
2556}
2557
2558Py_LOCAL_INLINE(Py_ssize_t)
2559countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2560{
2561 Py_ssize_t count=0;
2562 const char *start=target;
2563 const char *end=target+target_len;
2564
2565 while ( (start=findchar(start, end-start, c)) != NULL ) {
2566 count++;
2567 if (count >= maxcount)
2568 break;
2569 start += 1;
2570 }
2571 return count;
2572}
2573
2574Py_LOCAL(Py_ssize_t)
2575findstring(const char *target, Py_ssize_t target_len,
2576 const char *pattern, Py_ssize_t pattern_len,
2577 Py_ssize_t start,
2578 Py_ssize_t end,
2579 int direction)
2580{
2581 if (start < 0) {
2582 start += target_len;
2583 if (start < 0)
2584 start = 0;
2585 }
2586 if (end > target_len) {
2587 end = target_len;
2588 } else if (end < 0) {
2589 end += target_len;
2590 if (end < 0)
2591 end = 0;
2592 }
2593
2594 /* zero-length substrings always match at the first attempt */
2595 if (pattern_len == 0)
2596 return (direction > 0) ? start : end;
2597
2598 end -= pattern_len;
2599
2600 if (direction < 0) {
2601 for (; end >= start; end--)
2602 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2603 return end;
2604 } else {
2605 for (; start <= end; start++)
2606 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2607 return start;
2608 }
2609 return -1;
2610}
2611
2612Py_LOCAL_INLINE(Py_ssize_t)
2613countstring(const char *target, Py_ssize_t target_len,
2614 const char *pattern, Py_ssize_t pattern_len,
2615 Py_ssize_t start,
2616 Py_ssize_t end,
2617 int direction, Py_ssize_t maxcount)
2618{
2619 Py_ssize_t count=0;
2620
2621 if (start < 0) {
2622 start += target_len;
2623 if (start < 0)
2624 start = 0;
2625 }
2626 if (end > target_len) {
2627 end = target_len;
2628 } else if (end < 0) {
2629 end += target_len;
2630 if (end < 0)
2631 end = 0;
2632 }
2633
2634 /* zero-length substrings match everywhere */
2635 if (pattern_len == 0 || maxcount == 0) {
2636 if (target_len+1 < maxcount)
2637 return target_len+1;
2638 return maxcount;
2639 }
2640
2641 end -= pattern_len;
2642 if (direction < 0) {
2643 for (; (end >= start); end--)
2644 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2645 count++;
2646 if (--maxcount <= 0) break;
2647 end -= pattern_len-1;
2648 }
2649 } else {
2650 for (; (start <= end); start++)
2651 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2652 count++;
2653 if (--maxcount <= 0)
2654 break;
2655 start += pattern_len-1;
2656 }
2657 }
2658 return count;
2659}
2660
2661
2662/* Algorithms for different cases of string replacement */
2663
2664/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2665Py_LOCAL(PyBytesObject *)
2666replace_interleave(PyBytesObject *self,
2667 const char *to_s, Py_ssize_t to_len,
2668 Py_ssize_t maxcount)
2669{
2670 char *self_s, *result_s;
2671 Py_ssize_t self_len, result_len;
2672 Py_ssize_t count, i, product;
2673 PyBytesObject *result;
2674
2675 self_len = PyBytes_GET_SIZE(self);
2676
2677 /* 1 at the end plus 1 after every character */
2678 count = self_len+1;
2679 if (maxcount < count)
2680 count = maxcount;
2681
2682 /* Check for overflow */
2683 /* result_len = count * to_len + self_len; */
2684 product = count * to_len;
2685 if (product / to_len != count) {
2686 PyErr_SetString(PyExc_OverflowError,
2687 "replace string is too long");
2688 return NULL;
2689 }
2690 result_len = product + self_len;
2691 if (result_len < 0) {
2692 PyErr_SetString(PyExc_OverflowError,
2693 "replace string is too long");
2694 return NULL;
2695 }
2696
2697 if (! (result = (PyBytesObject *)
2698 PyBytes_FromStringAndSize(NULL, result_len)) )
2699 return NULL;
2700
2701 self_s = PyBytes_AS_STRING(self);
2702 result_s = PyBytes_AS_STRING(result);
2703
2704 /* TODO: special case single character, which doesn't need memcpy */
2705
2706 /* Lay the first one down (guaranteed this will occur) */
2707 Py_MEMCPY(result_s, to_s, to_len);
2708 result_s += to_len;
2709 count -= 1;
2710
2711 for (i=0; i<count; i++) {
2712 *result_s++ = *self_s++;
2713 Py_MEMCPY(result_s, to_s, to_len);
2714 result_s += to_len;
2715 }
2716
2717 /* Copy the rest of the original string */
2718 Py_MEMCPY(result_s, self_s, self_len-i);
2719
2720 return result;
2721}
2722
2723/* Special case for deleting a single character */
2724/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2725Py_LOCAL(PyBytesObject *)
2726replace_delete_single_character(PyBytesObject *self,
2727 char from_c, Py_ssize_t maxcount)
2728{
2729 char *self_s, *result_s;
2730 char *start, *next, *end;
2731 Py_ssize_t self_len, result_len;
2732 Py_ssize_t count;
2733 PyBytesObject *result;
2734
2735 self_len = PyBytes_GET_SIZE(self);
2736 self_s = PyBytes_AS_STRING(self);
2737
2738 count = countchar(self_s, self_len, from_c, maxcount);
2739 if (count == 0) {
2740 return return_self(self);
2741 }
2742
2743 result_len = self_len - count; /* from_len == 1 */
2744 assert(result_len>=0);
2745
2746 if ( (result = (PyBytesObject *)
2747 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2748 return NULL;
2749 result_s = PyBytes_AS_STRING(result);
2750
2751 start = self_s;
2752 end = self_s + self_len;
2753 while (count-- > 0) {
2754 next = findchar(start, end-start, from_c);
2755 if (next == NULL)
2756 break;
2757 Py_MEMCPY(result_s, start, next-start);
2758 result_s += (next-start);
2759 start = next+1;
2760 }
2761 Py_MEMCPY(result_s, start, end-start);
2762
2763 return result;
2764}
2765
2766/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2767
2768Py_LOCAL(PyBytesObject *)
2769replace_delete_substring(PyBytesObject *self,
2770 const char *from_s, Py_ssize_t from_len,
2771 Py_ssize_t maxcount) {
2772 char *self_s, *result_s;
2773 char *start, *next, *end;
2774 Py_ssize_t self_len, result_len;
2775 Py_ssize_t count, offset;
2776 PyBytesObject *result;
2777
2778 self_len = PyBytes_GET_SIZE(self);
2779 self_s = PyBytes_AS_STRING(self);
2780
2781 count = countstring(self_s, self_len,
2782 from_s, from_len,
2783 0, self_len, 1,
2784 maxcount);
2785
2786 if (count == 0) {
2787 /* no matches */
2788 return return_self(self);
2789 }
2790
2791 result_len = self_len - (count * from_len);
2792 assert (result_len>=0);
2793
2794 if ( (result = (PyBytesObject *)
2795 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2796 return NULL;
2797
2798 result_s = PyBytes_AS_STRING(result);
2799
2800 start = self_s;
2801 end = self_s + self_len;
2802 while (count-- > 0) {
2803 offset = findstring(start, end-start,
2804 from_s, from_len,
2805 0, end-start, FORWARD);
2806 if (offset == -1)
2807 break;
2808 next = start + offset;
2809
2810 Py_MEMCPY(result_s, start, next-start);
2811
2812 result_s += (next-start);
2813 start = next+from_len;
2814 }
2815 Py_MEMCPY(result_s, start, end-start);
2816 return result;
2817}
2818
2819/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2820Py_LOCAL(PyBytesObject *)
2821replace_single_character_in_place(PyBytesObject *self,
2822 char from_c, char to_c,
2823 Py_ssize_t maxcount)
2824{
2825 char *self_s, *result_s, *start, *end, *next;
2826 Py_ssize_t self_len;
2827 PyBytesObject *result;
2828
2829 /* The result string will be the same size */
2830 self_s = PyBytes_AS_STRING(self);
2831 self_len = PyBytes_GET_SIZE(self);
2832
2833 next = findchar(self_s, self_len, from_c);
2834
2835 if (next == NULL) {
2836 /* No matches; return the original string */
2837 return return_self(self);
2838 }
2839
2840 /* Need to make a new string */
2841 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2842 if (result == NULL)
2843 return NULL;
2844 result_s = PyBytes_AS_STRING(result);
2845 Py_MEMCPY(result_s, self_s, self_len);
2846
2847 /* change everything in-place, starting with this one */
2848 start = result_s + (next-self_s);
2849 *start = to_c;
2850 start++;
2851 end = result_s + self_len;
2852
2853 while (--maxcount > 0) {
2854 next = findchar(start, end-start, from_c);
2855 if (next == NULL)
2856 break;
2857 *next = to_c;
2858 start = next+1;
2859 }
2860
2861 return result;
2862}
2863
2864/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2865Py_LOCAL(PyBytesObject *)
2866replace_substring_in_place(PyBytesObject *self,
2867 const char *from_s, Py_ssize_t from_len,
2868 const char *to_s, Py_ssize_t to_len,
2869 Py_ssize_t maxcount)
2870{
2871 char *result_s, *start, *end;
2872 char *self_s;
2873 Py_ssize_t self_len, offset;
2874 PyBytesObject *result;
2875
2876 /* The result string will be the same size */
2877
2878 self_s = PyBytes_AS_STRING(self);
2879 self_len = PyBytes_GET_SIZE(self);
2880
2881 offset = findstring(self_s, self_len,
2882 from_s, from_len,
2883 0, self_len, FORWARD);
2884 if (offset == -1) {
2885 /* No matches; return the original string */
2886 return return_self(self);
2887 }
2888
2889 /* Need to make a new string */
2890 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2891 if (result == NULL)
2892 return NULL;
2893 result_s = PyBytes_AS_STRING(result);
2894 Py_MEMCPY(result_s, self_s, self_len);
2895
2896 /* change everything in-place, starting with this one */
2897 start = result_s + offset;
2898 Py_MEMCPY(start, to_s, from_len);
2899 start += from_len;
2900 end = result_s + self_len;
2901
2902 while ( --maxcount > 0) {
2903 offset = findstring(start, end-start,
2904 from_s, from_len,
2905 0, end-start, FORWARD);
2906 if (offset==-1)
2907 break;
2908 Py_MEMCPY(start+offset, to_s, from_len);
2909 start += offset+from_len;
2910 }
2911
2912 return result;
2913}
2914
2915/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2916Py_LOCAL(PyBytesObject *)
2917replace_single_character(PyBytesObject *self,
2918 char from_c,
2919 const char *to_s, Py_ssize_t to_len,
2920 Py_ssize_t maxcount)
2921{
2922 char *self_s, *result_s;
2923 char *start, *next, *end;
2924 Py_ssize_t self_len, result_len;
2925 Py_ssize_t count, product;
2926 PyBytesObject *result;
2927
2928 self_s = PyBytes_AS_STRING(self);
2929 self_len = PyBytes_GET_SIZE(self);
2930
2931 count = countchar(self_s, self_len, from_c, maxcount);
2932 if (count == 0) {
2933 /* no matches, return unchanged */
2934 return return_self(self);
2935 }
2936
2937 /* use the difference between current and new, hence the "-1" */
2938 /* result_len = self_len + count * (to_len-1) */
2939 product = count * (to_len-1);
2940 if (product / (to_len-1) != count) {
2941 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2942 return NULL;
2943 }
2944 result_len = self_len + product;
2945 if (result_len < 0) {
2946 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2947 return NULL;
2948 }
2949
2950 if ( (result = (PyBytesObject *)
2951 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2952 return NULL;
2953 result_s = PyBytes_AS_STRING(result);
2954
2955 start = self_s;
2956 end = self_s + self_len;
2957 while (count-- > 0) {
2958 next = findchar(start, end-start, from_c);
2959 if (next == NULL)
2960 break;
2961
2962 if (next == start) {
2963 /* replace with the 'to' */
2964 Py_MEMCPY(result_s, to_s, to_len);
2965 result_s += to_len;
2966 start += 1;
2967 } else {
2968 /* copy the unchanged old then the 'to' */
2969 Py_MEMCPY(result_s, start, next-start);
2970 result_s += (next-start);
2971 Py_MEMCPY(result_s, to_s, to_len);
2972 result_s += to_len;
2973 start = next+1;
2974 }
2975 }
2976 /* Copy the remainder of the remaining string */
2977 Py_MEMCPY(result_s, start, end-start);
2978
2979 return result;
2980}
2981
2982/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2983Py_LOCAL(PyBytesObject *)
2984replace_substring(PyBytesObject *self,
2985 const char *from_s, Py_ssize_t from_len,
2986 const char *to_s, Py_ssize_t to_len,
2987 Py_ssize_t maxcount) {
2988 char *self_s, *result_s;
2989 char *start, *next, *end;
2990 Py_ssize_t self_len, result_len;
2991 Py_ssize_t count, offset, product;
2992 PyBytesObject *result;
2993
2994 self_s = PyBytes_AS_STRING(self);
2995 self_len = PyBytes_GET_SIZE(self);
2996
2997 count = countstring(self_s, self_len,
2998 from_s, from_len,
2999 0, self_len, FORWARD, maxcount);
3000 if (count == 0) {
3001 /* no matches, return unchanged */
3002 return return_self(self);
3003 }
3004
3005 /* Check for overflow */
3006 /* result_len = self_len + count * (to_len-from_len) */
3007 product = count * (to_len-from_len);
3008 if (product / (to_len-from_len) != count) {
3009 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3010 return NULL;
3011 }
3012 result_len = self_len + product;
3013 if (result_len < 0) {
3014 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3015 return NULL;
3016 }
3017
3018 if ( (result = (PyBytesObject *)
3019 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
3020 return NULL;
3021 result_s = PyBytes_AS_STRING(result);
3022
3023 start = self_s;
3024 end = self_s + self_len;
3025 while (count-- > 0) {
3026 offset = findstring(start, end-start,
3027 from_s, from_len,
3028 0, end-start, FORWARD);
3029 if (offset == -1)
3030 break;
3031 next = start+offset;
3032 if (next == start) {
3033 /* replace with the 'to' */
3034 Py_MEMCPY(result_s, to_s, to_len);
3035 result_s += to_len;
3036 start += from_len;
3037 } else {
3038 /* copy the unchanged old then the 'to' */
3039 Py_MEMCPY(result_s, start, next-start);
3040 result_s += (next-start);
3041 Py_MEMCPY(result_s, to_s, to_len);
3042 result_s += to_len;
3043 start = next+from_len;
3044 }
3045 }
3046 /* Copy the remainder of the remaining string */
3047 Py_MEMCPY(result_s, start, end-start);
3048
3049 return result;
3050}
3051
3052
3053Py_LOCAL(PyBytesObject *)
3054replace(PyBytesObject *self,
3055 const char *from_s, Py_ssize_t from_len,
3056 const char *to_s, Py_ssize_t to_len,
3057 Py_ssize_t maxcount)
3058{
3059 if (maxcount < 0) {
3060 maxcount = PY_SSIZE_T_MAX;
3061 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
3062 /* nothing to do; return the original string */
3063 return return_self(self);
3064 }
3065
3066 if (maxcount == 0 ||
3067 (from_len == 0 && to_len == 0)) {
3068 /* nothing to do; return the original string */
3069 return return_self(self);
3070 }
3071
3072 /* Handle zero-length special cases */
3073
3074 if (from_len == 0) {
3075 /* insert the 'to' string everywhere. */
3076 /* >>> "Python".replace("", ".") */
3077 /* '.P.y.t.h.o.n.' */
3078 return replace_interleave(self, to_s, to_len, maxcount);
3079 }
3080
3081 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3082 /* point for an empty self string to generate a non-empty string */
3083 /* Special case so the remaining code always gets a non-empty string */
3084 if (PyBytes_GET_SIZE(self) == 0) {
3085 return return_self(self);
3086 }
3087
3088 if (to_len == 0) {
3089 /* delete all occurances of 'from' string */
3090 if (from_len == 1) {
3091 return replace_delete_single_character(
3092 self, from_s[0], maxcount);
3093 } else {
3094 return replace_delete_substring(self, from_s, from_len, maxcount);
3095 }
3096 }
3097
3098 /* Handle special case where both strings have the same length */
3099
3100 if (from_len == to_len) {
3101 if (from_len == 1) {
3102 return replace_single_character_in_place(
3103 self,
3104 from_s[0],
3105 to_s[0],
3106 maxcount);
3107 } else {
3108 return replace_substring_in_place(
3109 self, from_s, from_len, to_s, to_len, maxcount);
3110 }
3111 }
3112
3113 /* Otherwise use the more generic algorithms */
3114 if (from_len == 1) {
3115 return replace_single_character(self, from_s[0],
3116 to_s, to_len, maxcount);
3117 } else {
3118 /* len('from')>=2, len('to')>=1 */
3119 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
3120 }
3121}
3122
3123PyDoc_STRVAR(replace__doc__,
3124"S.replace (old, new[, count]) -> string\n\
3125\n\
3126Return a copy of string S with all occurrences of substring\n\
3127old replaced by new. If the optional argument count is\n\
3128given, only the first count occurrences are replaced.");
3129
3130static PyObject *
3131string_replace(PyBytesObject *self, PyObject *args)
3132{
3133 Py_ssize_t count = -1;
3134 PyObject *from, *to;
3135 const char *from_s, *to_s;
3136 Py_ssize_t from_len, to_len;
3137
3138 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
3139 return NULL;
3140
3141 if (PyBytes_Check(from)) {
3142 from_s = PyBytes_AS_STRING(from);
3143 from_len = PyBytes_GET_SIZE(from);
3144 }
3145#ifdef Py_USING_UNICODE
3146 if (PyUnicode_Check(from))
3147 return PyUnicode_Replace((PyObject *)self,
3148 from, to, count);
3149#endif
3150 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
3151 return NULL;
3152
3153 if (PyBytes_Check(to)) {
3154 to_s = PyBytes_AS_STRING(to);
3155 to_len = PyBytes_GET_SIZE(to);
3156 }
3157#ifdef Py_USING_UNICODE
3158 else if (PyUnicode_Check(to))
3159 return PyUnicode_Replace((PyObject *)self,
3160 from, to, count);
3161#endif
3162 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
3163 return NULL;
3164
3165 return (PyObject *)replace((PyBytesObject *) self,
3166 from_s, from_len,
3167 to_s, to_len, count);
3168}
3169
3170/** End DALKE **/
3171
3172/* Matches the end (direction >= 0) or start (direction < 0) of self
3173 * against substr, using the start and end arguments. Returns
3174 * -1 on error, 0 if not found and 1 if found.
3175 */
3176Py_LOCAL(int)
3177_string_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
3178 Py_ssize_t end, int direction)
3179{
3180 Py_ssize_t len = PyBytes_GET_SIZE(self);
3181 Py_ssize_t slen;
3182 const char* sub;
3183 const char* str;
3184
3185 if (PyBytes_Check(substr)) {
3186 sub = PyBytes_AS_STRING(substr);
3187 slen = PyBytes_GET_SIZE(substr);
3188 }
3189#ifdef Py_USING_UNICODE
3190 else if (PyUnicode_Check(substr))
3191 return PyUnicode_Tailmatch((PyObject *)self,
3192 substr, start, end, direction);
3193#endif
3194 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3195 return -1;
3196 str = PyBytes_AS_STRING(self);
3197
3198 string_adjust_indices(&start, &end, len);
3199
3200 if (direction < 0) {
3201 /* startswith */
3202 if (start+slen > len)
3203 return 0;
3204 } else {
3205 /* endswith */
3206 if (end-start < slen || start > len)
3207 return 0;
3208
3209 if (end-slen > start)
3210 start = end - slen;
3211 }
3212 if (end-start >= slen)
3213 return ! memcmp(str+start, sub, slen);
3214 return 0;
3215}
3216
3217
3218PyDoc_STRVAR(startswith__doc__,
3219"S.startswith(prefix[, start[, end]]) -> bool\n\
3220\n\
3221Return True if S starts with the specified prefix, False otherwise.\n\
3222With optional start, test S beginning at that position.\n\
3223With optional end, stop comparing S at that position.\n\
3224prefix can also be a tuple of strings to try.");
3225
3226static PyObject *
3227string_startswith(PyBytesObject *self, PyObject *args)
3228{
3229 Py_ssize_t start = 0;
3230 Py_ssize_t end = PY_SSIZE_T_MAX;
3231 PyObject *subobj;
3232 int result;
3233
3234 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3235 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3236 return NULL;
3237 if (PyTuple_Check(subobj)) {
3238 Py_ssize_t i;
3239 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3240 result = _string_tailmatch(self,
3241 PyTuple_GET_ITEM(subobj, i),
3242 start, end, -1);
3243 if (result == -1)
3244 return NULL;
3245 else if (result) {
3246 Py_RETURN_TRUE;
3247 }
3248 }
3249 Py_RETURN_FALSE;
3250 }
3251 result = _string_tailmatch(self, subobj, start, end, -1);
3252 if (result == -1)
3253 return NULL;
3254 else
3255 return PyBool_FromLong(result);
3256}
3257
3258
3259PyDoc_STRVAR(endswith__doc__,
3260"S.endswith(suffix[, start[, end]]) -> bool\n\
3261\n\
3262Return True if S ends with the specified suffix, False otherwise.\n\
3263With optional start, test S beginning at that position.\n\
3264With optional end, stop comparing S at that position.\n\
3265suffix can also be a tuple of strings to try.");
3266
3267static PyObject *
3268string_endswith(PyBytesObject *self, PyObject *args)
3269{
3270 Py_ssize_t start = 0;
3271 Py_ssize_t end = PY_SSIZE_T_MAX;
3272 PyObject *subobj;
3273 int result;
3274
3275 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3276 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3277 return NULL;
3278 if (PyTuple_Check(subobj)) {
3279 Py_ssize_t i;
3280 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3281 result = _string_tailmatch(self,
3282 PyTuple_GET_ITEM(subobj, i),
3283 start, end, +1);
3284 if (result == -1)
3285 return NULL;
3286 else if (result) {
3287 Py_RETURN_TRUE;
3288 }
3289 }
3290 Py_RETURN_FALSE;
3291 }
3292 result = _string_tailmatch(self, subobj, start, end, +1);
3293 if (result == -1)
3294 return NULL;
3295 else
3296 return PyBool_FromLong(result);
3297}
3298
3299
3300PyDoc_STRVAR(encode__doc__,
3301"S.encode([encoding[,errors]]) -> object\n\
3302\n\
3303Encodes S using the codec registered for encoding. encoding defaults\n\
3304to the default encoding. errors may be given to set a different error\n\
3305handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3306a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3307'xmlcharrefreplace' as well as any other name registered with\n\
3308codecs.register_error that is able to handle UnicodeEncodeErrors.");
3309
3310static PyObject *
3311string_encode(PyBytesObject *self, PyObject *args)
3312{
3313 char *encoding = NULL;
3314 char *errors = NULL;
3315 PyObject *v;
3316
3317 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3318 return NULL;
3319 v = PyBytes_AsEncodedObject((PyObject *)self, encoding, errors);
3320 if (v == NULL)
3321 goto onError;
3322 if (!PyBytes_Check(v) && !PyUnicode_Check(v)) {
3323 PyErr_Format(PyExc_TypeError,
3324 "encoder did not return a string/unicode object "
3325 "(type=%.400s)",
3326 Py_TYPE(v)->tp_name);
3327 Py_DECREF(v);
3328 return NULL;
3329 }
3330 return v;
3331
3332 onError:
Christian Heimes1a6387e2008-03-26 12:49:49 +00003333 return NULL;
3334}
3335
Christian Heimes44720832008-05-26 13:01:01 +00003336
3337PyDoc_STRVAR(decode__doc__,
3338"S.decode([encoding[,errors]]) -> object\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003339\n\
Christian Heimes44720832008-05-26 13:01:01 +00003340Decodes S using the codec registered for encoding. encoding defaults\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003341to the default encoding. errors may be given to set a different error\n\
Christian Heimes44720832008-05-26 13:01:01 +00003342handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3343a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3344as well as any other name registerd with codecs.register_error that is\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003345able to handle UnicodeDecodeErrors.");
3346
3347static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00003348string_decode(PyBytesObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003349{
Christian Heimes44720832008-05-26 13:01:01 +00003350 char *encoding = NULL;
3351 char *errors = NULL;
3352 PyObject *v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003353
3354 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3355 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003356 v = PyBytes_AsDecodedObject((PyObject *)self, encoding, errors);
3357 if (v == NULL)
3358 goto onError;
3359 if (!PyBytes_Check(v) && !PyUnicode_Check(v)) {
3360 PyErr_Format(PyExc_TypeError,
3361 "decoder did not return a string/unicode object "
3362 "(type=%.400s)",
3363 Py_TYPE(v)->tp_name);
3364 Py_DECREF(v);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003365 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003366 }
3367 return v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003368
Christian Heimes44720832008-05-26 13:01:01 +00003369 onError:
3370 return NULL;
3371}
3372
3373
3374PyDoc_STRVAR(expandtabs__doc__,
3375"S.expandtabs([tabsize]) -> string\n\
3376\n\
3377Return a copy of S where all tab characters are expanded using spaces.\n\
3378If tabsize is not given, a tab size of 8 characters is assumed.");
3379
3380static PyObject*
3381string_expandtabs(PyBytesObject *self, PyObject *args)
3382{
3383 const char *e, *p, *qe;
3384 char *q;
3385 Py_ssize_t i, j, incr;
3386 PyObject *u;
3387 int tabsize = 8;
3388
3389 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3390 return NULL;
3391
3392 /* First pass: determine size of output string */
3393 i = 0; /* chars up to and including most recent \n or \r */
3394 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
3395 e = PyBytes_AS_STRING(self) + PyBytes_GET_SIZE(self); /* end of input */
3396 for (p = PyBytes_AS_STRING(self); p < e; p++)
3397 if (*p == '\t') {
3398 if (tabsize > 0) {
3399 incr = tabsize - (j % tabsize);
3400 if (j > PY_SSIZE_T_MAX - incr)
3401 goto overflow1;
3402 j += incr;
3403 }
3404 }
3405 else {
3406 if (j > PY_SSIZE_T_MAX - 1)
3407 goto overflow1;
3408 j++;
3409 if (*p == '\n' || *p == '\r') {
3410 if (i > PY_SSIZE_T_MAX - j)
3411 goto overflow1;
3412 i += j;
3413 j = 0;
3414 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00003415 }
Christian Heimes44720832008-05-26 13:01:01 +00003416
3417 if (i > PY_SSIZE_T_MAX - j)
3418 goto overflow1;
3419
3420 /* Second pass: create output string and fill it */
3421 u = PyBytes_FromStringAndSize(NULL, i + j);
3422 if (!u)
3423 return NULL;
3424
3425 j = 0; /* same as in first pass */
3426 q = PyBytes_AS_STRING(u); /* next output char */
3427 qe = PyBytes_AS_STRING(u) + PyBytes_GET_SIZE(u); /* end of output */
3428
3429 for (p = PyBytes_AS_STRING(self); p < e; p++)
3430 if (*p == '\t') {
3431 if (tabsize > 0) {
3432 i = tabsize - (j % tabsize);
3433 j += i;
3434 while (i--) {
3435 if (q >= qe)
3436 goto overflow2;
3437 *q++ = ' ';
3438 }
3439 }
3440 }
3441 else {
3442 if (q >= qe)
3443 goto overflow2;
3444 *q++ = *p;
3445 j++;
3446 if (*p == '\n' || *p == '\r')
3447 j = 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003448 }
Christian Heimes44720832008-05-26 13:01:01 +00003449
3450 return u;
3451
3452 overflow2:
3453 Py_DECREF(u);
3454 overflow1:
3455 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3456 return NULL;
3457}
3458
3459Py_LOCAL_INLINE(PyObject *)
3460pad(PyBytesObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
3461{
3462 PyObject *u;
3463
3464 if (left < 0)
3465 left = 0;
3466 if (right < 0)
3467 right = 0;
3468
3469 if (left == 0 && right == 0 && PyBytes_CheckExact(self)) {
3470 Py_INCREF(self);
3471 return (PyObject *)self;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003472 }
3473
Christian Heimes44720832008-05-26 13:01:01 +00003474 u = PyBytes_FromStringAndSize(NULL,
3475 left + PyBytes_GET_SIZE(self) + right);
3476 if (u) {
3477 if (left)
3478 memset(PyBytes_AS_STRING(u), fill, left);
3479 Py_MEMCPY(PyBytes_AS_STRING(u) + left,
3480 PyBytes_AS_STRING(self),
3481 PyBytes_GET_SIZE(self));
3482 if (right)
3483 memset(PyBytes_AS_STRING(u) + left + PyBytes_GET_SIZE(self),
3484 fill, right);
3485 }
3486
3487 return u;
3488}
3489
3490PyDoc_STRVAR(ljust__doc__,
3491"S.ljust(width[, fillchar]) -> string\n"
3492"\n"
3493"Return S left justified in a string of length width. Padding is\n"
3494"done using the specified fill character (default is a space).");
3495
3496static PyObject *
3497string_ljust(PyBytesObject *self, PyObject *args)
3498{
3499 Py_ssize_t width;
3500 char fillchar = ' ';
3501
3502 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3503 return NULL;
3504
3505 if (PyBytes_GET_SIZE(self) >= width && PyBytes_CheckExact(self)) {
3506 Py_INCREF(self);
3507 return (PyObject*) self;
3508 }
3509
3510 return pad(self, 0, width - PyBytes_GET_SIZE(self), fillchar);
3511}
3512
3513
3514PyDoc_STRVAR(rjust__doc__,
3515"S.rjust(width[, fillchar]) -> string\n"
3516"\n"
3517"Return S right justified in a string of length width. Padding is\n"
3518"done using the specified fill character (default is a space)");
3519
3520static PyObject *
3521string_rjust(PyBytesObject *self, PyObject *args)
3522{
3523 Py_ssize_t width;
3524 char fillchar = ' ';
3525
3526 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3527 return NULL;
3528
3529 if (PyBytes_GET_SIZE(self) >= width && PyBytes_CheckExact(self)) {
3530 Py_INCREF(self);
3531 return (PyObject*) self;
3532 }
3533
3534 return pad(self, width - PyBytes_GET_SIZE(self), 0, fillchar);
3535}
3536
3537
3538PyDoc_STRVAR(center__doc__,
3539"S.center(width[, fillchar]) -> string\n"
3540"\n"
3541"Return S centered in a string of length width. Padding is\n"
3542"done using the specified fill character (default is a space)");
3543
3544static PyObject *
3545string_center(PyBytesObject *self, PyObject *args)
3546{
3547 Py_ssize_t marg, left;
3548 Py_ssize_t width;
3549 char fillchar = ' ';
3550
3551 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3552 return NULL;
3553
3554 if (PyBytes_GET_SIZE(self) >= width && PyBytes_CheckExact(self)) {
3555 Py_INCREF(self);
3556 return (PyObject*) self;
3557 }
3558
3559 marg = width - PyBytes_GET_SIZE(self);
3560 left = marg / 2 + (marg & width & 1);
3561
3562 return pad(self, left, marg - left, fillchar);
3563}
3564
3565PyDoc_STRVAR(zfill__doc__,
3566"S.zfill(width) -> string\n"
3567"\n"
3568"Pad a numeric string S with zeros on the left, to fill a field\n"
3569"of the specified width. The string S is never truncated.");
3570
3571static PyObject *
3572string_zfill(PyBytesObject *self, PyObject *args)
3573{
3574 Py_ssize_t fill;
3575 PyObject *s;
3576 char *p;
3577 Py_ssize_t width;
3578
3579 if (!PyArg_ParseTuple(args, "n:zfill", &width))
3580 return NULL;
3581
3582 if (PyBytes_GET_SIZE(self) >= width) {
3583 if (PyBytes_CheckExact(self)) {
3584 Py_INCREF(self);
3585 return (PyObject*) self;
3586 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00003587 else
Christian Heimes44720832008-05-26 13:01:01 +00003588 return PyBytes_FromStringAndSize(
3589 PyBytes_AS_STRING(self),
3590 PyBytes_GET_SIZE(self)
3591 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003592 }
3593
Christian Heimes44720832008-05-26 13:01:01 +00003594 fill = width - PyBytes_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003595
Christian Heimes44720832008-05-26 13:01:01 +00003596 s = pad(self, fill, 0, '0');
3597
3598 if (s == NULL)
3599 return NULL;
3600
3601 p = PyBytes_AS_STRING(s);
3602 if (p[fill] == '+' || p[fill] == '-') {
3603 /* move sign to beginning of string */
3604 p[0] = p[fill];
3605 p[fill] = '0';
3606 }
3607
3608 return (PyObject*) s;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003609}
3610
Christian Heimes44720832008-05-26 13:01:01 +00003611PyDoc_STRVAR(isspace__doc__,
3612"S.isspace() -> bool\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003613\n\
Christian Heimes44720832008-05-26 13:01:01 +00003614Return True if all characters in S are whitespace\n\
3615and there is at least one character in S, False otherwise.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003616
Christian Heimes44720832008-05-26 13:01:01 +00003617static PyObject*
3618string_isspace(PyBytesObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003619{
Christian Heimes44720832008-05-26 13:01:01 +00003620 register const unsigned char *p
3621 = (unsigned char *) PyBytes_AS_STRING(self);
3622 register const unsigned char *e;
3623
3624 /* Shortcut for single character strings */
3625 if (PyBytes_GET_SIZE(self) == 1 &&
3626 isspace(*p))
3627 return PyBool_FromLong(1);
3628
3629 /* Special case for empty strings */
3630 if (PyBytes_GET_SIZE(self) == 0)
3631 return PyBool_FromLong(0);
3632
3633 e = p + PyBytes_GET_SIZE(self);
3634 for (; p < e; p++) {
3635 if (!isspace(*p))
3636 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003637 }
Christian Heimes44720832008-05-26 13:01:01 +00003638 return PyBool_FromLong(1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003639}
3640
Christian Heimes44720832008-05-26 13:01:01 +00003641
3642PyDoc_STRVAR(isalpha__doc__,
3643"S.isalpha() -> bool\n\
3644\n\
3645Return True if all characters in S are alphabetic\n\
3646and there is at least one character in S, False otherwise.");
3647
3648static PyObject*
3649string_isalpha(PyBytesObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003650{
Christian Heimes44720832008-05-26 13:01:01 +00003651 register const unsigned char *p
3652 = (unsigned char *) PyBytes_AS_STRING(self);
3653 register const unsigned char *e;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003654
Christian Heimes44720832008-05-26 13:01:01 +00003655 /* Shortcut for single character strings */
3656 if (PyBytes_GET_SIZE(self) == 1 &&
3657 isalpha(*p))
3658 return PyBool_FromLong(1);
3659
3660 /* Special case for empty strings */
3661 if (PyBytes_GET_SIZE(self) == 0)
3662 return PyBool_FromLong(0);
3663
3664 e = p + PyBytes_GET_SIZE(self);
3665 for (; p < e; p++) {
3666 if (!isalpha(*p))
3667 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003668 }
Christian Heimes44720832008-05-26 13:01:01 +00003669 return PyBool_FromLong(1);
3670}
Christian Heimes1a6387e2008-03-26 12:49:49 +00003671
Christian Heimes44720832008-05-26 13:01:01 +00003672
3673PyDoc_STRVAR(isalnum__doc__,
3674"S.isalnum() -> bool\n\
3675\n\
3676Return True if all characters in S are alphanumeric\n\
3677and there is at least one character in S, False otherwise.");
3678
3679static PyObject*
3680string_isalnum(PyBytesObject *self)
3681{
3682 register const unsigned char *p
3683 = (unsigned char *) PyBytes_AS_STRING(self);
3684 register const unsigned char *e;
3685
3686 /* Shortcut for single character strings */
3687 if (PyBytes_GET_SIZE(self) == 1 &&
3688 isalnum(*p))
3689 return PyBool_FromLong(1);
3690
3691 /* Special case for empty strings */
3692 if (PyBytes_GET_SIZE(self) == 0)
3693 return PyBool_FromLong(0);
3694
3695 e = p + PyBytes_GET_SIZE(self);
3696 for (; p < e; p++) {
3697 if (!isalnum(*p))
3698 return PyBool_FromLong(0);
3699 }
3700 return PyBool_FromLong(1);
3701}
3702
3703
3704PyDoc_STRVAR(isdigit__doc__,
3705"S.isdigit() -> bool\n\
3706\n\
3707Return True if all characters in S are digits\n\
3708and there is at least one character in S, False otherwise.");
3709
3710static PyObject*
3711string_isdigit(PyBytesObject *self)
3712{
3713 register const unsigned char *p
3714 = (unsigned char *) PyBytes_AS_STRING(self);
3715 register const unsigned char *e;
3716
3717 /* Shortcut for single character strings */
3718 if (PyBytes_GET_SIZE(self) == 1 &&
3719 isdigit(*p))
3720 return PyBool_FromLong(1);
3721
3722 /* Special case for empty strings */
3723 if (PyBytes_GET_SIZE(self) == 0)
3724 return PyBool_FromLong(0);
3725
3726 e = p + PyBytes_GET_SIZE(self);
3727 for (; p < e; p++) {
3728 if (!isdigit(*p))
3729 return PyBool_FromLong(0);
3730 }
3731 return PyBool_FromLong(1);
3732}
3733
3734
3735PyDoc_STRVAR(islower__doc__,
3736"S.islower() -> bool\n\
3737\n\
3738Return True if all cased characters in S are lowercase and there is\n\
3739at least one cased character in S, False otherwise.");
3740
3741static PyObject*
3742string_islower(PyBytesObject *self)
3743{
3744 register const unsigned char *p
3745 = (unsigned char *) PyBytes_AS_STRING(self);
3746 register const unsigned char *e;
3747 int cased;
3748
3749 /* Shortcut for single character strings */
3750 if (PyBytes_GET_SIZE(self) == 1)
3751 return PyBool_FromLong(islower(*p) != 0);
3752
3753 /* Special case for empty strings */
3754 if (PyBytes_GET_SIZE(self) == 0)
3755 return PyBool_FromLong(0);
3756
3757 e = p + PyBytes_GET_SIZE(self);
3758 cased = 0;
3759 for (; p < e; p++) {
3760 if (isupper(*p))
3761 return PyBool_FromLong(0);
3762 else if (!cased && islower(*p))
3763 cased = 1;
3764 }
3765 return PyBool_FromLong(cased);
3766}
3767
3768
3769PyDoc_STRVAR(isupper__doc__,
3770"S.isupper() -> bool\n\
3771\n\
3772Return True if all cased characters in S are uppercase and there is\n\
3773at least one cased character in S, False otherwise.");
3774
3775static PyObject*
3776string_isupper(PyBytesObject *self)
3777{
3778 register const unsigned char *p
3779 = (unsigned char *) PyBytes_AS_STRING(self);
3780 register const unsigned char *e;
3781 int cased;
3782
3783 /* Shortcut for single character strings */
3784 if (PyBytes_GET_SIZE(self) == 1)
3785 return PyBool_FromLong(isupper(*p) != 0);
3786
3787 /* Special case for empty strings */
3788 if (PyBytes_GET_SIZE(self) == 0)
3789 return PyBool_FromLong(0);
3790
3791 e = p + PyBytes_GET_SIZE(self);
3792 cased = 0;
3793 for (; p < e; p++) {
3794 if (islower(*p))
3795 return PyBool_FromLong(0);
3796 else if (!cased && isupper(*p))
3797 cased = 1;
3798 }
3799 return PyBool_FromLong(cased);
3800}
3801
3802
3803PyDoc_STRVAR(istitle__doc__,
3804"S.istitle() -> bool\n\
3805\n\
3806Return True if S is a titlecased string and there is at least one\n\
3807character in S, i.e. uppercase characters may only follow uncased\n\
3808characters and lowercase characters only cased ones. Return False\n\
3809otherwise.");
3810
3811static PyObject*
3812string_istitle(PyBytesObject *self, PyObject *uncased)
3813{
3814 register const unsigned char *p
3815 = (unsigned char *) PyBytes_AS_STRING(self);
3816 register const unsigned char *e;
3817 int cased, previous_is_cased;
3818
3819 /* Shortcut for single character strings */
3820 if (PyBytes_GET_SIZE(self) == 1)
3821 return PyBool_FromLong(isupper(*p) != 0);
3822
3823 /* Special case for empty strings */
3824 if (PyBytes_GET_SIZE(self) == 0)
3825 return PyBool_FromLong(0);
3826
3827 e = p + PyBytes_GET_SIZE(self);
3828 cased = 0;
3829 previous_is_cased = 0;
3830 for (; p < e; p++) {
3831 register const unsigned char ch = *p;
3832
3833 if (isupper(ch)) {
3834 if (previous_is_cased)
3835 return PyBool_FromLong(0);
3836 previous_is_cased = 1;
3837 cased = 1;
3838 }
3839 else if (islower(ch)) {
3840 if (!previous_is_cased)
3841 return PyBool_FromLong(0);
3842 previous_is_cased = 1;
3843 cased = 1;
3844 }
3845 else
3846 previous_is_cased = 0;
3847 }
3848 return PyBool_FromLong(cased);
3849}
3850
3851
3852PyDoc_STRVAR(splitlines__doc__,
3853"S.splitlines([keepends]) -> list of strings\n\
3854\n\
3855Return a list of the lines in S, breaking at line boundaries.\n\
3856Line breaks are not included in the resulting list unless keepends\n\
3857is given and true.");
3858
3859static PyObject*
3860string_splitlines(PyBytesObject *self, PyObject *args)
3861{
3862 register Py_ssize_t i;
3863 register Py_ssize_t j;
3864 Py_ssize_t len;
3865 int keepends = 0;
3866 PyObject *list;
3867 PyObject *str;
3868 char *data;
3869
3870 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3871 return NULL;
3872
3873 data = PyBytes_AS_STRING(self);
3874 len = PyBytes_GET_SIZE(self);
3875
3876 /* This does not use the preallocated list because splitlines is
3877 usually run with hundreds of newlines. The overhead of
3878 switching between PyList_SET_ITEM and append causes about a
3879 2-3% slowdown for that common case. A smarter implementation
3880 could move the if check out, so the SET_ITEMs are done first
3881 and the appends only done when the prealloc buffer is full.
3882 That's too much work for little gain.*/
3883
3884 list = PyList_New(0);
3885 if (!list)
3886 goto onError;
3887
3888 for (i = j = 0; i < len; ) {
3889 Py_ssize_t eol;
3890
3891 /* Find a line and append it */
3892 while (i < len && data[i] != '\n' && data[i] != '\r')
3893 i++;
3894
3895 /* Skip the line break reading CRLF as one line break */
3896 eol = i;
3897 if (i < len) {
3898 if (data[i] == '\r' && i + 1 < len &&
3899 data[i+1] == '\n')
3900 i += 2;
3901 else
3902 i++;
3903 if (keepends)
3904 eol = i;
3905 }
3906 SPLIT_APPEND(data, j, eol);
3907 j = i;
3908 }
3909 if (j < len) {
3910 SPLIT_APPEND(data, j, len);
3911 }
3912
3913 return list;
3914
3915 onError:
3916 Py_XDECREF(list);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003917 return NULL;
3918}
3919
Christian Heimes44720832008-05-26 13:01:01 +00003920#undef SPLIT_APPEND
3921#undef SPLIT_ADD
3922#undef MAX_PREALLOC
3923#undef PREALLOC_SIZE
Christian Heimes1a6387e2008-03-26 12:49:49 +00003924
3925static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00003926string_getnewargs(PyBytesObject *v)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003927{
Christian Heimes44720832008-05-26 13:01:01 +00003928 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Christian Heimes1a6387e2008-03-26 12:49:49 +00003929}
3930
Christian Heimes1a6387e2008-03-26 12:49:49 +00003931
Christian Heimes44720832008-05-26 13:01:01 +00003932#include "stringlib/string_format.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +00003933
Christian Heimes44720832008-05-26 13:01:01 +00003934PyDoc_STRVAR(format__doc__,
3935"S.format(*args, **kwargs) -> unicode\n\
3936\n\
3937");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003938
Eric Smithdc13b792008-05-30 18:10:04 +00003939static PyObject *
3940string__format__(PyObject* self, PyObject* args)
3941{
3942 PyObject *format_spec;
3943 PyObject *result = NULL;
3944 PyObject *tmp = NULL;
3945
3946 /* If 2.x, convert format_spec to the same type as value */
3947 /* This is to allow things like u''.format('') */
3948 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
3949 goto done;
3950 if (!(PyBytes_Check(format_spec) || PyUnicode_Check(format_spec))) {
3951 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3952 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3953 goto done;
3954 }
3955 tmp = PyObject_Str(format_spec);
3956 if (tmp == NULL)
3957 goto done;
3958 format_spec = tmp;
3959
3960 result = _PyBytes_FormatAdvanced(self,
3961 PyBytes_AS_STRING(format_spec),
3962 PyBytes_GET_SIZE(format_spec));
3963done:
3964 Py_XDECREF(tmp);
3965 return result;
3966}
3967
Christian Heimes44720832008-05-26 13:01:01 +00003968PyDoc_STRVAR(p_format__doc__,
3969"S.__format__(format_spec) -> unicode\n\
3970\n\
3971");
3972
3973
Christian Heimes1a6387e2008-03-26 12:49:49 +00003974static PyMethodDef
Christian Heimes44720832008-05-26 13:01:01 +00003975string_methods[] = {
3976 /* Counterparts of the obsolete stropmodule functions; except
3977 string.maketrans(). */
3978 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3979 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3980 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3981 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3982 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3983 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3984 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3985 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3986 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3987 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3988 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3989 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3990 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3991 capitalize__doc__},
3992 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3993 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3994 endswith__doc__},
3995 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3996 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3997 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3998 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3999 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
4000 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
4001 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
4002 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
4003 {"rpartition", (PyCFunction)string_rpartition, METH_O,
4004 rpartition__doc__},
4005 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
4006 startswith__doc__},
4007 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
4008 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
4009 swapcase__doc__},
4010 {"translate", (PyCFunction)string_translate, METH_VARARGS,
4011 translate__doc__},
4012 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
4013 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
4014 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
4015 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
4016 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
4017 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
4018 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
4019 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
4020 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
4021 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
4022 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
4023 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
4024 expandtabs__doc__},
4025 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
4026 splitlines__doc__},
4027 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
4028 {NULL, NULL} /* sentinel */
Christian Heimes1a6387e2008-03-26 12:49:49 +00004029};
4030
4031static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00004032str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes1a6387e2008-03-26 12:49:49 +00004033
Christian Heimes44720832008-05-26 13:01:01 +00004034static PyObject *
4035string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4036{
4037 PyObject *x = NULL;
4038 static char *kwlist[] = {"object", 0};
4039
4040 if (type != &PyBytes_Type)
4041 return str_subtype_new(type, args, kwds);
4042 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
4043 return NULL;
4044 if (x == NULL)
4045 return PyBytes_FromString("");
4046 return PyObject_Str(x);
4047}
4048
4049static PyObject *
4050str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4051{
4052 PyObject *tmp, *pnew;
4053 Py_ssize_t n;
4054
4055 assert(PyType_IsSubtype(type, &PyBytes_Type));
4056 tmp = string_new(&PyBytes_Type, args, kwds);
4057 if (tmp == NULL)
4058 return NULL;
4059 assert(PyBytes_CheckExact(tmp));
4060 n = PyBytes_GET_SIZE(tmp);
4061 pnew = type->tp_alloc(type, n);
4062 if (pnew != NULL) {
4063 Py_MEMCPY(PyBytes_AS_STRING(pnew), PyBytes_AS_STRING(tmp), n+1);
4064 ((PyBytesObject *)pnew)->ob_shash =
4065 ((PyBytesObject *)tmp)->ob_shash;
4066 ((PyBytesObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
4067 }
4068 Py_DECREF(tmp);
4069 return pnew;
4070}
4071
4072static PyObject *
4073basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4074{
4075 PyErr_SetString(PyExc_TypeError,
4076 "The basestring type cannot be instantiated");
4077 return NULL;
4078}
4079
4080static PyObject *
4081string_mod(PyObject *v, PyObject *w)
4082{
4083 if (!PyBytes_Check(v)) {
4084 Py_INCREF(Py_NotImplemented);
4085 return Py_NotImplemented;
4086 }
4087 return PyBytes_Format(v, w);
4088}
4089
4090PyDoc_STRVAR(basestring_doc,
4091"Type basestring cannot be instantiated; it is the base for str and unicode.");
4092
4093static PyNumberMethods string_as_number = {
4094 0, /*nb_add*/
4095 0, /*nb_subtract*/
4096 0, /*nb_multiply*/
4097 0, /*nb_divide*/
4098 string_mod, /*nb_remainder*/
4099};
4100
4101
4102PyTypeObject PyBaseString_Type = {
4103 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4104 "basestring",
4105 0,
4106 0,
4107 0, /* tp_dealloc */
4108 0, /* tp_print */
4109 0, /* tp_getattr */
4110 0, /* tp_setattr */
4111 0, /* tp_compare */
4112 0, /* tp_repr */
4113 0, /* tp_as_number */
4114 0, /* tp_as_sequence */
4115 0, /* tp_as_mapping */
4116 0, /* tp_hash */
4117 0, /* tp_call */
4118 0, /* tp_str */
4119 0, /* tp_getattro */
4120 0, /* tp_setattro */
4121 0, /* tp_as_buffer */
4122 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4123 basestring_doc, /* tp_doc */
4124 0, /* tp_traverse */
4125 0, /* tp_clear */
4126 0, /* tp_richcompare */
4127 0, /* tp_weaklistoffset */
4128 0, /* tp_iter */
4129 0, /* tp_iternext */
4130 0, /* tp_methods */
4131 0, /* tp_members */
4132 0, /* tp_getset */
4133 &PyBaseObject_Type, /* tp_base */
4134 0, /* tp_dict */
4135 0, /* tp_descr_get */
4136 0, /* tp_descr_set */
4137 0, /* tp_dictoffset */
4138 0, /* tp_init */
4139 0, /* tp_alloc */
4140 basestring_new, /* tp_new */
4141 0, /* tp_free */
4142};
4143
4144PyDoc_STRVAR(string_doc,
4145"str(object) -> string\n\
4146\n\
4147Return a nice string representation of the object.\n\
4148If the argument is a string, the return value is the same object.");
4149
4150PyTypeObject PyBytes_Type = {
4151 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4152 "str",
4153 sizeof(PyBytesObject),
4154 sizeof(char),
4155 string_dealloc, /* tp_dealloc */
4156 (printfunc)string_print, /* tp_print */
4157 0, /* tp_getattr */
4158 0, /* tp_setattr */
4159 0, /* tp_compare */
4160 string_repr, /* tp_repr */
4161 &string_as_number, /* tp_as_number */
4162 &string_as_sequence, /* tp_as_sequence */
4163 &string_as_mapping, /* tp_as_mapping */
4164 (hashfunc)string_hash, /* tp_hash */
4165 0, /* tp_call */
4166 string_str, /* tp_str */
4167 PyObject_GenericGetAttr, /* tp_getattro */
4168 0, /* tp_setattro */
4169 &string_as_buffer, /* tp_as_buffer */
4170 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
4171 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
4172 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
4173 string_doc, /* tp_doc */
4174 0, /* tp_traverse */
4175 0, /* tp_clear */
4176 (richcmpfunc)string_richcompare, /* tp_richcompare */
4177 0, /* tp_weaklistoffset */
4178 0, /* tp_iter */
4179 0, /* tp_iternext */
4180 string_methods, /* tp_methods */
4181 0, /* tp_members */
4182 0, /* tp_getset */
4183 &PyBaseString_Type, /* tp_base */
4184 0, /* tp_dict */
4185 0, /* tp_descr_get */
4186 0, /* tp_descr_set */
4187 0, /* tp_dictoffset */
4188 0, /* tp_init */
4189 0, /* tp_alloc */
4190 string_new, /* tp_new */
4191 PyObject_Del, /* tp_free */
4192};
4193
4194void
4195PyBytes_Concat(register PyObject **pv, register PyObject *w)
4196{
4197 register PyObject *v;
4198 if (*pv == NULL)
4199 return;
4200 if (w == NULL || !PyBytes_Check(*pv)) {
4201 Py_DECREF(*pv);
4202 *pv = NULL;
4203 return;
4204 }
4205 v = string_concat((PyBytesObject *) *pv, w);
4206 Py_DECREF(*pv);
4207 *pv = v;
4208}
4209
4210void
4211PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
4212{
4213 PyBytes_Concat(pv, w);
4214 Py_XDECREF(w);
4215}
4216
4217
4218/* The following function breaks the notion that strings are immutable:
4219 it changes the size of a string. We get away with this only if there
4220 is only one module referencing the object. You can also think of it
4221 as creating a new string object and destroying the old one, only
4222 more efficiently. In any case, don't use this if the string may
4223 already be known to some other part of the code...
4224 Note that if there's not enough memory to resize the string, the original
4225 string object at *pv is deallocated, *pv is set to NULL, an "out of
4226 memory" exception is set, and -1 is returned. Else (on success) 0 is
4227 returned, and the value in *pv may or may not be the same as on input.
4228 As always, an extra byte is allocated for a trailing \0 byte (newsize
4229 does *not* include that), and a trailing \0 byte is stored.
4230*/
4231
4232int
4233_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
4234{
4235 register PyObject *v;
4236 register PyBytesObject *sv;
4237 v = *pv;
4238 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
4239 PyBytes_CHECK_INTERNED(v)) {
4240 *pv = 0;
4241 Py_DECREF(v);
4242 PyErr_BadInternalCall();
4243 return -1;
4244 }
4245 /* XXX UNREF/NEWREF interface should be more symmetrical */
4246 _Py_DEC_REFTOTAL;
4247 _Py_ForgetReference(v);
4248 *pv = (PyObject *)
4249 PyObject_REALLOC((char *)v, sizeof(PyBytesObject) + newsize);
4250 if (*pv == NULL) {
4251 PyObject_Del(v);
4252 PyErr_NoMemory();
4253 return -1;
4254 }
4255 _Py_NewReference(*pv);
4256 sv = (PyBytesObject *) *pv;
4257 Py_SIZE(sv) = newsize;
4258 sv->ob_sval[newsize] = '\0';
4259 sv->ob_shash = -1; /* invalidate cached hash value */
4260 return 0;
4261}
4262
4263/* Helpers for formatstring */
4264
4265Py_LOCAL_INLINE(PyObject *)
4266getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
4267{
4268 Py_ssize_t argidx = *p_argidx;
4269 if (argidx < arglen) {
4270 (*p_argidx)++;
4271 if (arglen < 0)
4272 return args;
4273 else
4274 return PyTuple_GetItem(args, argidx);
4275 }
4276 PyErr_SetString(PyExc_TypeError,
4277 "not enough arguments for format string");
4278 return NULL;
4279}
4280
4281/* Format codes
4282 * F_LJUST '-'
4283 * F_SIGN '+'
4284 * F_BLANK ' '
4285 * F_ALT '#'
4286 * F_ZERO '0'
4287 */
4288#define F_LJUST (1<<0)
4289#define F_SIGN (1<<1)
4290#define F_BLANK (1<<2)
4291#define F_ALT (1<<3)
4292#define F_ZERO (1<<4)
4293
4294Py_LOCAL_INLINE(int)
4295formatfloat(char *buf, size_t buflen, int flags,
4296 int prec, int type, PyObject *v)
4297{
4298 /* fmt = '%#.' + `prec` + `type`
4299 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
4300 char fmt[20];
4301 double x;
4302 x = PyFloat_AsDouble(v);
4303 if (x == -1.0 && PyErr_Occurred()) {
4304 PyErr_Format(PyExc_TypeError, "float argument required, "
4305 "not %.200s", Py_TYPE(v)->tp_name);
4306 return -1;
4307 }
4308 if (prec < 0)
4309 prec = 6;
4310 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4311 type = 'g';
4312 /* Worst case length calc to ensure no buffer overrun:
4313
4314 'g' formats:
4315 fmt = %#.<prec>g
4316 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
4317 for any double rep.)
4318 len = 1 + prec + 1 + 2 + 5 = 9 + prec
4319
4320 'f' formats:
4321 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4322 len = 1 + 50 + 1 + prec = 52 + prec
4323
4324 If prec=0 the effective precision is 1 (the leading digit is
4325 always given), therefore increase the length by one.
4326
4327 */
4328 if (((type == 'g' || type == 'G') &&
4329 buflen <= (size_t)10 + (size_t)prec) ||
4330 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
4331 PyErr_SetString(PyExc_OverflowError,
4332 "formatted float is too long (precision too large?)");
4333 return -1;
4334 }
4335 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4336 (flags&F_ALT) ? "#" : "",
4337 prec, type);
4338 PyOS_ascii_formatd(buf, buflen, fmt, x);
4339 return (int)strlen(buf);
4340}
4341
4342/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and
4343 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4344 * Python's regular ints.
4345 * Return value: a new PyString*, or NULL if error.
4346 * . *pbuf is set to point into it,
4347 * *plen set to the # of chars following that.
4348 * Caller must decref it when done using pbuf.
4349 * The string starting at *pbuf is of the form
4350 * "-"? ("0x" | "0X")? digit+
4351 * "0x"/"0X" are present only for x and X conversions, with F_ALT
4352 * set in flags. The case of hex digits will be correct,
4353 * There will be at least prec digits, zero-filled on the left if
4354 * necessary to get that many.
4355 * val object to be converted
4356 * flags bitmask of format flags; only F_ALT is looked at
4357 * prec minimum number of digits; 0-fill on left if needed
4358 * type a character in [duoxX]; u acts the same as d
4359 *
4360 * CAUTION: o, x and X conversions on regular ints can never
4361 * produce a '-' sign, but can for Python's unbounded ints.
4362 */
4363PyObject*
4364_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,
4365 char **pbuf, int *plen)
4366{
4367 PyObject *result = NULL;
4368 char *buf;
4369 Py_ssize_t i;
4370 int sign; /* 1 if '-', else 0 */
4371 int len; /* number of characters */
4372 Py_ssize_t llen;
4373 int numdigits; /* len == numnondigits + numdigits */
4374 int numnondigits = 0;
4375
4376 switch (type) {
4377 case 'd':
4378 case 'u':
4379 result = Py_TYPE(val)->tp_str(val);
4380 break;
4381 case 'o':
4382 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4383 break;
4384 case 'x':
4385 case 'X':
4386 numnondigits = 2;
4387 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4388 break;
4389 default:
4390 assert(!"'type' not in [duoxX]");
4391 }
4392 if (!result)
4393 return NULL;
4394
4395 buf = PyBytes_AsString(result);
4396 if (!buf) {
4397 Py_DECREF(result);
4398 return NULL;
4399 }
4400
4401 /* To modify the string in-place, there can only be one reference. */
4402 if (Py_REFCNT(result) != 1) {
4403 PyErr_BadInternalCall();
4404 return NULL;
4405 }
4406 llen = PyBytes_Size(result);
4407 if (llen > INT_MAX) {
4408 PyErr_SetString(PyExc_ValueError, "string too large in _PyBytes_FormatLong");
4409 return NULL;
4410 }
4411 len = (int)llen;
4412 if (buf[len-1] == 'L') {
4413 --len;
4414 buf[len] = '\0';
4415 }
4416 sign = buf[0] == '-';
4417 numnondigits += sign;
4418 numdigits = len - numnondigits;
4419 assert(numdigits > 0);
4420
4421 /* Get rid of base marker unless F_ALT */
4422 if ((flags & F_ALT) == 0) {
4423 /* Need to skip 0x, 0X or 0. */
4424 int skipped = 0;
4425 switch (type) {
4426 case 'o':
4427 assert(buf[sign] == '0');
4428 /* If 0 is only digit, leave it alone. */
4429 if (numdigits > 1) {
4430 skipped = 1;
4431 --numdigits;
4432 }
4433 break;
4434 case 'x':
4435 case 'X':
4436 assert(buf[sign] == '0');
4437 assert(buf[sign + 1] == 'x');
4438 skipped = 2;
4439 numnondigits -= 2;
4440 break;
4441 }
4442 if (skipped) {
4443 buf += skipped;
4444 len -= skipped;
4445 if (sign)
4446 buf[0] = '-';
4447 }
4448 assert(len == numnondigits + numdigits);
4449 assert(numdigits > 0);
4450 }
4451
4452 /* Fill with leading zeroes to meet minimum width. */
4453 if (prec > numdigits) {
4454 PyObject *r1 = PyBytes_FromStringAndSize(NULL,
4455 numnondigits + prec);
4456 char *b1;
4457 if (!r1) {
4458 Py_DECREF(result);
4459 return NULL;
4460 }
4461 b1 = PyBytes_AS_STRING(r1);
4462 for (i = 0; i < numnondigits; ++i)
4463 *b1++ = *buf++;
4464 for (i = 0; i < prec - numdigits; i++)
4465 *b1++ = '0';
4466 for (i = 0; i < numdigits; i++)
4467 *b1++ = *buf++;
4468 *b1 = '\0';
4469 Py_DECREF(result);
4470 result = r1;
4471 buf = PyBytes_AS_STRING(result);
4472 len = numnondigits + prec;
4473 }
4474
4475 /* Fix up case for hex conversions. */
4476 if (type == 'X') {
4477 /* Need to convert all lower case letters to upper case.
4478 and need to convert 0x to 0X (and -0x to -0X). */
4479 for (i = 0; i < len; i++)
4480 if (buf[i] >= 'a' && buf[i] <= 'x')
4481 buf[i] -= 'a'-'A';
4482 }
4483 *pbuf = buf;
4484 *plen = len;
4485 return result;
4486}
4487
4488Py_LOCAL_INLINE(int)
4489formatint(char *buf, size_t buflen, int flags,
4490 int prec, int type, PyObject *v)
4491{
4492 /* fmt = '%#.' + `prec` + 'l' + `type`
4493 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4494 + 1 + 1 = 24 */
4495 char fmt[64]; /* plenty big enough! */
4496 char *sign;
4497 long x;
4498
4499 x = PyInt_AsLong(v);
4500 if (x == -1 && PyErr_Occurred()) {
4501 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4502 Py_TYPE(v)->tp_name);
4503 return -1;
4504 }
4505 if (x < 0 && type == 'u') {
4506 type = 'd';
4507 }
4508 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4509 sign = "-";
4510 else
4511 sign = "";
4512 if (prec < 0)
4513 prec = 1;
4514
4515 if ((flags & F_ALT) &&
4516 (type == 'x' || type == 'X')) {
4517 /* When converting under %#x or %#X, there are a number
4518 * of issues that cause pain:
4519 * - when 0 is being converted, the C standard leaves off
4520 * the '0x' or '0X', which is inconsistent with other
4521 * %#x/%#X conversions and inconsistent with Python's
4522 * hex() function
4523 * - there are platforms that violate the standard and
4524 * convert 0 with the '0x' or '0X'
4525 * (Metrowerks, Compaq Tru64)
4526 * - there are platforms that give '0x' when converting
4527 * under %#X, but convert 0 in accordance with the
4528 * standard (OS/2 EMX)
4529 *
4530 * We can achieve the desired consistency by inserting our
4531 * own '0x' or '0X' prefix, and substituting %x/%X in place
4532 * of %#x/%#X.
4533 *
4534 * Note that this is the same approach as used in
4535 * formatint() in unicodeobject.c
4536 */
4537 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4538 sign, type, prec, type);
4539 }
4540 else {
4541 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4542 sign, (flags&F_ALT) ? "#" : "",
4543 prec, type);
4544 }
4545
4546 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4547 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4548 */
4549 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4550 PyErr_SetString(PyExc_OverflowError,
4551 "formatted integer is too long (precision too large?)");
4552 return -1;
4553 }
4554 if (sign[0])
4555 PyOS_snprintf(buf, buflen, fmt, -x);
4556 else
4557 PyOS_snprintf(buf, buflen, fmt, x);
4558 return (int)strlen(buf);
4559}
4560
4561Py_LOCAL_INLINE(int)
4562formatchar(char *buf, size_t buflen, PyObject *v)
4563{
4564 /* presume that the buffer is at least 2 characters long */
4565 if (PyBytes_Check(v)) {
4566 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4567 return -1;
4568 }
4569 else {
4570 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4571 return -1;
4572 }
4573 buf[1] = '\0';
4574 return 1;
4575}
4576
4577/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4578
4579 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4580 chars are formatted. XXX This is a magic number. Each formatting
4581 routine does bounds checking to ensure no overflow, but a better
4582 solution may be to malloc a buffer of appropriate size for each
4583 format. For now, the current solution is sufficient.
4584*/
4585#define FORMATBUFLEN (size_t)120
4586
4587PyObject *
4588PyBytes_Format(PyObject *format, PyObject *args)
4589{
4590 char *fmt, *res;
4591 Py_ssize_t arglen, argidx;
4592 Py_ssize_t reslen, rescnt, fmtcnt;
4593 int args_owned = 0;
4594 PyObject *result, *orig_args;
4595#ifdef Py_USING_UNICODE
4596 PyObject *v, *w;
4597#endif
4598 PyObject *dict = NULL;
4599 if (format == NULL || !PyBytes_Check(format) || args == NULL) {
4600 PyErr_BadInternalCall();
4601 return NULL;
4602 }
4603 orig_args = args;
4604 fmt = PyBytes_AS_STRING(format);
4605 fmtcnt = PyBytes_GET_SIZE(format);
4606 reslen = rescnt = fmtcnt + 100;
4607 result = PyBytes_FromStringAndSize((char *)NULL, reslen);
4608 if (result == NULL)
4609 return NULL;
4610 res = PyBytes_AsString(result);
4611 if (PyTuple_Check(args)) {
4612 arglen = PyTuple_GET_SIZE(args);
4613 argidx = 0;
4614 }
4615 else {
4616 arglen = -1;
4617 argidx = -2;
4618 }
4619 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4620 !PyObject_TypeCheck(args, &PyBaseString_Type))
4621 dict = args;
4622 while (--fmtcnt >= 0) {
4623 if (*fmt != '%') {
4624 if (--rescnt < 0) {
4625 rescnt = fmtcnt + 100;
4626 reslen += rescnt;
4627 if (_PyBytes_Resize(&result, reslen) < 0)
4628 return NULL;
4629 res = PyBytes_AS_STRING(result)
4630 + reslen - rescnt;
4631 --rescnt;
4632 }
4633 *res++ = *fmt++;
4634 }
4635 else {
4636 /* Got a format specifier */
4637 int flags = 0;
4638 Py_ssize_t width = -1;
4639 int prec = -1;
4640 int c = '\0';
4641 int fill;
4642 int isnumok;
4643 PyObject *v = NULL;
4644 PyObject *temp = NULL;
4645 char *pbuf;
4646 int sign;
4647 Py_ssize_t len;
4648 char formatbuf[FORMATBUFLEN];
4649 /* For format{float,int,char}() */
4650#ifdef Py_USING_UNICODE
4651 char *fmt_start = fmt;
4652 Py_ssize_t argidx_start = argidx;
4653#endif
4654
4655 fmt++;
4656 if (*fmt == '(') {
4657 char *keystart;
4658 Py_ssize_t keylen;
4659 PyObject *key;
4660 int pcount = 1;
4661
4662 if (dict == NULL) {
4663 PyErr_SetString(PyExc_TypeError,
4664 "format requires a mapping");
4665 goto error;
4666 }
4667 ++fmt;
4668 --fmtcnt;
4669 keystart = fmt;
4670 /* Skip over balanced parentheses */
4671 while (pcount > 0 && --fmtcnt >= 0) {
4672 if (*fmt == ')')
4673 --pcount;
4674 else if (*fmt == '(')
4675 ++pcount;
4676 fmt++;
4677 }
4678 keylen = fmt - keystart - 1;
4679 if (fmtcnt < 0 || pcount > 0) {
4680 PyErr_SetString(PyExc_ValueError,
4681 "incomplete format key");
4682 goto error;
4683 }
4684 key = PyBytes_FromStringAndSize(keystart,
4685 keylen);
4686 if (key == NULL)
4687 goto error;
4688 if (args_owned) {
4689 Py_DECREF(args);
4690 args_owned = 0;
4691 }
4692 args = PyObject_GetItem(dict, key);
4693 Py_DECREF(key);
4694 if (args == NULL) {
4695 goto error;
4696 }
4697 args_owned = 1;
4698 arglen = -1;
4699 argidx = -2;
4700 }
4701 while (--fmtcnt >= 0) {
4702 switch (c = *fmt++) {
4703 case '-': flags |= F_LJUST; continue;
4704 case '+': flags |= F_SIGN; continue;
4705 case ' ': flags |= F_BLANK; continue;
4706 case '#': flags |= F_ALT; continue;
4707 case '0': flags |= F_ZERO; continue;
4708 }
4709 break;
4710 }
4711 if (c == '*') {
4712 v = getnextarg(args, arglen, &argidx);
4713 if (v == NULL)
4714 goto error;
4715 if (!PyInt_Check(v)) {
4716 PyErr_SetString(PyExc_TypeError,
4717 "* wants int");
4718 goto error;
4719 }
4720 width = PyInt_AsLong(v);
4721 if (width < 0) {
4722 flags |= F_LJUST;
4723 width = -width;
4724 }
4725 if (--fmtcnt >= 0)
4726 c = *fmt++;
4727 }
4728 else if (c >= 0 && isdigit(c)) {
4729 width = c - '0';
4730 while (--fmtcnt >= 0) {
4731 c = Py_CHARMASK(*fmt++);
4732 if (!isdigit(c))
4733 break;
4734 if ((width*10) / 10 != width) {
4735 PyErr_SetString(
4736 PyExc_ValueError,
4737 "width too big");
4738 goto error;
4739 }
4740 width = width*10 + (c - '0');
4741 }
4742 }
4743 if (c == '.') {
4744 prec = 0;
4745 if (--fmtcnt >= 0)
4746 c = *fmt++;
4747 if (c == '*') {
4748 v = getnextarg(args, arglen, &argidx);
4749 if (v == NULL)
4750 goto error;
4751 if (!PyInt_Check(v)) {
4752 PyErr_SetString(
4753 PyExc_TypeError,
4754 "* wants int");
4755 goto error;
4756 }
4757 prec = PyInt_AsLong(v);
4758 if (prec < 0)
4759 prec = 0;
4760 if (--fmtcnt >= 0)
4761 c = *fmt++;
4762 }
4763 else if (c >= 0 && isdigit(c)) {
4764 prec = c - '0';
4765 while (--fmtcnt >= 0) {
4766 c = Py_CHARMASK(*fmt++);
4767 if (!isdigit(c))
4768 break;
4769 if ((prec*10) / 10 != prec) {
4770 PyErr_SetString(
4771 PyExc_ValueError,
4772 "prec too big");
4773 goto error;
4774 }
4775 prec = prec*10 + (c - '0');
4776 }
4777 }
4778 } /* prec */
4779 if (fmtcnt >= 0) {
4780 if (c == 'h' || c == 'l' || c == 'L') {
4781 if (--fmtcnt >= 0)
4782 c = *fmt++;
4783 }
4784 }
4785 if (fmtcnt < 0) {
4786 PyErr_SetString(PyExc_ValueError,
4787 "incomplete format");
4788 goto error;
4789 }
4790 if (c != '%') {
4791 v = getnextarg(args, arglen, &argidx);
4792 if (v == NULL)
4793 goto error;
4794 }
4795 sign = 0;
4796 fill = ' ';
4797 switch (c) {
4798 case '%':
4799 pbuf = "%";
4800 len = 1;
4801 break;
4802 case 's':
4803#ifdef Py_USING_UNICODE
4804 if (PyUnicode_Check(v)) {
4805 fmt = fmt_start;
4806 argidx = argidx_start;
4807 goto unicode;
4808 }
4809#endif
4810 temp = _PyObject_Str(v);
4811#ifdef Py_USING_UNICODE
4812 if (temp != NULL && PyUnicode_Check(temp)) {
4813 Py_DECREF(temp);
4814 fmt = fmt_start;
4815 argidx = argidx_start;
4816 goto unicode;
4817 }
4818#endif
4819 /* Fall through */
4820 case 'r':
4821 if (c == 'r')
4822 temp = PyObject_Repr(v);
4823 if (temp == NULL)
4824 goto error;
4825 if (!PyBytes_Check(temp)) {
4826 PyErr_SetString(PyExc_TypeError,
4827 "%s argument has non-string str()");
4828 Py_DECREF(temp);
4829 goto error;
4830 }
4831 pbuf = PyBytes_AS_STRING(temp);
4832 len = PyBytes_GET_SIZE(temp);
4833 if (prec >= 0 && len > prec)
4834 len = prec;
4835 break;
4836 case 'i':
4837 case 'd':
4838 case 'u':
4839 case 'o':
4840 case 'x':
4841 case 'X':
4842 if (c == 'i')
4843 c = 'd';
4844 isnumok = 0;
4845 if (PyNumber_Check(v)) {
4846 PyObject *iobj=NULL;
4847
4848 if (PyInt_Check(v) || (PyLong_Check(v))) {
4849 iobj = v;
4850 Py_INCREF(iobj);
4851 }
4852 else {
4853 iobj = PyNumber_Int(v);
4854 if (iobj==NULL) iobj = PyNumber_Long(v);
4855 }
4856 if (iobj!=NULL) {
4857 if (PyInt_Check(iobj)) {
4858 isnumok = 1;
4859 pbuf = formatbuf;
4860 len = formatint(pbuf,
4861 sizeof(formatbuf),
4862 flags, prec, c, iobj);
4863 Py_DECREF(iobj);
4864 if (len < 0)
4865 goto error;
4866 sign = 1;
4867 }
4868 else if (PyLong_Check(iobj)) {
4869 int ilen;
4870
4871 isnumok = 1;
4872 temp = _PyBytes_FormatLong(iobj, flags,
4873 prec, c, &pbuf, &ilen);
4874 Py_DECREF(iobj);
4875 len = ilen;
4876 if (!temp)
4877 goto error;
4878 sign = 1;
4879 }
4880 else {
4881 Py_DECREF(iobj);
4882 }
4883 }
4884 }
4885 if (!isnumok) {
4886 PyErr_Format(PyExc_TypeError,
4887 "%%%c format: a number is required, "
4888 "not %.200s", c, Py_TYPE(v)->tp_name);
4889 goto error;
4890 }
4891 if (flags & F_ZERO)
4892 fill = '0';
4893 break;
4894 case 'e':
4895 case 'E':
4896 case 'f':
4897 case 'F':
4898 case 'g':
4899 case 'G':
4900 if (c == 'F')
4901 c = 'f';
4902 pbuf = formatbuf;
4903 len = formatfloat(pbuf, sizeof(formatbuf),
4904 flags, prec, c, v);
4905 if (len < 0)
4906 goto error;
4907 sign = 1;
4908 if (flags & F_ZERO)
4909 fill = '0';
4910 break;
4911 case 'c':
4912#ifdef Py_USING_UNICODE
4913 if (PyUnicode_Check(v)) {
4914 fmt = fmt_start;
4915 argidx = argidx_start;
4916 goto unicode;
4917 }
4918#endif
4919 pbuf = formatbuf;
4920 len = formatchar(pbuf, sizeof(formatbuf), v);
4921 if (len < 0)
4922 goto error;
4923 break;
4924 default:
4925 PyErr_Format(PyExc_ValueError,
4926 "unsupported format character '%c' (0x%x) "
4927 "at index %zd",
4928 c, c,
4929 (Py_ssize_t)(fmt - 1 -
4930 PyBytes_AsString(format)));
4931 goto error;
4932 }
4933 if (sign) {
4934 if (*pbuf == '-' || *pbuf == '+') {
4935 sign = *pbuf++;
4936 len--;
4937 }
4938 else if (flags & F_SIGN)
4939 sign = '+';
4940 else if (flags & F_BLANK)
4941 sign = ' ';
4942 else
4943 sign = 0;
4944 }
4945 if (width < len)
4946 width = len;
4947 if (rescnt - (sign != 0) < width) {
4948 reslen -= rescnt;
4949 rescnt = width + fmtcnt + 100;
4950 reslen += rescnt;
4951 if (reslen < 0) {
4952 Py_DECREF(result);
4953 Py_XDECREF(temp);
4954 return PyErr_NoMemory();
4955 }
4956 if (_PyBytes_Resize(&result, reslen) < 0) {
4957 Py_XDECREF(temp);
4958 return NULL;
4959 }
4960 res = PyBytes_AS_STRING(result)
4961 + reslen - rescnt;
4962 }
4963 if (sign) {
4964 if (fill != ' ')
4965 *res++ = sign;
4966 rescnt--;
4967 if (width > len)
4968 width--;
4969 }
4970 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4971 assert(pbuf[0] == '0');
4972 assert(pbuf[1] == c);
4973 if (fill != ' ') {
4974 *res++ = *pbuf++;
4975 *res++ = *pbuf++;
4976 }
4977 rescnt -= 2;
4978 width -= 2;
4979 if (width < 0)
4980 width = 0;
4981 len -= 2;
4982 }
4983 if (width > len && !(flags & F_LJUST)) {
4984 do {
4985 --rescnt;
4986 *res++ = fill;
4987 } while (--width > len);
4988 }
4989 if (fill == ' ') {
4990 if (sign)
4991 *res++ = sign;
4992 if ((flags & F_ALT) &&
4993 (c == 'x' || c == 'X')) {
4994 assert(pbuf[0] == '0');
4995 assert(pbuf[1] == c);
4996 *res++ = *pbuf++;
4997 *res++ = *pbuf++;
4998 }
4999 }
5000 Py_MEMCPY(res, pbuf, len);
5001 res += len;
5002 rescnt -= len;
5003 while (--width >= len) {
5004 --rescnt;
5005 *res++ = ' ';
5006 }
5007 if (dict && (argidx < arglen) && c != '%') {
5008 PyErr_SetString(PyExc_TypeError,
5009 "not all arguments converted during string formatting");
5010 Py_XDECREF(temp);
5011 goto error;
5012 }
5013 Py_XDECREF(temp);
5014 } /* '%' */
5015 } /* until end */
5016 if (argidx < arglen && !dict) {
5017 PyErr_SetString(PyExc_TypeError,
5018 "not all arguments converted during string formatting");
5019 goto error;
5020 }
5021 if (args_owned) {
5022 Py_DECREF(args);
5023 }
5024 _PyBytes_Resize(&result, reslen - rescnt);
5025 return result;
5026
5027#ifdef Py_USING_UNICODE
5028 unicode:
5029 if (args_owned) {
5030 Py_DECREF(args);
5031 args_owned = 0;
5032 }
5033 /* Fiddle args right (remove the first argidx arguments) */
5034 if (PyTuple_Check(orig_args) && argidx > 0) {
5035 PyObject *v;
5036 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
5037 v = PyTuple_New(n);
5038 if (v == NULL)
5039 goto error;
5040 while (--n >= 0) {
5041 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
5042 Py_INCREF(w);
5043 PyTuple_SET_ITEM(v, n, w);
5044 }
5045 args = v;
5046 } else {
5047 Py_INCREF(orig_args);
5048 args = orig_args;
5049 }
5050 args_owned = 1;
5051 /* Take what we have of the result and let the Unicode formatting
5052 function format the rest of the input. */
5053 rescnt = res - PyBytes_AS_STRING(result);
5054 if (_PyBytes_Resize(&result, rescnt))
5055 goto error;
5056 fmtcnt = PyBytes_GET_SIZE(format) - \
5057 (fmt - PyBytes_AS_STRING(format));
5058 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
5059 if (format == NULL)
5060 goto error;
5061 v = PyUnicode_Format(format, args);
5062 Py_DECREF(format);
5063 if (v == NULL)
5064 goto error;
5065 /* Paste what we have (result) to what the Unicode formatting
5066 function returned (v) and return the result (or error) */
5067 w = PyUnicode_Concat(result, v);
5068 Py_DECREF(result);
5069 Py_DECREF(v);
5070 Py_DECREF(args);
5071 return w;
5072#endif /* Py_USING_UNICODE */
5073
5074 error:
5075 Py_DECREF(result);
5076 if (args_owned) {
5077 Py_DECREF(args);
5078 }
5079 return NULL;
5080}
5081
5082void
5083PyBytes_InternInPlace(PyObject **p)
5084{
5085 register PyBytesObject *s = (PyBytesObject *)(*p);
5086 PyObject *t;
5087 if (s == NULL || !PyBytes_Check(s))
5088 Py_FatalError("PyBytes_InternInPlace: strings only please!");
5089 /* If it's a string subclass, we don't really know what putting
5090 it in the interned dict might do. */
5091 if (!PyBytes_CheckExact(s))
5092 return;
5093 if (PyBytes_CHECK_INTERNED(s))
5094 return;
5095 if (interned == NULL) {
5096 interned = PyDict_New();
5097 if (interned == NULL) {
5098 PyErr_Clear(); /* Don't leave an exception */
5099 return;
5100 }
5101 }
5102 t = PyDict_GetItem(interned, (PyObject *)s);
5103 if (t) {
5104 Py_INCREF(t);
5105 Py_DECREF(*p);
5106 *p = t;
5107 return;
5108 }
5109
5110 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
5111 PyErr_Clear();
5112 return;
5113 }
5114 /* The two references in interned are not counted by refcnt.
5115 The string deallocator will take care of this */
5116 Py_REFCNT(s) -= 2;
5117 PyBytes_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
5118}
5119
5120void
5121PyBytes_InternImmortal(PyObject **p)
5122{
5123 PyBytes_InternInPlace(p);
5124 if (PyBytes_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
5125 PyBytes_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
5126 Py_INCREF(*p);
5127 }
5128}
5129
5130
5131PyObject *
5132PyBytes_InternFromString(const char *cp)
5133{
5134 PyObject *s = PyBytes_FromString(cp);
5135 if (s == NULL)
5136 return NULL;
5137 PyBytes_InternInPlace(&s);
5138 return s;
5139}
5140
5141void
5142PyBytes_Fini(void)
5143{
5144 int i;
5145 for (i = 0; i < UCHAR_MAX + 1; i++) {
5146 Py_XDECREF(characters[i]);
5147 characters[i] = NULL;
5148 }
5149 Py_XDECREF(nullstring);
5150 nullstring = NULL;
5151}
5152
5153void _Py_ReleaseInternedStrings(void)
5154{
5155 PyObject *keys;
5156 PyBytesObject *s;
5157 Py_ssize_t i, n;
5158 Py_ssize_t immortal_size = 0, mortal_size = 0;
5159
5160 if (interned == NULL || !PyDict_Check(interned))
5161 return;
5162 keys = PyDict_Keys(interned);
5163 if (keys == NULL || !PyList_Check(keys)) {
5164 PyErr_Clear();
5165 return;
5166 }
5167
5168 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5169 detector, interned strings are not forcibly deallocated; rather, we
5170 give them their stolen references back, and then clear and DECREF
5171 the interned dict. */
5172
5173 n = PyList_GET_SIZE(keys);
5174 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5175 n);
5176 for (i = 0; i < n; i++) {
5177 s = (PyBytesObject *) PyList_GET_ITEM(keys, i);
5178 switch (s->ob_sstate) {
5179 case SSTATE_NOT_INTERNED:
5180 /* XXX Shouldn't happen */
5181 break;
5182 case SSTATE_INTERNED_IMMORTAL:
5183 Py_REFCNT(s) += 1;
5184 immortal_size += Py_SIZE(s);
5185 break;
5186 case SSTATE_INTERNED_MORTAL:
5187 Py_REFCNT(s) += 2;
5188 mortal_size += Py_SIZE(s);
5189 break;
5190 default:
5191 Py_FatalError("Inconsistent interned string state.");
5192 }
5193 s->ob_sstate = SSTATE_NOT_INTERNED;
5194 }
5195 fprintf(stderr, "total size of all interned strings: "
5196 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5197 "mortal/immortal\n", mortal_size, immortal_size);
5198 Py_DECREF(keys);
5199 PyDict_Clear(interned);
5200 Py_DECREF(interned);
5201 interned = NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00005202}