blob: 0f4d4c3be6669722c219cc54301e7d8d8df46d1c [file] [log] [blame]
Christian Heimes44720832008-05-26 13:01:01 +00001/* String object implementation */
Christian Heimes1a6387e2008-03-26 12:49:49 +00002
3#define PY_SSIZE_T_CLEAN
Christian Heimes44720832008-05-26 13:01:01 +00004
Christian Heimes1a6387e2008-03-26 12:49:49 +00005#include "Python.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +00006
Christian Heimes44720832008-05-26 13:01:01 +00007#include "formatter_string.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +00008
Christian Heimes44720832008-05-26 13:01:01 +00009#include <ctype.h>
10
11#ifdef COUNT_ALLOCS
12int null_strings, one_strings;
13#endif
14
15static PyBytesObject *characters[UCHAR_MAX + 1];
16static PyBytesObject *nullstring;
17
18/* This dictionary holds all interned strings. Note that references to
19 strings in this dictionary are *not* counted in the string's ob_refcnt.
20 When the interned string reaches a refcnt of 0 the string deallocation
21 function will delete the reference from this dictionary.
22
23 Another way to look at this is that to say that the actual reference
24 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
25*/
26static PyObject *interned;
27
28/*
29 For both PyBytes_FromString() and PyBytes_FromStringAndSize(), the
30 parameter `size' denotes number of characters to allocate, not counting any
31 null terminating character.
32
33 For PyBytes_FromString(), the parameter `str' points to a null-terminated
34 string containing exactly `size' bytes.
35
36 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
37 either NULL or else points to a string containing at least `size' bytes.
38 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
39 not have to be null-terminated. (Therefore it is safe to construct a
40 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
41 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
42 bytes (setting the last byte to the null terminating character) and you can
43 fill in the data yourself. If `str' is non-NULL then the resulting
44 PyString object must be treated as immutable and you must not fill in nor
45 alter the data yourself, since the strings may be shared.
46
47 The PyObject member `op->ob_size', which denotes the number of "extra
48 items" in a variable-size object, will contain the number of bytes
49 allocated for string data, not counting the null terminating character. It
50 is therefore equal to the equal to the `size' parameter (for
51 PyBytes_FromStringAndSize()) or the length of the string in the `str'
52 parameter (for PyBytes_FromString()).
53*/
54PyObject *
55PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Christian Heimes1a6387e2008-03-26 12:49:49 +000056{
Christian Heimes44720832008-05-26 13:01:01 +000057 register PyBytesObject *op;
58 if (size < 0) {
59 PyErr_SetString(PyExc_SystemError,
60 "Negative size passed to PyBytes_FromStringAndSize");
61 return NULL;
62 }
63 if (size == 0 && (op = nullstring) != NULL) {
64#ifdef COUNT_ALLOCS
65 null_strings++;
66#endif
67 Py_INCREF(op);
68 return (PyObject *)op;
69 }
70 if (size == 1 && str != NULL &&
71 (op = characters[*str & UCHAR_MAX]) != NULL)
72 {
73#ifdef COUNT_ALLOCS
74 one_strings++;
75#endif
76 Py_INCREF(op);
77 return (PyObject *)op;
78 }
79
80 /* Inline PyObject_NewVar */
81 op = (PyBytesObject *)PyObject_MALLOC(sizeof(PyBytesObject) + size);
82 if (op == NULL)
83 return PyErr_NoMemory();
84 PyObject_INIT_VAR(op, &PyBytes_Type, size);
85 op->ob_shash = -1;
86 op->ob_sstate = SSTATE_NOT_INTERNED;
87 if (str != NULL)
88 Py_MEMCPY(op->ob_sval, str, size);
89 op->ob_sval[size] = '\0';
90 /* share short strings */
91 if (size == 0) {
92 PyObject *t = (PyObject *)op;
93 PyBytes_InternInPlace(&t);
94 op = (PyBytesObject *)t;
95 nullstring = op;
96 Py_INCREF(op);
97 } else if (size == 1 && str != NULL) {
98 PyObject *t = (PyObject *)op;
99 PyBytes_InternInPlace(&t);
100 op = (PyBytesObject *)t;
101 characters[*str & UCHAR_MAX] = op;
102 Py_INCREF(op);
103 }
104 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000105}
106
Christian Heimes44720832008-05-26 13:01:01 +0000107PyObject *
108PyBytes_FromString(const char *str)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000109{
Christian Heimes44720832008-05-26 13:01:01 +0000110 register size_t size;
111 register PyBytesObject *op;
112
113 assert(str != NULL);
114 size = strlen(str);
115 if (size > PY_SSIZE_T_MAX) {
116 PyErr_SetString(PyExc_OverflowError,
117 "string is too long for a Python string");
118 return NULL;
119 }
120 if (size == 0 && (op = nullstring) != NULL) {
121#ifdef COUNT_ALLOCS
122 null_strings++;
123#endif
124 Py_INCREF(op);
125 return (PyObject *)op;
126 }
127 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
128#ifdef COUNT_ALLOCS
129 one_strings++;
130#endif
131 Py_INCREF(op);
132 return (PyObject *)op;
133 }
134
135 /* Inline PyObject_NewVar */
136 op = (PyBytesObject *)PyObject_MALLOC(sizeof(PyBytesObject) + size);
137 if (op == NULL)
138 return PyErr_NoMemory();
139 PyObject_INIT_VAR(op, &PyBytes_Type, size);
140 op->ob_shash = -1;
141 op->ob_sstate = SSTATE_NOT_INTERNED;
142 Py_MEMCPY(op->ob_sval, str, size+1);
143 /* share short strings */
144 if (size == 0) {
145 PyObject *t = (PyObject *)op;
146 PyBytes_InternInPlace(&t);
147 op = (PyBytesObject *)t;
148 nullstring = op;
149 Py_INCREF(op);
150 } else if (size == 1) {
151 PyObject *t = (PyObject *)op;
152 PyBytes_InternInPlace(&t);
153 op = (PyBytesObject *)t;
154 characters[*str & UCHAR_MAX] = op;
155 Py_INCREF(op);
156 }
157 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000158}
159
Christian Heimes44720832008-05-26 13:01:01 +0000160PyObject *
161PyBytes_FromFormatV(const char *format, va_list vargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000162{
Christian Heimes44720832008-05-26 13:01:01 +0000163 va_list count;
164 Py_ssize_t n = 0;
165 const char* f;
166 char *s;
167 PyObject* string;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000168
Christian Heimes44720832008-05-26 13:01:01 +0000169#ifdef VA_LIST_IS_ARRAY
170 Py_MEMCPY(count, vargs, sizeof(va_list));
171#else
172#ifdef __va_copy
173 __va_copy(count, vargs);
174#else
175 count = vargs;
176#endif
177#endif
178 /* step 1: figure out how large a buffer we need */
179 for (f = format; *f; f++) {
180 if (*f == '%') {
181 const char* p = f;
182 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
183 ;
184
185 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
186 * they don't affect the amount of space we reserve.
187 */
188 if ((*f == 'l' || *f == 'z') &&
189 (f[1] == 'd' || f[1] == 'u'))
190 ++f;
191
192 switch (*f) {
193 case 'c':
194 (void)va_arg(count, int);
195 /* fall through... */
196 case '%':
197 n++;
198 break;
199 case 'd': case 'u': case 'i': case 'x':
200 (void) va_arg(count, int);
201 /* 20 bytes is enough to hold a 64-bit
202 integer. Decimal takes the most space.
203 This isn't enough for octal. */
204 n += 20;
205 break;
206 case 's':
207 s = va_arg(count, char*);
208 n += strlen(s);
209 break;
210 case 'p':
211 (void) va_arg(count, int);
212 /* maximum 64-bit pointer representation:
213 * 0xffffffffffffffff
214 * so 19 characters is enough.
215 * XXX I count 18 -- what's the extra for?
216 */
217 n += 19;
218 break;
219 default:
220 /* if we stumble upon an unknown
221 formatting code, copy the rest of
222 the format string to the output
223 string. (we cannot just skip the
224 code, since there's no way to know
225 what's in the argument list) */
226 n += strlen(p);
227 goto expand;
228 }
229 } else
230 n++;
231 }
232 expand:
233 /* step 2: fill the buffer */
234 /* Since we've analyzed how much space we need for the worst case,
235 use sprintf directly instead of the slower PyOS_snprintf. */
236 string = PyBytes_FromStringAndSize(NULL, n);
237 if (!string)
238 return NULL;
239
240 s = PyBytes_AsString(string);
241
242 for (f = format; *f; f++) {
243 if (*f == '%') {
244 const char* p = f++;
245 Py_ssize_t i;
246 int longflag = 0;
247 int size_tflag = 0;
248 /* parse the width.precision part (we're only
249 interested in the precision value, if any) */
250 n = 0;
251 while (isdigit(Py_CHARMASK(*f)))
252 n = (n*10) + *f++ - '0';
253 if (*f == '.') {
254 f++;
255 n = 0;
256 while (isdigit(Py_CHARMASK(*f)))
257 n = (n*10) + *f++ - '0';
258 }
259 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
260 f++;
261 /* handle the long flag, but only for %ld and %lu.
262 others can be added when necessary. */
263 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
264 longflag = 1;
265 ++f;
266 }
267 /* handle the size_t flag. */
268 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
269 size_tflag = 1;
270 ++f;
271 }
272
273 switch (*f) {
274 case 'c':
275 *s++ = va_arg(vargs, int);
276 break;
277 case 'd':
278 if (longflag)
279 sprintf(s, "%ld", va_arg(vargs, long));
280 else if (size_tflag)
281 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
282 va_arg(vargs, Py_ssize_t));
283 else
284 sprintf(s, "%d", va_arg(vargs, int));
285 s += strlen(s);
286 break;
287 case 'u':
288 if (longflag)
289 sprintf(s, "%lu",
290 va_arg(vargs, unsigned long));
291 else if (size_tflag)
292 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
293 va_arg(vargs, size_t));
294 else
295 sprintf(s, "%u",
296 va_arg(vargs, unsigned int));
297 s += strlen(s);
298 break;
299 case 'i':
300 sprintf(s, "%i", va_arg(vargs, int));
301 s += strlen(s);
302 break;
303 case 'x':
304 sprintf(s, "%x", va_arg(vargs, int));
305 s += strlen(s);
306 break;
307 case 's':
308 p = va_arg(vargs, char*);
309 i = strlen(p);
310 if (n > 0 && i > n)
311 i = n;
312 Py_MEMCPY(s, p, i);
313 s += i;
314 break;
315 case 'p':
316 sprintf(s, "%p", va_arg(vargs, void*));
317 /* %p is ill-defined: ensure leading 0x. */
318 if (s[1] == 'X')
319 s[1] = 'x';
320 else if (s[1] != 'x') {
321 memmove(s+2, s, strlen(s)+1);
322 s[0] = '0';
323 s[1] = 'x';
324 }
325 s += strlen(s);
326 break;
327 case '%':
328 *s++ = '%';
329 break;
330 default:
331 strcpy(s, p);
332 s += strlen(s);
333 goto end;
334 }
335 } else
336 *s++ = *f;
337 }
338
339 end:
340 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
341 return string;
342}
343
344PyObject *
345PyBytes_FromFormat(const char *format, ...)
346{
347 PyObject* ret;
348 va_list vargs;
349
350#ifdef HAVE_STDARG_PROTOTYPES
351 va_start(vargs, format);
352#else
353 va_start(vargs);
354#endif
355 ret = PyBytes_FromFormatV(format, vargs);
356 va_end(vargs);
357 return ret;
358}
359
360
361PyObject *PyBytes_Decode(const char *s,
362 Py_ssize_t size,
363 const char *encoding,
364 const char *errors)
365{
366 PyObject *v, *str;
367
368 str = PyBytes_FromStringAndSize(s, size);
369 if (str == NULL)
370 return NULL;
371 v = PyBytes_AsDecodedString(str, encoding, errors);
372 Py_DECREF(str);
373 return v;
374}
375
376PyObject *PyBytes_AsDecodedObject(PyObject *str,
377 const char *encoding,
378 const char *errors)
379{
380 PyObject *v;
381
382 if (!PyBytes_Check(str)) {
383 PyErr_BadArgument();
384 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000385 }
386
Christian Heimes44720832008-05-26 13:01:01 +0000387 if (encoding == NULL) {
388#ifdef Py_USING_UNICODE
389 encoding = PyUnicode_GetDefaultEncoding();
390#else
391 PyErr_SetString(PyExc_ValueError, "no encoding specified");
392 goto onError;
393#endif
Christian Heimes1a6387e2008-03-26 12:49:49 +0000394 }
Christian Heimes44720832008-05-26 13:01:01 +0000395
396 /* Decode via the codec registry */
397 v = PyCodec_Decode(str, encoding, errors);
398 if (v == NULL)
399 goto onError;
400
401 return v;
402
403 onError:
404 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000405}
406
Christian Heimes44720832008-05-26 13:01:01 +0000407PyObject *PyBytes_AsDecodedString(PyObject *str,
408 const char *encoding,
409 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000410{
Christian Heimes44720832008-05-26 13:01:01 +0000411 PyObject *v;
412
413 v = PyBytes_AsDecodedObject(str, encoding, errors);
414 if (v == NULL)
415 goto onError;
416
417#ifdef Py_USING_UNICODE
418 /* Convert Unicode to a string using the default encoding */
419 if (PyUnicode_Check(v)) {
420 PyObject *temp = v;
421 v = PyUnicode_AsEncodedString(v, NULL, NULL);
422 Py_DECREF(temp);
423 if (v == NULL)
424 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000425 }
Christian Heimes44720832008-05-26 13:01:01 +0000426#endif
427 if (!PyBytes_Check(v)) {
428 PyErr_Format(PyExc_TypeError,
429 "decoder did not return a string object (type=%.400s)",
430 Py_TYPE(v)->tp_name);
431 Py_DECREF(v);
432 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000433 }
Christian Heimes44720832008-05-26 13:01:01 +0000434
435 return v;
436
437 onError:
438 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000439}
440
Christian Heimes44720832008-05-26 13:01:01 +0000441PyObject *PyBytes_Encode(const char *s,
442 Py_ssize_t size,
443 const char *encoding,
444 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000445{
Christian Heimes44720832008-05-26 13:01:01 +0000446 PyObject *v, *str;
447
448 str = PyBytes_FromStringAndSize(s, size);
449 if (str == NULL)
450 return NULL;
451 v = PyBytes_AsEncodedString(str, encoding, errors);
452 Py_DECREF(str);
453 return v;
454}
455
456PyObject *PyBytes_AsEncodedObject(PyObject *str,
457 const char *encoding,
458 const char *errors)
459{
460 PyObject *v;
461
462 if (!PyBytes_Check(str)) {
463 PyErr_BadArgument();
464 goto onError;
465 }
466
467 if (encoding == NULL) {
468#ifdef Py_USING_UNICODE
469 encoding = PyUnicode_GetDefaultEncoding();
470#else
471 PyErr_SetString(PyExc_ValueError, "no encoding specified");
472 goto onError;
473#endif
474 }
475
476 /* Encode via the codec registry */
477 v = PyCodec_Encode(str, encoding, errors);
478 if (v == NULL)
479 goto onError;
480
481 return v;
482
483 onError:
484 return NULL;
485}
486
487PyObject *PyBytes_AsEncodedString(PyObject *str,
488 const char *encoding,
489 const char *errors)
490{
491 PyObject *v;
492
493 v = PyBytes_AsEncodedObject(str, encoding, errors);
494 if (v == NULL)
495 goto onError;
496
497#ifdef Py_USING_UNICODE
498 /* Convert Unicode to a string using the default encoding */
499 if (PyUnicode_Check(v)) {
500 PyObject *temp = v;
501 v = PyUnicode_AsEncodedString(v, NULL, NULL);
502 Py_DECREF(temp);
503 if (v == NULL)
504 goto onError;
505 }
506#endif
507 if (!PyBytes_Check(v)) {
508 PyErr_Format(PyExc_TypeError,
509 "encoder did not return a string object (type=%.400s)",
510 Py_TYPE(v)->tp_name);
511 Py_DECREF(v);
512 goto onError;
513 }
514
515 return v;
516
517 onError:
518 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000519}
520
521static void
Christian Heimes44720832008-05-26 13:01:01 +0000522string_dealloc(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000523{
Christian Heimes44720832008-05-26 13:01:01 +0000524 switch (PyBytes_CHECK_INTERNED(op)) {
525 case SSTATE_NOT_INTERNED:
526 break;
527
528 case SSTATE_INTERNED_MORTAL:
529 /* revive dead object temporarily for DelItem */
530 Py_REFCNT(op) = 3;
531 if (PyDict_DelItem(interned, op) != 0)
532 Py_FatalError(
533 "deletion of interned string failed");
534 break;
535
536 case SSTATE_INTERNED_IMMORTAL:
537 Py_FatalError("Immortal interned string died.");
538
539 default:
540 Py_FatalError("Inconsistent interned string state.");
541 }
542 Py_TYPE(op)->tp_free(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000543}
544
Christian Heimes44720832008-05-26 13:01:01 +0000545/* Unescape a backslash-escaped string. If unicode is non-zero,
546 the string is a u-literal. If recode_encoding is non-zero,
547 the string is UTF-8 encoded and should be re-encoded in the
548 specified encoding. */
549
550PyObject *PyBytes_DecodeEscape(const char *s,
551 Py_ssize_t len,
552 const char *errors,
553 Py_ssize_t unicode,
554 const char *recode_encoding)
555{
556 int c;
557 char *p, *buf;
558 const char *end;
559 PyObject *v;
560 Py_ssize_t newlen = recode_encoding ? 4*len:len;
561 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
562 if (v == NULL)
563 return NULL;
564 p = buf = PyBytes_AsString(v);
565 end = s + len;
566 while (s < end) {
567 if (*s != '\\') {
568 non_esc:
569#ifdef Py_USING_UNICODE
570 if (recode_encoding && (*s & 0x80)) {
571 PyObject *u, *w;
572 char *r;
573 const char* t;
574 Py_ssize_t rn;
575 t = s;
576 /* Decode non-ASCII bytes as UTF-8. */
577 while (t < end && (*t & 0x80)) t++;
578 u = PyUnicode_DecodeUTF8(s, t - s, errors);
579 if(!u) goto failed;
580
581 /* Recode them in target encoding. */
582 w = PyUnicode_AsEncodedString(
583 u, recode_encoding, errors);
584 Py_DECREF(u);
585 if (!w) goto failed;
586
587 /* Append bytes to output buffer. */
588 assert(PyBytes_Check(w));
589 r = PyBytes_AS_STRING(w);
590 rn = PyBytes_GET_SIZE(w);
591 Py_MEMCPY(p, r, rn);
592 p += rn;
593 Py_DECREF(w);
594 s = t;
595 } else {
596 *p++ = *s++;
597 }
598#else
599 *p++ = *s++;
600#endif
601 continue;
602 }
603 s++;
604 if (s==end) {
605 PyErr_SetString(PyExc_ValueError,
606 "Trailing \\ in string");
607 goto failed;
608 }
609 switch (*s++) {
610 /* XXX This assumes ASCII! */
611 case '\n': break;
612 case '\\': *p++ = '\\'; break;
613 case '\'': *p++ = '\''; break;
614 case '\"': *p++ = '\"'; break;
615 case 'b': *p++ = '\b'; break;
616 case 'f': *p++ = '\014'; break; /* FF */
617 case 't': *p++ = '\t'; break;
618 case 'n': *p++ = '\n'; break;
619 case 'r': *p++ = '\r'; break;
620 case 'v': *p++ = '\013'; break; /* VT */
621 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
622 case '0': case '1': case '2': case '3':
623 case '4': case '5': case '6': case '7':
624 c = s[-1] - '0';
625 if (s < end && '0' <= *s && *s <= '7') {
626 c = (c<<3) + *s++ - '0';
627 if (s < end && '0' <= *s && *s <= '7')
628 c = (c<<3) + *s++ - '0';
629 }
630 *p++ = c;
631 break;
632 case 'x':
633 if (s+1 < end &&
634 isxdigit(Py_CHARMASK(s[0])) &&
635 isxdigit(Py_CHARMASK(s[1])))
636 {
637 unsigned int x = 0;
638 c = Py_CHARMASK(*s);
639 s++;
640 if (isdigit(c))
641 x = c - '0';
642 else if (islower(c))
643 x = 10 + c - 'a';
644 else
645 x = 10 + c - 'A';
646 x = x << 4;
647 c = Py_CHARMASK(*s);
648 s++;
649 if (isdigit(c))
650 x += c - '0';
651 else if (islower(c))
652 x += 10 + c - 'a';
653 else
654 x += 10 + c - 'A';
655 *p++ = x;
656 break;
657 }
658 if (!errors || strcmp(errors, "strict") == 0) {
659 PyErr_SetString(PyExc_ValueError,
660 "invalid \\x escape");
661 goto failed;
662 }
663 if (strcmp(errors, "replace") == 0) {
664 *p++ = '?';
665 } else if (strcmp(errors, "ignore") == 0)
666 /* do nothing */;
667 else {
668 PyErr_Format(PyExc_ValueError,
669 "decoding error; "
670 "unknown error handling code: %.400s",
671 errors);
672 goto failed;
673 }
674#ifndef Py_USING_UNICODE
675 case 'u':
676 case 'U':
677 case 'N':
678 if (unicode) {
679 PyErr_SetString(PyExc_ValueError,
680 "Unicode escapes not legal "
681 "when Unicode disabled");
682 goto failed;
683 }
684#endif
685 default:
686 *p++ = '\\';
687 s--;
688 goto non_esc; /* an arbitry number of unescaped
689 UTF-8 bytes may follow. */
690 }
691 }
692 if (p-buf < newlen)
693 _PyBytes_Resize(&v, p - buf);
694 return v;
695 failed:
696 Py_DECREF(v);
697 return NULL;
698}
699
700/* -------------------------------------------------------------------- */
701/* object api */
702
Christian Heimes1a6387e2008-03-26 12:49:49 +0000703static Py_ssize_t
Christian Heimes44720832008-05-26 13:01:01 +0000704string_getsize(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000705{
Christian Heimes44720832008-05-26 13:01:01 +0000706 char *s;
707 Py_ssize_t len;
708 if (PyBytes_AsStringAndSize(op, &s, &len))
709 return -1;
710 return len;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000711}
712
Christian Heimes44720832008-05-26 13:01:01 +0000713static /*const*/ char *
714string_getbuffer(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000715{
Christian Heimes44720832008-05-26 13:01:01 +0000716 char *s;
717 Py_ssize_t len;
718 if (PyBytes_AsStringAndSize(op, &s, &len))
719 return NULL;
720 return s;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000721}
722
723Py_ssize_t
Christian Heimes44720832008-05-26 13:01:01 +0000724PyBytes_Size(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000725{
Christian Heimes44720832008-05-26 13:01:01 +0000726 if (!PyBytes_Check(op))
727 return string_getsize(op);
728 return Py_SIZE(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000729}
730
Christian Heimes44720832008-05-26 13:01:01 +0000731/*const*/ char *
732PyBytes_AsString(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000733{
Christian Heimes44720832008-05-26 13:01:01 +0000734 if (!PyBytes_Check(op))
735 return string_getbuffer(op);
736 return ((PyBytesObject *)op) -> ob_sval;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000737}
738
739int
Christian Heimes44720832008-05-26 13:01:01 +0000740PyBytes_AsStringAndSize(register PyObject *obj,
741 register char **s,
742 register Py_ssize_t *len)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000743{
Christian Heimes44720832008-05-26 13:01:01 +0000744 if (s == NULL) {
745 PyErr_BadInternalCall();
746 return -1;
747 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000748
Christian Heimes44720832008-05-26 13:01:01 +0000749 if (!PyBytes_Check(obj)) {
750#ifdef Py_USING_UNICODE
751 if (PyUnicode_Check(obj)) {
752 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
753 if (obj == NULL)
754 return -1;
755 }
756 else
Christian Heimes1a6387e2008-03-26 12:49:49 +0000757#endif
Christian Heimes44720832008-05-26 13:01:01 +0000758 {
759 PyErr_Format(PyExc_TypeError,
760 "expected string or Unicode object, "
761 "%.200s found", Py_TYPE(obj)->tp_name);
762 return -1;
763 }
764 }
765
766 *s = PyBytes_AS_STRING(obj);
767 if (len != NULL)
768 *len = PyBytes_GET_SIZE(obj);
769 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
770 PyErr_SetString(PyExc_TypeError,
771 "expected string without null bytes");
772 return -1;
773 }
774 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000775}
776
Christian Heimes1a6387e2008-03-26 12:49:49 +0000777/* -------------------------------------------------------------------- */
778/* Methods */
779
Christian Heimes44720832008-05-26 13:01:01 +0000780#include "stringlib/stringdefs.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000781#include "stringlib/fastsearch.h"
Christian Heimes44720832008-05-26 13:01:01 +0000782
Christian Heimes1a6387e2008-03-26 12:49:49 +0000783#include "stringlib/count.h"
784#include "stringlib/find.h"
785#include "stringlib/partition.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000786
Christian Heimes44720832008-05-26 13:01:01 +0000787#define _Py_InsertThousandsGrouping _PyBytes_InsertThousandsGrouping
788#include "stringlib/localeutil.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000789
Christian Heimes1a6387e2008-03-26 12:49:49 +0000790
791
792static int
Christian Heimes44720832008-05-26 13:01:01 +0000793string_print(PyBytesObject *op, FILE *fp, int flags)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000794{
Christian Heimes44720832008-05-26 13:01:01 +0000795 Py_ssize_t i, str_len;
796 char c;
797 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000798
Christian Heimes44720832008-05-26 13:01:01 +0000799 /* XXX Ought to check for interrupts when writing long strings */
800 if (! PyBytes_CheckExact(op)) {
801 int ret;
802 /* A str subclass may have its own __str__ method. */
803 op = (PyBytesObject *) PyObject_Str((PyObject *)op);
804 if (op == NULL)
805 return -1;
806 ret = string_print(op, fp, flags);
807 Py_DECREF(op);
808 return ret;
809 }
810 if (flags & Py_PRINT_RAW) {
811 char *data = op->ob_sval;
812 Py_ssize_t size = Py_SIZE(op);
813 Py_BEGIN_ALLOW_THREADS
814 while (size > INT_MAX) {
815 /* Very long strings cannot be written atomically.
816 * But don't write exactly INT_MAX bytes at a time
817 * to avoid memory aligment issues.
818 */
819 const int chunk_size = INT_MAX & ~0x3FFF;
820 fwrite(data, 1, chunk_size, fp);
821 data += chunk_size;
822 size -= chunk_size;
823 }
824#ifdef __VMS
825 if (size) fwrite(data, (int)size, 1, fp);
826#else
827 fwrite(data, 1, (int)size, fp);
828#endif
829 Py_END_ALLOW_THREADS
830 return 0;
831 }
832
833 /* figure out which quote to use; single is preferred */
834 quote = '\'';
835 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
836 !memchr(op->ob_sval, '"', Py_SIZE(op)))
837 quote = '"';
838
839 str_len = Py_SIZE(op);
840 Py_BEGIN_ALLOW_THREADS
841 fputc(quote, fp);
842 for (i = 0; i < str_len; i++) {
843 /* Since strings are immutable and the caller should have a
844 reference, accessing the interal buffer should not be an issue
845 with the GIL released. */
846 c = op->ob_sval[i];
847 if (c == quote || c == '\\')
848 fprintf(fp, "\\%c", c);
849 else if (c == '\t')
850 fprintf(fp, "\\t");
851 else if (c == '\n')
852 fprintf(fp, "\\n");
853 else if (c == '\r')
854 fprintf(fp, "\\r");
855 else if (c < ' ' || c >= 0x7f)
856 fprintf(fp, "\\x%02x", c & 0xff);
857 else
858 fputc(c, fp);
859 }
860 fputc(quote, fp);
861 Py_END_ALLOW_THREADS
862 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000863}
864
Christian Heimes44720832008-05-26 13:01:01 +0000865PyObject *
866PyBytes_Repr(PyObject *obj, int smartquotes)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000867{
Christian Heimes44720832008-05-26 13:01:01 +0000868 register PyBytesObject* op = (PyBytesObject*) obj;
869 size_t newsize = 2 + 4 * Py_SIZE(op);
870 PyObject *v;
871 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
872 PyErr_SetString(PyExc_OverflowError,
873 "string is too large to make repr");
Christian Heimes1a6387e2008-03-26 12:49:49 +0000874 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000875 }
876 v = PyBytes_FromStringAndSize((char *)NULL, newsize);
877 if (v == NULL) {
878 return NULL;
879 }
880 else {
881 register Py_ssize_t i;
882 register char c;
883 register char *p;
884 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000885
Christian Heimes44720832008-05-26 13:01:01 +0000886 /* figure out which quote to use; single is preferred */
887 quote = '\'';
888 if (smartquotes &&
889 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
890 !memchr(op->ob_sval, '"', Py_SIZE(op)))
891 quote = '"';
892
893 p = PyBytes_AS_STRING(v);
894 *p++ = quote;
895 for (i = 0; i < Py_SIZE(op); i++) {
896 /* There's at least enough room for a hex escape
897 and a closing quote. */
898 assert(newsize - (p - PyBytes_AS_STRING(v)) >= 5);
899 c = op->ob_sval[i];
900 if (c == quote || c == '\\')
901 *p++ = '\\', *p++ = c;
902 else if (c == '\t')
903 *p++ = '\\', *p++ = 't';
904 else if (c == '\n')
905 *p++ = '\\', *p++ = 'n';
906 else if (c == '\r')
907 *p++ = '\\', *p++ = 'r';
908 else if (c < ' ' || c >= 0x7f) {
909 /* For performance, we don't want to call
910 PyOS_snprintf here (extra layers of
911 function call). */
912 sprintf(p, "\\x%02x", c & 0xff);
913 p += 4;
914 }
915 else
916 *p++ = c;
917 }
918 assert(newsize - (p - PyBytes_AS_STRING(v)) >= 1);
919 *p++ = quote;
920 *p = '\0';
921 _PyBytes_Resize(
922 &v, (p - PyBytes_AS_STRING(v)));
923 return v;
924 }
925}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000926
927static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000928string_repr(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000929{
Christian Heimes44720832008-05-26 13:01:01 +0000930 return PyBytes_Repr(op, 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000931}
932
Christian Heimes1a6387e2008-03-26 12:49:49 +0000933static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000934string_str(PyObject *s)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000935{
Christian Heimes44720832008-05-26 13:01:01 +0000936 assert(PyBytes_Check(s));
937 if (PyBytes_CheckExact(s)) {
938 Py_INCREF(s);
939 return s;
940 }
941 else {
942 /* Subtype -- return genuine string with the same value. */
943 PyBytesObject *t = (PyBytesObject *) s;
944 return PyBytes_FromStringAndSize(t->ob_sval, Py_SIZE(t));
945 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000946}
947
Christian Heimes44720832008-05-26 13:01:01 +0000948static Py_ssize_t
949string_length(PyBytesObject *a)
950{
951 return Py_SIZE(a);
952}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000953
Christian Heimes44720832008-05-26 13:01:01 +0000954static PyObject *
955string_concat(register PyBytesObject *a, register PyObject *bb)
956{
957 register Py_ssize_t size;
958 register PyBytesObject *op;
959 if (!PyBytes_Check(bb)) {
960#ifdef Py_USING_UNICODE
961 if (PyUnicode_Check(bb))
962 return PyUnicode_Concat((PyObject *)a, bb);
963#endif
964 if (PyByteArray_Check(bb))
965 return PyByteArray_Concat((PyObject *)a, bb);
966 PyErr_Format(PyExc_TypeError,
967 "cannot concatenate 'str' and '%.200s' objects",
968 Py_TYPE(bb)->tp_name);
969 return NULL;
970 }
971#define b ((PyBytesObject *)bb)
972 /* Optimize cases with empty left or right operand */
973 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
974 PyBytes_CheckExact(a) && PyBytes_CheckExact(b)) {
975 if (Py_SIZE(a) == 0) {
976 Py_INCREF(bb);
977 return bb;
978 }
979 Py_INCREF(a);
980 return (PyObject *)a;
981 }
982 size = Py_SIZE(a) + Py_SIZE(b);
983 if (size < 0) {
984 PyErr_SetString(PyExc_OverflowError,
985 "strings are too large to concat");
986 return NULL;
987 }
988
989 /* Inline PyObject_NewVar */
990 op = (PyBytesObject *)PyObject_MALLOC(sizeof(PyBytesObject) + size);
991 if (op == NULL)
992 return PyErr_NoMemory();
993 PyObject_INIT_VAR(op, &PyBytes_Type, size);
994 op->ob_shash = -1;
995 op->ob_sstate = SSTATE_NOT_INTERNED;
996 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
997 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
998 op->ob_sval[size] = '\0';
999 return (PyObject *) op;
1000#undef b
1001}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001002
Christian Heimes44720832008-05-26 13:01:01 +00001003static PyObject *
1004string_repeat(register PyBytesObject *a, register Py_ssize_t n)
1005{
1006 register Py_ssize_t i;
1007 register Py_ssize_t j;
1008 register Py_ssize_t size;
1009 register PyBytesObject *op;
1010 size_t nbytes;
1011 if (n < 0)
1012 n = 0;
1013 /* watch out for overflows: the size can overflow int,
1014 * and the # of bytes needed can overflow size_t
1015 */
1016 size = Py_SIZE(a) * n;
1017 if (n && size / n != Py_SIZE(a)) {
1018 PyErr_SetString(PyExc_OverflowError,
1019 "repeated string is too long");
1020 return NULL;
1021 }
1022 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1023 Py_INCREF(a);
1024 return (PyObject *)a;
1025 }
1026 nbytes = (size_t)size;
1027 if (nbytes + sizeof(PyBytesObject) <= nbytes) {
1028 PyErr_SetString(PyExc_OverflowError,
1029 "repeated string is too long");
1030 return NULL;
1031 }
1032 op = (PyBytesObject *)
1033 PyObject_MALLOC(sizeof(PyBytesObject) + nbytes);
1034 if (op == NULL)
1035 return PyErr_NoMemory();
1036 PyObject_INIT_VAR(op, &PyBytes_Type, size);
1037 op->ob_shash = -1;
1038 op->ob_sstate = SSTATE_NOT_INTERNED;
1039 op->ob_sval[size] = '\0';
1040 if (Py_SIZE(a) == 1 && n > 0) {
1041 memset(op->ob_sval, a->ob_sval[0] , n);
1042 return (PyObject *) op;
1043 }
1044 i = 0;
1045 if (i < size) {
1046 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1047 i = Py_SIZE(a);
1048 }
1049 while (i < size) {
1050 j = (i <= size-i) ? i : size-i;
1051 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1052 i += j;
1053 }
1054 return (PyObject *) op;
1055}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001056
Christian Heimes44720832008-05-26 13:01:01 +00001057/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1058
1059static PyObject *
1060string_slice(register PyBytesObject *a, register Py_ssize_t i,
1061 register Py_ssize_t j)
1062 /* j -- may be negative! */
1063{
1064 if (i < 0)
1065 i = 0;
1066 if (j < 0)
1067 j = 0; /* Avoid signed/unsigned bug in next line */
1068 if (j > Py_SIZE(a))
1069 j = Py_SIZE(a);
1070 if (i == 0 && j == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1071 /* It's the same as a */
1072 Py_INCREF(a);
1073 return (PyObject *)a;
1074 }
1075 if (j < i)
1076 j = i;
1077 return PyBytes_FromStringAndSize(a->ob_sval + i, j-i);
1078}
1079
1080static int
1081string_contains(PyObject *str_obj, PyObject *sub_obj)
1082{
1083 if (!PyBytes_CheckExact(sub_obj)) {
1084#ifdef Py_USING_UNICODE
1085 if (PyUnicode_Check(sub_obj))
1086 return PyUnicode_Contains(str_obj, sub_obj);
1087#endif
1088 if (!PyBytes_Check(sub_obj)) {
1089 PyErr_Format(PyExc_TypeError,
1090 "'in <string>' requires string as left operand, "
1091 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1092 return -1;
1093 }
1094 }
1095
1096 return stringlib_contains_obj(str_obj, sub_obj);
1097}
1098
1099static PyObject *
1100string_item(PyBytesObject *a, register Py_ssize_t i)
1101{
1102 char pchar;
1103 PyObject *v;
1104 if (i < 0 || i >= Py_SIZE(a)) {
1105 PyErr_SetString(PyExc_IndexError, "string index out of range");
1106 return NULL;
1107 }
1108 pchar = a->ob_sval[i];
1109 v = (PyObject *)characters[pchar & UCHAR_MAX];
1110 if (v == NULL)
1111 v = PyBytes_FromStringAndSize(&pchar, 1);
1112 else {
1113#ifdef COUNT_ALLOCS
1114 one_strings++;
1115#endif
1116 Py_INCREF(v);
1117 }
1118 return v;
1119}
1120
1121static PyObject*
1122string_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
1123{
1124 int c;
1125 Py_ssize_t len_a, len_b;
1126 Py_ssize_t min_len;
1127 PyObject *result;
1128
1129 /* Make sure both arguments are strings. */
1130 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
1131 result = Py_NotImplemented;
1132 goto out;
1133 }
1134 if (a == b) {
1135 switch (op) {
1136 case Py_EQ:case Py_LE:case Py_GE:
1137 result = Py_True;
1138 goto out;
1139 case Py_NE:case Py_LT:case Py_GT:
1140 result = Py_False;
1141 goto out;
1142 }
1143 }
1144 if (op == Py_EQ) {
1145 /* Supporting Py_NE here as well does not save
1146 much time, since Py_NE is rarely used. */
1147 if (Py_SIZE(a) == Py_SIZE(b)
1148 && (a->ob_sval[0] == b->ob_sval[0]
1149 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1150 result = Py_True;
1151 } else {
1152 result = Py_False;
1153 }
1154 goto out;
1155 }
1156 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1157 min_len = (len_a < len_b) ? len_a : len_b;
1158 if (min_len > 0) {
1159 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1160 if (c==0)
1161 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1162 } else
1163 c = 0;
1164 if (c == 0)
1165 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1166 switch (op) {
1167 case Py_LT: c = c < 0; break;
1168 case Py_LE: c = c <= 0; break;
1169 case Py_EQ: assert(0); break; /* unreachable */
1170 case Py_NE: c = c != 0; break;
1171 case Py_GT: c = c > 0; break;
1172 case Py_GE: c = c >= 0; break;
1173 default:
1174 result = Py_NotImplemented;
1175 goto out;
1176 }
1177 result = c ? Py_True : Py_False;
1178 out:
1179 Py_INCREF(result);
1180 return result;
1181}
1182
1183int
1184_PyBytes_Eq(PyObject *o1, PyObject *o2)
1185{
1186 PyBytesObject *a = (PyBytesObject*) o1;
1187 PyBytesObject *b = (PyBytesObject*) o2;
1188 return Py_SIZE(a) == Py_SIZE(b)
1189 && *a->ob_sval == *b->ob_sval
1190 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1191}
1192
1193static long
1194string_hash(PyBytesObject *a)
1195{
1196 register Py_ssize_t len;
1197 register unsigned char *p;
1198 register long x;
1199
1200 if (a->ob_shash != -1)
1201 return a->ob_shash;
1202 len = Py_SIZE(a);
1203 p = (unsigned char *) a->ob_sval;
1204 x = *p << 7;
1205 while (--len >= 0)
1206 x = (1000003*x) ^ *p++;
1207 x ^= Py_SIZE(a);
1208 if (x == -1)
1209 x = -2;
1210 a->ob_shash = x;
1211 return x;
1212}
1213
1214static PyObject*
1215string_subscript(PyBytesObject* self, PyObject* item)
1216{
1217 if (PyIndex_Check(item)) {
1218 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1219 if (i == -1 && PyErr_Occurred())
1220 return NULL;
1221 if (i < 0)
1222 i += PyBytes_GET_SIZE(self);
1223 return string_item(self, i);
1224 }
1225 else if (PySlice_Check(item)) {
1226 Py_ssize_t start, stop, step, slicelength, cur, i;
1227 char* source_buf;
1228 char* result_buf;
1229 PyObject* result;
1230
1231 if (PySlice_GetIndicesEx((PySliceObject*)item,
1232 PyBytes_GET_SIZE(self),
1233 &start, &stop, &step, &slicelength) < 0) {
1234 return NULL;
1235 }
1236
1237 if (slicelength <= 0) {
1238 return PyBytes_FromStringAndSize("", 0);
1239 }
1240 else if (start == 0 && step == 1 &&
1241 slicelength == PyBytes_GET_SIZE(self) &&
1242 PyBytes_CheckExact(self)) {
1243 Py_INCREF(self);
1244 return (PyObject *)self;
1245 }
1246 else if (step == 1) {
1247 return PyBytes_FromStringAndSize(
1248 PyBytes_AS_STRING(self) + start,
1249 slicelength);
1250 }
1251 else {
1252 source_buf = PyBytes_AsString((PyObject*)self);
1253 result_buf = (char *)PyMem_Malloc(slicelength);
1254 if (result_buf == NULL)
1255 return PyErr_NoMemory();
1256
1257 for (cur = start, i = 0; i < slicelength;
1258 cur += step, i++) {
1259 result_buf[i] = source_buf[cur];
1260 }
1261
1262 result = PyBytes_FromStringAndSize(result_buf,
1263 slicelength);
1264 PyMem_Free(result_buf);
1265 return result;
1266 }
1267 }
1268 else {
1269 PyErr_Format(PyExc_TypeError,
1270 "string indices must be integers, not %.200s",
1271 Py_TYPE(item)->tp_name);
1272 return NULL;
1273 }
1274}
1275
1276static Py_ssize_t
1277string_buffer_getreadbuf(PyBytesObject *self, Py_ssize_t index, const void **ptr)
1278{
1279 if ( index != 0 ) {
1280 PyErr_SetString(PyExc_SystemError,
1281 "accessing non-existent string segment");
1282 return -1;
1283 }
1284 *ptr = (void *)self->ob_sval;
1285 return Py_SIZE(self);
1286}
1287
1288static Py_ssize_t
1289string_buffer_getwritebuf(PyBytesObject *self, Py_ssize_t index, const void **ptr)
1290{
1291 PyErr_SetString(PyExc_TypeError,
1292 "Cannot use string as modifiable buffer");
1293 return -1;
1294}
1295
1296static Py_ssize_t
1297string_buffer_getsegcount(PyBytesObject *self, Py_ssize_t *lenp)
1298{
1299 if ( lenp )
1300 *lenp = Py_SIZE(self);
1301 return 1;
1302}
1303
1304static Py_ssize_t
1305string_buffer_getcharbuf(PyBytesObject *self, Py_ssize_t index, const char **ptr)
1306{
1307 if ( index != 0 ) {
1308 PyErr_SetString(PyExc_SystemError,
1309 "accessing non-existent string segment");
1310 return -1;
1311 }
1312 *ptr = self->ob_sval;
1313 return Py_SIZE(self);
1314}
1315
1316static int
1317string_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
1318{
1319 return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_SIZE(self),
1320 0, flags);
1321}
1322
1323static PySequenceMethods string_as_sequence = {
1324 (lenfunc)string_length, /*sq_length*/
1325 (binaryfunc)string_concat, /*sq_concat*/
1326 (ssizeargfunc)string_repeat, /*sq_repeat*/
1327 (ssizeargfunc)string_item, /*sq_item*/
1328 (ssizessizeargfunc)string_slice, /*sq_slice*/
1329 0, /*sq_ass_item*/
1330 0, /*sq_ass_slice*/
1331 (objobjproc)string_contains /*sq_contains*/
1332};
1333
1334static PyMappingMethods string_as_mapping = {
1335 (lenfunc)string_length,
1336 (binaryfunc)string_subscript,
1337 0,
1338};
1339
1340static PyBufferProcs string_as_buffer = {
1341 (readbufferproc)string_buffer_getreadbuf,
1342 (writebufferproc)string_buffer_getwritebuf,
1343 (segcountproc)string_buffer_getsegcount,
1344 (charbufferproc)string_buffer_getcharbuf,
1345 (getbufferproc)string_buffer_getbuffer,
1346 0, /* XXX */
1347};
1348
1349
1350
1351#define LEFTSTRIP 0
1352#define RIGHTSTRIP 1
1353#define BOTHSTRIP 2
1354
1355/* Arrays indexed by above */
1356static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1357
1358#define STRIPNAME(i) (stripformat[i]+3)
1359
Christian Heimes1a6387e2008-03-26 12:49:49 +00001360
1361/* Don't call if length < 2 */
Christian Heimes44720832008-05-26 13:01:01 +00001362#define Py_STRING_MATCH(target, offset, pattern, length) \
1363 (target[offset] == pattern[0] && \
1364 target[offset+length-1] == pattern[length-1] && \
Christian Heimes1a6387e2008-03-26 12:49:49 +00001365 !memcmp(target+offset+1, pattern+1, length-2) )
1366
1367
Christian Heimes1a6387e2008-03-26 12:49:49 +00001368/* Overallocate the initial list to reduce the number of reallocs for small
1369 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1370 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1371 text (roughly 11 words per line) and field delimited data (usually 1-10
1372 fields). For large strings the split algorithms are bandwidth limited
1373 so increasing the preallocation likely will not improve things.*/
1374
1375#define MAX_PREALLOC 12
1376
1377/* 5 splits gives 6 elements */
1378#define PREALLOC_SIZE(maxsplit) \
Christian Heimes44720832008-05-26 13:01:01 +00001379 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001380
Christian Heimes44720832008-05-26 13:01:01 +00001381#define SPLIT_APPEND(data, left, right) \
1382 str = PyBytes_FromStringAndSize((data) + (left), \
1383 (right) - (left)); \
1384 if (str == NULL) \
1385 goto onError; \
1386 if (PyList_Append(list, str)) { \
1387 Py_DECREF(str); \
1388 goto onError; \
1389 } \
1390 else \
1391 Py_DECREF(str);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001392
Christian Heimes44720832008-05-26 13:01:01 +00001393#define SPLIT_ADD(data, left, right) { \
1394 str = PyBytes_FromStringAndSize((data) + (left), \
1395 (right) - (left)); \
1396 if (str == NULL) \
1397 goto onError; \
1398 if (count < MAX_PREALLOC) { \
1399 PyList_SET_ITEM(list, count, str); \
1400 } else { \
1401 if (PyList_Append(list, str)) { \
1402 Py_DECREF(str); \
1403 goto onError; \
1404 } \
1405 else \
1406 Py_DECREF(str); \
1407 } \
1408 count++; }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001409
1410/* Always force the list to the expected size. */
1411#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
1412
Christian Heimes44720832008-05-26 13:01:01 +00001413#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1414#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1415#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1416#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001417
1418Py_LOCAL_INLINE(PyObject *)
Christian Heimes44720832008-05-26 13:01:01 +00001419split_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001420{
Christian Heimes44720832008-05-26 13:01:01 +00001421 const char *s = PyBytes_AS_STRING(self);
1422 Py_ssize_t i, j, count=0;
1423 PyObject *str;
1424 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001425
Christian Heimes44720832008-05-26 13:01:01 +00001426 if (list == NULL)
1427 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001428
Christian Heimes44720832008-05-26 13:01:01 +00001429 i = j = 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001430
Christian Heimes44720832008-05-26 13:01:01 +00001431 while (maxsplit-- > 0) {
1432 SKIP_SPACE(s, i, len);
1433 if (i==len) break;
1434 j = i; i++;
1435 SKIP_NONSPACE(s, i, len);
1436 if (j == 0 && i == len && PyBytes_CheckExact(self)) {
1437 /* No whitespace in self, so just use it as list[0] */
1438 Py_INCREF(self);
1439 PyList_SET_ITEM(list, 0, (PyObject *)self);
1440 count++;
1441 break;
1442 }
1443 SPLIT_ADD(s, j, i);
1444 }
1445
1446 if (i < len) {
1447 /* Only occurs when maxsplit was reached */
1448 /* Skip any remaining whitespace and copy to end of string */
1449 SKIP_SPACE(s, i, len);
1450 if (i != len)
1451 SPLIT_ADD(s, i, len);
1452 }
1453 FIX_PREALLOC_SIZE(list);
1454 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001455 onError:
Christian Heimes44720832008-05-26 13:01:01 +00001456 Py_DECREF(list);
1457 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001458}
1459
Christian Heimes1a6387e2008-03-26 12:49:49 +00001460Py_LOCAL_INLINE(PyObject *)
Christian Heimes44720832008-05-26 13:01:01 +00001461split_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001462{
Christian Heimes44720832008-05-26 13:01:01 +00001463 const char *s = PyBytes_AS_STRING(self);
1464 register Py_ssize_t i, j, count=0;
1465 PyObject *str;
1466 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001467
Christian Heimes44720832008-05-26 13:01:01 +00001468 if (list == NULL)
1469 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001470
Christian Heimes44720832008-05-26 13:01:01 +00001471 i = j = 0;
1472 while ((j < len) && (maxcount-- > 0)) {
1473 for(; j<len; j++) {
1474 /* I found that using memchr makes no difference */
1475 if (s[j] == ch) {
1476 SPLIT_ADD(s, i, j);
1477 i = j = j + 1;
1478 break;
1479 }
1480 }
1481 }
1482 if (i == 0 && count == 0 && PyBytes_CheckExact(self)) {
1483 /* ch not in self, so just use self as list[0] */
1484 Py_INCREF(self);
1485 PyList_SET_ITEM(list, 0, (PyObject *)self);
1486 count++;
1487 }
1488 else if (i <= len) {
1489 SPLIT_ADD(s, i, len);
1490 }
1491 FIX_PREALLOC_SIZE(list);
1492 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001493
1494 onError:
Christian Heimes44720832008-05-26 13:01:01 +00001495 Py_DECREF(list);
1496 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001497}
1498
1499PyDoc_STRVAR(split__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001500"S.split([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001501\n\
Christian Heimes44720832008-05-26 13:01:01 +00001502Return a list of the words in the string S, using sep as the\n\
1503delimiter string. If maxsplit is given, at most maxsplit\n\
1504splits are done. If sep is not specified or is None, any\n\
1505whitespace string is a separator and empty strings are removed\n\
1506from the result.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001507
1508static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00001509string_split(PyBytesObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001510{
Christian Heimes44720832008-05-26 13:01:01 +00001511 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
1512 Py_ssize_t maxsplit = -1, count=0;
1513 const char *s = PyBytes_AS_STRING(self), *sub;
1514 PyObject *list, *str, *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001515#ifdef USE_FAST
Christian Heimes44720832008-05-26 13:01:01 +00001516 Py_ssize_t pos;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001517#endif
1518
Christian Heimes44720832008-05-26 13:01:01 +00001519 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1520 return NULL;
1521 if (maxsplit < 0)
1522 maxsplit = PY_SSIZE_T_MAX;
1523 if (subobj == Py_None)
1524 return split_whitespace(self, len, maxsplit);
1525 if (PyBytes_Check(subobj)) {
1526 sub = PyBytes_AS_STRING(subobj);
1527 n = PyBytes_GET_SIZE(subobj);
1528 }
1529#ifdef Py_USING_UNICODE
1530 else if (PyUnicode_Check(subobj))
1531 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1532#endif
1533 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1534 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001535
Christian Heimes44720832008-05-26 13:01:01 +00001536 if (n == 0) {
1537 PyErr_SetString(PyExc_ValueError, "empty separator");
1538 return NULL;
1539 }
1540 else if (n == 1)
1541 return split_char(self, len, sub[0], maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001542
Christian Heimes44720832008-05-26 13:01:01 +00001543 list = PyList_New(PREALLOC_SIZE(maxsplit));
1544 if (list == NULL)
1545 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001546
1547#ifdef USE_FAST
Christian Heimes44720832008-05-26 13:01:01 +00001548 i = j = 0;
1549 while (maxsplit-- > 0) {
1550 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1551 if (pos < 0)
1552 break;
1553 j = i+pos;
1554 SPLIT_ADD(s, i, j);
1555 i = j + n;
1556 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001557#else
Christian Heimes44720832008-05-26 13:01:01 +00001558 i = j = 0;
1559 while ((j+n <= len) && (maxsplit-- > 0)) {
1560 for (; j+n <= len; j++) {
1561 if (Py_STRING_MATCH(s, j, sub, n)) {
1562 SPLIT_ADD(s, i, j);
1563 i = j = j + n;
1564 break;
1565 }
1566 }
1567 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001568#endif
Christian Heimes44720832008-05-26 13:01:01 +00001569 SPLIT_ADD(s, i, len);
1570 FIX_PREALLOC_SIZE(list);
1571 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001572
Christian Heimes44720832008-05-26 13:01:01 +00001573 onError:
1574 Py_DECREF(list);
1575 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001576}
1577
1578PyDoc_STRVAR(partition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001579"S.partition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001580\n\
Christian Heimes44720832008-05-26 13:01:01 +00001581Searches for the separator sep in S, and returns the part before it,\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001582the separator itself, and the part after it. If the separator is not\n\
Christian Heimes44720832008-05-26 13:01:01 +00001583found, returns S and two empty strings.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001584
1585static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00001586string_partition(PyBytesObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001587{
Christian Heimes44720832008-05-26 13:01:01 +00001588 const char *sep;
1589 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001590
Christian Heimes44720832008-05-26 13:01:01 +00001591 if (PyBytes_Check(sep_obj)) {
1592 sep = PyBytes_AS_STRING(sep_obj);
1593 sep_len = PyBytes_GET_SIZE(sep_obj);
1594 }
1595#ifdef Py_USING_UNICODE
1596 else if (PyUnicode_Check(sep_obj))
1597 return PyUnicode_Partition((PyObject *) self, sep_obj);
1598#endif
1599 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1600 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001601
Christian Heimes44720832008-05-26 13:01:01 +00001602 return stringlib_partition(
1603 (PyObject*) self,
1604 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1605 sep_obj, sep, sep_len
1606 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001607}
1608
1609PyDoc_STRVAR(rpartition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001610"S.rpartition(sep) -> (tail, sep, head)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001611\n\
Christian Heimes44720832008-05-26 13:01:01 +00001612Searches for the separator sep in S, starting at the end of S, and returns\n\
1613the part before it, the separator itself, and the part after it. If the\n\
1614separator is not found, returns two empty strings and S.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001615
1616static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00001617string_rpartition(PyBytesObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001618{
Christian Heimes44720832008-05-26 13:01:01 +00001619 const char *sep;
1620 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001621
Christian Heimes44720832008-05-26 13:01:01 +00001622 if (PyBytes_Check(sep_obj)) {
1623 sep = PyBytes_AS_STRING(sep_obj);
1624 sep_len = PyBytes_GET_SIZE(sep_obj);
1625 }
1626#ifdef Py_USING_UNICODE
1627 else if (PyUnicode_Check(sep_obj))
1628 return PyUnicode_Partition((PyObject *) self, sep_obj);
1629#endif
1630 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1631 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001632
Christian Heimes44720832008-05-26 13:01:01 +00001633 return stringlib_rpartition(
1634 (PyObject*) self,
1635 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1636 sep_obj, sep, sep_len
1637 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001638}
1639
1640Py_LOCAL_INLINE(PyObject *)
Christian Heimes44720832008-05-26 13:01:01 +00001641rsplit_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001642{
Christian Heimes44720832008-05-26 13:01:01 +00001643 const char *s = PyBytes_AS_STRING(self);
1644 Py_ssize_t i, j, count=0;
1645 PyObject *str;
1646 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001647
Christian Heimes44720832008-05-26 13:01:01 +00001648 if (list == NULL)
1649 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001650
Christian Heimes44720832008-05-26 13:01:01 +00001651 i = j = len-1;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001652
Christian Heimes44720832008-05-26 13:01:01 +00001653 while (maxsplit-- > 0) {
1654 RSKIP_SPACE(s, i);
1655 if (i<0) break;
1656 j = i; i--;
1657 RSKIP_NONSPACE(s, i);
1658 if (j == len-1 && i < 0 && PyBytes_CheckExact(self)) {
1659 /* No whitespace in self, so just use it as list[0] */
1660 Py_INCREF(self);
1661 PyList_SET_ITEM(list, 0, (PyObject *)self);
1662 count++;
1663 break;
1664 }
1665 SPLIT_ADD(s, i + 1, j + 1);
1666 }
1667 if (i >= 0) {
1668 /* Only occurs when maxsplit was reached */
1669 /* Skip any remaining whitespace and copy to beginning of string */
1670 RSKIP_SPACE(s, i);
1671 if (i >= 0)
1672 SPLIT_ADD(s, 0, i + 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001673
Christian Heimes44720832008-05-26 13:01:01 +00001674 }
1675 FIX_PREALLOC_SIZE(list);
1676 if (PyList_Reverse(list) < 0)
1677 goto onError;
1678 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001679 onError:
Christian Heimes44720832008-05-26 13:01:01 +00001680 Py_DECREF(list);
1681 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001682}
1683
1684Py_LOCAL_INLINE(PyObject *)
Christian Heimes44720832008-05-26 13:01:01 +00001685rsplit_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001686{
Christian Heimes44720832008-05-26 13:01:01 +00001687 const char *s = PyBytes_AS_STRING(self);
1688 register Py_ssize_t i, j, count=0;
1689 PyObject *str;
1690 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001691
Christian Heimes44720832008-05-26 13:01:01 +00001692 if (list == NULL)
1693 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001694
Christian Heimes44720832008-05-26 13:01:01 +00001695 i = j = len - 1;
1696 while ((i >= 0) && (maxcount-- > 0)) {
1697 for (; i >= 0; i--) {
1698 if (s[i] == ch) {
1699 SPLIT_ADD(s, i + 1, j + 1);
1700 j = i = i - 1;
1701 break;
1702 }
1703 }
1704 }
1705 if (i < 0 && count == 0 && PyBytes_CheckExact(self)) {
1706 /* ch not in self, so just use self as list[0] */
1707 Py_INCREF(self);
1708 PyList_SET_ITEM(list, 0, (PyObject *)self);
1709 count++;
1710 }
1711 else if (j >= -1) {
1712 SPLIT_ADD(s, 0, j + 1);
1713 }
1714 FIX_PREALLOC_SIZE(list);
1715 if (PyList_Reverse(list) < 0)
1716 goto onError;
1717 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001718
Christian Heimes44720832008-05-26 13:01:01 +00001719 onError:
1720 Py_DECREF(list);
1721 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001722}
1723
1724PyDoc_STRVAR(rsplit__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001725"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001726\n\
Christian Heimes44720832008-05-26 13:01:01 +00001727Return a list of the words in the string S, using sep as the\n\
1728delimiter string, starting at the end of the string and working\n\
1729to the front. If maxsplit is given, at most maxsplit splits are\n\
1730done. If sep is not specified or is None, any whitespace string\n\
1731is a separator.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001732
1733static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00001734string_rsplit(PyBytesObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001735{
Christian Heimes44720832008-05-26 13:01:01 +00001736 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
1737 Py_ssize_t maxsplit = -1, count=0;
1738 const char *s, *sub;
1739 PyObject *list, *str, *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001740
Christian Heimes44720832008-05-26 13:01:01 +00001741 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1742 return NULL;
1743 if (maxsplit < 0)
1744 maxsplit = PY_SSIZE_T_MAX;
1745 if (subobj == Py_None)
1746 return rsplit_whitespace(self, len, maxsplit);
1747 if (PyBytes_Check(subobj)) {
1748 sub = PyBytes_AS_STRING(subobj);
1749 n = PyBytes_GET_SIZE(subobj);
1750 }
1751#ifdef Py_USING_UNICODE
1752 else if (PyUnicode_Check(subobj))
1753 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1754#endif
1755 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1756 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001757
Christian Heimes44720832008-05-26 13:01:01 +00001758 if (n == 0) {
1759 PyErr_SetString(PyExc_ValueError, "empty separator");
1760 return NULL;
1761 }
1762 else if (n == 1)
1763 return rsplit_char(self, len, sub[0], maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001764
Christian Heimes44720832008-05-26 13:01:01 +00001765 list = PyList_New(PREALLOC_SIZE(maxsplit));
1766 if (list == NULL)
1767 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001768
Christian Heimes44720832008-05-26 13:01:01 +00001769 j = len;
1770 i = j - n;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001771
Christian Heimes44720832008-05-26 13:01:01 +00001772 s = PyBytes_AS_STRING(self);
1773 while ( (i >= 0) && (maxsplit-- > 0) ) {
1774 for (; i>=0; i--) {
1775 if (Py_STRING_MATCH(s, i, sub, n)) {
1776 SPLIT_ADD(s, i + n, j);
1777 j = i;
1778 i -= n;
1779 break;
1780 }
1781 }
1782 }
1783 SPLIT_ADD(s, 0, j);
1784 FIX_PREALLOC_SIZE(list);
1785 if (PyList_Reverse(list) < 0)
1786 goto onError;
1787 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001788
1789onError:
Christian Heimes44720832008-05-26 13:01:01 +00001790 Py_DECREF(list);
1791 return NULL;
1792}
1793
1794
1795PyDoc_STRVAR(join__doc__,
1796"S.join(sequence) -> string\n\
1797\n\
1798Return a string which is the concatenation of the strings in the\n\
1799sequence. The separator between elements is S.");
1800
1801static PyObject *
1802string_join(PyBytesObject *self, PyObject *orig)
1803{
1804 char *sep = PyBytes_AS_STRING(self);
1805 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1806 PyObject *res = NULL;
1807 char *p;
1808 Py_ssize_t seqlen = 0;
1809 size_t sz = 0;
1810 Py_ssize_t i;
1811 PyObject *seq, *item;
1812
1813 seq = PySequence_Fast(orig, "");
1814 if (seq == NULL) {
1815 return NULL;
1816 }
1817
1818 seqlen = PySequence_Size(seq);
1819 if (seqlen == 0) {
1820 Py_DECREF(seq);
1821 return PyBytes_FromString("");
1822 }
1823 if (seqlen == 1) {
1824 item = PySequence_Fast_GET_ITEM(seq, 0);
1825 if (PyBytes_CheckExact(item) || PyUnicode_CheckExact(item)) {
1826 Py_INCREF(item);
1827 Py_DECREF(seq);
1828 return item;
1829 }
1830 }
1831
1832 /* There are at least two things to join, or else we have a subclass
1833 * of the builtin types in the sequence.
1834 * Do a pre-pass to figure out the total amount of space we'll
1835 * need (sz), see whether any argument is absurd, and defer to
1836 * the Unicode join if appropriate.
1837 */
1838 for (i = 0; i < seqlen; i++) {
1839 const size_t old_sz = sz;
1840 item = PySequence_Fast_GET_ITEM(seq, i);
1841 if (!PyBytes_Check(item)){
1842#ifdef Py_USING_UNICODE
1843 if (PyUnicode_Check(item)) {
1844 /* Defer to Unicode join.
1845 * CAUTION: There's no gurantee that the
1846 * original sequence can be iterated over
1847 * again, so we must pass seq here.
1848 */
1849 PyObject *result;
1850 result = PyUnicode_Join((PyObject *)self, seq);
1851 Py_DECREF(seq);
1852 return result;
1853 }
1854#endif
1855 PyErr_Format(PyExc_TypeError,
1856 "sequence item %zd: expected string,"
1857 " %.80s found",
1858 i, Py_TYPE(item)->tp_name);
1859 Py_DECREF(seq);
1860 return NULL;
1861 }
1862 sz += PyBytes_GET_SIZE(item);
1863 if (i != 0)
1864 sz += seplen;
1865 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1866 PyErr_SetString(PyExc_OverflowError,
1867 "join() result is too long for a Python string");
1868 Py_DECREF(seq);
1869 return NULL;
1870 }
1871 }
1872
1873 /* Allocate result space. */
1874 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1875 if (res == NULL) {
1876 Py_DECREF(seq);
1877 return NULL;
1878 }
1879
1880 /* Catenate everything. */
1881 p = PyBytes_AS_STRING(res);
1882 for (i = 0; i < seqlen; ++i) {
1883 size_t n;
1884 item = PySequence_Fast_GET_ITEM(seq, i);
1885 n = PyBytes_GET_SIZE(item);
1886 Py_MEMCPY(p, PyBytes_AS_STRING(item), n);
1887 p += n;
1888 if (i < seqlen - 1) {
1889 Py_MEMCPY(p, sep, seplen);
1890 p += seplen;
1891 }
1892 }
1893
1894 Py_DECREF(seq);
1895 return res;
1896}
1897
1898PyObject *
1899_PyBytes_Join(PyObject *sep, PyObject *x)
1900{
1901 assert(sep != NULL && PyBytes_Check(sep));
1902 assert(x != NULL);
1903 return string_join((PyBytesObject *)sep, x);
1904}
1905
1906Py_LOCAL_INLINE(void)
1907string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1908{
1909 if (*end > len)
1910 *end = len;
1911 else if (*end < 0)
1912 *end += len;
1913 if (*end < 0)
1914 *end = 0;
1915 if (*start < 0)
1916 *start += len;
1917 if (*start < 0)
1918 *start = 0;
1919}
1920
1921Py_LOCAL_INLINE(Py_ssize_t)
1922string_find_internal(PyBytesObject *self, PyObject *args, int dir)
1923{
1924 PyObject *subobj;
1925 const char *sub;
1926 Py_ssize_t sub_len;
1927 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1928 PyObject *obj_start=Py_None, *obj_end=Py_None;
1929
1930 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1931 &obj_start, &obj_end))
1932 return -2;
1933 /* To support None in "start" and "end" arguments, meaning
1934 the same as if they were not passed.
1935 */
1936 if (obj_start != Py_None)
1937 if (!_PyEval_SliceIndex(obj_start, &start))
1938 return -2;
1939 if (obj_end != Py_None)
1940 if (!_PyEval_SliceIndex(obj_end, &end))
1941 return -2;
1942
1943 if (PyBytes_Check(subobj)) {
1944 sub = PyBytes_AS_STRING(subobj);
1945 sub_len = PyBytes_GET_SIZE(subobj);
1946 }
1947#ifdef Py_USING_UNICODE
1948 else if (PyUnicode_Check(subobj))
1949 return PyUnicode_Find(
1950 (PyObject *)self, subobj, start, end, dir);
1951#endif
1952 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1953 /* XXX - the "expected a character buffer object" is pretty
1954 confusing for a non-expert. remap to something else ? */
1955 return -2;
1956
1957 if (dir > 0)
1958 return stringlib_find_slice(
1959 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1960 sub, sub_len, start, end);
1961 else
1962 return stringlib_rfind_slice(
1963 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1964 sub, sub_len, start, end);
1965}
1966
1967
1968PyDoc_STRVAR(find__doc__,
1969"S.find(sub [,start [,end]]) -> int\n\
1970\n\
1971Return the lowest index in S where substring sub is found,\n\
1972such that sub is contained within s[start:end]. Optional\n\
1973arguments start and end are interpreted as in slice notation.\n\
1974\n\
1975Return -1 on failure.");
1976
1977static PyObject *
1978string_find(PyBytesObject *self, PyObject *args)
1979{
1980 Py_ssize_t result = string_find_internal(self, args, +1);
1981 if (result == -2)
1982 return NULL;
1983 return PyInt_FromSsize_t(result);
1984}
1985
1986
1987PyDoc_STRVAR(index__doc__,
1988"S.index(sub [,start [,end]]) -> int\n\
1989\n\
1990Like S.find() but raise ValueError when the substring is not found.");
1991
1992static PyObject *
1993string_index(PyBytesObject *self, PyObject *args)
1994{
1995 Py_ssize_t result = string_find_internal(self, args, +1);
1996 if (result == -2)
1997 return NULL;
1998 if (result == -1) {
1999 PyErr_SetString(PyExc_ValueError,
2000 "substring not found");
2001 return NULL;
2002 }
2003 return PyInt_FromSsize_t(result);
2004}
2005
2006
2007PyDoc_STRVAR(rfind__doc__,
2008"S.rfind(sub [,start [,end]]) -> int\n\
2009\n\
2010Return the highest index in S where substring sub is found,\n\
2011such that sub is contained within s[start:end]. Optional\n\
2012arguments start and end are interpreted as in slice notation.\n\
2013\n\
2014Return -1 on failure.");
2015
2016static PyObject *
2017string_rfind(PyBytesObject *self, PyObject *args)
2018{
2019 Py_ssize_t result = string_find_internal(self, args, -1);
2020 if (result == -2)
2021 return NULL;
2022 return PyInt_FromSsize_t(result);
2023}
2024
2025
2026PyDoc_STRVAR(rindex__doc__,
2027"S.rindex(sub [,start [,end]]) -> int\n\
2028\n\
2029Like S.rfind() but raise ValueError when the substring is not found.");
2030
2031static PyObject *
2032string_rindex(PyBytesObject *self, PyObject *args)
2033{
2034 Py_ssize_t result = string_find_internal(self, args, -1);
2035 if (result == -2)
2036 return NULL;
2037 if (result == -1) {
2038 PyErr_SetString(PyExc_ValueError,
2039 "substring not found");
2040 return NULL;
2041 }
2042 return PyInt_FromSsize_t(result);
2043}
2044
2045
2046Py_LOCAL_INLINE(PyObject *)
2047do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
2048{
2049 char *s = PyBytes_AS_STRING(self);
2050 Py_ssize_t len = PyBytes_GET_SIZE(self);
2051 char *sep = PyBytes_AS_STRING(sepobj);
2052 Py_ssize_t seplen = PyBytes_GET_SIZE(sepobj);
2053 Py_ssize_t i, j;
2054
2055 i = 0;
2056 if (striptype != RIGHTSTRIP) {
2057 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2058 i++;
2059 }
2060 }
2061
2062 j = len;
2063 if (striptype != LEFTSTRIP) {
2064 do {
2065 j--;
2066 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2067 j++;
2068 }
2069
2070 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2071 Py_INCREF(self);
2072 return (PyObject*)self;
2073 }
2074 else
2075 return PyBytes_FromStringAndSize(s+i, j-i);
2076}
2077
2078
2079Py_LOCAL_INLINE(PyObject *)
2080do_strip(PyBytesObject *self, int striptype)
2081{
2082 char *s = PyBytes_AS_STRING(self);
2083 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
2084
2085 i = 0;
2086 if (striptype != RIGHTSTRIP) {
2087 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2088 i++;
2089 }
2090 }
2091
2092 j = len;
2093 if (striptype != LEFTSTRIP) {
2094 do {
2095 j--;
2096 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2097 j++;
2098 }
2099
2100 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2101 Py_INCREF(self);
2102 return (PyObject*)self;
2103 }
2104 else
2105 return PyBytes_FromStringAndSize(s+i, j-i);
2106}
2107
2108
2109Py_LOCAL_INLINE(PyObject *)
2110do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
2111{
2112 PyObject *sep = NULL;
2113
2114 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
2115 return NULL;
2116
2117 if (sep != NULL && sep != Py_None) {
2118 if (PyBytes_Check(sep))
2119 return do_xstrip(self, striptype, sep);
2120#ifdef Py_USING_UNICODE
2121 else if (PyUnicode_Check(sep)) {
2122 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2123 PyObject *res;
2124 if (uniself==NULL)
2125 return NULL;
2126 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2127 striptype, sep);
2128 Py_DECREF(uniself);
2129 return res;
2130 }
2131#endif
2132 PyErr_Format(PyExc_TypeError,
2133#ifdef Py_USING_UNICODE
2134 "%s arg must be None, str or unicode",
2135#else
2136 "%s arg must be None or str",
2137#endif
2138 STRIPNAME(striptype));
2139 return NULL;
2140 }
2141
2142 return do_strip(self, striptype);
2143}
2144
2145
2146PyDoc_STRVAR(strip__doc__,
2147"S.strip([chars]) -> string or unicode\n\
2148\n\
2149Return a copy of the string S with leading and trailing\n\
2150whitespace removed.\n\
2151If chars is given and not None, remove characters in chars instead.\n\
2152If chars is unicode, S will be converted to unicode before stripping");
2153
2154static PyObject *
2155string_strip(PyBytesObject *self, PyObject *args)
2156{
2157 if (PyTuple_GET_SIZE(args) == 0)
2158 return do_strip(self, BOTHSTRIP); /* Common case */
2159 else
2160 return do_argstrip(self, BOTHSTRIP, args);
2161}
2162
2163
2164PyDoc_STRVAR(lstrip__doc__,
2165"S.lstrip([chars]) -> string or unicode\n\
2166\n\
2167Return a copy of the string S with leading whitespace removed.\n\
2168If chars is given and not None, remove characters in chars instead.\n\
2169If chars is unicode, S will be converted to unicode before stripping");
2170
2171static PyObject *
2172string_lstrip(PyBytesObject *self, PyObject *args)
2173{
2174 if (PyTuple_GET_SIZE(args) == 0)
2175 return do_strip(self, LEFTSTRIP); /* Common case */
2176 else
2177 return do_argstrip(self, LEFTSTRIP, args);
2178}
2179
2180
2181PyDoc_STRVAR(rstrip__doc__,
2182"S.rstrip([chars]) -> string or unicode\n\
2183\n\
2184Return a copy of the string S with trailing whitespace removed.\n\
2185If chars is given and not None, remove characters in chars instead.\n\
2186If chars is unicode, S will be converted to unicode before stripping");
2187
2188static PyObject *
2189string_rstrip(PyBytesObject *self, PyObject *args)
2190{
2191 if (PyTuple_GET_SIZE(args) == 0)
2192 return do_strip(self, RIGHTSTRIP); /* Common case */
2193 else
2194 return do_argstrip(self, RIGHTSTRIP, args);
2195}
2196
2197
2198PyDoc_STRVAR(lower__doc__,
2199"S.lower() -> string\n\
2200\n\
2201Return a copy of the string S converted to lowercase.");
2202
2203/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2204#ifndef _tolower
2205#define _tolower tolower
2206#endif
2207
2208static PyObject *
2209string_lower(PyBytesObject *self)
2210{
2211 char *s;
2212 Py_ssize_t i, n = PyBytes_GET_SIZE(self);
2213 PyObject *newobj;
2214
2215 newobj = PyBytes_FromStringAndSize(NULL, n);
2216 if (!newobj)
2217 return NULL;
2218
2219 s = PyBytes_AS_STRING(newobj);
2220
2221 Py_MEMCPY(s, PyBytes_AS_STRING(self), n);
2222
2223 for (i = 0; i < n; i++) {
2224 int c = Py_CHARMASK(s[i]);
2225 if (isupper(c))
2226 s[i] = _tolower(c);
2227 }
2228
2229 return newobj;
2230}
2231
2232PyDoc_STRVAR(upper__doc__,
2233"S.upper() -> string\n\
2234\n\
2235Return a copy of the string S converted to uppercase.");
2236
2237#ifndef _toupper
2238#define _toupper toupper
2239#endif
2240
2241static PyObject *
2242string_upper(PyBytesObject *self)
2243{
2244 char *s;
2245 Py_ssize_t i, n = PyBytes_GET_SIZE(self);
2246 PyObject *newobj;
2247
2248 newobj = PyBytes_FromStringAndSize(NULL, n);
2249 if (!newobj)
2250 return NULL;
2251
2252 s = PyBytes_AS_STRING(newobj);
2253
2254 Py_MEMCPY(s, PyBytes_AS_STRING(self), n);
2255
2256 for (i = 0; i < n; i++) {
2257 int c = Py_CHARMASK(s[i]);
2258 if (islower(c))
2259 s[i] = _toupper(c);
2260 }
2261
2262 return newobj;
2263}
2264
2265PyDoc_STRVAR(title__doc__,
2266"S.title() -> string\n\
2267\n\
2268Return a titlecased version of S, i.e. words start with uppercase\n\
2269characters, all remaining cased characters have lowercase.");
2270
2271static PyObject*
2272string_title(PyBytesObject *self)
2273{
2274 char *s = PyBytes_AS_STRING(self), *s_new;
2275 Py_ssize_t i, n = PyBytes_GET_SIZE(self);
2276 int previous_is_cased = 0;
2277 PyObject *newobj;
2278
2279 newobj = PyBytes_FromStringAndSize(NULL, n);
2280 if (newobj == NULL)
2281 return NULL;
2282 s_new = PyBytes_AsString(newobj);
2283 for (i = 0; i < n; i++) {
2284 int c = Py_CHARMASK(*s++);
2285 if (islower(c)) {
2286 if (!previous_is_cased)
2287 c = toupper(c);
2288 previous_is_cased = 1;
2289 } else if (isupper(c)) {
2290 if (previous_is_cased)
2291 c = tolower(c);
2292 previous_is_cased = 1;
2293 } else
2294 previous_is_cased = 0;
2295 *s_new++ = c;
2296 }
2297 return newobj;
2298}
2299
2300PyDoc_STRVAR(capitalize__doc__,
2301"S.capitalize() -> string\n\
2302\n\
2303Return a copy of the string S with only its first character\n\
2304capitalized.");
2305
2306static PyObject *
2307string_capitalize(PyBytesObject *self)
2308{
2309 char *s = PyBytes_AS_STRING(self), *s_new;
2310 Py_ssize_t i, n = PyBytes_GET_SIZE(self);
2311 PyObject *newobj;
2312
2313 newobj = PyBytes_FromStringAndSize(NULL, n);
2314 if (newobj == NULL)
2315 return NULL;
2316 s_new = PyBytes_AsString(newobj);
2317 if (0 < n) {
2318 int c = Py_CHARMASK(*s++);
2319 if (islower(c))
2320 *s_new = toupper(c);
2321 else
2322 *s_new = c;
2323 s_new++;
2324 }
2325 for (i = 1; i < n; i++) {
2326 int c = Py_CHARMASK(*s++);
2327 if (isupper(c))
2328 *s_new = tolower(c);
2329 else
2330 *s_new = c;
2331 s_new++;
2332 }
2333 return newobj;
2334}
2335
2336
2337PyDoc_STRVAR(count__doc__,
2338"S.count(sub[, start[, end]]) -> int\n\
2339\n\
2340Return the number of non-overlapping occurrences of substring sub in\n\
2341string S[start:end]. Optional arguments start and end are interpreted\n\
2342as in slice notation.");
2343
2344static PyObject *
2345string_count(PyBytesObject *self, PyObject *args)
2346{
2347 PyObject *sub_obj;
2348 const char *str = PyBytes_AS_STRING(self), *sub;
2349 Py_ssize_t sub_len;
2350 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2351
2352 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2353 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2354 return NULL;
2355
2356 if (PyBytes_Check(sub_obj)) {
2357 sub = PyBytes_AS_STRING(sub_obj);
2358 sub_len = PyBytes_GET_SIZE(sub_obj);
2359 }
2360#ifdef Py_USING_UNICODE
2361 else if (PyUnicode_Check(sub_obj)) {
2362 Py_ssize_t count;
2363 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2364 if (count == -1)
2365 return NULL;
2366 else
2367 return PyInt_FromSsize_t(count);
2368 }
2369#endif
2370 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2371 return NULL;
2372
2373 string_adjust_indices(&start, &end, PyBytes_GET_SIZE(self));
2374
2375 return PyInt_FromSsize_t(
2376 stringlib_count(str + start, end - start, sub, sub_len)
2377 );
2378}
2379
2380PyDoc_STRVAR(swapcase__doc__,
2381"S.swapcase() -> string\n\
2382\n\
2383Return a copy of the string S with uppercase characters\n\
2384converted to lowercase and vice versa.");
2385
2386static PyObject *
2387string_swapcase(PyBytesObject *self)
2388{
2389 char *s = PyBytes_AS_STRING(self), *s_new;
2390 Py_ssize_t i, n = PyBytes_GET_SIZE(self);
2391 PyObject *newobj;
2392
2393 newobj = PyBytes_FromStringAndSize(NULL, n);
2394 if (newobj == NULL)
2395 return NULL;
2396 s_new = PyBytes_AsString(newobj);
2397 for (i = 0; i < n; i++) {
2398 int c = Py_CHARMASK(*s++);
2399 if (islower(c)) {
2400 *s_new = toupper(c);
2401 }
2402 else if (isupper(c)) {
2403 *s_new = tolower(c);
2404 }
2405 else
2406 *s_new = c;
2407 s_new++;
2408 }
2409 return newobj;
2410}
2411
2412
2413PyDoc_STRVAR(translate__doc__,
2414"S.translate(table [,deletechars]) -> string\n\
2415\n\
2416Return a copy of the string S, where all characters occurring\n\
2417in the optional argument deletechars are removed, and the\n\
2418remaining characters have been mapped through the given\n\
2419translation table, which must be a string of length 256.");
2420
2421static PyObject *
2422string_translate(PyBytesObject *self, PyObject *args)
2423{
2424 register char *input, *output;
2425 const char *table;
2426 register Py_ssize_t i, c, changed = 0;
2427 PyObject *input_obj = (PyObject*)self;
2428 const char *output_start, *del_table=NULL;
2429 Py_ssize_t inlen, tablen, dellen = 0;
2430 PyObject *result;
2431 int trans_table[256];
2432 PyObject *tableobj, *delobj = NULL;
2433
2434 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2435 &tableobj, &delobj))
2436 return NULL;
2437
2438 if (PyBytes_Check(tableobj)) {
2439 table = PyBytes_AS_STRING(tableobj);
2440 tablen = PyBytes_GET_SIZE(tableobj);
2441 }
2442 else if (tableobj == Py_None) {
2443 table = NULL;
2444 tablen = 256;
2445 }
2446#ifdef Py_USING_UNICODE
2447 else if (PyUnicode_Check(tableobj)) {
2448 /* Unicode .translate() does not support the deletechars
2449 parameter; instead a mapping to None will cause characters
2450 to be deleted. */
2451 if (delobj != NULL) {
2452 PyErr_SetString(PyExc_TypeError,
2453 "deletions are implemented differently for unicode");
2454 return NULL;
2455 }
2456 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2457 }
2458#endif
2459 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2460 return NULL;
2461
2462 if (tablen != 256) {
2463 PyErr_SetString(PyExc_ValueError,
2464 "translation table must be 256 characters long");
2465 return NULL;
2466 }
2467
2468 if (delobj != NULL) {
2469 if (PyBytes_Check(delobj)) {
2470 del_table = PyBytes_AS_STRING(delobj);
2471 dellen = PyBytes_GET_SIZE(delobj);
2472 }
2473#ifdef Py_USING_UNICODE
2474 else if (PyUnicode_Check(delobj)) {
2475 PyErr_SetString(PyExc_TypeError,
2476 "deletions are implemented differently for unicode");
2477 return NULL;
2478 }
2479#endif
2480 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2481 return NULL;
2482 }
2483 else {
2484 del_table = NULL;
2485 dellen = 0;
2486 }
2487
2488 inlen = PyBytes_GET_SIZE(input_obj);
2489 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2490 if (result == NULL)
2491 return NULL;
2492 output_start = output = PyBytes_AsString(result);
2493 input = PyBytes_AS_STRING(input_obj);
2494
2495 if (dellen == 0 && table != NULL) {
2496 /* If no deletions are required, use faster code */
2497 for (i = inlen; --i >= 0; ) {
2498 c = Py_CHARMASK(*input++);
2499 if (Py_CHARMASK((*output++ = table[c])) != c)
2500 changed = 1;
2501 }
2502 if (changed || !PyBytes_CheckExact(input_obj))
2503 return result;
2504 Py_DECREF(result);
2505 Py_INCREF(input_obj);
2506 return input_obj;
2507 }
2508
2509 if (table == NULL) {
2510 for (i = 0; i < 256; i++)
2511 trans_table[i] = Py_CHARMASK(i);
2512 } else {
2513 for (i = 0; i < 256; i++)
2514 trans_table[i] = Py_CHARMASK(table[i]);
2515 }
2516
2517 for (i = 0; i < dellen; i++)
2518 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2519
2520 for (i = inlen; --i >= 0; ) {
2521 c = Py_CHARMASK(*input++);
2522 if (trans_table[c] != -1)
2523 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2524 continue;
2525 changed = 1;
2526 }
2527 if (!changed && PyBytes_CheckExact(input_obj)) {
2528 Py_DECREF(result);
2529 Py_INCREF(input_obj);
2530 return input_obj;
2531 }
2532 /* Fix the size of the resulting string */
2533 if (inlen > 0)
2534 _PyBytes_Resize(&result, output - output_start);
2535 return result;
2536}
2537
2538
2539#define FORWARD 1
2540#define REVERSE -1
2541
2542/* find and count characters and substrings */
2543
2544#define findchar(target, target_len, c) \
2545 ((char *)memchr((const void *)(target), c, target_len))
2546
2547/* String ops must return a string. */
2548/* If the object is subclass of string, create a copy */
2549Py_LOCAL(PyBytesObject *)
2550return_self(PyBytesObject *self)
2551{
2552 if (PyBytes_CheckExact(self)) {
2553 Py_INCREF(self);
2554 return self;
2555 }
2556 return (PyBytesObject *)PyBytes_FromStringAndSize(
2557 PyBytes_AS_STRING(self),
2558 PyBytes_GET_SIZE(self));
2559}
2560
2561Py_LOCAL_INLINE(Py_ssize_t)
2562countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2563{
2564 Py_ssize_t count=0;
2565 const char *start=target;
2566 const char *end=target+target_len;
2567
2568 while ( (start=findchar(start, end-start, c)) != NULL ) {
2569 count++;
2570 if (count >= maxcount)
2571 break;
2572 start += 1;
2573 }
2574 return count;
2575}
2576
2577Py_LOCAL(Py_ssize_t)
2578findstring(const char *target, Py_ssize_t target_len,
2579 const char *pattern, Py_ssize_t pattern_len,
2580 Py_ssize_t start,
2581 Py_ssize_t end,
2582 int direction)
2583{
2584 if (start < 0) {
2585 start += target_len;
2586 if (start < 0)
2587 start = 0;
2588 }
2589 if (end > target_len) {
2590 end = target_len;
2591 } else if (end < 0) {
2592 end += target_len;
2593 if (end < 0)
2594 end = 0;
2595 }
2596
2597 /* zero-length substrings always match at the first attempt */
2598 if (pattern_len == 0)
2599 return (direction > 0) ? start : end;
2600
2601 end -= pattern_len;
2602
2603 if (direction < 0) {
2604 for (; end >= start; end--)
2605 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2606 return end;
2607 } else {
2608 for (; start <= end; start++)
2609 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2610 return start;
2611 }
2612 return -1;
2613}
2614
2615Py_LOCAL_INLINE(Py_ssize_t)
2616countstring(const char *target, Py_ssize_t target_len,
2617 const char *pattern, Py_ssize_t pattern_len,
2618 Py_ssize_t start,
2619 Py_ssize_t end,
2620 int direction, Py_ssize_t maxcount)
2621{
2622 Py_ssize_t count=0;
2623
2624 if (start < 0) {
2625 start += target_len;
2626 if (start < 0)
2627 start = 0;
2628 }
2629 if (end > target_len) {
2630 end = target_len;
2631 } else if (end < 0) {
2632 end += target_len;
2633 if (end < 0)
2634 end = 0;
2635 }
2636
2637 /* zero-length substrings match everywhere */
2638 if (pattern_len == 0 || maxcount == 0) {
2639 if (target_len+1 < maxcount)
2640 return target_len+1;
2641 return maxcount;
2642 }
2643
2644 end -= pattern_len;
2645 if (direction < 0) {
2646 for (; (end >= start); end--)
2647 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2648 count++;
2649 if (--maxcount <= 0) break;
2650 end -= pattern_len-1;
2651 }
2652 } else {
2653 for (; (start <= end); start++)
2654 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2655 count++;
2656 if (--maxcount <= 0)
2657 break;
2658 start += pattern_len-1;
2659 }
2660 }
2661 return count;
2662}
2663
2664
2665/* Algorithms for different cases of string replacement */
2666
2667/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2668Py_LOCAL(PyBytesObject *)
2669replace_interleave(PyBytesObject *self,
2670 const char *to_s, Py_ssize_t to_len,
2671 Py_ssize_t maxcount)
2672{
2673 char *self_s, *result_s;
2674 Py_ssize_t self_len, result_len;
2675 Py_ssize_t count, i, product;
2676 PyBytesObject *result;
2677
2678 self_len = PyBytes_GET_SIZE(self);
2679
2680 /* 1 at the end plus 1 after every character */
2681 count = self_len+1;
2682 if (maxcount < count)
2683 count = maxcount;
2684
2685 /* Check for overflow */
2686 /* result_len = count * to_len + self_len; */
2687 product = count * to_len;
2688 if (product / to_len != count) {
2689 PyErr_SetString(PyExc_OverflowError,
2690 "replace string is too long");
2691 return NULL;
2692 }
2693 result_len = product + self_len;
2694 if (result_len < 0) {
2695 PyErr_SetString(PyExc_OverflowError,
2696 "replace string is too long");
2697 return NULL;
2698 }
2699
2700 if (! (result = (PyBytesObject *)
2701 PyBytes_FromStringAndSize(NULL, result_len)) )
2702 return NULL;
2703
2704 self_s = PyBytes_AS_STRING(self);
2705 result_s = PyBytes_AS_STRING(result);
2706
2707 /* TODO: special case single character, which doesn't need memcpy */
2708
2709 /* Lay the first one down (guaranteed this will occur) */
2710 Py_MEMCPY(result_s, to_s, to_len);
2711 result_s += to_len;
2712 count -= 1;
2713
2714 for (i=0; i<count; i++) {
2715 *result_s++ = *self_s++;
2716 Py_MEMCPY(result_s, to_s, to_len);
2717 result_s += to_len;
2718 }
2719
2720 /* Copy the rest of the original string */
2721 Py_MEMCPY(result_s, self_s, self_len-i);
2722
2723 return result;
2724}
2725
2726/* Special case for deleting a single character */
2727/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2728Py_LOCAL(PyBytesObject *)
2729replace_delete_single_character(PyBytesObject *self,
2730 char from_c, Py_ssize_t maxcount)
2731{
2732 char *self_s, *result_s;
2733 char *start, *next, *end;
2734 Py_ssize_t self_len, result_len;
2735 Py_ssize_t count;
2736 PyBytesObject *result;
2737
2738 self_len = PyBytes_GET_SIZE(self);
2739 self_s = PyBytes_AS_STRING(self);
2740
2741 count = countchar(self_s, self_len, from_c, maxcount);
2742 if (count == 0) {
2743 return return_self(self);
2744 }
2745
2746 result_len = self_len - count; /* from_len == 1 */
2747 assert(result_len>=0);
2748
2749 if ( (result = (PyBytesObject *)
2750 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2751 return NULL;
2752 result_s = PyBytes_AS_STRING(result);
2753
2754 start = self_s;
2755 end = self_s + self_len;
2756 while (count-- > 0) {
2757 next = findchar(start, end-start, from_c);
2758 if (next == NULL)
2759 break;
2760 Py_MEMCPY(result_s, start, next-start);
2761 result_s += (next-start);
2762 start = next+1;
2763 }
2764 Py_MEMCPY(result_s, start, end-start);
2765
2766 return result;
2767}
2768
2769/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2770
2771Py_LOCAL(PyBytesObject *)
2772replace_delete_substring(PyBytesObject *self,
2773 const char *from_s, Py_ssize_t from_len,
2774 Py_ssize_t maxcount) {
2775 char *self_s, *result_s;
2776 char *start, *next, *end;
2777 Py_ssize_t self_len, result_len;
2778 Py_ssize_t count, offset;
2779 PyBytesObject *result;
2780
2781 self_len = PyBytes_GET_SIZE(self);
2782 self_s = PyBytes_AS_STRING(self);
2783
2784 count = countstring(self_s, self_len,
2785 from_s, from_len,
2786 0, self_len, 1,
2787 maxcount);
2788
2789 if (count == 0) {
2790 /* no matches */
2791 return return_self(self);
2792 }
2793
2794 result_len = self_len - (count * from_len);
2795 assert (result_len>=0);
2796
2797 if ( (result = (PyBytesObject *)
2798 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2799 return NULL;
2800
2801 result_s = PyBytes_AS_STRING(result);
2802
2803 start = self_s;
2804 end = self_s + self_len;
2805 while (count-- > 0) {
2806 offset = findstring(start, end-start,
2807 from_s, from_len,
2808 0, end-start, FORWARD);
2809 if (offset == -1)
2810 break;
2811 next = start + offset;
2812
2813 Py_MEMCPY(result_s, start, next-start);
2814
2815 result_s += (next-start);
2816 start = next+from_len;
2817 }
2818 Py_MEMCPY(result_s, start, end-start);
2819 return result;
2820}
2821
2822/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2823Py_LOCAL(PyBytesObject *)
2824replace_single_character_in_place(PyBytesObject *self,
2825 char from_c, char to_c,
2826 Py_ssize_t maxcount)
2827{
2828 char *self_s, *result_s, *start, *end, *next;
2829 Py_ssize_t self_len;
2830 PyBytesObject *result;
2831
2832 /* The result string will be the same size */
2833 self_s = PyBytes_AS_STRING(self);
2834 self_len = PyBytes_GET_SIZE(self);
2835
2836 next = findchar(self_s, self_len, from_c);
2837
2838 if (next == NULL) {
2839 /* No matches; return the original string */
2840 return return_self(self);
2841 }
2842
2843 /* Need to make a new string */
2844 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2845 if (result == NULL)
2846 return NULL;
2847 result_s = PyBytes_AS_STRING(result);
2848 Py_MEMCPY(result_s, self_s, self_len);
2849
2850 /* change everything in-place, starting with this one */
2851 start = result_s + (next-self_s);
2852 *start = to_c;
2853 start++;
2854 end = result_s + self_len;
2855
2856 while (--maxcount > 0) {
2857 next = findchar(start, end-start, from_c);
2858 if (next == NULL)
2859 break;
2860 *next = to_c;
2861 start = next+1;
2862 }
2863
2864 return result;
2865}
2866
2867/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2868Py_LOCAL(PyBytesObject *)
2869replace_substring_in_place(PyBytesObject *self,
2870 const char *from_s, Py_ssize_t from_len,
2871 const char *to_s, Py_ssize_t to_len,
2872 Py_ssize_t maxcount)
2873{
2874 char *result_s, *start, *end;
2875 char *self_s;
2876 Py_ssize_t self_len, offset;
2877 PyBytesObject *result;
2878
2879 /* The result string will be the same size */
2880
2881 self_s = PyBytes_AS_STRING(self);
2882 self_len = PyBytes_GET_SIZE(self);
2883
2884 offset = findstring(self_s, self_len,
2885 from_s, from_len,
2886 0, self_len, FORWARD);
2887 if (offset == -1) {
2888 /* No matches; return the original string */
2889 return return_self(self);
2890 }
2891
2892 /* Need to make a new string */
2893 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2894 if (result == NULL)
2895 return NULL;
2896 result_s = PyBytes_AS_STRING(result);
2897 Py_MEMCPY(result_s, self_s, self_len);
2898
2899 /* change everything in-place, starting with this one */
2900 start = result_s + offset;
2901 Py_MEMCPY(start, to_s, from_len);
2902 start += from_len;
2903 end = result_s + self_len;
2904
2905 while ( --maxcount > 0) {
2906 offset = findstring(start, end-start,
2907 from_s, from_len,
2908 0, end-start, FORWARD);
2909 if (offset==-1)
2910 break;
2911 Py_MEMCPY(start+offset, to_s, from_len);
2912 start += offset+from_len;
2913 }
2914
2915 return result;
2916}
2917
2918/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2919Py_LOCAL(PyBytesObject *)
2920replace_single_character(PyBytesObject *self,
2921 char from_c,
2922 const char *to_s, Py_ssize_t to_len,
2923 Py_ssize_t maxcount)
2924{
2925 char *self_s, *result_s;
2926 char *start, *next, *end;
2927 Py_ssize_t self_len, result_len;
2928 Py_ssize_t count, product;
2929 PyBytesObject *result;
2930
2931 self_s = PyBytes_AS_STRING(self);
2932 self_len = PyBytes_GET_SIZE(self);
2933
2934 count = countchar(self_s, self_len, from_c, maxcount);
2935 if (count == 0) {
2936 /* no matches, return unchanged */
2937 return return_self(self);
2938 }
2939
2940 /* use the difference between current and new, hence the "-1" */
2941 /* result_len = self_len + count * (to_len-1) */
2942 product = count * (to_len-1);
2943 if (product / (to_len-1) != count) {
2944 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2945 return NULL;
2946 }
2947 result_len = self_len + product;
2948 if (result_len < 0) {
2949 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2950 return NULL;
2951 }
2952
2953 if ( (result = (PyBytesObject *)
2954 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2955 return NULL;
2956 result_s = PyBytes_AS_STRING(result);
2957
2958 start = self_s;
2959 end = self_s + self_len;
2960 while (count-- > 0) {
2961 next = findchar(start, end-start, from_c);
2962 if (next == NULL)
2963 break;
2964
2965 if (next == start) {
2966 /* replace with the 'to' */
2967 Py_MEMCPY(result_s, to_s, to_len);
2968 result_s += to_len;
2969 start += 1;
2970 } else {
2971 /* copy the unchanged old then the 'to' */
2972 Py_MEMCPY(result_s, start, next-start);
2973 result_s += (next-start);
2974 Py_MEMCPY(result_s, to_s, to_len);
2975 result_s += to_len;
2976 start = next+1;
2977 }
2978 }
2979 /* Copy the remainder of the remaining string */
2980 Py_MEMCPY(result_s, start, end-start);
2981
2982 return result;
2983}
2984
2985/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2986Py_LOCAL(PyBytesObject *)
2987replace_substring(PyBytesObject *self,
2988 const char *from_s, Py_ssize_t from_len,
2989 const char *to_s, Py_ssize_t to_len,
2990 Py_ssize_t maxcount) {
2991 char *self_s, *result_s;
2992 char *start, *next, *end;
2993 Py_ssize_t self_len, result_len;
2994 Py_ssize_t count, offset, product;
2995 PyBytesObject *result;
2996
2997 self_s = PyBytes_AS_STRING(self);
2998 self_len = PyBytes_GET_SIZE(self);
2999
3000 count = countstring(self_s, self_len,
3001 from_s, from_len,
3002 0, self_len, FORWARD, maxcount);
3003 if (count == 0) {
3004 /* no matches, return unchanged */
3005 return return_self(self);
3006 }
3007
3008 /* Check for overflow */
3009 /* result_len = self_len + count * (to_len-from_len) */
3010 product = count * (to_len-from_len);
3011 if (product / (to_len-from_len) != count) {
3012 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3013 return NULL;
3014 }
3015 result_len = self_len + product;
3016 if (result_len < 0) {
3017 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3018 return NULL;
3019 }
3020
3021 if ( (result = (PyBytesObject *)
3022 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
3023 return NULL;
3024 result_s = PyBytes_AS_STRING(result);
3025
3026 start = self_s;
3027 end = self_s + self_len;
3028 while (count-- > 0) {
3029 offset = findstring(start, end-start,
3030 from_s, from_len,
3031 0, end-start, FORWARD);
3032 if (offset == -1)
3033 break;
3034 next = start+offset;
3035 if (next == start) {
3036 /* replace with the 'to' */
3037 Py_MEMCPY(result_s, to_s, to_len);
3038 result_s += to_len;
3039 start += from_len;
3040 } else {
3041 /* copy the unchanged old then the 'to' */
3042 Py_MEMCPY(result_s, start, next-start);
3043 result_s += (next-start);
3044 Py_MEMCPY(result_s, to_s, to_len);
3045 result_s += to_len;
3046 start = next+from_len;
3047 }
3048 }
3049 /* Copy the remainder of the remaining string */
3050 Py_MEMCPY(result_s, start, end-start);
3051
3052 return result;
3053}
3054
3055
3056Py_LOCAL(PyBytesObject *)
3057replace(PyBytesObject *self,
3058 const char *from_s, Py_ssize_t from_len,
3059 const char *to_s, Py_ssize_t to_len,
3060 Py_ssize_t maxcount)
3061{
3062 if (maxcount < 0) {
3063 maxcount = PY_SSIZE_T_MAX;
3064 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
3065 /* nothing to do; return the original string */
3066 return return_self(self);
3067 }
3068
3069 if (maxcount == 0 ||
3070 (from_len == 0 && to_len == 0)) {
3071 /* nothing to do; return the original string */
3072 return return_self(self);
3073 }
3074
3075 /* Handle zero-length special cases */
3076
3077 if (from_len == 0) {
3078 /* insert the 'to' string everywhere. */
3079 /* >>> "Python".replace("", ".") */
3080 /* '.P.y.t.h.o.n.' */
3081 return replace_interleave(self, to_s, to_len, maxcount);
3082 }
3083
3084 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3085 /* point for an empty self string to generate a non-empty string */
3086 /* Special case so the remaining code always gets a non-empty string */
3087 if (PyBytes_GET_SIZE(self) == 0) {
3088 return return_self(self);
3089 }
3090
3091 if (to_len == 0) {
3092 /* delete all occurances of 'from' string */
3093 if (from_len == 1) {
3094 return replace_delete_single_character(
3095 self, from_s[0], maxcount);
3096 } else {
3097 return replace_delete_substring(self, from_s, from_len, maxcount);
3098 }
3099 }
3100
3101 /* Handle special case where both strings have the same length */
3102
3103 if (from_len == to_len) {
3104 if (from_len == 1) {
3105 return replace_single_character_in_place(
3106 self,
3107 from_s[0],
3108 to_s[0],
3109 maxcount);
3110 } else {
3111 return replace_substring_in_place(
3112 self, from_s, from_len, to_s, to_len, maxcount);
3113 }
3114 }
3115
3116 /* Otherwise use the more generic algorithms */
3117 if (from_len == 1) {
3118 return replace_single_character(self, from_s[0],
3119 to_s, to_len, maxcount);
3120 } else {
3121 /* len('from')>=2, len('to')>=1 */
3122 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
3123 }
3124}
3125
3126PyDoc_STRVAR(replace__doc__,
3127"S.replace (old, new[, count]) -> string\n\
3128\n\
3129Return a copy of string S with all occurrences of substring\n\
3130old replaced by new. If the optional argument count is\n\
3131given, only the first count occurrences are replaced.");
3132
3133static PyObject *
3134string_replace(PyBytesObject *self, PyObject *args)
3135{
3136 Py_ssize_t count = -1;
3137 PyObject *from, *to;
3138 const char *from_s, *to_s;
3139 Py_ssize_t from_len, to_len;
3140
3141 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
3142 return NULL;
3143
3144 if (PyBytes_Check(from)) {
3145 from_s = PyBytes_AS_STRING(from);
3146 from_len = PyBytes_GET_SIZE(from);
3147 }
3148#ifdef Py_USING_UNICODE
3149 if (PyUnicode_Check(from))
3150 return PyUnicode_Replace((PyObject *)self,
3151 from, to, count);
3152#endif
3153 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
3154 return NULL;
3155
3156 if (PyBytes_Check(to)) {
3157 to_s = PyBytes_AS_STRING(to);
3158 to_len = PyBytes_GET_SIZE(to);
3159 }
3160#ifdef Py_USING_UNICODE
3161 else if (PyUnicode_Check(to))
3162 return PyUnicode_Replace((PyObject *)self,
3163 from, to, count);
3164#endif
3165 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
3166 return NULL;
3167
3168 return (PyObject *)replace((PyBytesObject *) self,
3169 from_s, from_len,
3170 to_s, to_len, count);
3171}
3172
3173/** End DALKE **/
3174
3175/* Matches the end (direction >= 0) or start (direction < 0) of self
3176 * against substr, using the start and end arguments. Returns
3177 * -1 on error, 0 if not found and 1 if found.
3178 */
3179Py_LOCAL(int)
3180_string_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
3181 Py_ssize_t end, int direction)
3182{
3183 Py_ssize_t len = PyBytes_GET_SIZE(self);
3184 Py_ssize_t slen;
3185 const char* sub;
3186 const char* str;
3187
3188 if (PyBytes_Check(substr)) {
3189 sub = PyBytes_AS_STRING(substr);
3190 slen = PyBytes_GET_SIZE(substr);
3191 }
3192#ifdef Py_USING_UNICODE
3193 else if (PyUnicode_Check(substr))
3194 return PyUnicode_Tailmatch((PyObject *)self,
3195 substr, start, end, direction);
3196#endif
3197 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3198 return -1;
3199 str = PyBytes_AS_STRING(self);
3200
3201 string_adjust_indices(&start, &end, len);
3202
3203 if (direction < 0) {
3204 /* startswith */
3205 if (start+slen > len)
3206 return 0;
3207 } else {
3208 /* endswith */
3209 if (end-start < slen || start > len)
3210 return 0;
3211
3212 if (end-slen > start)
3213 start = end - slen;
3214 }
3215 if (end-start >= slen)
3216 return ! memcmp(str+start, sub, slen);
3217 return 0;
3218}
3219
3220
3221PyDoc_STRVAR(startswith__doc__,
3222"S.startswith(prefix[, start[, end]]) -> bool\n\
3223\n\
3224Return True if S starts with the specified prefix, False otherwise.\n\
3225With optional start, test S beginning at that position.\n\
3226With optional end, stop comparing S at that position.\n\
3227prefix can also be a tuple of strings to try.");
3228
3229static PyObject *
3230string_startswith(PyBytesObject *self, PyObject *args)
3231{
3232 Py_ssize_t start = 0;
3233 Py_ssize_t end = PY_SSIZE_T_MAX;
3234 PyObject *subobj;
3235 int result;
3236
3237 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3238 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3239 return NULL;
3240 if (PyTuple_Check(subobj)) {
3241 Py_ssize_t i;
3242 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3243 result = _string_tailmatch(self,
3244 PyTuple_GET_ITEM(subobj, i),
3245 start, end, -1);
3246 if (result == -1)
3247 return NULL;
3248 else if (result) {
3249 Py_RETURN_TRUE;
3250 }
3251 }
3252 Py_RETURN_FALSE;
3253 }
3254 result = _string_tailmatch(self, subobj, start, end, -1);
3255 if (result == -1)
3256 return NULL;
3257 else
3258 return PyBool_FromLong(result);
3259}
3260
3261
3262PyDoc_STRVAR(endswith__doc__,
3263"S.endswith(suffix[, start[, end]]) -> bool\n\
3264\n\
3265Return True if S ends with the specified suffix, False otherwise.\n\
3266With optional start, test S beginning at that position.\n\
3267With optional end, stop comparing S at that position.\n\
3268suffix can also be a tuple of strings to try.");
3269
3270static PyObject *
3271string_endswith(PyBytesObject *self, PyObject *args)
3272{
3273 Py_ssize_t start = 0;
3274 Py_ssize_t end = PY_SSIZE_T_MAX;
3275 PyObject *subobj;
3276 int result;
3277
3278 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3279 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3280 return NULL;
3281 if (PyTuple_Check(subobj)) {
3282 Py_ssize_t i;
3283 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3284 result = _string_tailmatch(self,
3285 PyTuple_GET_ITEM(subobj, i),
3286 start, end, +1);
3287 if (result == -1)
3288 return NULL;
3289 else if (result) {
3290 Py_RETURN_TRUE;
3291 }
3292 }
3293 Py_RETURN_FALSE;
3294 }
3295 result = _string_tailmatch(self, subobj, start, end, +1);
3296 if (result == -1)
3297 return NULL;
3298 else
3299 return PyBool_FromLong(result);
3300}
3301
3302
3303PyDoc_STRVAR(encode__doc__,
3304"S.encode([encoding[,errors]]) -> object\n\
3305\n\
3306Encodes S using the codec registered for encoding. encoding defaults\n\
3307to the default encoding. errors may be given to set a different error\n\
3308handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3309a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3310'xmlcharrefreplace' as well as any other name registered with\n\
3311codecs.register_error that is able to handle UnicodeEncodeErrors.");
3312
3313static PyObject *
3314string_encode(PyBytesObject *self, PyObject *args)
3315{
3316 char *encoding = NULL;
3317 char *errors = NULL;
3318 PyObject *v;
3319
3320 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3321 return NULL;
3322 v = PyBytes_AsEncodedObject((PyObject *)self, encoding, errors);
3323 if (v == NULL)
3324 goto onError;
3325 if (!PyBytes_Check(v) && !PyUnicode_Check(v)) {
3326 PyErr_Format(PyExc_TypeError,
3327 "encoder did not return a string/unicode object "
3328 "(type=%.400s)",
3329 Py_TYPE(v)->tp_name);
3330 Py_DECREF(v);
3331 return NULL;
3332 }
3333 return v;
3334
3335 onError:
Christian Heimes1a6387e2008-03-26 12:49:49 +00003336 return NULL;
3337}
3338
Christian Heimes44720832008-05-26 13:01:01 +00003339
3340PyDoc_STRVAR(decode__doc__,
3341"S.decode([encoding[,errors]]) -> object\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003342\n\
Christian Heimes44720832008-05-26 13:01:01 +00003343Decodes S using the codec registered for encoding. encoding defaults\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003344to the default encoding. errors may be given to set a different error\n\
Christian Heimes44720832008-05-26 13:01:01 +00003345handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3346a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3347as well as any other name registerd with codecs.register_error that is\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003348able to handle UnicodeDecodeErrors.");
3349
3350static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00003351string_decode(PyBytesObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003352{
Christian Heimes44720832008-05-26 13:01:01 +00003353 char *encoding = NULL;
3354 char *errors = NULL;
3355 PyObject *v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003356
3357 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3358 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003359 v = PyBytes_AsDecodedObject((PyObject *)self, encoding, errors);
3360 if (v == NULL)
3361 goto onError;
3362 if (!PyBytes_Check(v) && !PyUnicode_Check(v)) {
3363 PyErr_Format(PyExc_TypeError,
3364 "decoder did not return a string/unicode object "
3365 "(type=%.400s)",
3366 Py_TYPE(v)->tp_name);
3367 Py_DECREF(v);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003368 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003369 }
3370 return v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003371
Christian Heimes44720832008-05-26 13:01:01 +00003372 onError:
3373 return NULL;
3374}
3375
3376
3377PyDoc_STRVAR(expandtabs__doc__,
3378"S.expandtabs([tabsize]) -> string\n\
3379\n\
3380Return a copy of S where all tab characters are expanded using spaces.\n\
3381If tabsize is not given, a tab size of 8 characters is assumed.");
3382
3383static PyObject*
3384string_expandtabs(PyBytesObject *self, PyObject *args)
3385{
3386 const char *e, *p, *qe;
3387 char *q;
3388 Py_ssize_t i, j, incr;
3389 PyObject *u;
3390 int tabsize = 8;
3391
3392 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3393 return NULL;
3394
3395 /* First pass: determine size of output string */
3396 i = 0; /* chars up to and including most recent \n or \r */
3397 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
3398 e = PyBytes_AS_STRING(self) + PyBytes_GET_SIZE(self); /* end of input */
3399 for (p = PyBytes_AS_STRING(self); p < e; p++)
3400 if (*p == '\t') {
3401 if (tabsize > 0) {
3402 incr = tabsize - (j % tabsize);
3403 if (j > PY_SSIZE_T_MAX - incr)
3404 goto overflow1;
3405 j += incr;
3406 }
3407 }
3408 else {
3409 if (j > PY_SSIZE_T_MAX - 1)
3410 goto overflow1;
3411 j++;
3412 if (*p == '\n' || *p == '\r') {
3413 if (i > PY_SSIZE_T_MAX - j)
3414 goto overflow1;
3415 i += j;
3416 j = 0;
3417 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00003418 }
Christian Heimes44720832008-05-26 13:01:01 +00003419
3420 if (i > PY_SSIZE_T_MAX - j)
3421 goto overflow1;
3422
3423 /* Second pass: create output string and fill it */
3424 u = PyBytes_FromStringAndSize(NULL, i + j);
3425 if (!u)
3426 return NULL;
3427
3428 j = 0; /* same as in first pass */
3429 q = PyBytes_AS_STRING(u); /* next output char */
3430 qe = PyBytes_AS_STRING(u) + PyBytes_GET_SIZE(u); /* end of output */
3431
3432 for (p = PyBytes_AS_STRING(self); p < e; p++)
3433 if (*p == '\t') {
3434 if (tabsize > 0) {
3435 i = tabsize - (j % tabsize);
3436 j += i;
3437 while (i--) {
3438 if (q >= qe)
3439 goto overflow2;
3440 *q++ = ' ';
3441 }
3442 }
3443 }
3444 else {
3445 if (q >= qe)
3446 goto overflow2;
3447 *q++ = *p;
3448 j++;
3449 if (*p == '\n' || *p == '\r')
3450 j = 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003451 }
Christian Heimes44720832008-05-26 13:01:01 +00003452
3453 return u;
3454
3455 overflow2:
3456 Py_DECREF(u);
3457 overflow1:
3458 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3459 return NULL;
3460}
3461
3462Py_LOCAL_INLINE(PyObject *)
3463pad(PyBytesObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
3464{
3465 PyObject *u;
3466
3467 if (left < 0)
3468 left = 0;
3469 if (right < 0)
3470 right = 0;
3471
3472 if (left == 0 && right == 0 && PyBytes_CheckExact(self)) {
3473 Py_INCREF(self);
3474 return (PyObject *)self;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003475 }
3476
Christian Heimes44720832008-05-26 13:01:01 +00003477 u = PyBytes_FromStringAndSize(NULL,
3478 left + PyBytes_GET_SIZE(self) + right);
3479 if (u) {
3480 if (left)
3481 memset(PyBytes_AS_STRING(u), fill, left);
3482 Py_MEMCPY(PyBytes_AS_STRING(u) + left,
3483 PyBytes_AS_STRING(self),
3484 PyBytes_GET_SIZE(self));
3485 if (right)
3486 memset(PyBytes_AS_STRING(u) + left + PyBytes_GET_SIZE(self),
3487 fill, right);
3488 }
3489
3490 return u;
3491}
3492
3493PyDoc_STRVAR(ljust__doc__,
3494"S.ljust(width[, fillchar]) -> string\n"
3495"\n"
3496"Return S left justified in a string of length width. Padding is\n"
3497"done using the specified fill character (default is a space).");
3498
3499static PyObject *
3500string_ljust(PyBytesObject *self, PyObject *args)
3501{
3502 Py_ssize_t width;
3503 char fillchar = ' ';
3504
3505 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3506 return NULL;
3507
3508 if (PyBytes_GET_SIZE(self) >= width && PyBytes_CheckExact(self)) {
3509 Py_INCREF(self);
3510 return (PyObject*) self;
3511 }
3512
3513 return pad(self, 0, width - PyBytes_GET_SIZE(self), fillchar);
3514}
3515
3516
3517PyDoc_STRVAR(rjust__doc__,
3518"S.rjust(width[, fillchar]) -> string\n"
3519"\n"
3520"Return S right justified in a string of length width. Padding is\n"
3521"done using the specified fill character (default is a space)");
3522
3523static PyObject *
3524string_rjust(PyBytesObject *self, PyObject *args)
3525{
3526 Py_ssize_t width;
3527 char fillchar = ' ';
3528
3529 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3530 return NULL;
3531
3532 if (PyBytes_GET_SIZE(self) >= width && PyBytes_CheckExact(self)) {
3533 Py_INCREF(self);
3534 return (PyObject*) self;
3535 }
3536
3537 return pad(self, width - PyBytes_GET_SIZE(self), 0, fillchar);
3538}
3539
3540
3541PyDoc_STRVAR(center__doc__,
3542"S.center(width[, fillchar]) -> string\n"
3543"\n"
3544"Return S centered in a string of length width. Padding is\n"
3545"done using the specified fill character (default is a space)");
3546
3547static PyObject *
3548string_center(PyBytesObject *self, PyObject *args)
3549{
3550 Py_ssize_t marg, left;
3551 Py_ssize_t width;
3552 char fillchar = ' ';
3553
3554 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3555 return NULL;
3556
3557 if (PyBytes_GET_SIZE(self) >= width && PyBytes_CheckExact(self)) {
3558 Py_INCREF(self);
3559 return (PyObject*) self;
3560 }
3561
3562 marg = width - PyBytes_GET_SIZE(self);
3563 left = marg / 2 + (marg & width & 1);
3564
3565 return pad(self, left, marg - left, fillchar);
3566}
3567
3568PyDoc_STRVAR(zfill__doc__,
3569"S.zfill(width) -> string\n"
3570"\n"
3571"Pad a numeric string S with zeros on the left, to fill a field\n"
3572"of the specified width. The string S is never truncated.");
3573
3574static PyObject *
3575string_zfill(PyBytesObject *self, PyObject *args)
3576{
3577 Py_ssize_t fill;
3578 PyObject *s;
3579 char *p;
3580 Py_ssize_t width;
3581
3582 if (!PyArg_ParseTuple(args, "n:zfill", &width))
3583 return NULL;
3584
3585 if (PyBytes_GET_SIZE(self) >= width) {
3586 if (PyBytes_CheckExact(self)) {
3587 Py_INCREF(self);
3588 return (PyObject*) self;
3589 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00003590 else
Christian Heimes44720832008-05-26 13:01:01 +00003591 return PyBytes_FromStringAndSize(
3592 PyBytes_AS_STRING(self),
3593 PyBytes_GET_SIZE(self)
3594 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003595 }
3596
Christian Heimes44720832008-05-26 13:01:01 +00003597 fill = width - PyBytes_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003598
Christian Heimes44720832008-05-26 13:01:01 +00003599 s = pad(self, fill, 0, '0');
3600
3601 if (s == NULL)
3602 return NULL;
3603
3604 p = PyBytes_AS_STRING(s);
3605 if (p[fill] == '+' || p[fill] == '-') {
3606 /* move sign to beginning of string */
3607 p[0] = p[fill];
3608 p[fill] = '0';
3609 }
3610
3611 return (PyObject*) s;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003612}
3613
Christian Heimes44720832008-05-26 13:01:01 +00003614PyDoc_STRVAR(isspace__doc__,
3615"S.isspace() -> bool\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003616\n\
Christian Heimes44720832008-05-26 13:01:01 +00003617Return True if all characters in S are whitespace\n\
3618and there is at least one character in S, False otherwise.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003619
Christian Heimes44720832008-05-26 13:01:01 +00003620static PyObject*
3621string_isspace(PyBytesObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003622{
Christian Heimes44720832008-05-26 13:01:01 +00003623 register const unsigned char *p
3624 = (unsigned char *) PyBytes_AS_STRING(self);
3625 register const unsigned char *e;
3626
3627 /* Shortcut for single character strings */
3628 if (PyBytes_GET_SIZE(self) == 1 &&
3629 isspace(*p))
3630 return PyBool_FromLong(1);
3631
3632 /* Special case for empty strings */
3633 if (PyBytes_GET_SIZE(self) == 0)
3634 return PyBool_FromLong(0);
3635
3636 e = p + PyBytes_GET_SIZE(self);
3637 for (; p < e; p++) {
3638 if (!isspace(*p))
3639 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003640 }
Christian Heimes44720832008-05-26 13:01:01 +00003641 return PyBool_FromLong(1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003642}
3643
Christian Heimes44720832008-05-26 13:01:01 +00003644
3645PyDoc_STRVAR(isalpha__doc__,
3646"S.isalpha() -> bool\n\
3647\n\
3648Return True if all characters in S are alphabetic\n\
3649and there is at least one character in S, False otherwise.");
3650
3651static PyObject*
3652string_isalpha(PyBytesObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003653{
Christian Heimes44720832008-05-26 13:01:01 +00003654 register const unsigned char *p
3655 = (unsigned char *) PyBytes_AS_STRING(self);
3656 register const unsigned char *e;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003657
Christian Heimes44720832008-05-26 13:01:01 +00003658 /* Shortcut for single character strings */
3659 if (PyBytes_GET_SIZE(self) == 1 &&
3660 isalpha(*p))
3661 return PyBool_FromLong(1);
3662
3663 /* Special case for empty strings */
3664 if (PyBytes_GET_SIZE(self) == 0)
3665 return PyBool_FromLong(0);
3666
3667 e = p + PyBytes_GET_SIZE(self);
3668 for (; p < e; p++) {
3669 if (!isalpha(*p))
3670 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003671 }
Christian Heimes44720832008-05-26 13:01:01 +00003672 return PyBool_FromLong(1);
3673}
Christian Heimes1a6387e2008-03-26 12:49:49 +00003674
Christian Heimes44720832008-05-26 13:01:01 +00003675
3676PyDoc_STRVAR(isalnum__doc__,
3677"S.isalnum() -> bool\n\
3678\n\
3679Return True if all characters in S are alphanumeric\n\
3680and there is at least one character in S, False otherwise.");
3681
3682static PyObject*
3683string_isalnum(PyBytesObject *self)
3684{
3685 register const unsigned char *p
3686 = (unsigned char *) PyBytes_AS_STRING(self);
3687 register const unsigned char *e;
3688
3689 /* Shortcut for single character strings */
3690 if (PyBytes_GET_SIZE(self) == 1 &&
3691 isalnum(*p))
3692 return PyBool_FromLong(1);
3693
3694 /* Special case for empty strings */
3695 if (PyBytes_GET_SIZE(self) == 0)
3696 return PyBool_FromLong(0);
3697
3698 e = p + PyBytes_GET_SIZE(self);
3699 for (; p < e; p++) {
3700 if (!isalnum(*p))
3701 return PyBool_FromLong(0);
3702 }
3703 return PyBool_FromLong(1);
3704}
3705
3706
3707PyDoc_STRVAR(isdigit__doc__,
3708"S.isdigit() -> bool\n\
3709\n\
3710Return True if all characters in S are digits\n\
3711and there is at least one character in S, False otherwise.");
3712
3713static PyObject*
3714string_isdigit(PyBytesObject *self)
3715{
3716 register const unsigned char *p
3717 = (unsigned char *) PyBytes_AS_STRING(self);
3718 register const unsigned char *e;
3719
3720 /* Shortcut for single character strings */
3721 if (PyBytes_GET_SIZE(self) == 1 &&
3722 isdigit(*p))
3723 return PyBool_FromLong(1);
3724
3725 /* Special case for empty strings */
3726 if (PyBytes_GET_SIZE(self) == 0)
3727 return PyBool_FromLong(0);
3728
3729 e = p + PyBytes_GET_SIZE(self);
3730 for (; p < e; p++) {
3731 if (!isdigit(*p))
3732 return PyBool_FromLong(0);
3733 }
3734 return PyBool_FromLong(1);
3735}
3736
3737
3738PyDoc_STRVAR(islower__doc__,
3739"S.islower() -> bool\n\
3740\n\
3741Return True if all cased characters in S are lowercase and there is\n\
3742at least one cased character in S, False otherwise.");
3743
3744static PyObject*
3745string_islower(PyBytesObject *self)
3746{
3747 register const unsigned char *p
3748 = (unsigned char *) PyBytes_AS_STRING(self);
3749 register const unsigned char *e;
3750 int cased;
3751
3752 /* Shortcut for single character strings */
3753 if (PyBytes_GET_SIZE(self) == 1)
3754 return PyBool_FromLong(islower(*p) != 0);
3755
3756 /* Special case for empty strings */
3757 if (PyBytes_GET_SIZE(self) == 0)
3758 return PyBool_FromLong(0);
3759
3760 e = p + PyBytes_GET_SIZE(self);
3761 cased = 0;
3762 for (; p < e; p++) {
3763 if (isupper(*p))
3764 return PyBool_FromLong(0);
3765 else if (!cased && islower(*p))
3766 cased = 1;
3767 }
3768 return PyBool_FromLong(cased);
3769}
3770
3771
3772PyDoc_STRVAR(isupper__doc__,
3773"S.isupper() -> bool\n\
3774\n\
3775Return True if all cased characters in S are uppercase and there is\n\
3776at least one cased character in S, False otherwise.");
3777
3778static PyObject*
3779string_isupper(PyBytesObject *self)
3780{
3781 register const unsigned char *p
3782 = (unsigned char *) PyBytes_AS_STRING(self);
3783 register const unsigned char *e;
3784 int cased;
3785
3786 /* Shortcut for single character strings */
3787 if (PyBytes_GET_SIZE(self) == 1)
3788 return PyBool_FromLong(isupper(*p) != 0);
3789
3790 /* Special case for empty strings */
3791 if (PyBytes_GET_SIZE(self) == 0)
3792 return PyBool_FromLong(0);
3793
3794 e = p + PyBytes_GET_SIZE(self);
3795 cased = 0;
3796 for (; p < e; p++) {
3797 if (islower(*p))
3798 return PyBool_FromLong(0);
3799 else if (!cased && isupper(*p))
3800 cased = 1;
3801 }
3802 return PyBool_FromLong(cased);
3803}
3804
3805
3806PyDoc_STRVAR(istitle__doc__,
3807"S.istitle() -> bool\n\
3808\n\
3809Return True if S is a titlecased string and there is at least one\n\
3810character in S, i.e. uppercase characters may only follow uncased\n\
3811characters and lowercase characters only cased ones. Return False\n\
3812otherwise.");
3813
3814static PyObject*
3815string_istitle(PyBytesObject *self, PyObject *uncased)
3816{
3817 register const unsigned char *p
3818 = (unsigned char *) PyBytes_AS_STRING(self);
3819 register const unsigned char *e;
3820 int cased, previous_is_cased;
3821
3822 /* Shortcut for single character strings */
3823 if (PyBytes_GET_SIZE(self) == 1)
3824 return PyBool_FromLong(isupper(*p) != 0);
3825
3826 /* Special case for empty strings */
3827 if (PyBytes_GET_SIZE(self) == 0)
3828 return PyBool_FromLong(0);
3829
3830 e = p + PyBytes_GET_SIZE(self);
3831 cased = 0;
3832 previous_is_cased = 0;
3833 for (; p < e; p++) {
3834 register const unsigned char ch = *p;
3835
3836 if (isupper(ch)) {
3837 if (previous_is_cased)
3838 return PyBool_FromLong(0);
3839 previous_is_cased = 1;
3840 cased = 1;
3841 }
3842 else if (islower(ch)) {
3843 if (!previous_is_cased)
3844 return PyBool_FromLong(0);
3845 previous_is_cased = 1;
3846 cased = 1;
3847 }
3848 else
3849 previous_is_cased = 0;
3850 }
3851 return PyBool_FromLong(cased);
3852}
3853
3854
3855PyDoc_STRVAR(splitlines__doc__,
3856"S.splitlines([keepends]) -> list of strings\n\
3857\n\
3858Return a list of the lines in S, breaking at line boundaries.\n\
3859Line breaks are not included in the resulting list unless keepends\n\
3860is given and true.");
3861
3862static PyObject*
3863string_splitlines(PyBytesObject *self, PyObject *args)
3864{
3865 register Py_ssize_t i;
3866 register Py_ssize_t j;
3867 Py_ssize_t len;
3868 int keepends = 0;
3869 PyObject *list;
3870 PyObject *str;
3871 char *data;
3872
3873 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3874 return NULL;
3875
3876 data = PyBytes_AS_STRING(self);
3877 len = PyBytes_GET_SIZE(self);
3878
3879 /* This does not use the preallocated list because splitlines is
3880 usually run with hundreds of newlines. The overhead of
3881 switching between PyList_SET_ITEM and append causes about a
3882 2-3% slowdown for that common case. A smarter implementation
3883 could move the if check out, so the SET_ITEMs are done first
3884 and the appends only done when the prealloc buffer is full.
3885 That's too much work for little gain.*/
3886
3887 list = PyList_New(0);
3888 if (!list)
3889 goto onError;
3890
3891 for (i = j = 0; i < len; ) {
3892 Py_ssize_t eol;
3893
3894 /* Find a line and append it */
3895 while (i < len && data[i] != '\n' && data[i] != '\r')
3896 i++;
3897
3898 /* Skip the line break reading CRLF as one line break */
3899 eol = i;
3900 if (i < len) {
3901 if (data[i] == '\r' && i + 1 < len &&
3902 data[i+1] == '\n')
3903 i += 2;
3904 else
3905 i++;
3906 if (keepends)
3907 eol = i;
3908 }
3909 SPLIT_APPEND(data, j, eol);
3910 j = i;
3911 }
3912 if (j < len) {
3913 SPLIT_APPEND(data, j, len);
3914 }
3915
3916 return list;
3917
3918 onError:
3919 Py_XDECREF(list);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003920 return NULL;
3921}
3922
Christian Heimes44720832008-05-26 13:01:01 +00003923#undef SPLIT_APPEND
3924#undef SPLIT_ADD
3925#undef MAX_PREALLOC
3926#undef PREALLOC_SIZE
Christian Heimes1a6387e2008-03-26 12:49:49 +00003927
3928static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00003929string_getnewargs(PyBytesObject *v)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003930{
Christian Heimes44720832008-05-26 13:01:01 +00003931 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Christian Heimes1a6387e2008-03-26 12:49:49 +00003932}
3933
Christian Heimes1a6387e2008-03-26 12:49:49 +00003934
Christian Heimes44720832008-05-26 13:01:01 +00003935#include "stringlib/string_format.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +00003936
Christian Heimes44720832008-05-26 13:01:01 +00003937PyDoc_STRVAR(format__doc__,
3938"S.format(*args, **kwargs) -> unicode\n\
3939\n\
3940");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003941
Christian Heimes44720832008-05-26 13:01:01 +00003942PyDoc_STRVAR(p_format__doc__,
3943"S.__format__(format_spec) -> unicode\n\
3944\n\
3945");
3946
3947
Christian Heimes1a6387e2008-03-26 12:49:49 +00003948static PyMethodDef
Christian Heimes44720832008-05-26 13:01:01 +00003949string_methods[] = {
3950 /* Counterparts of the obsolete stropmodule functions; except
3951 string.maketrans(). */
3952 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3953 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3954 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3955 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3956 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3957 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3958 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3959 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3960 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3961 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3962 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3963 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3964 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3965 capitalize__doc__},
3966 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3967 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3968 endswith__doc__},
3969 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3970 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3971 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3972 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3973 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3974 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3975 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3976 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3977 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3978 rpartition__doc__},
3979 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3980 startswith__doc__},
3981 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3982 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3983 swapcase__doc__},
3984 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3985 translate__doc__},
3986 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3987 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3988 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3989 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3990 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3991 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3992 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3993 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3994 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
3995 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3996 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3997 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3998 expandtabs__doc__},
3999 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
4000 splitlines__doc__},
4001 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
4002 {NULL, NULL} /* sentinel */
Christian Heimes1a6387e2008-03-26 12:49:49 +00004003};
4004
4005static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00004006str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes1a6387e2008-03-26 12:49:49 +00004007
Christian Heimes44720832008-05-26 13:01:01 +00004008static PyObject *
4009string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4010{
4011 PyObject *x = NULL;
4012 static char *kwlist[] = {"object", 0};
4013
4014 if (type != &PyBytes_Type)
4015 return str_subtype_new(type, args, kwds);
4016 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
4017 return NULL;
4018 if (x == NULL)
4019 return PyBytes_FromString("");
4020 return PyObject_Str(x);
4021}
4022
4023static PyObject *
4024str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4025{
4026 PyObject *tmp, *pnew;
4027 Py_ssize_t n;
4028
4029 assert(PyType_IsSubtype(type, &PyBytes_Type));
4030 tmp = string_new(&PyBytes_Type, args, kwds);
4031 if (tmp == NULL)
4032 return NULL;
4033 assert(PyBytes_CheckExact(tmp));
4034 n = PyBytes_GET_SIZE(tmp);
4035 pnew = type->tp_alloc(type, n);
4036 if (pnew != NULL) {
4037 Py_MEMCPY(PyBytes_AS_STRING(pnew), PyBytes_AS_STRING(tmp), n+1);
4038 ((PyBytesObject *)pnew)->ob_shash =
4039 ((PyBytesObject *)tmp)->ob_shash;
4040 ((PyBytesObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
4041 }
4042 Py_DECREF(tmp);
4043 return pnew;
4044}
4045
4046static PyObject *
4047basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4048{
4049 PyErr_SetString(PyExc_TypeError,
4050 "The basestring type cannot be instantiated");
4051 return NULL;
4052}
4053
4054static PyObject *
4055string_mod(PyObject *v, PyObject *w)
4056{
4057 if (!PyBytes_Check(v)) {
4058 Py_INCREF(Py_NotImplemented);
4059 return Py_NotImplemented;
4060 }
4061 return PyBytes_Format(v, w);
4062}
4063
4064PyDoc_STRVAR(basestring_doc,
4065"Type basestring cannot be instantiated; it is the base for str and unicode.");
4066
4067static PyNumberMethods string_as_number = {
4068 0, /*nb_add*/
4069 0, /*nb_subtract*/
4070 0, /*nb_multiply*/
4071 0, /*nb_divide*/
4072 string_mod, /*nb_remainder*/
4073};
4074
4075
4076PyTypeObject PyBaseString_Type = {
4077 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4078 "basestring",
4079 0,
4080 0,
4081 0, /* tp_dealloc */
4082 0, /* tp_print */
4083 0, /* tp_getattr */
4084 0, /* tp_setattr */
4085 0, /* tp_compare */
4086 0, /* tp_repr */
4087 0, /* tp_as_number */
4088 0, /* tp_as_sequence */
4089 0, /* tp_as_mapping */
4090 0, /* tp_hash */
4091 0, /* tp_call */
4092 0, /* tp_str */
4093 0, /* tp_getattro */
4094 0, /* tp_setattro */
4095 0, /* tp_as_buffer */
4096 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4097 basestring_doc, /* tp_doc */
4098 0, /* tp_traverse */
4099 0, /* tp_clear */
4100 0, /* tp_richcompare */
4101 0, /* tp_weaklistoffset */
4102 0, /* tp_iter */
4103 0, /* tp_iternext */
4104 0, /* tp_methods */
4105 0, /* tp_members */
4106 0, /* tp_getset */
4107 &PyBaseObject_Type, /* tp_base */
4108 0, /* tp_dict */
4109 0, /* tp_descr_get */
4110 0, /* tp_descr_set */
4111 0, /* tp_dictoffset */
4112 0, /* tp_init */
4113 0, /* tp_alloc */
4114 basestring_new, /* tp_new */
4115 0, /* tp_free */
4116};
4117
4118PyDoc_STRVAR(string_doc,
4119"str(object) -> string\n\
4120\n\
4121Return a nice string representation of the object.\n\
4122If the argument is a string, the return value is the same object.");
4123
4124PyTypeObject PyBytes_Type = {
4125 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4126 "str",
4127 sizeof(PyBytesObject),
4128 sizeof(char),
4129 string_dealloc, /* tp_dealloc */
4130 (printfunc)string_print, /* tp_print */
4131 0, /* tp_getattr */
4132 0, /* tp_setattr */
4133 0, /* tp_compare */
4134 string_repr, /* tp_repr */
4135 &string_as_number, /* tp_as_number */
4136 &string_as_sequence, /* tp_as_sequence */
4137 &string_as_mapping, /* tp_as_mapping */
4138 (hashfunc)string_hash, /* tp_hash */
4139 0, /* tp_call */
4140 string_str, /* tp_str */
4141 PyObject_GenericGetAttr, /* tp_getattro */
4142 0, /* tp_setattro */
4143 &string_as_buffer, /* tp_as_buffer */
4144 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
4145 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
4146 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
4147 string_doc, /* tp_doc */
4148 0, /* tp_traverse */
4149 0, /* tp_clear */
4150 (richcmpfunc)string_richcompare, /* tp_richcompare */
4151 0, /* tp_weaklistoffset */
4152 0, /* tp_iter */
4153 0, /* tp_iternext */
4154 string_methods, /* tp_methods */
4155 0, /* tp_members */
4156 0, /* tp_getset */
4157 &PyBaseString_Type, /* tp_base */
4158 0, /* tp_dict */
4159 0, /* tp_descr_get */
4160 0, /* tp_descr_set */
4161 0, /* tp_dictoffset */
4162 0, /* tp_init */
4163 0, /* tp_alloc */
4164 string_new, /* tp_new */
4165 PyObject_Del, /* tp_free */
4166};
4167
4168void
4169PyBytes_Concat(register PyObject **pv, register PyObject *w)
4170{
4171 register PyObject *v;
4172 if (*pv == NULL)
4173 return;
4174 if (w == NULL || !PyBytes_Check(*pv)) {
4175 Py_DECREF(*pv);
4176 *pv = NULL;
4177 return;
4178 }
4179 v = string_concat((PyBytesObject *) *pv, w);
4180 Py_DECREF(*pv);
4181 *pv = v;
4182}
4183
4184void
4185PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
4186{
4187 PyBytes_Concat(pv, w);
4188 Py_XDECREF(w);
4189}
4190
4191
4192/* The following function breaks the notion that strings are immutable:
4193 it changes the size of a string. We get away with this only if there
4194 is only one module referencing the object. You can also think of it
4195 as creating a new string object and destroying the old one, only
4196 more efficiently. In any case, don't use this if the string may
4197 already be known to some other part of the code...
4198 Note that if there's not enough memory to resize the string, the original
4199 string object at *pv is deallocated, *pv is set to NULL, an "out of
4200 memory" exception is set, and -1 is returned. Else (on success) 0 is
4201 returned, and the value in *pv may or may not be the same as on input.
4202 As always, an extra byte is allocated for a trailing \0 byte (newsize
4203 does *not* include that), and a trailing \0 byte is stored.
4204*/
4205
4206int
4207_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
4208{
4209 register PyObject *v;
4210 register PyBytesObject *sv;
4211 v = *pv;
4212 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
4213 PyBytes_CHECK_INTERNED(v)) {
4214 *pv = 0;
4215 Py_DECREF(v);
4216 PyErr_BadInternalCall();
4217 return -1;
4218 }
4219 /* XXX UNREF/NEWREF interface should be more symmetrical */
4220 _Py_DEC_REFTOTAL;
4221 _Py_ForgetReference(v);
4222 *pv = (PyObject *)
4223 PyObject_REALLOC((char *)v, sizeof(PyBytesObject) + newsize);
4224 if (*pv == NULL) {
4225 PyObject_Del(v);
4226 PyErr_NoMemory();
4227 return -1;
4228 }
4229 _Py_NewReference(*pv);
4230 sv = (PyBytesObject *) *pv;
4231 Py_SIZE(sv) = newsize;
4232 sv->ob_sval[newsize] = '\0';
4233 sv->ob_shash = -1; /* invalidate cached hash value */
4234 return 0;
4235}
4236
4237/* Helpers for formatstring */
4238
4239Py_LOCAL_INLINE(PyObject *)
4240getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
4241{
4242 Py_ssize_t argidx = *p_argidx;
4243 if (argidx < arglen) {
4244 (*p_argidx)++;
4245 if (arglen < 0)
4246 return args;
4247 else
4248 return PyTuple_GetItem(args, argidx);
4249 }
4250 PyErr_SetString(PyExc_TypeError,
4251 "not enough arguments for format string");
4252 return NULL;
4253}
4254
4255/* Format codes
4256 * F_LJUST '-'
4257 * F_SIGN '+'
4258 * F_BLANK ' '
4259 * F_ALT '#'
4260 * F_ZERO '0'
4261 */
4262#define F_LJUST (1<<0)
4263#define F_SIGN (1<<1)
4264#define F_BLANK (1<<2)
4265#define F_ALT (1<<3)
4266#define F_ZERO (1<<4)
4267
4268Py_LOCAL_INLINE(int)
4269formatfloat(char *buf, size_t buflen, int flags,
4270 int prec, int type, PyObject *v)
4271{
4272 /* fmt = '%#.' + `prec` + `type`
4273 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
4274 char fmt[20];
4275 double x;
4276 x = PyFloat_AsDouble(v);
4277 if (x == -1.0 && PyErr_Occurred()) {
4278 PyErr_Format(PyExc_TypeError, "float argument required, "
4279 "not %.200s", Py_TYPE(v)->tp_name);
4280 return -1;
4281 }
4282 if (prec < 0)
4283 prec = 6;
4284 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4285 type = 'g';
4286 /* Worst case length calc to ensure no buffer overrun:
4287
4288 'g' formats:
4289 fmt = %#.<prec>g
4290 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
4291 for any double rep.)
4292 len = 1 + prec + 1 + 2 + 5 = 9 + prec
4293
4294 'f' formats:
4295 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4296 len = 1 + 50 + 1 + prec = 52 + prec
4297
4298 If prec=0 the effective precision is 1 (the leading digit is
4299 always given), therefore increase the length by one.
4300
4301 */
4302 if (((type == 'g' || type == 'G') &&
4303 buflen <= (size_t)10 + (size_t)prec) ||
4304 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
4305 PyErr_SetString(PyExc_OverflowError,
4306 "formatted float is too long (precision too large?)");
4307 return -1;
4308 }
4309 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4310 (flags&F_ALT) ? "#" : "",
4311 prec, type);
4312 PyOS_ascii_formatd(buf, buflen, fmt, x);
4313 return (int)strlen(buf);
4314}
4315
4316/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and
4317 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4318 * Python's regular ints.
4319 * Return value: a new PyString*, or NULL if error.
4320 * . *pbuf is set to point into it,
4321 * *plen set to the # of chars following that.
4322 * Caller must decref it when done using pbuf.
4323 * The string starting at *pbuf is of the form
4324 * "-"? ("0x" | "0X")? digit+
4325 * "0x"/"0X" are present only for x and X conversions, with F_ALT
4326 * set in flags. The case of hex digits will be correct,
4327 * There will be at least prec digits, zero-filled on the left if
4328 * necessary to get that many.
4329 * val object to be converted
4330 * flags bitmask of format flags; only F_ALT is looked at
4331 * prec minimum number of digits; 0-fill on left if needed
4332 * type a character in [duoxX]; u acts the same as d
4333 *
4334 * CAUTION: o, x and X conversions on regular ints can never
4335 * produce a '-' sign, but can for Python's unbounded ints.
4336 */
4337PyObject*
4338_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,
4339 char **pbuf, int *plen)
4340{
4341 PyObject *result = NULL;
4342 char *buf;
4343 Py_ssize_t i;
4344 int sign; /* 1 if '-', else 0 */
4345 int len; /* number of characters */
4346 Py_ssize_t llen;
4347 int numdigits; /* len == numnondigits + numdigits */
4348 int numnondigits = 0;
4349
4350 switch (type) {
4351 case 'd':
4352 case 'u':
4353 result = Py_TYPE(val)->tp_str(val);
4354 break;
4355 case 'o':
4356 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4357 break;
4358 case 'x':
4359 case 'X':
4360 numnondigits = 2;
4361 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4362 break;
4363 default:
4364 assert(!"'type' not in [duoxX]");
4365 }
4366 if (!result)
4367 return NULL;
4368
4369 buf = PyBytes_AsString(result);
4370 if (!buf) {
4371 Py_DECREF(result);
4372 return NULL;
4373 }
4374
4375 /* To modify the string in-place, there can only be one reference. */
4376 if (Py_REFCNT(result) != 1) {
4377 PyErr_BadInternalCall();
4378 return NULL;
4379 }
4380 llen = PyBytes_Size(result);
4381 if (llen > INT_MAX) {
4382 PyErr_SetString(PyExc_ValueError, "string too large in _PyBytes_FormatLong");
4383 return NULL;
4384 }
4385 len = (int)llen;
4386 if (buf[len-1] == 'L') {
4387 --len;
4388 buf[len] = '\0';
4389 }
4390 sign = buf[0] == '-';
4391 numnondigits += sign;
4392 numdigits = len - numnondigits;
4393 assert(numdigits > 0);
4394
4395 /* Get rid of base marker unless F_ALT */
4396 if ((flags & F_ALT) == 0) {
4397 /* Need to skip 0x, 0X or 0. */
4398 int skipped = 0;
4399 switch (type) {
4400 case 'o':
4401 assert(buf[sign] == '0');
4402 /* If 0 is only digit, leave it alone. */
4403 if (numdigits > 1) {
4404 skipped = 1;
4405 --numdigits;
4406 }
4407 break;
4408 case 'x':
4409 case 'X':
4410 assert(buf[sign] == '0');
4411 assert(buf[sign + 1] == 'x');
4412 skipped = 2;
4413 numnondigits -= 2;
4414 break;
4415 }
4416 if (skipped) {
4417 buf += skipped;
4418 len -= skipped;
4419 if (sign)
4420 buf[0] = '-';
4421 }
4422 assert(len == numnondigits + numdigits);
4423 assert(numdigits > 0);
4424 }
4425
4426 /* Fill with leading zeroes to meet minimum width. */
4427 if (prec > numdigits) {
4428 PyObject *r1 = PyBytes_FromStringAndSize(NULL,
4429 numnondigits + prec);
4430 char *b1;
4431 if (!r1) {
4432 Py_DECREF(result);
4433 return NULL;
4434 }
4435 b1 = PyBytes_AS_STRING(r1);
4436 for (i = 0; i < numnondigits; ++i)
4437 *b1++ = *buf++;
4438 for (i = 0; i < prec - numdigits; i++)
4439 *b1++ = '0';
4440 for (i = 0; i < numdigits; i++)
4441 *b1++ = *buf++;
4442 *b1 = '\0';
4443 Py_DECREF(result);
4444 result = r1;
4445 buf = PyBytes_AS_STRING(result);
4446 len = numnondigits + prec;
4447 }
4448
4449 /* Fix up case for hex conversions. */
4450 if (type == 'X') {
4451 /* Need to convert all lower case letters to upper case.
4452 and need to convert 0x to 0X (and -0x to -0X). */
4453 for (i = 0; i < len; i++)
4454 if (buf[i] >= 'a' && buf[i] <= 'x')
4455 buf[i] -= 'a'-'A';
4456 }
4457 *pbuf = buf;
4458 *plen = len;
4459 return result;
4460}
4461
4462Py_LOCAL_INLINE(int)
4463formatint(char *buf, size_t buflen, int flags,
4464 int prec, int type, PyObject *v)
4465{
4466 /* fmt = '%#.' + `prec` + 'l' + `type`
4467 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4468 + 1 + 1 = 24 */
4469 char fmt[64]; /* plenty big enough! */
4470 char *sign;
4471 long x;
4472
4473 x = PyInt_AsLong(v);
4474 if (x == -1 && PyErr_Occurred()) {
4475 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4476 Py_TYPE(v)->tp_name);
4477 return -1;
4478 }
4479 if (x < 0 && type == 'u') {
4480 type = 'd';
4481 }
4482 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4483 sign = "-";
4484 else
4485 sign = "";
4486 if (prec < 0)
4487 prec = 1;
4488
4489 if ((flags & F_ALT) &&
4490 (type == 'x' || type == 'X')) {
4491 /* When converting under %#x or %#X, there are a number
4492 * of issues that cause pain:
4493 * - when 0 is being converted, the C standard leaves off
4494 * the '0x' or '0X', which is inconsistent with other
4495 * %#x/%#X conversions and inconsistent with Python's
4496 * hex() function
4497 * - there are platforms that violate the standard and
4498 * convert 0 with the '0x' or '0X'
4499 * (Metrowerks, Compaq Tru64)
4500 * - there are platforms that give '0x' when converting
4501 * under %#X, but convert 0 in accordance with the
4502 * standard (OS/2 EMX)
4503 *
4504 * We can achieve the desired consistency by inserting our
4505 * own '0x' or '0X' prefix, and substituting %x/%X in place
4506 * of %#x/%#X.
4507 *
4508 * Note that this is the same approach as used in
4509 * formatint() in unicodeobject.c
4510 */
4511 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4512 sign, type, prec, type);
4513 }
4514 else {
4515 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4516 sign, (flags&F_ALT) ? "#" : "",
4517 prec, type);
4518 }
4519
4520 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4521 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4522 */
4523 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4524 PyErr_SetString(PyExc_OverflowError,
4525 "formatted integer is too long (precision too large?)");
4526 return -1;
4527 }
4528 if (sign[0])
4529 PyOS_snprintf(buf, buflen, fmt, -x);
4530 else
4531 PyOS_snprintf(buf, buflen, fmt, x);
4532 return (int)strlen(buf);
4533}
4534
4535Py_LOCAL_INLINE(int)
4536formatchar(char *buf, size_t buflen, PyObject *v)
4537{
4538 /* presume that the buffer is at least 2 characters long */
4539 if (PyBytes_Check(v)) {
4540 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4541 return -1;
4542 }
4543 else {
4544 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4545 return -1;
4546 }
4547 buf[1] = '\0';
4548 return 1;
4549}
4550
4551/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4552
4553 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4554 chars are formatted. XXX This is a magic number. Each formatting
4555 routine does bounds checking to ensure no overflow, but a better
4556 solution may be to malloc a buffer of appropriate size for each
4557 format. For now, the current solution is sufficient.
4558*/
4559#define FORMATBUFLEN (size_t)120
4560
4561PyObject *
4562PyBytes_Format(PyObject *format, PyObject *args)
4563{
4564 char *fmt, *res;
4565 Py_ssize_t arglen, argidx;
4566 Py_ssize_t reslen, rescnt, fmtcnt;
4567 int args_owned = 0;
4568 PyObject *result, *orig_args;
4569#ifdef Py_USING_UNICODE
4570 PyObject *v, *w;
4571#endif
4572 PyObject *dict = NULL;
4573 if (format == NULL || !PyBytes_Check(format) || args == NULL) {
4574 PyErr_BadInternalCall();
4575 return NULL;
4576 }
4577 orig_args = args;
4578 fmt = PyBytes_AS_STRING(format);
4579 fmtcnt = PyBytes_GET_SIZE(format);
4580 reslen = rescnt = fmtcnt + 100;
4581 result = PyBytes_FromStringAndSize((char *)NULL, reslen);
4582 if (result == NULL)
4583 return NULL;
4584 res = PyBytes_AsString(result);
4585 if (PyTuple_Check(args)) {
4586 arglen = PyTuple_GET_SIZE(args);
4587 argidx = 0;
4588 }
4589 else {
4590 arglen = -1;
4591 argidx = -2;
4592 }
4593 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4594 !PyObject_TypeCheck(args, &PyBaseString_Type))
4595 dict = args;
4596 while (--fmtcnt >= 0) {
4597 if (*fmt != '%') {
4598 if (--rescnt < 0) {
4599 rescnt = fmtcnt + 100;
4600 reslen += rescnt;
4601 if (_PyBytes_Resize(&result, reslen) < 0)
4602 return NULL;
4603 res = PyBytes_AS_STRING(result)
4604 + reslen - rescnt;
4605 --rescnt;
4606 }
4607 *res++ = *fmt++;
4608 }
4609 else {
4610 /* Got a format specifier */
4611 int flags = 0;
4612 Py_ssize_t width = -1;
4613 int prec = -1;
4614 int c = '\0';
4615 int fill;
4616 int isnumok;
4617 PyObject *v = NULL;
4618 PyObject *temp = NULL;
4619 char *pbuf;
4620 int sign;
4621 Py_ssize_t len;
4622 char formatbuf[FORMATBUFLEN];
4623 /* For format{float,int,char}() */
4624#ifdef Py_USING_UNICODE
4625 char *fmt_start = fmt;
4626 Py_ssize_t argidx_start = argidx;
4627#endif
4628
4629 fmt++;
4630 if (*fmt == '(') {
4631 char *keystart;
4632 Py_ssize_t keylen;
4633 PyObject *key;
4634 int pcount = 1;
4635
4636 if (dict == NULL) {
4637 PyErr_SetString(PyExc_TypeError,
4638 "format requires a mapping");
4639 goto error;
4640 }
4641 ++fmt;
4642 --fmtcnt;
4643 keystart = fmt;
4644 /* Skip over balanced parentheses */
4645 while (pcount > 0 && --fmtcnt >= 0) {
4646 if (*fmt == ')')
4647 --pcount;
4648 else if (*fmt == '(')
4649 ++pcount;
4650 fmt++;
4651 }
4652 keylen = fmt - keystart - 1;
4653 if (fmtcnt < 0 || pcount > 0) {
4654 PyErr_SetString(PyExc_ValueError,
4655 "incomplete format key");
4656 goto error;
4657 }
4658 key = PyBytes_FromStringAndSize(keystart,
4659 keylen);
4660 if (key == NULL)
4661 goto error;
4662 if (args_owned) {
4663 Py_DECREF(args);
4664 args_owned = 0;
4665 }
4666 args = PyObject_GetItem(dict, key);
4667 Py_DECREF(key);
4668 if (args == NULL) {
4669 goto error;
4670 }
4671 args_owned = 1;
4672 arglen = -1;
4673 argidx = -2;
4674 }
4675 while (--fmtcnt >= 0) {
4676 switch (c = *fmt++) {
4677 case '-': flags |= F_LJUST; continue;
4678 case '+': flags |= F_SIGN; continue;
4679 case ' ': flags |= F_BLANK; continue;
4680 case '#': flags |= F_ALT; continue;
4681 case '0': flags |= F_ZERO; continue;
4682 }
4683 break;
4684 }
4685 if (c == '*') {
4686 v = getnextarg(args, arglen, &argidx);
4687 if (v == NULL)
4688 goto error;
4689 if (!PyInt_Check(v)) {
4690 PyErr_SetString(PyExc_TypeError,
4691 "* wants int");
4692 goto error;
4693 }
4694 width = PyInt_AsLong(v);
4695 if (width < 0) {
4696 flags |= F_LJUST;
4697 width = -width;
4698 }
4699 if (--fmtcnt >= 0)
4700 c = *fmt++;
4701 }
4702 else if (c >= 0 && isdigit(c)) {
4703 width = c - '0';
4704 while (--fmtcnt >= 0) {
4705 c = Py_CHARMASK(*fmt++);
4706 if (!isdigit(c))
4707 break;
4708 if ((width*10) / 10 != width) {
4709 PyErr_SetString(
4710 PyExc_ValueError,
4711 "width too big");
4712 goto error;
4713 }
4714 width = width*10 + (c - '0');
4715 }
4716 }
4717 if (c == '.') {
4718 prec = 0;
4719 if (--fmtcnt >= 0)
4720 c = *fmt++;
4721 if (c == '*') {
4722 v = getnextarg(args, arglen, &argidx);
4723 if (v == NULL)
4724 goto error;
4725 if (!PyInt_Check(v)) {
4726 PyErr_SetString(
4727 PyExc_TypeError,
4728 "* wants int");
4729 goto error;
4730 }
4731 prec = PyInt_AsLong(v);
4732 if (prec < 0)
4733 prec = 0;
4734 if (--fmtcnt >= 0)
4735 c = *fmt++;
4736 }
4737 else if (c >= 0 && isdigit(c)) {
4738 prec = c - '0';
4739 while (--fmtcnt >= 0) {
4740 c = Py_CHARMASK(*fmt++);
4741 if (!isdigit(c))
4742 break;
4743 if ((prec*10) / 10 != prec) {
4744 PyErr_SetString(
4745 PyExc_ValueError,
4746 "prec too big");
4747 goto error;
4748 }
4749 prec = prec*10 + (c - '0');
4750 }
4751 }
4752 } /* prec */
4753 if (fmtcnt >= 0) {
4754 if (c == 'h' || c == 'l' || c == 'L') {
4755 if (--fmtcnt >= 0)
4756 c = *fmt++;
4757 }
4758 }
4759 if (fmtcnt < 0) {
4760 PyErr_SetString(PyExc_ValueError,
4761 "incomplete format");
4762 goto error;
4763 }
4764 if (c != '%') {
4765 v = getnextarg(args, arglen, &argidx);
4766 if (v == NULL)
4767 goto error;
4768 }
4769 sign = 0;
4770 fill = ' ';
4771 switch (c) {
4772 case '%':
4773 pbuf = "%";
4774 len = 1;
4775 break;
4776 case 's':
4777#ifdef Py_USING_UNICODE
4778 if (PyUnicode_Check(v)) {
4779 fmt = fmt_start;
4780 argidx = argidx_start;
4781 goto unicode;
4782 }
4783#endif
4784 temp = _PyObject_Str(v);
4785#ifdef Py_USING_UNICODE
4786 if (temp != NULL && PyUnicode_Check(temp)) {
4787 Py_DECREF(temp);
4788 fmt = fmt_start;
4789 argidx = argidx_start;
4790 goto unicode;
4791 }
4792#endif
4793 /* Fall through */
4794 case 'r':
4795 if (c == 'r')
4796 temp = PyObject_Repr(v);
4797 if (temp == NULL)
4798 goto error;
4799 if (!PyBytes_Check(temp)) {
4800 PyErr_SetString(PyExc_TypeError,
4801 "%s argument has non-string str()");
4802 Py_DECREF(temp);
4803 goto error;
4804 }
4805 pbuf = PyBytes_AS_STRING(temp);
4806 len = PyBytes_GET_SIZE(temp);
4807 if (prec >= 0 && len > prec)
4808 len = prec;
4809 break;
4810 case 'i':
4811 case 'd':
4812 case 'u':
4813 case 'o':
4814 case 'x':
4815 case 'X':
4816 if (c == 'i')
4817 c = 'd';
4818 isnumok = 0;
4819 if (PyNumber_Check(v)) {
4820 PyObject *iobj=NULL;
4821
4822 if (PyInt_Check(v) || (PyLong_Check(v))) {
4823 iobj = v;
4824 Py_INCREF(iobj);
4825 }
4826 else {
4827 iobj = PyNumber_Int(v);
4828 if (iobj==NULL) iobj = PyNumber_Long(v);
4829 }
4830 if (iobj!=NULL) {
4831 if (PyInt_Check(iobj)) {
4832 isnumok = 1;
4833 pbuf = formatbuf;
4834 len = formatint(pbuf,
4835 sizeof(formatbuf),
4836 flags, prec, c, iobj);
4837 Py_DECREF(iobj);
4838 if (len < 0)
4839 goto error;
4840 sign = 1;
4841 }
4842 else if (PyLong_Check(iobj)) {
4843 int ilen;
4844
4845 isnumok = 1;
4846 temp = _PyBytes_FormatLong(iobj, flags,
4847 prec, c, &pbuf, &ilen);
4848 Py_DECREF(iobj);
4849 len = ilen;
4850 if (!temp)
4851 goto error;
4852 sign = 1;
4853 }
4854 else {
4855 Py_DECREF(iobj);
4856 }
4857 }
4858 }
4859 if (!isnumok) {
4860 PyErr_Format(PyExc_TypeError,
4861 "%%%c format: a number is required, "
4862 "not %.200s", c, Py_TYPE(v)->tp_name);
4863 goto error;
4864 }
4865 if (flags & F_ZERO)
4866 fill = '0';
4867 break;
4868 case 'e':
4869 case 'E':
4870 case 'f':
4871 case 'F':
4872 case 'g':
4873 case 'G':
4874 if (c == 'F')
4875 c = 'f';
4876 pbuf = formatbuf;
4877 len = formatfloat(pbuf, sizeof(formatbuf),
4878 flags, prec, c, v);
4879 if (len < 0)
4880 goto error;
4881 sign = 1;
4882 if (flags & F_ZERO)
4883 fill = '0';
4884 break;
4885 case 'c':
4886#ifdef Py_USING_UNICODE
4887 if (PyUnicode_Check(v)) {
4888 fmt = fmt_start;
4889 argidx = argidx_start;
4890 goto unicode;
4891 }
4892#endif
4893 pbuf = formatbuf;
4894 len = formatchar(pbuf, sizeof(formatbuf), v);
4895 if (len < 0)
4896 goto error;
4897 break;
4898 default:
4899 PyErr_Format(PyExc_ValueError,
4900 "unsupported format character '%c' (0x%x) "
4901 "at index %zd",
4902 c, c,
4903 (Py_ssize_t)(fmt - 1 -
4904 PyBytes_AsString(format)));
4905 goto error;
4906 }
4907 if (sign) {
4908 if (*pbuf == '-' || *pbuf == '+') {
4909 sign = *pbuf++;
4910 len--;
4911 }
4912 else if (flags & F_SIGN)
4913 sign = '+';
4914 else if (flags & F_BLANK)
4915 sign = ' ';
4916 else
4917 sign = 0;
4918 }
4919 if (width < len)
4920 width = len;
4921 if (rescnt - (sign != 0) < width) {
4922 reslen -= rescnt;
4923 rescnt = width + fmtcnt + 100;
4924 reslen += rescnt;
4925 if (reslen < 0) {
4926 Py_DECREF(result);
4927 Py_XDECREF(temp);
4928 return PyErr_NoMemory();
4929 }
4930 if (_PyBytes_Resize(&result, reslen) < 0) {
4931 Py_XDECREF(temp);
4932 return NULL;
4933 }
4934 res = PyBytes_AS_STRING(result)
4935 + reslen - rescnt;
4936 }
4937 if (sign) {
4938 if (fill != ' ')
4939 *res++ = sign;
4940 rescnt--;
4941 if (width > len)
4942 width--;
4943 }
4944 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4945 assert(pbuf[0] == '0');
4946 assert(pbuf[1] == c);
4947 if (fill != ' ') {
4948 *res++ = *pbuf++;
4949 *res++ = *pbuf++;
4950 }
4951 rescnt -= 2;
4952 width -= 2;
4953 if (width < 0)
4954 width = 0;
4955 len -= 2;
4956 }
4957 if (width > len && !(flags & F_LJUST)) {
4958 do {
4959 --rescnt;
4960 *res++ = fill;
4961 } while (--width > len);
4962 }
4963 if (fill == ' ') {
4964 if (sign)
4965 *res++ = sign;
4966 if ((flags & F_ALT) &&
4967 (c == 'x' || c == 'X')) {
4968 assert(pbuf[0] == '0');
4969 assert(pbuf[1] == c);
4970 *res++ = *pbuf++;
4971 *res++ = *pbuf++;
4972 }
4973 }
4974 Py_MEMCPY(res, pbuf, len);
4975 res += len;
4976 rescnt -= len;
4977 while (--width >= len) {
4978 --rescnt;
4979 *res++ = ' ';
4980 }
4981 if (dict && (argidx < arglen) && c != '%') {
4982 PyErr_SetString(PyExc_TypeError,
4983 "not all arguments converted during string formatting");
4984 Py_XDECREF(temp);
4985 goto error;
4986 }
4987 Py_XDECREF(temp);
4988 } /* '%' */
4989 } /* until end */
4990 if (argidx < arglen && !dict) {
4991 PyErr_SetString(PyExc_TypeError,
4992 "not all arguments converted during string formatting");
4993 goto error;
4994 }
4995 if (args_owned) {
4996 Py_DECREF(args);
4997 }
4998 _PyBytes_Resize(&result, reslen - rescnt);
4999 return result;
5000
5001#ifdef Py_USING_UNICODE
5002 unicode:
5003 if (args_owned) {
5004 Py_DECREF(args);
5005 args_owned = 0;
5006 }
5007 /* Fiddle args right (remove the first argidx arguments) */
5008 if (PyTuple_Check(orig_args) && argidx > 0) {
5009 PyObject *v;
5010 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
5011 v = PyTuple_New(n);
5012 if (v == NULL)
5013 goto error;
5014 while (--n >= 0) {
5015 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
5016 Py_INCREF(w);
5017 PyTuple_SET_ITEM(v, n, w);
5018 }
5019 args = v;
5020 } else {
5021 Py_INCREF(orig_args);
5022 args = orig_args;
5023 }
5024 args_owned = 1;
5025 /* Take what we have of the result and let the Unicode formatting
5026 function format the rest of the input. */
5027 rescnt = res - PyBytes_AS_STRING(result);
5028 if (_PyBytes_Resize(&result, rescnt))
5029 goto error;
5030 fmtcnt = PyBytes_GET_SIZE(format) - \
5031 (fmt - PyBytes_AS_STRING(format));
5032 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
5033 if (format == NULL)
5034 goto error;
5035 v = PyUnicode_Format(format, args);
5036 Py_DECREF(format);
5037 if (v == NULL)
5038 goto error;
5039 /* Paste what we have (result) to what the Unicode formatting
5040 function returned (v) and return the result (or error) */
5041 w = PyUnicode_Concat(result, v);
5042 Py_DECREF(result);
5043 Py_DECREF(v);
5044 Py_DECREF(args);
5045 return w;
5046#endif /* Py_USING_UNICODE */
5047
5048 error:
5049 Py_DECREF(result);
5050 if (args_owned) {
5051 Py_DECREF(args);
5052 }
5053 return NULL;
5054}
5055
5056void
5057PyBytes_InternInPlace(PyObject **p)
5058{
5059 register PyBytesObject *s = (PyBytesObject *)(*p);
5060 PyObject *t;
5061 if (s == NULL || !PyBytes_Check(s))
5062 Py_FatalError("PyBytes_InternInPlace: strings only please!");
5063 /* If it's a string subclass, we don't really know what putting
5064 it in the interned dict might do. */
5065 if (!PyBytes_CheckExact(s))
5066 return;
5067 if (PyBytes_CHECK_INTERNED(s))
5068 return;
5069 if (interned == NULL) {
5070 interned = PyDict_New();
5071 if (interned == NULL) {
5072 PyErr_Clear(); /* Don't leave an exception */
5073 return;
5074 }
5075 }
5076 t = PyDict_GetItem(interned, (PyObject *)s);
5077 if (t) {
5078 Py_INCREF(t);
5079 Py_DECREF(*p);
5080 *p = t;
5081 return;
5082 }
5083
5084 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
5085 PyErr_Clear();
5086 return;
5087 }
5088 /* The two references in interned are not counted by refcnt.
5089 The string deallocator will take care of this */
5090 Py_REFCNT(s) -= 2;
5091 PyBytes_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
5092}
5093
5094void
5095PyBytes_InternImmortal(PyObject **p)
5096{
5097 PyBytes_InternInPlace(p);
5098 if (PyBytes_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
5099 PyBytes_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
5100 Py_INCREF(*p);
5101 }
5102}
5103
5104
5105PyObject *
5106PyBytes_InternFromString(const char *cp)
5107{
5108 PyObject *s = PyBytes_FromString(cp);
5109 if (s == NULL)
5110 return NULL;
5111 PyBytes_InternInPlace(&s);
5112 return s;
5113}
5114
5115void
5116PyBytes_Fini(void)
5117{
5118 int i;
5119 for (i = 0; i < UCHAR_MAX + 1; i++) {
5120 Py_XDECREF(characters[i]);
5121 characters[i] = NULL;
5122 }
5123 Py_XDECREF(nullstring);
5124 nullstring = NULL;
5125}
5126
5127void _Py_ReleaseInternedStrings(void)
5128{
5129 PyObject *keys;
5130 PyBytesObject *s;
5131 Py_ssize_t i, n;
5132 Py_ssize_t immortal_size = 0, mortal_size = 0;
5133
5134 if (interned == NULL || !PyDict_Check(interned))
5135 return;
5136 keys = PyDict_Keys(interned);
5137 if (keys == NULL || !PyList_Check(keys)) {
5138 PyErr_Clear();
5139 return;
5140 }
5141
5142 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5143 detector, interned strings are not forcibly deallocated; rather, we
5144 give them their stolen references back, and then clear and DECREF
5145 the interned dict. */
5146
5147 n = PyList_GET_SIZE(keys);
5148 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5149 n);
5150 for (i = 0; i < n; i++) {
5151 s = (PyBytesObject *) PyList_GET_ITEM(keys, i);
5152 switch (s->ob_sstate) {
5153 case SSTATE_NOT_INTERNED:
5154 /* XXX Shouldn't happen */
5155 break;
5156 case SSTATE_INTERNED_IMMORTAL:
5157 Py_REFCNT(s) += 1;
5158 immortal_size += Py_SIZE(s);
5159 break;
5160 case SSTATE_INTERNED_MORTAL:
5161 Py_REFCNT(s) += 2;
5162 mortal_size += Py_SIZE(s);
5163 break;
5164 default:
5165 Py_FatalError("Inconsistent interned string state.");
5166 }
5167 s->ob_sstate = SSTATE_NOT_INTERNED;
5168 }
5169 fprintf(stderr, "total size of all interned strings: "
5170 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5171 "mortal/immortal\n", mortal_size, immortal_size);
5172 Py_DECREF(keys);
5173 PyDict_Clear(interned);
5174 Py_DECREF(interned);
5175 interned = NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00005176}