blob: 89614e6a3d6e26729b3e3f6270bb4a8744af4a93 [file] [log] [blame]
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001/* String (str/bytes) object implementation */
Christian Heimes1a6387e2008-03-26 12:49:49 +00002
3#define PY_SSIZE_T_CLEAN
Christian Heimes44720832008-05-26 13:01:01 +00004
Christian Heimes1a6387e2008-03-26 12:49:49 +00005#include "Python.h"
Christian Heimes44720832008-05-26 13:01:01 +00006#include <ctype.h>
Mark Dickinson826f3fe2008-12-05 21:55:28 +00007#include <stddef.h>
Christian Heimes44720832008-05-26 13:01:01 +00008
9#ifdef COUNT_ALLOCS
Martin v. Löwisb90304a2009-01-07 18:40:40 +000010Py_ssize_t null_strings, one_strings;
Christian Heimes44720832008-05-26 13:01:01 +000011#endif
12
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000013static PyStringObject *characters[UCHAR_MAX + 1];
14static PyStringObject *nullstring;
Christian Heimes44720832008-05-26 13:01:01 +000015
16/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Mark Dickinson826f3fe2008-12-05 21:55:28 +000026/* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyStringObject_SIZE + n bytes.
28
29 Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30 3 bytes per string allocation on a typical system.
31*/
32#define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
33
Christian Heimes44720832008-05-26 13:01:01 +000034/*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000035 For both PyString_FromString() and PyString_FromStringAndSize(), the
Christian Heimes44720832008-05-26 13:01:01 +000036 parameter `size' denotes number of characters to allocate, not counting any
37 null terminating character.
38
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000039 For PyString_FromString(), the parameter `str' points to a null-terminated
Christian Heimes44720832008-05-26 13:01:01 +000040 string containing exactly `size' bytes.
41
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000042 For PyString_FromStringAndSize(), the parameter the parameter `str' is
Christian Heimes44720832008-05-26 13:01:01 +000043 either NULL or else points to a string containing at least `size' bytes.
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000044 For PyString_FromStringAndSize(), the string in the `str' parameter does
Christian Heimes44720832008-05-26 13:01:01 +000045 not have to be null-terminated. (Therefore it is safe to construct a
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000046 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
47 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
Christian Heimes44720832008-05-26 13:01:01 +000048 bytes (setting the last byte to the null terminating character) and you can
49 fill in the data yourself. If `str' is non-NULL then the resulting
50 PyString object must be treated as immutable and you must not fill in nor
51 alter the data yourself, since the strings may be shared.
52
53 The PyObject member `op->ob_size', which denotes the number of "extra
54 items" in a variable-size object, will contain the number of bytes
55 allocated for string data, not counting the null terminating character. It
56 is therefore equal to the equal to the `size' parameter (for
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000057 PyString_FromStringAndSize()) or the length of the string in the `str'
58 parameter (for PyString_FromString()).
Christian Heimes44720832008-05-26 13:01:01 +000059*/
60PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000061PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Christian Heimes1a6387e2008-03-26 12:49:49 +000062{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000063 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +000064 if (size < 0) {
65 PyErr_SetString(PyExc_SystemError,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000066 "Negative size passed to PyString_FromStringAndSize");
Christian Heimes44720832008-05-26 13:01:01 +000067 return NULL;
68 }
69 if (size == 0 && (op = nullstring) != NULL) {
70#ifdef COUNT_ALLOCS
71 null_strings++;
72#endif
73 Py_INCREF(op);
74 return (PyObject *)op;
75 }
76 if (size == 1 && str != NULL &&
77 (op = characters[*str & UCHAR_MAX]) != NULL)
78 {
79#ifdef COUNT_ALLOCS
80 one_strings++;
81#endif
82 Py_INCREF(op);
83 return (PyObject *)op;
84 }
85
Mark Dickinson826f3fe2008-12-05 21:55:28 +000086 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
Neal Norwitze7d8be82008-07-31 17:17:14 +000087 PyErr_SetString(PyExc_OverflowError, "string is too large");
88 return NULL;
89 }
90
Christian Heimes44720832008-05-26 13:01:01 +000091 /* Inline PyObject_NewVar */
Mark Dickinson826f3fe2008-12-05 21:55:28 +000092 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
Christian Heimes44720832008-05-26 13:01:01 +000093 if (op == NULL)
94 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000095 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +000096 op->ob_shash = -1;
97 op->ob_sstate = SSTATE_NOT_INTERNED;
98 if (str != NULL)
99 Py_MEMCPY(op->ob_sval, str, size);
100 op->ob_sval[size] = '\0';
101 /* share short strings */
102 if (size == 0) {
103 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000104 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000105 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +0000106 nullstring = op;
107 Py_INCREF(op);
108 } else if (size == 1 && str != NULL) {
109 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000110 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000111 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +0000112 characters[*str & UCHAR_MAX] = op;
113 Py_INCREF(op);
114 }
115 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000116}
117
Christian Heimes44720832008-05-26 13:01:01 +0000118PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000119PyString_FromString(const char *str)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000120{
Christian Heimes44720832008-05-26 13:01:01 +0000121 register size_t size;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000122 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +0000123
124 assert(str != NULL);
125 size = strlen(str);
Mark Dickinson826f3fe2008-12-05 21:55:28 +0000126 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
Christian Heimes44720832008-05-26 13:01:01 +0000127 PyErr_SetString(PyExc_OverflowError,
128 "string is too long for a Python string");
129 return NULL;
130 }
131 if (size == 0 && (op = nullstring) != NULL) {
132#ifdef COUNT_ALLOCS
133 null_strings++;
134#endif
135 Py_INCREF(op);
136 return (PyObject *)op;
137 }
138 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
139#ifdef COUNT_ALLOCS
140 one_strings++;
141#endif
142 Py_INCREF(op);
143 return (PyObject *)op;
144 }
145
146 /* Inline PyObject_NewVar */
Mark Dickinson826f3fe2008-12-05 21:55:28 +0000147 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
Christian Heimes44720832008-05-26 13:01:01 +0000148 if (op == NULL)
149 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000150 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +0000151 op->ob_shash = -1;
152 op->ob_sstate = SSTATE_NOT_INTERNED;
153 Py_MEMCPY(op->ob_sval, str, size+1);
154 /* share short strings */
155 if (size == 0) {
156 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000157 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000158 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +0000159 nullstring = op;
160 Py_INCREF(op);
161 } else if (size == 1) {
162 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000163 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000164 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +0000165 characters[*str & UCHAR_MAX] = op;
166 Py_INCREF(op);
167 }
168 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000169}
170
Christian Heimes44720832008-05-26 13:01:01 +0000171PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000172PyString_FromFormatV(const char *format, va_list vargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000173{
Christian Heimes44720832008-05-26 13:01:01 +0000174 va_list count;
175 Py_ssize_t n = 0;
176 const char* f;
177 char *s;
178 PyObject* string;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000179
Christian Heimes44720832008-05-26 13:01:01 +0000180#ifdef VA_LIST_IS_ARRAY
181 Py_MEMCPY(count, vargs, sizeof(va_list));
182#else
183#ifdef __va_copy
184 __va_copy(count, vargs);
185#else
186 count = vargs;
187#endif
188#endif
189 /* step 1: figure out how large a buffer we need */
190 for (f = format; *f; f++) {
191 if (*f == '%') {
192 const char* p = f;
193 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
194 ;
195
196 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
197 * they don't affect the amount of space we reserve.
198 */
199 if ((*f == 'l' || *f == 'z') &&
200 (f[1] == 'd' || f[1] == 'u'))
201 ++f;
202
203 switch (*f) {
204 case 'c':
205 (void)va_arg(count, int);
206 /* fall through... */
207 case '%':
208 n++;
209 break;
210 case 'd': case 'u': case 'i': case 'x':
211 (void) va_arg(count, int);
212 /* 20 bytes is enough to hold a 64-bit
213 integer. Decimal takes the most space.
214 This isn't enough for octal. */
215 n += 20;
216 break;
217 case 's':
218 s = va_arg(count, char*);
219 n += strlen(s);
220 break;
221 case 'p':
222 (void) va_arg(count, int);
223 /* maximum 64-bit pointer representation:
224 * 0xffffffffffffffff
225 * so 19 characters is enough.
226 * XXX I count 18 -- what's the extra for?
227 */
228 n += 19;
229 break;
230 default:
231 /* if we stumble upon an unknown
232 formatting code, copy the rest of
233 the format string to the output
234 string. (we cannot just skip the
235 code, since there's no way to know
236 what's in the argument list) */
237 n += strlen(p);
238 goto expand;
239 }
240 } else
241 n++;
242 }
243 expand:
244 /* step 2: fill the buffer */
245 /* Since we've analyzed how much space we need for the worst case,
246 use sprintf directly instead of the slower PyOS_snprintf. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000247 string = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +0000248 if (!string)
249 return NULL;
250
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000251 s = PyString_AsString(string);
Christian Heimes44720832008-05-26 13:01:01 +0000252
253 for (f = format; *f; f++) {
254 if (*f == '%') {
255 const char* p = f++;
256 Py_ssize_t i;
257 int longflag = 0;
258 int size_tflag = 0;
259 /* parse the width.precision part (we're only
260 interested in the precision value, if any) */
261 n = 0;
262 while (isdigit(Py_CHARMASK(*f)))
263 n = (n*10) + *f++ - '0';
264 if (*f == '.') {
265 f++;
266 n = 0;
267 while (isdigit(Py_CHARMASK(*f)))
268 n = (n*10) + *f++ - '0';
269 }
270 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
271 f++;
272 /* handle the long flag, but only for %ld and %lu.
273 others can be added when necessary. */
274 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
275 longflag = 1;
276 ++f;
277 }
278 /* handle the size_t flag. */
279 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
280 size_tflag = 1;
281 ++f;
282 }
283
284 switch (*f) {
285 case 'c':
286 *s++ = va_arg(vargs, int);
287 break;
288 case 'd':
289 if (longflag)
290 sprintf(s, "%ld", va_arg(vargs, long));
291 else if (size_tflag)
292 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
293 va_arg(vargs, Py_ssize_t));
294 else
295 sprintf(s, "%d", va_arg(vargs, int));
296 s += strlen(s);
297 break;
298 case 'u':
299 if (longflag)
300 sprintf(s, "%lu",
301 va_arg(vargs, unsigned long));
302 else if (size_tflag)
303 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
304 va_arg(vargs, size_t));
305 else
306 sprintf(s, "%u",
307 va_arg(vargs, unsigned int));
308 s += strlen(s);
309 break;
310 case 'i':
311 sprintf(s, "%i", va_arg(vargs, int));
312 s += strlen(s);
313 break;
314 case 'x':
315 sprintf(s, "%x", va_arg(vargs, int));
316 s += strlen(s);
317 break;
318 case 's':
319 p = va_arg(vargs, char*);
320 i = strlen(p);
321 if (n > 0 && i > n)
322 i = n;
323 Py_MEMCPY(s, p, i);
324 s += i;
325 break;
326 case 'p':
327 sprintf(s, "%p", va_arg(vargs, void*));
328 /* %p is ill-defined: ensure leading 0x. */
329 if (s[1] == 'X')
330 s[1] = 'x';
331 else if (s[1] != 'x') {
332 memmove(s+2, s, strlen(s)+1);
333 s[0] = '0';
334 s[1] = 'x';
335 }
336 s += strlen(s);
337 break;
338 case '%':
339 *s++ = '%';
340 break;
341 default:
342 strcpy(s, p);
343 s += strlen(s);
344 goto end;
345 }
346 } else
347 *s++ = *f;
348 }
349
350 end:
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000351 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Christian Heimes44720832008-05-26 13:01:01 +0000352 return string;
353}
354
355PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000356PyString_FromFormat(const char *format, ...)
Christian Heimes44720832008-05-26 13:01:01 +0000357{
358 PyObject* ret;
359 va_list vargs;
360
361#ifdef HAVE_STDARG_PROTOTYPES
362 va_start(vargs, format);
363#else
364 va_start(vargs);
365#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000366 ret = PyString_FromFormatV(format, vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000367 va_end(vargs);
368 return ret;
369}
370
371
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000372PyObject *PyString_Decode(const char *s,
Christian Heimes44720832008-05-26 13:01:01 +0000373 Py_ssize_t size,
374 const char *encoding,
375 const char *errors)
376{
377 PyObject *v, *str;
378
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000379 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000380 if (str == NULL)
381 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000382 v = PyString_AsDecodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000383 Py_DECREF(str);
384 return v;
385}
386
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000387PyObject *PyString_AsDecodedObject(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000388 const char *encoding,
389 const char *errors)
390{
391 PyObject *v;
392
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000393 if (!PyString_Check(str)) {
Christian Heimes44720832008-05-26 13:01:01 +0000394 PyErr_BadArgument();
395 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000396 }
397
Christian Heimes44720832008-05-26 13:01:01 +0000398 if (encoding == NULL) {
399#ifdef Py_USING_UNICODE
400 encoding = PyUnicode_GetDefaultEncoding();
401#else
402 PyErr_SetString(PyExc_ValueError, "no encoding specified");
403 goto onError;
404#endif
Christian Heimes1a6387e2008-03-26 12:49:49 +0000405 }
Christian Heimes44720832008-05-26 13:01:01 +0000406
407 /* Decode via the codec registry */
408 v = PyCodec_Decode(str, encoding, errors);
409 if (v == NULL)
410 goto onError;
411
412 return v;
413
414 onError:
415 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000416}
417
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000418PyObject *PyString_AsDecodedString(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000419 const char *encoding,
420 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000421{
Christian Heimes44720832008-05-26 13:01:01 +0000422 PyObject *v;
423
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000424 v = PyString_AsDecodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000425 if (v == NULL)
426 goto onError;
427
428#ifdef Py_USING_UNICODE
429 /* Convert Unicode to a string using the default encoding */
430 if (PyUnicode_Check(v)) {
431 PyObject *temp = v;
432 v = PyUnicode_AsEncodedString(v, NULL, NULL);
433 Py_DECREF(temp);
434 if (v == NULL)
435 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000436 }
Christian Heimes44720832008-05-26 13:01:01 +0000437#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000438 if (!PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +0000439 PyErr_Format(PyExc_TypeError,
440 "decoder did not return a string object (type=%.400s)",
441 Py_TYPE(v)->tp_name);
442 Py_DECREF(v);
443 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000444 }
Christian Heimes44720832008-05-26 13:01:01 +0000445
446 return v;
447
448 onError:
449 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000450}
451
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000452PyObject *PyString_Encode(const char *s,
Christian Heimes44720832008-05-26 13:01:01 +0000453 Py_ssize_t size,
454 const char *encoding,
455 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000456{
Christian Heimes44720832008-05-26 13:01:01 +0000457 PyObject *v, *str;
458
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000459 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000460 if (str == NULL)
461 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000462 v = PyString_AsEncodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000463 Py_DECREF(str);
464 return v;
465}
466
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000467PyObject *PyString_AsEncodedObject(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000468 const char *encoding,
469 const char *errors)
470{
471 PyObject *v;
472
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000473 if (!PyString_Check(str)) {
Christian Heimes44720832008-05-26 13:01:01 +0000474 PyErr_BadArgument();
475 goto onError;
476 }
477
478 if (encoding == NULL) {
479#ifdef Py_USING_UNICODE
480 encoding = PyUnicode_GetDefaultEncoding();
481#else
482 PyErr_SetString(PyExc_ValueError, "no encoding specified");
483 goto onError;
484#endif
485 }
486
487 /* Encode via the codec registry */
488 v = PyCodec_Encode(str, encoding, errors);
489 if (v == NULL)
490 goto onError;
491
492 return v;
493
494 onError:
495 return NULL;
496}
497
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000498PyObject *PyString_AsEncodedString(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000499 const char *encoding,
500 const char *errors)
501{
502 PyObject *v;
503
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000504 v = PyString_AsEncodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000505 if (v == NULL)
506 goto onError;
507
508#ifdef Py_USING_UNICODE
509 /* Convert Unicode to a string using the default encoding */
510 if (PyUnicode_Check(v)) {
511 PyObject *temp = v;
512 v = PyUnicode_AsEncodedString(v, NULL, NULL);
513 Py_DECREF(temp);
514 if (v == NULL)
515 goto onError;
516 }
517#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000518 if (!PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +0000519 PyErr_Format(PyExc_TypeError,
520 "encoder did not return a string object (type=%.400s)",
521 Py_TYPE(v)->tp_name);
522 Py_DECREF(v);
523 goto onError;
524 }
525
526 return v;
527
528 onError:
529 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000530}
531
532static void
Christian Heimes44720832008-05-26 13:01:01 +0000533string_dealloc(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000534{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000535 switch (PyString_CHECK_INTERNED(op)) {
Christian Heimes44720832008-05-26 13:01:01 +0000536 case SSTATE_NOT_INTERNED:
537 break;
538
539 case SSTATE_INTERNED_MORTAL:
540 /* revive dead object temporarily for DelItem */
541 Py_REFCNT(op) = 3;
542 if (PyDict_DelItem(interned, op) != 0)
543 Py_FatalError(
544 "deletion of interned string failed");
545 break;
546
547 case SSTATE_INTERNED_IMMORTAL:
548 Py_FatalError("Immortal interned string died.");
549
550 default:
551 Py_FatalError("Inconsistent interned string state.");
552 }
553 Py_TYPE(op)->tp_free(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000554}
555
Christian Heimes44720832008-05-26 13:01:01 +0000556/* Unescape a backslash-escaped string. If unicode is non-zero,
557 the string is a u-literal. If recode_encoding is non-zero,
558 the string is UTF-8 encoded and should be re-encoded in the
559 specified encoding. */
560
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000561PyObject *PyString_DecodeEscape(const char *s,
Christian Heimes44720832008-05-26 13:01:01 +0000562 Py_ssize_t len,
563 const char *errors,
564 Py_ssize_t unicode,
565 const char *recode_encoding)
566{
567 int c;
568 char *p, *buf;
569 const char *end;
570 PyObject *v;
571 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000572 v = PyString_FromStringAndSize((char *)NULL, newlen);
Christian Heimes44720832008-05-26 13:01:01 +0000573 if (v == NULL)
574 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000575 p = buf = PyString_AsString(v);
Christian Heimes44720832008-05-26 13:01:01 +0000576 end = s + len;
577 while (s < end) {
578 if (*s != '\\') {
579 non_esc:
580#ifdef Py_USING_UNICODE
581 if (recode_encoding && (*s & 0x80)) {
582 PyObject *u, *w;
583 char *r;
584 const char* t;
585 Py_ssize_t rn;
586 t = s;
587 /* Decode non-ASCII bytes as UTF-8. */
588 while (t < end && (*t & 0x80)) t++;
589 u = PyUnicode_DecodeUTF8(s, t - s, errors);
590 if(!u) goto failed;
591
592 /* Recode them in target encoding. */
593 w = PyUnicode_AsEncodedString(
594 u, recode_encoding, errors);
595 Py_DECREF(u);
596 if (!w) goto failed;
597
598 /* Append bytes to output buffer. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000599 assert(PyString_Check(w));
600 r = PyString_AS_STRING(w);
601 rn = PyString_GET_SIZE(w);
Christian Heimes44720832008-05-26 13:01:01 +0000602 Py_MEMCPY(p, r, rn);
603 p += rn;
604 Py_DECREF(w);
605 s = t;
606 } else {
607 *p++ = *s++;
608 }
609#else
610 *p++ = *s++;
611#endif
612 continue;
613 }
614 s++;
615 if (s==end) {
616 PyErr_SetString(PyExc_ValueError,
617 "Trailing \\ in string");
618 goto failed;
619 }
620 switch (*s++) {
621 /* XXX This assumes ASCII! */
622 case '\n': break;
623 case '\\': *p++ = '\\'; break;
624 case '\'': *p++ = '\''; break;
625 case '\"': *p++ = '\"'; break;
626 case 'b': *p++ = '\b'; break;
627 case 'f': *p++ = '\014'; break; /* FF */
628 case 't': *p++ = '\t'; break;
629 case 'n': *p++ = '\n'; break;
630 case 'r': *p++ = '\r'; break;
631 case 'v': *p++ = '\013'; break; /* VT */
632 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
633 case '0': case '1': case '2': case '3':
634 case '4': case '5': case '6': case '7':
635 c = s[-1] - '0';
636 if (s < end && '0' <= *s && *s <= '7') {
637 c = (c<<3) + *s++ - '0';
638 if (s < end && '0' <= *s && *s <= '7')
639 c = (c<<3) + *s++ - '0';
640 }
641 *p++ = c;
642 break;
643 case 'x':
644 if (s+1 < end &&
645 isxdigit(Py_CHARMASK(s[0])) &&
646 isxdigit(Py_CHARMASK(s[1])))
647 {
648 unsigned int x = 0;
649 c = Py_CHARMASK(*s);
650 s++;
651 if (isdigit(c))
652 x = c - '0';
653 else if (islower(c))
654 x = 10 + c - 'a';
655 else
656 x = 10 + c - 'A';
657 x = x << 4;
658 c = Py_CHARMASK(*s);
659 s++;
660 if (isdigit(c))
661 x += c - '0';
662 else if (islower(c))
663 x += 10 + c - 'a';
664 else
665 x += 10 + c - 'A';
666 *p++ = x;
667 break;
668 }
669 if (!errors || strcmp(errors, "strict") == 0) {
670 PyErr_SetString(PyExc_ValueError,
671 "invalid \\x escape");
672 goto failed;
673 }
674 if (strcmp(errors, "replace") == 0) {
675 *p++ = '?';
676 } else if (strcmp(errors, "ignore") == 0)
677 /* do nothing */;
678 else {
679 PyErr_Format(PyExc_ValueError,
680 "decoding error; "
681 "unknown error handling code: %.400s",
682 errors);
683 goto failed;
684 }
685#ifndef Py_USING_UNICODE
686 case 'u':
687 case 'U':
688 case 'N':
689 if (unicode) {
690 PyErr_SetString(PyExc_ValueError,
691 "Unicode escapes not legal "
692 "when Unicode disabled");
693 goto failed;
694 }
695#endif
696 default:
697 *p++ = '\\';
698 s--;
699 goto non_esc; /* an arbitry number of unescaped
700 UTF-8 bytes may follow. */
701 }
702 }
703 if (p-buf < newlen)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000704 _PyString_Resize(&v, p - buf);
Christian Heimes44720832008-05-26 13:01:01 +0000705 return v;
706 failed:
707 Py_DECREF(v);
708 return NULL;
709}
710
711/* -------------------------------------------------------------------- */
712/* object api */
713
Christian Heimes1a6387e2008-03-26 12:49:49 +0000714static Py_ssize_t
Christian Heimes44720832008-05-26 13:01:01 +0000715string_getsize(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000716{
Christian Heimes44720832008-05-26 13:01:01 +0000717 char *s;
718 Py_ssize_t len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000719 if (PyString_AsStringAndSize(op, &s, &len))
Christian Heimes44720832008-05-26 13:01:01 +0000720 return -1;
721 return len;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000722}
723
Christian Heimes44720832008-05-26 13:01:01 +0000724static /*const*/ char *
725string_getbuffer(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000726{
Christian Heimes44720832008-05-26 13:01:01 +0000727 char *s;
728 Py_ssize_t len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000729 if (PyString_AsStringAndSize(op, &s, &len))
Christian Heimes44720832008-05-26 13:01:01 +0000730 return NULL;
731 return s;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000732}
733
734Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000735PyString_Size(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000736{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000737 if (!PyString_Check(op))
Christian Heimes44720832008-05-26 13:01:01 +0000738 return string_getsize(op);
739 return Py_SIZE(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000740}
741
Christian Heimes44720832008-05-26 13:01:01 +0000742/*const*/ char *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000743PyString_AsString(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000744{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000745 if (!PyString_Check(op))
Christian Heimes44720832008-05-26 13:01:01 +0000746 return string_getbuffer(op);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000747 return ((PyStringObject *)op) -> ob_sval;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000748}
749
750int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000751PyString_AsStringAndSize(register PyObject *obj,
Christian Heimes44720832008-05-26 13:01:01 +0000752 register char **s,
753 register Py_ssize_t *len)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000754{
Christian Heimes44720832008-05-26 13:01:01 +0000755 if (s == NULL) {
756 PyErr_BadInternalCall();
757 return -1;
758 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000759
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000760 if (!PyString_Check(obj)) {
Christian Heimes44720832008-05-26 13:01:01 +0000761#ifdef Py_USING_UNICODE
762 if (PyUnicode_Check(obj)) {
763 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
764 if (obj == NULL)
765 return -1;
766 }
767 else
Christian Heimes1a6387e2008-03-26 12:49:49 +0000768#endif
Christian Heimes44720832008-05-26 13:01:01 +0000769 {
770 PyErr_Format(PyExc_TypeError,
771 "expected string or Unicode object, "
772 "%.200s found", Py_TYPE(obj)->tp_name);
773 return -1;
774 }
775 }
776
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000777 *s = PyString_AS_STRING(obj);
Christian Heimes44720832008-05-26 13:01:01 +0000778 if (len != NULL)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000779 *len = PyString_GET_SIZE(obj);
780 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Christian Heimes44720832008-05-26 13:01:01 +0000781 PyErr_SetString(PyExc_TypeError,
782 "expected string without null bytes");
783 return -1;
784 }
785 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000786}
787
Christian Heimes1a6387e2008-03-26 12:49:49 +0000788/* -------------------------------------------------------------------- */
789/* Methods */
790
Christian Heimes44720832008-05-26 13:01:01 +0000791#include "stringlib/stringdefs.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000792#include "stringlib/fastsearch.h"
Christian Heimes44720832008-05-26 13:01:01 +0000793
Christian Heimes1a6387e2008-03-26 12:49:49 +0000794#include "stringlib/count.h"
795#include "stringlib/find.h"
796#include "stringlib/partition.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000797
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000798#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
Christian Heimes44720832008-05-26 13:01:01 +0000799#include "stringlib/localeutil.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000800
Christian Heimes1a6387e2008-03-26 12:49:49 +0000801
802
803static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000804string_print(PyStringObject *op, FILE *fp, int flags)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000805{
Christian Heimes44720832008-05-26 13:01:01 +0000806 Py_ssize_t i, str_len;
807 char c;
808 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000809
Christian Heimes44720832008-05-26 13:01:01 +0000810 /* XXX Ought to check for interrupts when writing long strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000811 if (! PyString_CheckExact(op)) {
Christian Heimes44720832008-05-26 13:01:01 +0000812 int ret;
813 /* A str subclass may have its own __str__ method. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000814 op = (PyStringObject *) PyObject_Str((PyObject *)op);
Christian Heimes44720832008-05-26 13:01:01 +0000815 if (op == NULL)
816 return -1;
817 ret = string_print(op, fp, flags);
818 Py_DECREF(op);
819 return ret;
820 }
821 if (flags & Py_PRINT_RAW) {
822 char *data = op->ob_sval;
823 Py_ssize_t size = Py_SIZE(op);
824 Py_BEGIN_ALLOW_THREADS
825 while (size > INT_MAX) {
826 /* Very long strings cannot be written atomically.
827 * But don't write exactly INT_MAX bytes at a time
828 * to avoid memory aligment issues.
829 */
830 const int chunk_size = INT_MAX & ~0x3FFF;
831 fwrite(data, 1, chunk_size, fp);
832 data += chunk_size;
833 size -= chunk_size;
834 }
835#ifdef __VMS
836 if (size) fwrite(data, (int)size, 1, fp);
837#else
838 fwrite(data, 1, (int)size, fp);
839#endif
840 Py_END_ALLOW_THREADS
841 return 0;
842 }
843
844 /* figure out which quote to use; single is preferred */
845 quote = '\'';
846 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
847 !memchr(op->ob_sval, '"', Py_SIZE(op)))
848 quote = '"';
849
850 str_len = Py_SIZE(op);
851 Py_BEGIN_ALLOW_THREADS
852 fputc(quote, fp);
853 for (i = 0; i < str_len; i++) {
854 /* Since strings are immutable and the caller should have a
855 reference, accessing the interal buffer should not be an issue
856 with the GIL released. */
857 c = op->ob_sval[i];
858 if (c == quote || c == '\\')
859 fprintf(fp, "\\%c", c);
860 else if (c == '\t')
861 fprintf(fp, "\\t");
862 else if (c == '\n')
863 fprintf(fp, "\\n");
864 else if (c == '\r')
865 fprintf(fp, "\\r");
866 else if (c < ' ' || c >= 0x7f)
867 fprintf(fp, "\\x%02x", c & 0xff);
868 else
869 fputc(c, fp);
870 }
871 fputc(quote, fp);
872 Py_END_ALLOW_THREADS
873 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000874}
875
Christian Heimes44720832008-05-26 13:01:01 +0000876PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000877PyString_Repr(PyObject *obj, int smartquotes)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000878{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000879 register PyStringObject* op = (PyStringObject*) obj;
Christian Heimes44720832008-05-26 13:01:01 +0000880 size_t newsize = 2 + 4 * Py_SIZE(op);
881 PyObject *v;
882 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
883 PyErr_SetString(PyExc_OverflowError,
884 "string is too large to make repr");
Christian Heimes1a6387e2008-03-26 12:49:49 +0000885 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000886 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000887 v = PyString_FromStringAndSize((char *)NULL, newsize);
Christian Heimes44720832008-05-26 13:01:01 +0000888 if (v == NULL) {
889 return NULL;
890 }
891 else {
892 register Py_ssize_t i;
893 register char c;
894 register char *p;
895 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000896
Christian Heimes44720832008-05-26 13:01:01 +0000897 /* figure out which quote to use; single is preferred */
898 quote = '\'';
899 if (smartquotes &&
900 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
901 !memchr(op->ob_sval, '"', Py_SIZE(op)))
902 quote = '"';
903
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000904 p = PyString_AS_STRING(v);
Christian Heimes44720832008-05-26 13:01:01 +0000905 *p++ = quote;
906 for (i = 0; i < Py_SIZE(op); i++) {
907 /* There's at least enough room for a hex escape
908 and a closing quote. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000909 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Christian Heimes44720832008-05-26 13:01:01 +0000910 c = op->ob_sval[i];
911 if (c == quote || c == '\\')
912 *p++ = '\\', *p++ = c;
913 else if (c == '\t')
914 *p++ = '\\', *p++ = 't';
915 else if (c == '\n')
916 *p++ = '\\', *p++ = 'n';
917 else if (c == '\r')
918 *p++ = '\\', *p++ = 'r';
919 else if (c < ' ' || c >= 0x7f) {
920 /* For performance, we don't want to call
921 PyOS_snprintf here (extra layers of
922 function call). */
923 sprintf(p, "\\x%02x", c & 0xff);
924 p += 4;
925 }
926 else
927 *p++ = c;
928 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000929 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Christian Heimes44720832008-05-26 13:01:01 +0000930 *p++ = quote;
931 *p = '\0';
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000932 _PyString_Resize(
933 &v, (p - PyString_AS_STRING(v)));
Christian Heimes44720832008-05-26 13:01:01 +0000934 return v;
935 }
936}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000937
938static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000939string_repr(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000940{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000941 return PyString_Repr(op, 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000942}
943
Christian Heimes1a6387e2008-03-26 12:49:49 +0000944static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000945string_str(PyObject *s)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000946{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000947 assert(PyString_Check(s));
948 if (PyString_CheckExact(s)) {
Christian Heimes44720832008-05-26 13:01:01 +0000949 Py_INCREF(s);
950 return s;
951 }
952 else {
953 /* Subtype -- return genuine string with the same value. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000954 PyStringObject *t = (PyStringObject *) s;
955 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
Christian Heimes44720832008-05-26 13:01:01 +0000956 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000957}
958
Christian Heimes44720832008-05-26 13:01:01 +0000959static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000960string_length(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +0000961{
962 return Py_SIZE(a);
963}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000964
Christian Heimes44720832008-05-26 13:01:01 +0000965static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000966string_concat(register PyStringObject *a, register PyObject *bb)
Christian Heimes44720832008-05-26 13:01:01 +0000967{
968 register Py_ssize_t size;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000969 register PyStringObject *op;
970 if (!PyString_Check(bb)) {
Christian Heimes44720832008-05-26 13:01:01 +0000971#ifdef Py_USING_UNICODE
972 if (PyUnicode_Check(bb))
973 return PyUnicode_Concat((PyObject *)a, bb);
974#endif
975 if (PyByteArray_Check(bb))
976 return PyByteArray_Concat((PyObject *)a, bb);
977 PyErr_Format(PyExc_TypeError,
978 "cannot concatenate 'str' and '%.200s' objects",
979 Py_TYPE(bb)->tp_name);
980 return NULL;
981 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000982#define b ((PyStringObject *)bb)
Christian Heimes44720832008-05-26 13:01:01 +0000983 /* Optimize cases with empty left or right operand */
984 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000985 PyString_CheckExact(a) && PyString_CheckExact(b)) {
Christian Heimes44720832008-05-26 13:01:01 +0000986 if (Py_SIZE(a) == 0) {
987 Py_INCREF(bb);
988 return bb;
989 }
990 Py_INCREF(a);
991 return (PyObject *)a;
992 }
993 size = Py_SIZE(a) + Py_SIZE(b);
Neal Norwitze7d8be82008-07-31 17:17:14 +0000994 /* Check that string sizes are not negative, to prevent an
995 overflow in cases where we are passed incorrectly-created
996 strings with negative lengths (due to a bug in other code).
997 */
998 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
999 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
Christian Heimes44720832008-05-26 13:01:01 +00001000 PyErr_SetString(PyExc_OverflowError,
1001 "strings are too large to concat");
1002 return NULL;
1003 }
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001004
Christian Heimes44720832008-05-26 13:01:01 +00001005 /* Inline PyObject_NewVar */
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001006 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
Neal Norwitze7d8be82008-07-31 17:17:14 +00001007 PyErr_SetString(PyExc_OverflowError,
1008 "strings are too large to concat");
1009 return NULL;
1010 }
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001011 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
Christian Heimes44720832008-05-26 13:01:01 +00001012 if (op == NULL)
1013 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001014 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +00001015 op->ob_shash = -1;
1016 op->ob_sstate = SSTATE_NOT_INTERNED;
1017 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1018 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1019 op->ob_sval[size] = '\0';
1020 return (PyObject *) op;
1021#undef b
1022}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001023
Christian Heimes44720832008-05-26 13:01:01 +00001024static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001025string_repeat(register PyStringObject *a, register Py_ssize_t n)
Christian Heimes44720832008-05-26 13:01:01 +00001026{
1027 register Py_ssize_t i;
1028 register Py_ssize_t j;
1029 register Py_ssize_t size;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001030 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +00001031 size_t nbytes;
1032 if (n < 0)
1033 n = 0;
1034 /* watch out for overflows: the size can overflow int,
1035 * and the # of bytes needed can overflow size_t
1036 */
1037 size = Py_SIZE(a) * n;
1038 if (n && size / n != Py_SIZE(a)) {
1039 PyErr_SetString(PyExc_OverflowError,
1040 "repeated string is too long");
1041 return NULL;
1042 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001043 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
Christian Heimes44720832008-05-26 13:01:01 +00001044 Py_INCREF(a);
1045 return (PyObject *)a;
1046 }
1047 nbytes = (size_t)size;
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001048 if (nbytes + PyStringObject_SIZE <= nbytes) {
Christian Heimes44720832008-05-26 13:01:01 +00001049 PyErr_SetString(PyExc_OverflowError,
1050 "repeated string is too long");
1051 return NULL;
1052 }
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001053 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
Christian Heimes44720832008-05-26 13:01:01 +00001054 if (op == NULL)
1055 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001056 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +00001057 op->ob_shash = -1;
1058 op->ob_sstate = SSTATE_NOT_INTERNED;
1059 op->ob_sval[size] = '\0';
1060 if (Py_SIZE(a) == 1 && n > 0) {
1061 memset(op->ob_sval, a->ob_sval[0] , n);
1062 return (PyObject *) op;
1063 }
1064 i = 0;
1065 if (i < size) {
1066 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1067 i = Py_SIZE(a);
1068 }
1069 while (i < size) {
1070 j = (i <= size-i) ? i : size-i;
1071 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1072 i += j;
1073 }
1074 return (PyObject *) op;
1075}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001076
Christian Heimes44720832008-05-26 13:01:01 +00001077/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1078
1079static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001080string_slice(register PyStringObject *a, register Py_ssize_t i,
Christian Heimes44720832008-05-26 13:01:01 +00001081 register Py_ssize_t j)
1082 /* j -- may be negative! */
1083{
1084 if (i < 0)
1085 i = 0;
1086 if (j < 0)
1087 j = 0; /* Avoid signed/unsigned bug in next line */
1088 if (j > Py_SIZE(a))
1089 j = Py_SIZE(a);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001090 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
Christian Heimes44720832008-05-26 13:01:01 +00001091 /* It's the same as a */
1092 Py_INCREF(a);
1093 return (PyObject *)a;
1094 }
1095 if (j < i)
1096 j = i;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001097 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001098}
1099
1100static int
1101string_contains(PyObject *str_obj, PyObject *sub_obj)
1102{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001103 if (!PyString_CheckExact(sub_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00001104#ifdef Py_USING_UNICODE
1105 if (PyUnicode_Check(sub_obj))
1106 return PyUnicode_Contains(str_obj, sub_obj);
1107#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001108 if (!PyString_Check(sub_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00001109 PyErr_Format(PyExc_TypeError,
1110 "'in <string>' requires string as left operand, "
1111 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1112 return -1;
1113 }
1114 }
1115
1116 return stringlib_contains_obj(str_obj, sub_obj);
1117}
1118
1119static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001120string_item(PyStringObject *a, register Py_ssize_t i)
Christian Heimes44720832008-05-26 13:01:01 +00001121{
1122 char pchar;
1123 PyObject *v;
1124 if (i < 0 || i >= Py_SIZE(a)) {
1125 PyErr_SetString(PyExc_IndexError, "string index out of range");
1126 return NULL;
1127 }
1128 pchar = a->ob_sval[i];
1129 v = (PyObject *)characters[pchar & UCHAR_MAX];
1130 if (v == NULL)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001131 v = PyString_FromStringAndSize(&pchar, 1);
Christian Heimes44720832008-05-26 13:01:01 +00001132 else {
1133#ifdef COUNT_ALLOCS
1134 one_strings++;
1135#endif
1136 Py_INCREF(v);
1137 }
1138 return v;
1139}
1140
1141static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001142string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Christian Heimes44720832008-05-26 13:01:01 +00001143{
1144 int c;
1145 Py_ssize_t len_a, len_b;
1146 Py_ssize_t min_len;
1147 PyObject *result;
1148
1149 /* Make sure both arguments are strings. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001150 if (!(PyString_Check(a) && PyString_Check(b))) {
Christian Heimes44720832008-05-26 13:01:01 +00001151 result = Py_NotImplemented;
1152 goto out;
1153 }
1154 if (a == b) {
1155 switch (op) {
1156 case Py_EQ:case Py_LE:case Py_GE:
1157 result = Py_True;
1158 goto out;
1159 case Py_NE:case Py_LT:case Py_GT:
1160 result = Py_False;
1161 goto out;
1162 }
1163 }
1164 if (op == Py_EQ) {
1165 /* Supporting Py_NE here as well does not save
1166 much time, since Py_NE is rarely used. */
1167 if (Py_SIZE(a) == Py_SIZE(b)
1168 && (a->ob_sval[0] == b->ob_sval[0]
1169 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1170 result = Py_True;
1171 } else {
1172 result = Py_False;
1173 }
1174 goto out;
1175 }
1176 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1177 min_len = (len_a < len_b) ? len_a : len_b;
1178 if (min_len > 0) {
1179 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1180 if (c==0)
1181 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1182 } else
1183 c = 0;
1184 if (c == 0)
1185 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1186 switch (op) {
1187 case Py_LT: c = c < 0; break;
1188 case Py_LE: c = c <= 0; break;
1189 case Py_EQ: assert(0); break; /* unreachable */
1190 case Py_NE: c = c != 0; break;
1191 case Py_GT: c = c > 0; break;
1192 case Py_GE: c = c >= 0; break;
1193 default:
1194 result = Py_NotImplemented;
1195 goto out;
1196 }
1197 result = c ? Py_True : Py_False;
1198 out:
1199 Py_INCREF(result);
1200 return result;
1201}
1202
1203int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001204_PyString_Eq(PyObject *o1, PyObject *o2)
Christian Heimes44720832008-05-26 13:01:01 +00001205{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001206 PyStringObject *a = (PyStringObject*) o1;
1207 PyStringObject *b = (PyStringObject*) o2;
Christian Heimes44720832008-05-26 13:01:01 +00001208 return Py_SIZE(a) == Py_SIZE(b)
1209 && *a->ob_sval == *b->ob_sval
1210 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1211}
1212
1213static long
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001214string_hash(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001215{
1216 register Py_ssize_t len;
1217 register unsigned char *p;
1218 register long x;
1219
1220 if (a->ob_shash != -1)
1221 return a->ob_shash;
1222 len = Py_SIZE(a);
1223 p = (unsigned char *) a->ob_sval;
1224 x = *p << 7;
1225 while (--len >= 0)
1226 x = (1000003*x) ^ *p++;
1227 x ^= Py_SIZE(a);
1228 if (x == -1)
1229 x = -2;
1230 a->ob_shash = x;
1231 return x;
1232}
1233
1234static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001235string_subscript(PyStringObject* self, PyObject* item)
Christian Heimes44720832008-05-26 13:01:01 +00001236{
1237 if (PyIndex_Check(item)) {
1238 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1239 if (i == -1 && PyErr_Occurred())
1240 return NULL;
1241 if (i < 0)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001242 i += PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001243 return string_item(self, i);
1244 }
1245 else if (PySlice_Check(item)) {
1246 Py_ssize_t start, stop, step, slicelength, cur, i;
1247 char* source_buf;
1248 char* result_buf;
1249 PyObject* result;
1250
1251 if (PySlice_GetIndicesEx((PySliceObject*)item,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001252 PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001253 &start, &stop, &step, &slicelength) < 0) {
1254 return NULL;
1255 }
1256
1257 if (slicelength <= 0) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001258 return PyString_FromStringAndSize("", 0);
Christian Heimes44720832008-05-26 13:01:01 +00001259 }
1260 else if (start == 0 && step == 1 &&
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001261 slicelength == PyString_GET_SIZE(self) &&
1262 PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001263 Py_INCREF(self);
1264 return (PyObject *)self;
1265 }
1266 else if (step == 1) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001267 return PyString_FromStringAndSize(
1268 PyString_AS_STRING(self) + start,
Christian Heimes44720832008-05-26 13:01:01 +00001269 slicelength);
1270 }
1271 else {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001272 source_buf = PyString_AsString((PyObject*)self);
Christian Heimes44720832008-05-26 13:01:01 +00001273 result_buf = (char *)PyMem_Malloc(slicelength);
1274 if (result_buf == NULL)
1275 return PyErr_NoMemory();
1276
1277 for (cur = start, i = 0; i < slicelength;
1278 cur += step, i++) {
1279 result_buf[i] = source_buf[cur];
1280 }
1281
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001282 result = PyString_FromStringAndSize(result_buf,
Christian Heimes44720832008-05-26 13:01:01 +00001283 slicelength);
1284 PyMem_Free(result_buf);
1285 return result;
1286 }
1287 }
1288 else {
1289 PyErr_Format(PyExc_TypeError,
1290 "string indices must be integers, not %.200s",
1291 Py_TYPE(item)->tp_name);
1292 return NULL;
1293 }
1294}
1295
1296static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001297string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001298{
1299 if ( index != 0 ) {
1300 PyErr_SetString(PyExc_SystemError,
1301 "accessing non-existent string segment");
1302 return -1;
1303 }
1304 *ptr = (void *)self->ob_sval;
1305 return Py_SIZE(self);
1306}
1307
1308static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001309string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001310{
1311 PyErr_SetString(PyExc_TypeError,
1312 "Cannot use string as modifiable buffer");
1313 return -1;
1314}
1315
1316static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001317string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Christian Heimes44720832008-05-26 13:01:01 +00001318{
1319 if ( lenp )
1320 *lenp = Py_SIZE(self);
1321 return 1;
1322}
1323
1324static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001325string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001326{
1327 if ( index != 0 ) {
1328 PyErr_SetString(PyExc_SystemError,
1329 "accessing non-existent string segment");
1330 return -1;
1331 }
1332 *ptr = self->ob_sval;
1333 return Py_SIZE(self);
1334}
1335
1336static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001337string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Christian Heimes44720832008-05-26 13:01:01 +00001338{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001339 return PyBuffer_FillInfo(view, (PyObject*)self,
1340 (void *)self->ob_sval, Py_SIZE(self),
Antoine Pitrou92a62402008-08-02 21:58:05 +00001341 1, flags);
Christian Heimes44720832008-05-26 13:01:01 +00001342}
1343
1344static PySequenceMethods string_as_sequence = {
1345 (lenfunc)string_length, /*sq_length*/
1346 (binaryfunc)string_concat, /*sq_concat*/
1347 (ssizeargfunc)string_repeat, /*sq_repeat*/
1348 (ssizeargfunc)string_item, /*sq_item*/
1349 (ssizessizeargfunc)string_slice, /*sq_slice*/
1350 0, /*sq_ass_item*/
1351 0, /*sq_ass_slice*/
1352 (objobjproc)string_contains /*sq_contains*/
1353};
1354
1355static PyMappingMethods string_as_mapping = {
1356 (lenfunc)string_length,
1357 (binaryfunc)string_subscript,
1358 0,
1359};
1360
1361static PyBufferProcs string_as_buffer = {
1362 (readbufferproc)string_buffer_getreadbuf,
1363 (writebufferproc)string_buffer_getwritebuf,
1364 (segcountproc)string_buffer_getsegcount,
1365 (charbufferproc)string_buffer_getcharbuf,
1366 (getbufferproc)string_buffer_getbuffer,
1367 0, /* XXX */
1368};
1369
1370
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001371
Christian Heimes44720832008-05-26 13:01:01 +00001372#define LEFTSTRIP 0
1373#define RIGHTSTRIP 1
1374#define BOTHSTRIP 2
1375
1376/* Arrays indexed by above */
1377static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1378
1379#define STRIPNAME(i) (stripformat[i]+3)
1380
Christian Heimes1a6387e2008-03-26 12:49:49 +00001381
1382/* Don't call if length < 2 */
Christian Heimes44720832008-05-26 13:01:01 +00001383#define Py_STRING_MATCH(target, offset, pattern, length) \
1384 (target[offset] == pattern[0] && \
1385 target[offset+length-1] == pattern[length-1] && \
Christian Heimes1a6387e2008-03-26 12:49:49 +00001386 !memcmp(target+offset+1, pattern+1, length-2) )
1387
1388
Christian Heimes1a6387e2008-03-26 12:49:49 +00001389/* Overallocate the initial list to reduce the number of reallocs for small
1390 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1391 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1392 text (roughly 11 words per line) and field delimited data (usually 1-10
1393 fields). For large strings the split algorithms are bandwidth limited
1394 so increasing the preallocation likely will not improve things.*/
1395
1396#define MAX_PREALLOC 12
1397
1398/* 5 splits gives 6 elements */
1399#define PREALLOC_SIZE(maxsplit) \
Christian Heimes44720832008-05-26 13:01:01 +00001400 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001401
Christian Heimes44720832008-05-26 13:01:01 +00001402#define SPLIT_APPEND(data, left, right) \
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001403 str = PyString_FromStringAndSize((data) + (left), \
Christian Heimes44720832008-05-26 13:01:01 +00001404 (right) - (left)); \
1405 if (str == NULL) \
1406 goto onError; \
1407 if (PyList_Append(list, str)) { \
1408 Py_DECREF(str); \
1409 goto onError; \
1410 } \
1411 else \
1412 Py_DECREF(str);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001413
Christian Heimes44720832008-05-26 13:01:01 +00001414#define SPLIT_ADD(data, left, right) { \
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001415 str = PyString_FromStringAndSize((data) + (left), \
Christian Heimes44720832008-05-26 13:01:01 +00001416 (right) - (left)); \
1417 if (str == NULL) \
1418 goto onError; \
1419 if (count < MAX_PREALLOC) { \
1420 PyList_SET_ITEM(list, count, str); \
1421 } else { \
1422 if (PyList_Append(list, str)) { \
1423 Py_DECREF(str); \
1424 goto onError; \
1425 } \
1426 else \
1427 Py_DECREF(str); \
1428 } \
1429 count++; }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001430
1431/* Always force the list to the expected size. */
1432#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
1433
Christian Heimes44720832008-05-26 13:01:01 +00001434#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1435#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1436#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1437#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001438
1439Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001440split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001441{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001442 const char *s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001443 Py_ssize_t i, j, count=0;
1444 PyObject *str;
1445 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001446
Christian Heimes44720832008-05-26 13:01:01 +00001447 if (list == NULL)
1448 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001449
Christian Heimes44720832008-05-26 13:01:01 +00001450 i = j = 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001451
Christian Heimes44720832008-05-26 13:01:01 +00001452 while (maxsplit-- > 0) {
1453 SKIP_SPACE(s, i, len);
1454 if (i==len) break;
1455 j = i; i++;
1456 SKIP_NONSPACE(s, i, len);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001457 if (j == 0 && i == len && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001458 /* No whitespace in self, so just use it as list[0] */
1459 Py_INCREF(self);
1460 PyList_SET_ITEM(list, 0, (PyObject *)self);
1461 count++;
1462 break;
1463 }
1464 SPLIT_ADD(s, j, i);
1465 }
1466
1467 if (i < len) {
1468 /* Only occurs when maxsplit was reached */
1469 /* Skip any remaining whitespace and copy to end of string */
1470 SKIP_SPACE(s, i, len);
1471 if (i != len)
1472 SPLIT_ADD(s, i, len);
1473 }
1474 FIX_PREALLOC_SIZE(list);
1475 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001476 onError:
Christian Heimes44720832008-05-26 13:01:01 +00001477 Py_DECREF(list);
1478 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001479}
1480
Christian Heimes1a6387e2008-03-26 12:49:49 +00001481Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001482split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001483{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001484 const char *s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001485 register Py_ssize_t i, j, count=0;
1486 PyObject *str;
1487 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001488
Christian Heimes44720832008-05-26 13:01:01 +00001489 if (list == NULL)
1490 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001491
Christian Heimes44720832008-05-26 13:01:01 +00001492 i = j = 0;
1493 while ((j < len) && (maxcount-- > 0)) {
1494 for(; j<len; j++) {
1495 /* I found that using memchr makes no difference */
1496 if (s[j] == ch) {
1497 SPLIT_ADD(s, i, j);
1498 i = j = j + 1;
1499 break;
1500 }
1501 }
1502 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001503 if (i == 0 && count == 0 && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001504 /* ch not in self, so just use self as list[0] */
1505 Py_INCREF(self);
1506 PyList_SET_ITEM(list, 0, (PyObject *)self);
1507 count++;
1508 }
1509 else if (i <= len) {
1510 SPLIT_ADD(s, i, len);
1511 }
1512 FIX_PREALLOC_SIZE(list);
1513 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001514
1515 onError:
Christian Heimes44720832008-05-26 13:01:01 +00001516 Py_DECREF(list);
1517 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001518}
1519
1520PyDoc_STRVAR(split__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001521"S.split([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001522\n\
Christian Heimes44720832008-05-26 13:01:01 +00001523Return a list of the words in the string S, using sep as the\n\
1524delimiter string. If maxsplit is given, at most maxsplit\n\
1525splits are done. If sep is not specified or is None, any\n\
1526whitespace string is a separator and empty strings are removed\n\
1527from the result.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001528
1529static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001530string_split(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001531{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001532 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001533 Py_ssize_t maxsplit = -1, count=0;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001534 const char *s = PyString_AS_STRING(self), *sub;
Christian Heimes44720832008-05-26 13:01:01 +00001535 PyObject *list, *str, *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001536#ifdef USE_FAST
Christian Heimes44720832008-05-26 13:01:01 +00001537 Py_ssize_t pos;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001538#endif
1539
Christian Heimes44720832008-05-26 13:01:01 +00001540 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1541 return NULL;
1542 if (maxsplit < 0)
1543 maxsplit = PY_SSIZE_T_MAX;
1544 if (subobj == Py_None)
1545 return split_whitespace(self, len, maxsplit);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001546 if (PyString_Check(subobj)) {
1547 sub = PyString_AS_STRING(subobj);
1548 n = PyString_GET_SIZE(subobj);
Christian Heimes44720832008-05-26 13:01:01 +00001549 }
1550#ifdef Py_USING_UNICODE
1551 else if (PyUnicode_Check(subobj))
1552 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1553#endif
1554 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1555 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001556
Christian Heimes44720832008-05-26 13:01:01 +00001557 if (n == 0) {
1558 PyErr_SetString(PyExc_ValueError, "empty separator");
1559 return NULL;
1560 }
1561 else if (n == 1)
1562 return split_char(self, len, sub[0], maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001563
Christian Heimes44720832008-05-26 13:01:01 +00001564 list = PyList_New(PREALLOC_SIZE(maxsplit));
1565 if (list == NULL)
1566 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001567
1568#ifdef USE_FAST
Christian Heimes44720832008-05-26 13:01:01 +00001569 i = j = 0;
1570 while (maxsplit-- > 0) {
1571 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1572 if (pos < 0)
1573 break;
1574 j = i+pos;
1575 SPLIT_ADD(s, i, j);
1576 i = j + n;
1577 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001578#else
Christian Heimes44720832008-05-26 13:01:01 +00001579 i = j = 0;
1580 while ((j+n <= len) && (maxsplit-- > 0)) {
1581 for (; j+n <= len; j++) {
1582 if (Py_STRING_MATCH(s, j, sub, n)) {
1583 SPLIT_ADD(s, i, j);
1584 i = j = j + n;
1585 break;
1586 }
1587 }
1588 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001589#endif
Christian Heimes44720832008-05-26 13:01:01 +00001590 SPLIT_ADD(s, i, len);
1591 FIX_PREALLOC_SIZE(list);
1592 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001593
Christian Heimes44720832008-05-26 13:01:01 +00001594 onError:
1595 Py_DECREF(list);
1596 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001597}
1598
1599PyDoc_STRVAR(partition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001600"S.partition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001601\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001602Search for the separator sep in S, and return the part before it,\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001603the separator itself, and the part after it. If the separator is not\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001604found, return S and two empty strings.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001605
1606static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001607string_partition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001608{
Christian Heimes44720832008-05-26 13:01:01 +00001609 const char *sep;
1610 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001611
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001612 if (PyString_Check(sep_obj)) {
1613 sep = PyString_AS_STRING(sep_obj);
1614 sep_len = PyString_GET_SIZE(sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001615 }
1616#ifdef Py_USING_UNICODE
1617 else if (PyUnicode_Check(sep_obj))
1618 return PyUnicode_Partition((PyObject *) self, sep_obj);
1619#endif
1620 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1621 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001622
Christian Heimes44720832008-05-26 13:01:01 +00001623 return stringlib_partition(
1624 (PyObject*) self,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001625 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001626 sep_obj, sep, sep_len
1627 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001628}
1629
1630PyDoc_STRVAR(rpartition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001631"S.rpartition(sep) -> (tail, sep, head)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001632\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001633Search for the separator sep in S, starting at the end of S, and return\n\
Christian Heimes44720832008-05-26 13:01:01 +00001634the part before it, the separator itself, and the part after it. If the\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001635separator is not found, return two empty strings and S.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001636
1637static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001638string_rpartition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001639{
Christian Heimes44720832008-05-26 13:01:01 +00001640 const char *sep;
1641 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001642
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001643 if (PyString_Check(sep_obj)) {
1644 sep = PyString_AS_STRING(sep_obj);
1645 sep_len = PyString_GET_SIZE(sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001646 }
1647#ifdef Py_USING_UNICODE
1648 else if (PyUnicode_Check(sep_obj))
Amaury Forgeot d'Arc3571fbf2008-09-01 19:52:00 +00001649 return PyUnicode_RPartition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001650#endif
1651 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1652 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001653
Christian Heimes44720832008-05-26 13:01:01 +00001654 return stringlib_rpartition(
1655 (PyObject*) self,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001656 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001657 sep_obj, sep, sep_len
1658 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001659}
1660
1661Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001662rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001663{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001664 const char *s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001665 Py_ssize_t i, j, count=0;
1666 PyObject *str;
1667 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001668
Christian Heimes44720832008-05-26 13:01:01 +00001669 if (list == NULL)
1670 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001671
Christian Heimes44720832008-05-26 13:01:01 +00001672 i = j = len-1;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001673
Christian Heimes44720832008-05-26 13:01:01 +00001674 while (maxsplit-- > 0) {
1675 RSKIP_SPACE(s, i);
1676 if (i<0) break;
1677 j = i; i--;
1678 RSKIP_NONSPACE(s, i);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001679 if (j == len-1 && i < 0 && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001680 /* No whitespace in self, so just use it as list[0] */
1681 Py_INCREF(self);
1682 PyList_SET_ITEM(list, 0, (PyObject *)self);
1683 count++;
1684 break;
1685 }
1686 SPLIT_ADD(s, i + 1, j + 1);
1687 }
1688 if (i >= 0) {
1689 /* Only occurs when maxsplit was reached */
1690 /* Skip any remaining whitespace and copy to beginning of string */
1691 RSKIP_SPACE(s, i);
1692 if (i >= 0)
1693 SPLIT_ADD(s, 0, i + 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001694
Christian Heimes44720832008-05-26 13:01:01 +00001695 }
1696 FIX_PREALLOC_SIZE(list);
1697 if (PyList_Reverse(list) < 0)
1698 goto onError;
1699 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001700 onError:
Christian Heimes44720832008-05-26 13:01:01 +00001701 Py_DECREF(list);
1702 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001703}
1704
1705Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001706rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001707{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001708 const char *s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001709 register Py_ssize_t i, j, count=0;
1710 PyObject *str;
1711 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001712
Christian Heimes44720832008-05-26 13:01:01 +00001713 if (list == NULL)
1714 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001715
Christian Heimes44720832008-05-26 13:01:01 +00001716 i = j = len - 1;
1717 while ((i >= 0) && (maxcount-- > 0)) {
1718 for (; i >= 0; i--) {
1719 if (s[i] == ch) {
1720 SPLIT_ADD(s, i + 1, j + 1);
1721 j = i = i - 1;
1722 break;
1723 }
1724 }
1725 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001726 if (i < 0 && count == 0 && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001727 /* ch not in self, so just use self as list[0] */
1728 Py_INCREF(self);
1729 PyList_SET_ITEM(list, 0, (PyObject *)self);
1730 count++;
1731 }
1732 else if (j >= -1) {
1733 SPLIT_ADD(s, 0, j + 1);
1734 }
1735 FIX_PREALLOC_SIZE(list);
1736 if (PyList_Reverse(list) < 0)
1737 goto onError;
1738 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001739
Christian Heimes44720832008-05-26 13:01:01 +00001740 onError:
1741 Py_DECREF(list);
1742 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001743}
1744
1745PyDoc_STRVAR(rsplit__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001746"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001747\n\
Christian Heimes44720832008-05-26 13:01:01 +00001748Return a list of the words in the string S, using sep as the\n\
1749delimiter string, starting at the end of the string and working\n\
1750to the front. If maxsplit is given, at most maxsplit splits are\n\
1751done. If sep is not specified or is None, any whitespace string\n\
1752is a separator.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001753
1754static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001755string_rsplit(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001756{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001757 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001758 Py_ssize_t maxsplit = -1, count=0;
1759 const char *s, *sub;
1760 PyObject *list, *str, *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001761
Christian Heimes44720832008-05-26 13:01:01 +00001762 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1763 return NULL;
1764 if (maxsplit < 0)
1765 maxsplit = PY_SSIZE_T_MAX;
1766 if (subobj == Py_None)
1767 return rsplit_whitespace(self, len, maxsplit);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001768 if (PyString_Check(subobj)) {
1769 sub = PyString_AS_STRING(subobj);
1770 n = PyString_GET_SIZE(subobj);
Christian Heimes44720832008-05-26 13:01:01 +00001771 }
1772#ifdef Py_USING_UNICODE
1773 else if (PyUnicode_Check(subobj))
1774 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1775#endif
1776 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1777 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001778
Christian Heimes44720832008-05-26 13:01:01 +00001779 if (n == 0) {
1780 PyErr_SetString(PyExc_ValueError, "empty separator");
1781 return NULL;
1782 }
1783 else if (n == 1)
1784 return rsplit_char(self, len, sub[0], maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001785
Christian Heimes44720832008-05-26 13:01:01 +00001786 list = PyList_New(PREALLOC_SIZE(maxsplit));
1787 if (list == NULL)
1788 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001789
Christian Heimes44720832008-05-26 13:01:01 +00001790 j = len;
1791 i = j - n;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001792
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001793 s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001794 while ( (i >= 0) && (maxsplit-- > 0) ) {
1795 for (; i>=0; i--) {
1796 if (Py_STRING_MATCH(s, i, sub, n)) {
1797 SPLIT_ADD(s, i + n, j);
1798 j = i;
1799 i -= n;
1800 break;
1801 }
1802 }
1803 }
1804 SPLIT_ADD(s, 0, j);
1805 FIX_PREALLOC_SIZE(list);
1806 if (PyList_Reverse(list) < 0)
1807 goto onError;
1808 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001809
1810onError:
Christian Heimes44720832008-05-26 13:01:01 +00001811 Py_DECREF(list);
1812 return NULL;
1813}
1814
1815
1816PyDoc_STRVAR(join__doc__,
1817"S.join(sequence) -> string\n\
1818\n\
1819Return a string which is the concatenation of the strings in the\n\
1820sequence. The separator between elements is S.");
1821
1822static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001823string_join(PyStringObject *self, PyObject *orig)
Christian Heimes44720832008-05-26 13:01:01 +00001824{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001825 char *sep = PyString_AS_STRING(self);
1826 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001827 PyObject *res = NULL;
1828 char *p;
1829 Py_ssize_t seqlen = 0;
1830 size_t sz = 0;
1831 Py_ssize_t i;
1832 PyObject *seq, *item;
1833
1834 seq = PySequence_Fast(orig, "");
1835 if (seq == NULL) {
1836 return NULL;
1837 }
1838
1839 seqlen = PySequence_Size(seq);
1840 if (seqlen == 0) {
1841 Py_DECREF(seq);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001842 return PyString_FromString("");
Christian Heimes44720832008-05-26 13:01:01 +00001843 }
1844 if (seqlen == 1) {
1845 item = PySequence_Fast_GET_ITEM(seq, 0);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001846 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
Christian Heimes44720832008-05-26 13:01:01 +00001847 Py_INCREF(item);
1848 Py_DECREF(seq);
1849 return item;
1850 }
1851 }
1852
1853 /* There are at least two things to join, or else we have a subclass
1854 * of the builtin types in the sequence.
1855 * Do a pre-pass to figure out the total amount of space we'll
1856 * need (sz), see whether any argument is absurd, and defer to
1857 * the Unicode join if appropriate.
1858 */
1859 for (i = 0; i < seqlen; i++) {
1860 const size_t old_sz = sz;
1861 item = PySequence_Fast_GET_ITEM(seq, i);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001862 if (!PyString_Check(item)){
Christian Heimes44720832008-05-26 13:01:01 +00001863#ifdef Py_USING_UNICODE
1864 if (PyUnicode_Check(item)) {
1865 /* Defer to Unicode join.
1866 * CAUTION: There's no gurantee that the
1867 * original sequence can be iterated over
1868 * again, so we must pass seq here.
1869 */
1870 PyObject *result;
1871 result = PyUnicode_Join((PyObject *)self, seq);
1872 Py_DECREF(seq);
1873 return result;
1874 }
1875#endif
1876 PyErr_Format(PyExc_TypeError,
1877 "sequence item %zd: expected string,"
1878 " %.80s found",
1879 i, Py_TYPE(item)->tp_name);
1880 Py_DECREF(seq);
1881 return NULL;
1882 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001883 sz += PyString_GET_SIZE(item);
Christian Heimes44720832008-05-26 13:01:01 +00001884 if (i != 0)
1885 sz += seplen;
1886 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1887 PyErr_SetString(PyExc_OverflowError,
1888 "join() result is too long for a Python string");
1889 Py_DECREF(seq);
1890 return NULL;
1891 }
1892 }
1893
1894 /* Allocate result space. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001895 res = PyString_FromStringAndSize((char*)NULL, sz);
Christian Heimes44720832008-05-26 13:01:01 +00001896 if (res == NULL) {
1897 Py_DECREF(seq);
1898 return NULL;
1899 }
1900
1901 /* Catenate everything. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001902 p = PyString_AS_STRING(res);
Christian Heimes44720832008-05-26 13:01:01 +00001903 for (i = 0; i < seqlen; ++i) {
1904 size_t n;
1905 item = PySequence_Fast_GET_ITEM(seq, i);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001906 n = PyString_GET_SIZE(item);
1907 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Christian Heimes44720832008-05-26 13:01:01 +00001908 p += n;
1909 if (i < seqlen - 1) {
1910 Py_MEMCPY(p, sep, seplen);
1911 p += seplen;
1912 }
1913 }
1914
1915 Py_DECREF(seq);
1916 return res;
1917}
1918
1919PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001920_PyString_Join(PyObject *sep, PyObject *x)
Christian Heimes44720832008-05-26 13:01:01 +00001921{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001922 assert(sep != NULL && PyString_Check(sep));
Christian Heimes44720832008-05-26 13:01:01 +00001923 assert(x != NULL);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001924 return string_join((PyStringObject *)sep, x);
Christian Heimes44720832008-05-26 13:01:01 +00001925}
1926
1927Py_LOCAL_INLINE(void)
1928string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1929{
1930 if (*end > len)
1931 *end = len;
1932 else if (*end < 0)
1933 *end += len;
1934 if (*end < 0)
1935 *end = 0;
1936 if (*start < 0)
1937 *start += len;
1938 if (*start < 0)
1939 *start = 0;
1940}
1941
1942Py_LOCAL_INLINE(Py_ssize_t)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001943string_find_internal(PyStringObject *self, PyObject *args, int dir)
Christian Heimes44720832008-05-26 13:01:01 +00001944{
1945 PyObject *subobj;
1946 const char *sub;
1947 Py_ssize_t sub_len;
1948 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1949 PyObject *obj_start=Py_None, *obj_end=Py_None;
1950
1951 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1952 &obj_start, &obj_end))
1953 return -2;
1954 /* To support None in "start" and "end" arguments, meaning
1955 the same as if they were not passed.
1956 */
1957 if (obj_start != Py_None)
1958 if (!_PyEval_SliceIndex(obj_start, &start))
1959 return -2;
1960 if (obj_end != Py_None)
1961 if (!_PyEval_SliceIndex(obj_end, &end))
1962 return -2;
1963
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001964 if (PyString_Check(subobj)) {
1965 sub = PyString_AS_STRING(subobj);
1966 sub_len = PyString_GET_SIZE(subobj);
Christian Heimes44720832008-05-26 13:01:01 +00001967 }
1968#ifdef Py_USING_UNICODE
1969 else if (PyUnicode_Check(subobj))
1970 return PyUnicode_Find(
1971 (PyObject *)self, subobj, start, end, dir);
1972#endif
1973 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1974 /* XXX - the "expected a character buffer object" is pretty
1975 confusing for a non-expert. remap to something else ? */
1976 return -2;
1977
1978 if (dir > 0)
1979 return stringlib_find_slice(
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001980 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001981 sub, sub_len, start, end);
1982 else
1983 return stringlib_rfind_slice(
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001984 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001985 sub, sub_len, start, end);
1986}
1987
1988
1989PyDoc_STRVAR(find__doc__,
1990"S.find(sub [,start [,end]]) -> int\n\
1991\n\
1992Return the lowest index in S where substring sub is found,\n\
1993such that sub is contained within s[start:end]. Optional\n\
1994arguments start and end are interpreted as in slice notation.\n\
1995\n\
1996Return -1 on failure.");
1997
1998static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001999string_find(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002000{
2001 Py_ssize_t result = string_find_internal(self, args, +1);
2002 if (result == -2)
2003 return NULL;
2004 return PyInt_FromSsize_t(result);
2005}
2006
2007
2008PyDoc_STRVAR(index__doc__,
2009"S.index(sub [,start [,end]]) -> int\n\
2010\n\
2011Like S.find() but raise ValueError when the substring is not found.");
2012
2013static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002014string_index(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002015{
2016 Py_ssize_t result = string_find_internal(self, args, +1);
2017 if (result == -2)
2018 return NULL;
2019 if (result == -1) {
2020 PyErr_SetString(PyExc_ValueError,
2021 "substring not found");
2022 return NULL;
2023 }
2024 return PyInt_FromSsize_t(result);
2025}
2026
2027
2028PyDoc_STRVAR(rfind__doc__,
2029"S.rfind(sub [,start [,end]]) -> int\n\
2030\n\
2031Return the highest index in S where substring sub is found,\n\
2032such that sub is contained within s[start:end]. Optional\n\
2033arguments start and end are interpreted as in slice notation.\n\
2034\n\
2035Return -1 on failure.");
2036
2037static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002038string_rfind(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002039{
2040 Py_ssize_t result = string_find_internal(self, args, -1);
2041 if (result == -2)
2042 return NULL;
2043 return PyInt_FromSsize_t(result);
2044}
2045
2046
2047PyDoc_STRVAR(rindex__doc__,
2048"S.rindex(sub [,start [,end]]) -> int\n\
2049\n\
2050Like S.rfind() but raise ValueError when the substring is not found.");
2051
2052static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002053string_rindex(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002054{
2055 Py_ssize_t result = string_find_internal(self, args, -1);
2056 if (result == -2)
2057 return NULL;
2058 if (result == -1) {
2059 PyErr_SetString(PyExc_ValueError,
2060 "substring not found");
2061 return NULL;
2062 }
2063 return PyInt_FromSsize_t(result);
2064}
2065
2066
2067Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002068do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
Christian Heimes44720832008-05-26 13:01:01 +00002069{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002070 char *s = PyString_AS_STRING(self);
2071 Py_ssize_t len = PyString_GET_SIZE(self);
2072 char *sep = PyString_AS_STRING(sepobj);
2073 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
Christian Heimes44720832008-05-26 13:01:01 +00002074 Py_ssize_t i, j;
2075
2076 i = 0;
2077 if (striptype != RIGHTSTRIP) {
2078 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2079 i++;
2080 }
2081 }
2082
2083 j = len;
2084 if (striptype != LEFTSTRIP) {
2085 do {
2086 j--;
2087 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2088 j++;
2089 }
2090
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002091 if (i == 0 && j == len && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00002092 Py_INCREF(self);
2093 return (PyObject*)self;
2094 }
2095 else
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002096 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00002097}
2098
2099
2100Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002101do_strip(PyStringObject *self, int striptype)
Christian Heimes44720832008-05-26 13:01:01 +00002102{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002103 char *s = PyString_AS_STRING(self);
2104 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Christian Heimes44720832008-05-26 13:01:01 +00002105
2106 i = 0;
2107 if (striptype != RIGHTSTRIP) {
2108 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2109 i++;
2110 }
2111 }
2112
2113 j = len;
2114 if (striptype != LEFTSTRIP) {
2115 do {
2116 j--;
2117 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2118 j++;
2119 }
2120
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002121 if (i == 0 && j == len && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00002122 Py_INCREF(self);
2123 return (PyObject*)self;
2124 }
2125 else
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002126 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00002127}
2128
2129
2130Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002131do_argstrip(PyStringObject *self, int striptype, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002132{
2133 PyObject *sep = NULL;
2134
2135 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
2136 return NULL;
2137
2138 if (sep != NULL && sep != Py_None) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002139 if (PyString_Check(sep))
Christian Heimes44720832008-05-26 13:01:01 +00002140 return do_xstrip(self, striptype, sep);
2141#ifdef Py_USING_UNICODE
2142 else if (PyUnicode_Check(sep)) {
2143 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2144 PyObject *res;
2145 if (uniself==NULL)
2146 return NULL;
2147 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2148 striptype, sep);
2149 Py_DECREF(uniself);
2150 return res;
2151 }
2152#endif
2153 PyErr_Format(PyExc_TypeError,
2154#ifdef Py_USING_UNICODE
2155 "%s arg must be None, str or unicode",
2156#else
2157 "%s arg must be None or str",
2158#endif
2159 STRIPNAME(striptype));
2160 return NULL;
2161 }
2162
2163 return do_strip(self, striptype);
2164}
2165
2166
2167PyDoc_STRVAR(strip__doc__,
2168"S.strip([chars]) -> string or unicode\n\
2169\n\
2170Return a copy of the string S with leading and trailing\n\
2171whitespace removed.\n\
2172If chars is given and not None, remove characters in chars instead.\n\
2173If chars is unicode, S will be converted to unicode before stripping");
2174
2175static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002176string_strip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002177{
2178 if (PyTuple_GET_SIZE(args) == 0)
2179 return do_strip(self, BOTHSTRIP); /* Common case */
2180 else
2181 return do_argstrip(self, BOTHSTRIP, args);
2182}
2183
2184
2185PyDoc_STRVAR(lstrip__doc__,
2186"S.lstrip([chars]) -> string or unicode\n\
2187\n\
2188Return a copy of the string S with leading whitespace removed.\n\
2189If chars is given and not None, remove characters in chars instead.\n\
2190If chars is unicode, S will be converted to unicode before stripping");
2191
2192static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002193string_lstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002194{
2195 if (PyTuple_GET_SIZE(args) == 0)
2196 return do_strip(self, LEFTSTRIP); /* Common case */
2197 else
2198 return do_argstrip(self, LEFTSTRIP, args);
2199}
2200
2201
2202PyDoc_STRVAR(rstrip__doc__,
2203"S.rstrip([chars]) -> string or unicode\n\
2204\n\
2205Return a copy of the string S with trailing whitespace removed.\n\
2206If chars is given and not None, remove characters in chars instead.\n\
2207If chars is unicode, S will be converted to unicode before stripping");
2208
2209static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002210string_rstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002211{
2212 if (PyTuple_GET_SIZE(args) == 0)
2213 return do_strip(self, RIGHTSTRIP); /* Common case */
2214 else
2215 return do_argstrip(self, RIGHTSTRIP, args);
2216}
2217
2218
2219PyDoc_STRVAR(lower__doc__,
2220"S.lower() -> string\n\
2221\n\
2222Return a copy of the string S converted to lowercase.");
2223
2224/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2225#ifndef _tolower
2226#define _tolower tolower
2227#endif
2228
2229static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002230string_lower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002231{
2232 char *s;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002233 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002234 PyObject *newobj;
2235
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002236 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002237 if (!newobj)
2238 return NULL;
2239
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002240 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002241
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002242 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002243
2244 for (i = 0; i < n; i++) {
2245 int c = Py_CHARMASK(s[i]);
2246 if (isupper(c))
2247 s[i] = _tolower(c);
2248 }
2249
2250 return newobj;
2251}
2252
2253PyDoc_STRVAR(upper__doc__,
2254"S.upper() -> string\n\
2255\n\
2256Return a copy of the string S converted to uppercase.");
2257
2258#ifndef _toupper
2259#define _toupper toupper
2260#endif
2261
2262static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002263string_upper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002264{
2265 char *s;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002266 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002267 PyObject *newobj;
2268
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002269 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002270 if (!newobj)
2271 return NULL;
2272
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002273 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002274
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002275 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002276
2277 for (i = 0; i < n; i++) {
2278 int c = Py_CHARMASK(s[i]);
2279 if (islower(c))
2280 s[i] = _toupper(c);
2281 }
2282
2283 return newobj;
2284}
2285
2286PyDoc_STRVAR(title__doc__,
2287"S.title() -> string\n\
2288\n\
2289Return a titlecased version of S, i.e. words start with uppercase\n\
2290characters, all remaining cased characters have lowercase.");
2291
2292static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002293string_title(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002294{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002295 char *s = PyString_AS_STRING(self), *s_new;
2296 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002297 int previous_is_cased = 0;
2298 PyObject *newobj;
2299
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002300 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002301 if (newobj == NULL)
2302 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002303 s_new = PyString_AsString(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002304 for (i = 0; i < n; i++) {
2305 int c = Py_CHARMASK(*s++);
2306 if (islower(c)) {
2307 if (!previous_is_cased)
2308 c = toupper(c);
2309 previous_is_cased = 1;
2310 } else if (isupper(c)) {
2311 if (previous_is_cased)
2312 c = tolower(c);
2313 previous_is_cased = 1;
2314 } else
2315 previous_is_cased = 0;
2316 *s_new++ = c;
2317 }
2318 return newobj;
2319}
2320
2321PyDoc_STRVAR(capitalize__doc__,
2322"S.capitalize() -> string\n\
2323\n\
2324Return a copy of the string S with only its first character\n\
2325capitalized.");
2326
2327static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002328string_capitalize(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002329{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002330 char *s = PyString_AS_STRING(self), *s_new;
2331 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002332 PyObject *newobj;
2333
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002334 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002335 if (newobj == NULL)
2336 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002337 s_new = PyString_AsString(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002338 if (0 < n) {
2339 int c = Py_CHARMASK(*s++);
2340 if (islower(c))
2341 *s_new = toupper(c);
2342 else
2343 *s_new = c;
2344 s_new++;
2345 }
2346 for (i = 1; i < n; i++) {
2347 int c = Py_CHARMASK(*s++);
2348 if (isupper(c))
2349 *s_new = tolower(c);
2350 else
2351 *s_new = c;
2352 s_new++;
2353 }
2354 return newobj;
2355}
2356
2357
2358PyDoc_STRVAR(count__doc__,
2359"S.count(sub[, start[, end]]) -> int\n\
2360\n\
2361Return the number of non-overlapping occurrences of substring sub in\n\
2362string S[start:end]. Optional arguments start and end are interpreted\n\
2363as in slice notation.");
2364
2365static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002366string_count(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002367{
2368 PyObject *sub_obj;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002369 const char *str = PyString_AS_STRING(self), *sub;
Christian Heimes44720832008-05-26 13:01:01 +00002370 Py_ssize_t sub_len;
2371 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2372
2373 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2374 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2375 return NULL;
2376
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002377 if (PyString_Check(sub_obj)) {
2378 sub = PyString_AS_STRING(sub_obj);
2379 sub_len = PyString_GET_SIZE(sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00002380 }
2381#ifdef Py_USING_UNICODE
2382 else if (PyUnicode_Check(sub_obj)) {
2383 Py_ssize_t count;
2384 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2385 if (count == -1)
2386 return NULL;
2387 else
2388 return PyInt_FromSsize_t(count);
2389 }
2390#endif
2391 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2392 return NULL;
2393
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002394 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002395
2396 return PyInt_FromSsize_t(
2397 stringlib_count(str + start, end - start, sub, sub_len)
2398 );
2399}
2400
2401PyDoc_STRVAR(swapcase__doc__,
2402"S.swapcase() -> string\n\
2403\n\
2404Return a copy of the string S with uppercase characters\n\
2405converted to lowercase and vice versa.");
2406
2407static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002408string_swapcase(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002409{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002410 char *s = PyString_AS_STRING(self), *s_new;
2411 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002412 PyObject *newobj;
2413
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002414 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002415 if (newobj == NULL)
2416 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002417 s_new = PyString_AsString(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002418 for (i = 0; i < n; i++) {
2419 int c = Py_CHARMASK(*s++);
2420 if (islower(c)) {
2421 *s_new = toupper(c);
2422 }
2423 else if (isupper(c)) {
2424 *s_new = tolower(c);
2425 }
2426 else
2427 *s_new = c;
2428 s_new++;
2429 }
2430 return newobj;
2431}
2432
2433
2434PyDoc_STRVAR(translate__doc__,
2435"S.translate(table [,deletechars]) -> string\n\
2436\n\
2437Return a copy of the string S, where all characters occurring\n\
2438in the optional argument deletechars are removed, and the\n\
2439remaining characters have been mapped through the given\n\
2440translation table, which must be a string of length 256.");
2441
2442static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002443string_translate(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002444{
2445 register char *input, *output;
2446 const char *table;
2447 register Py_ssize_t i, c, changed = 0;
2448 PyObject *input_obj = (PyObject*)self;
2449 const char *output_start, *del_table=NULL;
2450 Py_ssize_t inlen, tablen, dellen = 0;
2451 PyObject *result;
2452 int trans_table[256];
2453 PyObject *tableobj, *delobj = NULL;
2454
2455 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2456 &tableobj, &delobj))
2457 return NULL;
2458
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002459 if (PyString_Check(tableobj)) {
2460 table = PyString_AS_STRING(tableobj);
2461 tablen = PyString_GET_SIZE(tableobj);
Christian Heimes44720832008-05-26 13:01:01 +00002462 }
2463 else if (tableobj == Py_None) {
2464 table = NULL;
2465 tablen = 256;
2466 }
2467#ifdef Py_USING_UNICODE
2468 else if (PyUnicode_Check(tableobj)) {
2469 /* Unicode .translate() does not support the deletechars
2470 parameter; instead a mapping to None will cause characters
2471 to be deleted. */
2472 if (delobj != NULL) {
2473 PyErr_SetString(PyExc_TypeError,
2474 "deletions are implemented differently for unicode");
2475 return NULL;
2476 }
2477 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2478 }
2479#endif
2480 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2481 return NULL;
2482
2483 if (tablen != 256) {
2484 PyErr_SetString(PyExc_ValueError,
2485 "translation table must be 256 characters long");
2486 return NULL;
2487 }
2488
2489 if (delobj != NULL) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002490 if (PyString_Check(delobj)) {
2491 del_table = PyString_AS_STRING(delobj);
2492 dellen = PyString_GET_SIZE(delobj);
Christian Heimes44720832008-05-26 13:01:01 +00002493 }
2494#ifdef Py_USING_UNICODE
2495 else if (PyUnicode_Check(delobj)) {
2496 PyErr_SetString(PyExc_TypeError,
2497 "deletions are implemented differently for unicode");
2498 return NULL;
2499 }
2500#endif
2501 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2502 return NULL;
2503 }
2504 else {
2505 del_table = NULL;
2506 dellen = 0;
2507 }
2508
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002509 inlen = PyString_GET_SIZE(input_obj);
2510 result = PyString_FromStringAndSize((char *)NULL, inlen);
Christian Heimes44720832008-05-26 13:01:01 +00002511 if (result == NULL)
2512 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002513 output_start = output = PyString_AsString(result);
2514 input = PyString_AS_STRING(input_obj);
Christian Heimes44720832008-05-26 13:01:01 +00002515
2516 if (dellen == 0 && table != NULL) {
2517 /* If no deletions are required, use faster code */
2518 for (i = inlen; --i >= 0; ) {
2519 c = Py_CHARMASK(*input++);
2520 if (Py_CHARMASK((*output++ = table[c])) != c)
2521 changed = 1;
2522 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002523 if (changed || !PyString_CheckExact(input_obj))
Christian Heimes44720832008-05-26 13:01:01 +00002524 return result;
2525 Py_DECREF(result);
2526 Py_INCREF(input_obj);
2527 return input_obj;
2528 }
2529
2530 if (table == NULL) {
2531 for (i = 0; i < 256; i++)
2532 trans_table[i] = Py_CHARMASK(i);
2533 } else {
2534 for (i = 0; i < 256; i++)
2535 trans_table[i] = Py_CHARMASK(table[i]);
2536 }
2537
2538 for (i = 0; i < dellen; i++)
2539 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2540
2541 for (i = inlen; --i >= 0; ) {
2542 c = Py_CHARMASK(*input++);
2543 if (trans_table[c] != -1)
2544 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2545 continue;
2546 changed = 1;
2547 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002548 if (!changed && PyString_CheckExact(input_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00002549 Py_DECREF(result);
2550 Py_INCREF(input_obj);
2551 return input_obj;
2552 }
2553 /* Fix the size of the resulting string */
2554 if (inlen > 0)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002555 _PyString_Resize(&result, output - output_start);
Christian Heimes44720832008-05-26 13:01:01 +00002556 return result;
2557}
2558
2559
2560#define FORWARD 1
2561#define REVERSE -1
2562
2563/* find and count characters and substrings */
2564
2565#define findchar(target, target_len, c) \
2566 ((char *)memchr((const void *)(target), c, target_len))
2567
2568/* String ops must return a string. */
2569/* If the object is subclass of string, create a copy */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002570Py_LOCAL(PyStringObject *)
2571return_self(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002572{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002573 if (PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00002574 Py_INCREF(self);
2575 return self;
2576 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002577 return (PyStringObject *)PyString_FromStringAndSize(
2578 PyString_AS_STRING(self),
2579 PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002580}
2581
2582Py_LOCAL_INLINE(Py_ssize_t)
2583countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2584{
2585 Py_ssize_t count=0;
2586 const char *start=target;
2587 const char *end=target+target_len;
2588
2589 while ( (start=findchar(start, end-start, c)) != NULL ) {
2590 count++;
2591 if (count >= maxcount)
2592 break;
2593 start += 1;
2594 }
2595 return count;
2596}
2597
2598Py_LOCAL(Py_ssize_t)
2599findstring(const char *target, Py_ssize_t target_len,
2600 const char *pattern, Py_ssize_t pattern_len,
2601 Py_ssize_t start,
2602 Py_ssize_t end,
2603 int direction)
2604{
2605 if (start < 0) {
2606 start += target_len;
2607 if (start < 0)
2608 start = 0;
2609 }
2610 if (end > target_len) {
2611 end = target_len;
2612 } else if (end < 0) {
2613 end += target_len;
2614 if (end < 0)
2615 end = 0;
2616 }
2617
2618 /* zero-length substrings always match at the first attempt */
2619 if (pattern_len == 0)
2620 return (direction > 0) ? start : end;
2621
2622 end -= pattern_len;
2623
2624 if (direction < 0) {
2625 for (; end >= start; end--)
2626 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2627 return end;
2628 } else {
2629 for (; start <= end; start++)
2630 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2631 return start;
2632 }
2633 return -1;
2634}
2635
2636Py_LOCAL_INLINE(Py_ssize_t)
2637countstring(const char *target, Py_ssize_t target_len,
2638 const char *pattern, Py_ssize_t pattern_len,
2639 Py_ssize_t start,
2640 Py_ssize_t end,
2641 int direction, Py_ssize_t maxcount)
2642{
2643 Py_ssize_t count=0;
2644
2645 if (start < 0) {
2646 start += target_len;
2647 if (start < 0)
2648 start = 0;
2649 }
2650 if (end > target_len) {
2651 end = target_len;
2652 } else if (end < 0) {
2653 end += target_len;
2654 if (end < 0)
2655 end = 0;
2656 }
2657
2658 /* zero-length substrings match everywhere */
2659 if (pattern_len == 0 || maxcount == 0) {
2660 if (target_len+1 < maxcount)
2661 return target_len+1;
2662 return maxcount;
2663 }
2664
2665 end -= pattern_len;
2666 if (direction < 0) {
2667 for (; (end >= start); end--)
2668 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2669 count++;
2670 if (--maxcount <= 0) break;
2671 end -= pattern_len-1;
2672 }
2673 } else {
2674 for (; (start <= end); start++)
2675 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2676 count++;
2677 if (--maxcount <= 0)
2678 break;
2679 start += pattern_len-1;
2680 }
2681 }
2682 return count;
2683}
2684
2685
2686/* Algorithms for different cases of string replacement */
2687
2688/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002689Py_LOCAL(PyStringObject *)
2690replace_interleave(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002691 const char *to_s, Py_ssize_t to_len,
2692 Py_ssize_t maxcount)
2693{
2694 char *self_s, *result_s;
2695 Py_ssize_t self_len, result_len;
2696 Py_ssize_t count, i, product;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002697 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002698
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002699 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002700
2701 /* 1 at the end plus 1 after every character */
2702 count = self_len+1;
2703 if (maxcount < count)
2704 count = maxcount;
2705
2706 /* Check for overflow */
2707 /* result_len = count * to_len + self_len; */
2708 product = count * to_len;
2709 if (product / to_len != count) {
2710 PyErr_SetString(PyExc_OverflowError,
2711 "replace string is too long");
2712 return NULL;
2713 }
2714 result_len = product + self_len;
2715 if (result_len < 0) {
2716 PyErr_SetString(PyExc_OverflowError,
2717 "replace string is too long");
2718 return NULL;
2719 }
2720
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002721 if (! (result = (PyStringObject *)
2722 PyString_FromStringAndSize(NULL, result_len)) )
Christian Heimes44720832008-05-26 13:01:01 +00002723 return NULL;
2724
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002725 self_s = PyString_AS_STRING(self);
2726 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002727
2728 /* TODO: special case single character, which doesn't need memcpy */
2729
2730 /* Lay the first one down (guaranteed this will occur) */
2731 Py_MEMCPY(result_s, to_s, to_len);
2732 result_s += to_len;
2733 count -= 1;
2734
2735 for (i=0; i<count; i++) {
2736 *result_s++ = *self_s++;
2737 Py_MEMCPY(result_s, to_s, to_len);
2738 result_s += to_len;
2739 }
2740
2741 /* Copy the rest of the original string */
2742 Py_MEMCPY(result_s, self_s, self_len-i);
2743
2744 return result;
2745}
2746
2747/* Special case for deleting a single character */
2748/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002749Py_LOCAL(PyStringObject *)
2750replace_delete_single_character(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002751 char from_c, Py_ssize_t maxcount)
2752{
2753 char *self_s, *result_s;
2754 char *start, *next, *end;
2755 Py_ssize_t self_len, result_len;
2756 Py_ssize_t count;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002757 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002758
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002759 self_len = PyString_GET_SIZE(self);
2760 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002761
2762 count = countchar(self_s, self_len, from_c, maxcount);
2763 if (count == 0) {
2764 return return_self(self);
2765 }
2766
2767 result_len = self_len - count; /* from_len == 1 */
2768 assert(result_len>=0);
2769
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002770 if ( (result = (PyStringObject *)
2771 PyString_FromStringAndSize(NULL, result_len)) == NULL)
Christian Heimes44720832008-05-26 13:01:01 +00002772 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002773 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002774
2775 start = self_s;
2776 end = self_s + self_len;
2777 while (count-- > 0) {
2778 next = findchar(start, end-start, from_c);
2779 if (next == NULL)
2780 break;
2781 Py_MEMCPY(result_s, start, next-start);
2782 result_s += (next-start);
2783 start = next+1;
2784 }
2785 Py_MEMCPY(result_s, start, end-start);
2786
2787 return result;
2788}
2789
2790/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2791
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002792Py_LOCAL(PyStringObject *)
2793replace_delete_substring(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002794 const char *from_s, Py_ssize_t from_len,
2795 Py_ssize_t maxcount) {
2796 char *self_s, *result_s;
2797 char *start, *next, *end;
2798 Py_ssize_t self_len, result_len;
2799 Py_ssize_t count, offset;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002800 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002801
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002802 self_len = PyString_GET_SIZE(self);
2803 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002804
2805 count = countstring(self_s, self_len,
2806 from_s, from_len,
2807 0, self_len, 1,
2808 maxcount);
2809
2810 if (count == 0) {
2811 /* no matches */
2812 return return_self(self);
2813 }
2814
2815 result_len = self_len - (count * from_len);
2816 assert (result_len>=0);
2817
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002818 if ( (result = (PyStringObject *)
2819 PyString_FromStringAndSize(NULL, result_len)) == NULL )
Christian Heimes44720832008-05-26 13:01:01 +00002820 return NULL;
2821
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002822 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002823
2824 start = self_s;
2825 end = self_s + self_len;
2826 while (count-- > 0) {
2827 offset = findstring(start, end-start,
2828 from_s, from_len,
2829 0, end-start, FORWARD);
2830 if (offset == -1)
2831 break;
2832 next = start + offset;
2833
2834 Py_MEMCPY(result_s, start, next-start);
2835
2836 result_s += (next-start);
2837 start = next+from_len;
2838 }
2839 Py_MEMCPY(result_s, start, end-start);
2840 return result;
2841}
2842
2843/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002844Py_LOCAL(PyStringObject *)
2845replace_single_character_in_place(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002846 char from_c, char to_c,
2847 Py_ssize_t maxcount)
2848{
2849 char *self_s, *result_s, *start, *end, *next;
2850 Py_ssize_t self_len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002851 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002852
2853 /* The result string will be the same size */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002854 self_s = PyString_AS_STRING(self);
2855 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002856
2857 next = findchar(self_s, self_len, from_c);
2858
2859 if (next == NULL) {
2860 /* No matches; return the original string */
2861 return return_self(self);
2862 }
2863
2864 /* Need to make a new string */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002865 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002866 if (result == NULL)
2867 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002868 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002869 Py_MEMCPY(result_s, self_s, self_len);
2870
2871 /* change everything in-place, starting with this one */
2872 start = result_s + (next-self_s);
2873 *start = to_c;
2874 start++;
2875 end = result_s + self_len;
2876
2877 while (--maxcount > 0) {
2878 next = findchar(start, end-start, from_c);
2879 if (next == NULL)
2880 break;
2881 *next = to_c;
2882 start = next+1;
2883 }
2884
2885 return result;
2886}
2887
2888/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002889Py_LOCAL(PyStringObject *)
2890replace_substring_in_place(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002891 const char *from_s, Py_ssize_t from_len,
2892 const char *to_s, Py_ssize_t to_len,
2893 Py_ssize_t maxcount)
2894{
2895 char *result_s, *start, *end;
2896 char *self_s;
2897 Py_ssize_t self_len, offset;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002898 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002899
2900 /* The result string will be the same size */
2901
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002902 self_s = PyString_AS_STRING(self);
2903 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002904
2905 offset = findstring(self_s, self_len,
2906 from_s, from_len,
2907 0, self_len, FORWARD);
2908 if (offset == -1) {
2909 /* No matches; return the original string */
2910 return return_self(self);
2911 }
2912
2913 /* Need to make a new string */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002914 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002915 if (result == NULL)
2916 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002917 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002918 Py_MEMCPY(result_s, self_s, self_len);
2919
2920 /* change everything in-place, starting with this one */
2921 start = result_s + offset;
2922 Py_MEMCPY(start, to_s, from_len);
2923 start += from_len;
2924 end = result_s + self_len;
2925
2926 while ( --maxcount > 0) {
2927 offset = findstring(start, end-start,
2928 from_s, from_len,
2929 0, end-start, FORWARD);
2930 if (offset==-1)
2931 break;
2932 Py_MEMCPY(start+offset, to_s, from_len);
2933 start += offset+from_len;
2934 }
2935
2936 return result;
2937}
2938
2939/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002940Py_LOCAL(PyStringObject *)
2941replace_single_character(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002942 char from_c,
2943 const char *to_s, Py_ssize_t to_len,
2944 Py_ssize_t maxcount)
2945{
2946 char *self_s, *result_s;
2947 char *start, *next, *end;
2948 Py_ssize_t self_len, result_len;
2949 Py_ssize_t count, product;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002950 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002951
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002952 self_s = PyString_AS_STRING(self);
2953 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002954
2955 count = countchar(self_s, self_len, from_c, maxcount);
2956 if (count == 0) {
2957 /* no matches, return unchanged */
2958 return return_self(self);
2959 }
2960
2961 /* use the difference between current and new, hence the "-1" */
2962 /* result_len = self_len + count * (to_len-1) */
2963 product = count * (to_len-1);
2964 if (product / (to_len-1) != count) {
2965 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2966 return NULL;
2967 }
2968 result_len = self_len + product;
2969 if (result_len < 0) {
2970 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2971 return NULL;
2972 }
2973
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002974 if ( (result = (PyStringObject *)
2975 PyString_FromStringAndSize(NULL, result_len)) == NULL)
Christian Heimes44720832008-05-26 13:01:01 +00002976 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002977 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002978
2979 start = self_s;
2980 end = self_s + self_len;
2981 while (count-- > 0) {
2982 next = findchar(start, end-start, from_c);
2983 if (next == NULL)
2984 break;
2985
2986 if (next == start) {
2987 /* replace with the 'to' */
2988 Py_MEMCPY(result_s, to_s, to_len);
2989 result_s += to_len;
2990 start += 1;
2991 } else {
2992 /* copy the unchanged old then the 'to' */
2993 Py_MEMCPY(result_s, start, next-start);
2994 result_s += (next-start);
2995 Py_MEMCPY(result_s, to_s, to_len);
2996 result_s += to_len;
2997 start = next+1;
2998 }
2999 }
3000 /* Copy the remainder of the remaining string */
3001 Py_MEMCPY(result_s, start, end-start);
3002
3003 return result;
3004}
3005
3006/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003007Py_LOCAL(PyStringObject *)
3008replace_substring(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00003009 const char *from_s, Py_ssize_t from_len,
3010 const char *to_s, Py_ssize_t to_len,
3011 Py_ssize_t maxcount) {
3012 char *self_s, *result_s;
3013 char *start, *next, *end;
3014 Py_ssize_t self_len, result_len;
3015 Py_ssize_t count, offset, product;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003016 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00003017
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003018 self_s = PyString_AS_STRING(self);
3019 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003020
3021 count = countstring(self_s, self_len,
3022 from_s, from_len,
3023 0, self_len, FORWARD, maxcount);
3024 if (count == 0) {
3025 /* no matches, return unchanged */
3026 return return_self(self);
3027 }
3028
3029 /* Check for overflow */
3030 /* result_len = self_len + count * (to_len-from_len) */
3031 product = count * (to_len-from_len);
3032 if (product / (to_len-from_len) != count) {
3033 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3034 return NULL;
3035 }
3036 result_len = self_len + product;
3037 if (result_len < 0) {
3038 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3039 return NULL;
3040 }
3041
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003042 if ( (result = (PyStringObject *)
3043 PyString_FromStringAndSize(NULL, result_len)) == NULL)
Christian Heimes44720832008-05-26 13:01:01 +00003044 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003045 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00003046
3047 start = self_s;
3048 end = self_s + self_len;
3049 while (count-- > 0) {
3050 offset = findstring(start, end-start,
3051 from_s, from_len,
3052 0, end-start, FORWARD);
3053 if (offset == -1)
3054 break;
3055 next = start+offset;
3056 if (next == start) {
3057 /* replace with the 'to' */
3058 Py_MEMCPY(result_s, to_s, to_len);
3059 result_s += to_len;
3060 start += from_len;
3061 } else {
3062 /* copy the unchanged old then the 'to' */
3063 Py_MEMCPY(result_s, start, next-start);
3064 result_s += (next-start);
3065 Py_MEMCPY(result_s, to_s, to_len);
3066 result_s += to_len;
3067 start = next+from_len;
3068 }
3069 }
3070 /* Copy the remainder of the remaining string */
3071 Py_MEMCPY(result_s, start, end-start);
3072
3073 return result;
3074}
3075
3076
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003077Py_LOCAL(PyStringObject *)
3078replace(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00003079 const char *from_s, Py_ssize_t from_len,
3080 const char *to_s, Py_ssize_t to_len,
3081 Py_ssize_t maxcount)
3082{
3083 if (maxcount < 0) {
3084 maxcount = PY_SSIZE_T_MAX;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003085 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
Christian Heimes44720832008-05-26 13:01:01 +00003086 /* nothing to do; return the original string */
3087 return return_self(self);
3088 }
3089
3090 if (maxcount == 0 ||
3091 (from_len == 0 && to_len == 0)) {
3092 /* nothing to do; return the original string */
3093 return return_self(self);
3094 }
3095
3096 /* Handle zero-length special cases */
3097
3098 if (from_len == 0) {
3099 /* insert the 'to' string everywhere. */
3100 /* >>> "Python".replace("", ".") */
3101 /* '.P.y.t.h.o.n.' */
3102 return replace_interleave(self, to_s, to_len, maxcount);
3103 }
3104
3105 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3106 /* point for an empty self string to generate a non-empty string */
3107 /* Special case so the remaining code always gets a non-empty string */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003108 if (PyString_GET_SIZE(self) == 0) {
Christian Heimes44720832008-05-26 13:01:01 +00003109 return return_self(self);
3110 }
3111
3112 if (to_len == 0) {
3113 /* delete all occurances of 'from' string */
3114 if (from_len == 1) {
3115 return replace_delete_single_character(
3116 self, from_s[0], maxcount);
3117 } else {
3118 return replace_delete_substring(self, from_s, from_len, maxcount);
3119 }
3120 }
3121
3122 /* Handle special case where both strings have the same length */
3123
3124 if (from_len == to_len) {
3125 if (from_len == 1) {
3126 return replace_single_character_in_place(
3127 self,
3128 from_s[0],
3129 to_s[0],
3130 maxcount);
3131 } else {
3132 return replace_substring_in_place(
3133 self, from_s, from_len, to_s, to_len, maxcount);
3134 }
3135 }
3136
3137 /* Otherwise use the more generic algorithms */
3138 if (from_len == 1) {
3139 return replace_single_character(self, from_s[0],
3140 to_s, to_len, maxcount);
3141 } else {
3142 /* len('from')>=2, len('to')>=1 */
3143 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
3144 }
3145}
3146
3147PyDoc_STRVAR(replace__doc__,
3148"S.replace (old, new[, count]) -> string\n\
3149\n\
3150Return a copy of string S with all occurrences of substring\n\
3151old replaced by new. If the optional argument count is\n\
3152given, only the first count occurrences are replaced.");
3153
3154static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003155string_replace(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003156{
3157 Py_ssize_t count = -1;
3158 PyObject *from, *to;
3159 const char *from_s, *to_s;
3160 Py_ssize_t from_len, to_len;
3161
3162 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
3163 return NULL;
3164
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003165 if (PyString_Check(from)) {
3166 from_s = PyString_AS_STRING(from);
3167 from_len = PyString_GET_SIZE(from);
Christian Heimes44720832008-05-26 13:01:01 +00003168 }
3169#ifdef Py_USING_UNICODE
3170 if (PyUnicode_Check(from))
3171 return PyUnicode_Replace((PyObject *)self,
3172 from, to, count);
3173#endif
3174 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
3175 return NULL;
3176
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003177 if (PyString_Check(to)) {
3178 to_s = PyString_AS_STRING(to);
3179 to_len = PyString_GET_SIZE(to);
Christian Heimes44720832008-05-26 13:01:01 +00003180 }
3181#ifdef Py_USING_UNICODE
3182 else if (PyUnicode_Check(to))
3183 return PyUnicode_Replace((PyObject *)self,
3184 from, to, count);
3185#endif
3186 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
3187 return NULL;
3188
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003189 return (PyObject *)replace((PyStringObject *) self,
Christian Heimes44720832008-05-26 13:01:01 +00003190 from_s, from_len,
3191 to_s, to_len, count);
3192}
3193
3194/** End DALKE **/
3195
3196/* Matches the end (direction >= 0) or start (direction < 0) of self
3197 * against substr, using the start and end arguments. Returns
3198 * -1 on error, 0 if not found and 1 if found.
3199 */
3200Py_LOCAL(int)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003201_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
Christian Heimes44720832008-05-26 13:01:01 +00003202 Py_ssize_t end, int direction)
3203{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003204 Py_ssize_t len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003205 Py_ssize_t slen;
3206 const char* sub;
3207 const char* str;
3208
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003209 if (PyString_Check(substr)) {
3210 sub = PyString_AS_STRING(substr);
3211 slen = PyString_GET_SIZE(substr);
Christian Heimes44720832008-05-26 13:01:01 +00003212 }
3213#ifdef Py_USING_UNICODE
3214 else if (PyUnicode_Check(substr))
3215 return PyUnicode_Tailmatch((PyObject *)self,
3216 substr, start, end, direction);
3217#endif
3218 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3219 return -1;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003220 str = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003221
3222 string_adjust_indices(&start, &end, len);
3223
3224 if (direction < 0) {
3225 /* startswith */
3226 if (start+slen > len)
3227 return 0;
3228 } else {
3229 /* endswith */
3230 if (end-start < slen || start > len)
3231 return 0;
3232
3233 if (end-slen > start)
3234 start = end - slen;
3235 }
3236 if (end-start >= slen)
3237 return ! memcmp(str+start, sub, slen);
3238 return 0;
3239}
3240
3241
3242PyDoc_STRVAR(startswith__doc__,
3243"S.startswith(prefix[, start[, end]]) -> bool\n\
3244\n\
3245Return True if S starts with the specified prefix, False otherwise.\n\
3246With optional start, test S beginning at that position.\n\
3247With optional end, stop comparing S at that position.\n\
3248prefix can also be a tuple of strings to try.");
3249
3250static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003251string_startswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003252{
3253 Py_ssize_t start = 0;
3254 Py_ssize_t end = PY_SSIZE_T_MAX;
3255 PyObject *subobj;
3256 int result;
3257
3258 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3259 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3260 return NULL;
3261 if (PyTuple_Check(subobj)) {
3262 Py_ssize_t i;
3263 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3264 result = _string_tailmatch(self,
3265 PyTuple_GET_ITEM(subobj, i),
3266 start, end, -1);
3267 if (result == -1)
3268 return NULL;
3269 else if (result) {
3270 Py_RETURN_TRUE;
3271 }
3272 }
3273 Py_RETURN_FALSE;
3274 }
3275 result = _string_tailmatch(self, subobj, start, end, -1);
3276 if (result == -1)
3277 return NULL;
3278 else
3279 return PyBool_FromLong(result);
3280}
3281
3282
3283PyDoc_STRVAR(endswith__doc__,
3284"S.endswith(suffix[, start[, end]]) -> bool\n\
3285\n\
3286Return True if S ends with the specified suffix, False otherwise.\n\
3287With optional start, test S beginning at that position.\n\
3288With optional end, stop comparing S at that position.\n\
3289suffix can also be a tuple of strings to try.");
3290
3291static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003292string_endswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003293{
3294 Py_ssize_t start = 0;
3295 Py_ssize_t end = PY_SSIZE_T_MAX;
3296 PyObject *subobj;
3297 int result;
3298
3299 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3300 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3301 return NULL;
3302 if (PyTuple_Check(subobj)) {
3303 Py_ssize_t i;
3304 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3305 result = _string_tailmatch(self,
3306 PyTuple_GET_ITEM(subobj, i),
3307 start, end, +1);
3308 if (result == -1)
3309 return NULL;
3310 else if (result) {
3311 Py_RETURN_TRUE;
3312 }
3313 }
3314 Py_RETURN_FALSE;
3315 }
3316 result = _string_tailmatch(self, subobj, start, end, +1);
3317 if (result == -1)
3318 return NULL;
3319 else
3320 return PyBool_FromLong(result);
3321}
3322
3323
3324PyDoc_STRVAR(encode__doc__,
3325"S.encode([encoding[,errors]]) -> object\n\
3326\n\
3327Encodes S using the codec registered for encoding. encoding defaults\n\
3328to the default encoding. errors may be given to set a different error\n\
3329handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3330a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3331'xmlcharrefreplace' as well as any other name registered with\n\
3332codecs.register_error that is able to handle UnicodeEncodeErrors.");
3333
3334static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003335string_encode(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003336{
3337 char *encoding = NULL;
3338 char *errors = NULL;
3339 PyObject *v;
3340
3341 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3342 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003343 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003344 if (v == NULL)
3345 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003346 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00003347 PyErr_Format(PyExc_TypeError,
3348 "encoder did not return a string/unicode object "
3349 "(type=%.400s)",
3350 Py_TYPE(v)->tp_name);
3351 Py_DECREF(v);
3352 return NULL;
3353 }
3354 return v;
3355
3356 onError:
Christian Heimes1a6387e2008-03-26 12:49:49 +00003357 return NULL;
3358}
3359
Christian Heimes44720832008-05-26 13:01:01 +00003360
3361PyDoc_STRVAR(decode__doc__,
3362"S.decode([encoding[,errors]]) -> object\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003363\n\
Christian Heimes44720832008-05-26 13:01:01 +00003364Decodes S using the codec registered for encoding. encoding defaults\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003365to the default encoding. errors may be given to set a different error\n\
Christian Heimes44720832008-05-26 13:01:01 +00003366handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3367a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003368as well as any other name registered with codecs.register_error that is\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003369able to handle UnicodeDecodeErrors.");
3370
3371static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003372string_decode(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003373{
Christian Heimes44720832008-05-26 13:01:01 +00003374 char *encoding = NULL;
3375 char *errors = NULL;
3376 PyObject *v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003377
3378 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3379 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003380 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003381 if (v == NULL)
3382 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003383 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00003384 PyErr_Format(PyExc_TypeError,
3385 "decoder did not return a string/unicode object "
3386 "(type=%.400s)",
3387 Py_TYPE(v)->tp_name);
3388 Py_DECREF(v);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003389 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003390 }
3391 return v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003392
Christian Heimes44720832008-05-26 13:01:01 +00003393 onError:
3394 return NULL;
3395}
3396
3397
3398PyDoc_STRVAR(expandtabs__doc__,
3399"S.expandtabs([tabsize]) -> string\n\
3400\n\
3401Return a copy of S where all tab characters are expanded using spaces.\n\
3402If tabsize is not given, a tab size of 8 characters is assumed.");
3403
3404static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003405string_expandtabs(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003406{
3407 const char *e, *p, *qe;
3408 char *q;
3409 Py_ssize_t i, j, incr;
3410 PyObject *u;
3411 int tabsize = 8;
3412
3413 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3414 return NULL;
3415
3416 /* First pass: determine size of output string */
3417 i = 0; /* chars up to and including most recent \n or \r */
3418 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003419 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3420 for (p = PyString_AS_STRING(self); p < e; p++)
Christian Heimes44720832008-05-26 13:01:01 +00003421 if (*p == '\t') {
3422 if (tabsize > 0) {
3423 incr = tabsize - (j % tabsize);
3424 if (j > PY_SSIZE_T_MAX - incr)
3425 goto overflow1;
3426 j += incr;
3427 }
3428 }
3429 else {
3430 if (j > PY_SSIZE_T_MAX - 1)
3431 goto overflow1;
3432 j++;
3433 if (*p == '\n' || *p == '\r') {
3434 if (i > PY_SSIZE_T_MAX - j)
3435 goto overflow1;
3436 i += j;
3437 j = 0;
3438 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00003439 }
Christian Heimes44720832008-05-26 13:01:01 +00003440
3441 if (i > PY_SSIZE_T_MAX - j)
3442 goto overflow1;
3443
3444 /* Second pass: create output string and fill it */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003445 u = PyString_FromStringAndSize(NULL, i + j);
Christian Heimes44720832008-05-26 13:01:01 +00003446 if (!u)
3447 return NULL;
3448
3449 j = 0; /* same as in first pass */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003450 q = PyString_AS_STRING(u); /* next output char */
3451 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
Christian Heimes44720832008-05-26 13:01:01 +00003452
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003453 for (p = PyString_AS_STRING(self); p < e; p++)
Christian Heimes44720832008-05-26 13:01:01 +00003454 if (*p == '\t') {
3455 if (tabsize > 0) {
3456 i = tabsize - (j % tabsize);
3457 j += i;
3458 while (i--) {
3459 if (q >= qe)
3460 goto overflow2;
3461 *q++ = ' ';
3462 }
3463 }
3464 }
3465 else {
3466 if (q >= qe)
3467 goto overflow2;
3468 *q++ = *p;
3469 j++;
3470 if (*p == '\n' || *p == '\r')
3471 j = 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003472 }
Christian Heimes44720832008-05-26 13:01:01 +00003473
3474 return u;
3475
3476 overflow2:
3477 Py_DECREF(u);
3478 overflow1:
3479 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3480 return NULL;
3481}
3482
3483Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003484pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Christian Heimes44720832008-05-26 13:01:01 +00003485{
3486 PyObject *u;
3487
3488 if (left < 0)
3489 left = 0;
3490 if (right < 0)
3491 right = 0;
3492
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003493 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003494 Py_INCREF(self);
3495 return (PyObject *)self;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003496 }
3497
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003498 u = PyString_FromStringAndSize(NULL,
3499 left + PyString_GET_SIZE(self) + right);
Christian Heimes44720832008-05-26 13:01:01 +00003500 if (u) {
3501 if (left)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003502 memset(PyString_AS_STRING(u), fill, left);
3503 Py_MEMCPY(PyString_AS_STRING(u) + left,
3504 PyString_AS_STRING(self),
3505 PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00003506 if (right)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003507 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00003508 fill, right);
3509 }
3510
3511 return u;
3512}
3513
3514PyDoc_STRVAR(ljust__doc__,
3515"S.ljust(width[, fillchar]) -> string\n"
3516"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003517"Return S left-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003518"done using the specified fill character (default is a space).");
3519
3520static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003521string_ljust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003522{
3523 Py_ssize_t width;
3524 char fillchar = ' ';
3525
3526 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3527 return NULL;
3528
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003529 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003530 Py_INCREF(self);
3531 return (PyObject*) self;
3532 }
3533
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003534 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003535}
3536
3537
3538PyDoc_STRVAR(rjust__doc__,
3539"S.rjust(width[, fillchar]) -> string\n"
3540"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003541"Return S right-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003542"done using the specified fill character (default is a space)");
3543
3544static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003545string_rjust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003546{
3547 Py_ssize_t width;
3548 char fillchar = ' ';
3549
3550 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3551 return NULL;
3552
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003553 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003554 Py_INCREF(self);
3555 return (PyObject*) self;
3556 }
3557
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003558 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003559}
3560
3561
3562PyDoc_STRVAR(center__doc__,
3563"S.center(width[, fillchar]) -> string\n"
3564"\n"
3565"Return S centered in a string of length width. Padding is\n"
3566"done using the specified fill character (default is a space)");
3567
3568static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003569string_center(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003570{
3571 Py_ssize_t marg, left;
3572 Py_ssize_t width;
3573 char fillchar = ' ';
3574
3575 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3576 return NULL;
3577
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003578 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003579 Py_INCREF(self);
3580 return (PyObject*) self;
3581 }
3582
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003583 marg = width - PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003584 left = marg / 2 + (marg & width & 1);
3585
3586 return pad(self, left, marg - left, fillchar);
3587}
3588
3589PyDoc_STRVAR(zfill__doc__,
3590"S.zfill(width) -> string\n"
3591"\n"
3592"Pad a numeric string S with zeros on the left, to fill a field\n"
3593"of the specified width. The string S is never truncated.");
3594
3595static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003596string_zfill(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003597{
3598 Py_ssize_t fill;
3599 PyObject *s;
3600 char *p;
3601 Py_ssize_t width;
3602
3603 if (!PyArg_ParseTuple(args, "n:zfill", &width))
3604 return NULL;
3605
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003606 if (PyString_GET_SIZE(self) >= width) {
3607 if (PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003608 Py_INCREF(self);
3609 return (PyObject*) self;
3610 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00003611 else
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003612 return PyString_FromStringAndSize(
3613 PyString_AS_STRING(self),
3614 PyString_GET_SIZE(self)
Christian Heimes44720832008-05-26 13:01:01 +00003615 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003616 }
3617
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003618 fill = width - PyString_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003619
Christian Heimes44720832008-05-26 13:01:01 +00003620 s = pad(self, fill, 0, '0');
3621
3622 if (s == NULL)
3623 return NULL;
3624
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003625 p = PyString_AS_STRING(s);
Christian Heimes44720832008-05-26 13:01:01 +00003626 if (p[fill] == '+' || p[fill] == '-') {
3627 /* move sign to beginning of string */
3628 p[0] = p[fill];
3629 p[fill] = '0';
3630 }
3631
3632 return (PyObject*) s;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003633}
3634
Christian Heimes44720832008-05-26 13:01:01 +00003635PyDoc_STRVAR(isspace__doc__,
3636"S.isspace() -> bool\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003637\n\
Christian Heimes44720832008-05-26 13:01:01 +00003638Return True if all characters in S are whitespace\n\
3639and there is at least one character in S, False otherwise.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003640
Christian Heimes44720832008-05-26 13:01:01 +00003641static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003642string_isspace(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003643{
Christian Heimes44720832008-05-26 13:01:01 +00003644 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003645 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003646 register const unsigned char *e;
3647
3648 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003649 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003650 isspace(*p))
3651 return PyBool_FromLong(1);
3652
3653 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003654 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003655 return PyBool_FromLong(0);
3656
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003657 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003658 for (; p < e; p++) {
3659 if (!isspace(*p))
3660 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003661 }
Christian Heimes44720832008-05-26 13:01:01 +00003662 return PyBool_FromLong(1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003663}
3664
Christian Heimes44720832008-05-26 13:01:01 +00003665
3666PyDoc_STRVAR(isalpha__doc__,
3667"S.isalpha() -> bool\n\
3668\n\
3669Return True if all characters in S are alphabetic\n\
3670and there is at least one character in S, False otherwise.");
3671
3672static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003673string_isalpha(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003674{
Christian Heimes44720832008-05-26 13:01:01 +00003675 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003676 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003677 register const unsigned char *e;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003678
Christian Heimes44720832008-05-26 13:01:01 +00003679 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003680 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003681 isalpha(*p))
3682 return PyBool_FromLong(1);
3683
3684 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003685 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003686 return PyBool_FromLong(0);
3687
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003688 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003689 for (; p < e; p++) {
3690 if (!isalpha(*p))
3691 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003692 }
Christian Heimes44720832008-05-26 13:01:01 +00003693 return PyBool_FromLong(1);
3694}
Christian Heimes1a6387e2008-03-26 12:49:49 +00003695
Christian Heimes44720832008-05-26 13:01:01 +00003696
3697PyDoc_STRVAR(isalnum__doc__,
3698"S.isalnum() -> bool\n\
3699\n\
3700Return True if all characters in S are alphanumeric\n\
3701and there is at least one character in S, False otherwise.");
3702
3703static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003704string_isalnum(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003705{
3706 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003707 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003708 register const unsigned char *e;
3709
3710 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003711 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003712 isalnum(*p))
3713 return PyBool_FromLong(1);
3714
3715 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003716 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003717 return PyBool_FromLong(0);
3718
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003719 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003720 for (; p < e; p++) {
3721 if (!isalnum(*p))
3722 return PyBool_FromLong(0);
3723 }
3724 return PyBool_FromLong(1);
3725}
3726
3727
3728PyDoc_STRVAR(isdigit__doc__,
3729"S.isdigit() -> bool\n\
3730\n\
3731Return True if all characters in S are digits\n\
3732and there is at least one character in S, False otherwise.");
3733
3734static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003735string_isdigit(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003736{
3737 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003738 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003739 register const unsigned char *e;
3740
3741 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003742 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003743 isdigit(*p))
3744 return PyBool_FromLong(1);
3745
3746 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003747 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003748 return PyBool_FromLong(0);
3749
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003750 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003751 for (; p < e; p++) {
3752 if (!isdigit(*p))
3753 return PyBool_FromLong(0);
3754 }
3755 return PyBool_FromLong(1);
3756}
3757
3758
3759PyDoc_STRVAR(islower__doc__,
3760"S.islower() -> bool\n\
3761\n\
3762Return True if all cased characters in S are lowercase and there is\n\
3763at least one cased character in S, False otherwise.");
3764
3765static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003766string_islower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003767{
3768 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003769 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003770 register const unsigned char *e;
3771 int cased;
3772
3773 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003774 if (PyString_GET_SIZE(self) == 1)
Christian Heimes44720832008-05-26 13:01:01 +00003775 return PyBool_FromLong(islower(*p) != 0);
3776
3777 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003778 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003779 return PyBool_FromLong(0);
3780
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003781 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003782 cased = 0;
3783 for (; p < e; p++) {
3784 if (isupper(*p))
3785 return PyBool_FromLong(0);
3786 else if (!cased && islower(*p))
3787 cased = 1;
3788 }
3789 return PyBool_FromLong(cased);
3790}
3791
3792
3793PyDoc_STRVAR(isupper__doc__,
3794"S.isupper() -> bool\n\
3795\n\
3796Return True if all cased characters in S are uppercase and there is\n\
3797at least one cased character in S, False otherwise.");
3798
3799static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003800string_isupper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003801{
3802 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003803 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003804 register const unsigned char *e;
3805 int cased;
3806
3807 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003808 if (PyString_GET_SIZE(self) == 1)
Christian Heimes44720832008-05-26 13:01:01 +00003809 return PyBool_FromLong(isupper(*p) != 0);
3810
3811 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003812 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003813 return PyBool_FromLong(0);
3814
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003815 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003816 cased = 0;
3817 for (; p < e; p++) {
3818 if (islower(*p))
3819 return PyBool_FromLong(0);
3820 else if (!cased && isupper(*p))
3821 cased = 1;
3822 }
3823 return PyBool_FromLong(cased);
3824}
3825
3826
3827PyDoc_STRVAR(istitle__doc__,
3828"S.istitle() -> bool\n\
3829\n\
3830Return True if S is a titlecased string and there is at least one\n\
3831character in S, i.e. uppercase characters may only follow uncased\n\
3832characters and lowercase characters only cased ones. Return False\n\
3833otherwise.");
3834
3835static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003836string_istitle(PyStringObject *self, PyObject *uncased)
Christian Heimes44720832008-05-26 13:01:01 +00003837{
3838 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003839 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003840 register const unsigned char *e;
3841 int cased, previous_is_cased;
3842
3843 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003844 if (PyString_GET_SIZE(self) == 1)
Christian Heimes44720832008-05-26 13:01:01 +00003845 return PyBool_FromLong(isupper(*p) != 0);
3846
3847 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003848 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003849 return PyBool_FromLong(0);
3850
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003851 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003852 cased = 0;
3853 previous_is_cased = 0;
3854 for (; p < e; p++) {
3855 register const unsigned char ch = *p;
3856
3857 if (isupper(ch)) {
3858 if (previous_is_cased)
3859 return PyBool_FromLong(0);
3860 previous_is_cased = 1;
3861 cased = 1;
3862 }
3863 else if (islower(ch)) {
3864 if (!previous_is_cased)
3865 return PyBool_FromLong(0);
3866 previous_is_cased = 1;
3867 cased = 1;
3868 }
3869 else
3870 previous_is_cased = 0;
3871 }
3872 return PyBool_FromLong(cased);
3873}
3874
3875
3876PyDoc_STRVAR(splitlines__doc__,
3877"S.splitlines([keepends]) -> list of strings\n\
3878\n\
3879Return a list of the lines in S, breaking at line boundaries.\n\
3880Line breaks are not included in the resulting list unless keepends\n\
3881is given and true.");
3882
3883static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003884string_splitlines(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003885{
3886 register Py_ssize_t i;
3887 register Py_ssize_t j;
3888 Py_ssize_t len;
3889 int keepends = 0;
3890 PyObject *list;
3891 PyObject *str;
3892 char *data;
3893
3894 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3895 return NULL;
3896
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003897 data = PyString_AS_STRING(self);
3898 len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003899
3900 /* This does not use the preallocated list because splitlines is
3901 usually run with hundreds of newlines. The overhead of
3902 switching between PyList_SET_ITEM and append causes about a
3903 2-3% slowdown for that common case. A smarter implementation
3904 could move the if check out, so the SET_ITEMs are done first
3905 and the appends only done when the prealloc buffer is full.
3906 That's too much work for little gain.*/
3907
3908 list = PyList_New(0);
3909 if (!list)
3910 goto onError;
3911
3912 for (i = j = 0; i < len; ) {
3913 Py_ssize_t eol;
3914
3915 /* Find a line and append it */
3916 while (i < len && data[i] != '\n' && data[i] != '\r')
3917 i++;
3918
3919 /* Skip the line break reading CRLF as one line break */
3920 eol = i;
3921 if (i < len) {
3922 if (data[i] == '\r' && i + 1 < len &&
3923 data[i+1] == '\n')
3924 i += 2;
3925 else
3926 i++;
3927 if (keepends)
3928 eol = i;
3929 }
3930 SPLIT_APPEND(data, j, eol);
3931 j = i;
3932 }
3933 if (j < len) {
3934 SPLIT_APPEND(data, j, len);
3935 }
3936
3937 return list;
3938
3939 onError:
3940 Py_XDECREF(list);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003941 return NULL;
3942}
3943
Robert Schuppenies51df0642008-06-01 16:16:17 +00003944PyDoc_STRVAR(sizeof__doc__,
Georg Brandl7a6de8b2008-06-01 16:42:16 +00003945"S.__sizeof__() -> size of S in memory, in bytes");
Robert Schuppenies51df0642008-06-01 16:16:17 +00003946
3947static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003948string_sizeof(PyStringObject *v)
Robert Schuppenies51df0642008-06-01 16:16:17 +00003949{
3950 Py_ssize_t res;
Mark Dickinson826f3fe2008-12-05 21:55:28 +00003951 res = PyStringObject_SIZE + v->ob_size * v->ob_type->tp_itemsize;
Robert Schuppenies51df0642008-06-01 16:16:17 +00003952 return PyInt_FromSsize_t(res);
3953}
3954
Christian Heimes44720832008-05-26 13:01:01 +00003955#undef SPLIT_APPEND
3956#undef SPLIT_ADD
3957#undef MAX_PREALLOC
3958#undef PREALLOC_SIZE
Christian Heimes1a6387e2008-03-26 12:49:49 +00003959
3960static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003961string_getnewargs(PyStringObject *v)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003962{
Christian Heimes44720832008-05-26 13:01:01 +00003963 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Christian Heimes1a6387e2008-03-26 12:49:49 +00003964}
3965
Christian Heimes1a6387e2008-03-26 12:49:49 +00003966
Christian Heimes44720832008-05-26 13:01:01 +00003967#include "stringlib/string_format.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +00003968
Christian Heimes44720832008-05-26 13:01:01 +00003969PyDoc_STRVAR(format__doc__,
3970"S.format(*args, **kwargs) -> unicode\n\
3971\n\
3972");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003973
Eric Smithdc13b792008-05-30 18:10:04 +00003974static PyObject *
3975string__format__(PyObject* self, PyObject* args)
3976{
3977 PyObject *format_spec;
3978 PyObject *result = NULL;
3979 PyObject *tmp = NULL;
3980
3981 /* If 2.x, convert format_spec to the same type as value */
3982 /* This is to allow things like u''.format('') */
3983 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
3984 goto done;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003985 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
Eric Smithdc13b792008-05-30 18:10:04 +00003986 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3987 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3988 goto done;
3989 }
3990 tmp = PyObject_Str(format_spec);
3991 if (tmp == NULL)
3992 goto done;
3993 format_spec = tmp;
3994
3995 result = _PyBytes_FormatAdvanced(self,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003996 PyString_AS_STRING(format_spec),
3997 PyString_GET_SIZE(format_spec));
Eric Smithdc13b792008-05-30 18:10:04 +00003998done:
3999 Py_XDECREF(tmp);
4000 return result;
4001}
4002
Christian Heimes44720832008-05-26 13:01:01 +00004003PyDoc_STRVAR(p_format__doc__,
4004"S.__format__(format_spec) -> unicode\n\
4005\n\
4006");
4007
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00004008
Christian Heimes1a6387e2008-03-26 12:49:49 +00004009static PyMethodDef
Christian Heimes44720832008-05-26 13:01:01 +00004010string_methods[] = {
4011 /* Counterparts of the obsolete stropmodule functions; except
4012 string.maketrans(). */
4013 {"join", (PyCFunction)string_join, METH_O, join__doc__},
4014 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
4015 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
4016 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
4017 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
4018 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
4019 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
4020 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
4021 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
4022 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
4023 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
4024 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
4025 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
4026 capitalize__doc__},
4027 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
4028 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
4029 endswith__doc__},
4030 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
4031 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
4032 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
4033 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
4034 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
4035 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
4036 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
4037 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
4038 {"rpartition", (PyCFunction)string_rpartition, METH_O,
4039 rpartition__doc__},
4040 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
4041 startswith__doc__},
4042 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
4043 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
4044 swapcase__doc__},
4045 {"translate", (PyCFunction)string_translate, METH_VARARGS,
4046 translate__doc__},
4047 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
4048 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
4049 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
4050 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
4051 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
4052 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
4053 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
4054 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
4055 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
4056 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
4057 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
4058 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
4059 expandtabs__doc__},
4060 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
4061 splitlines__doc__},
Robert Schuppenies51df0642008-06-01 16:16:17 +00004062 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
4063 sizeof__doc__},
Christian Heimes44720832008-05-26 13:01:01 +00004064 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
4065 {NULL, NULL} /* sentinel */
Christian Heimes1a6387e2008-03-26 12:49:49 +00004066};
4067
4068static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00004069str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes1a6387e2008-03-26 12:49:49 +00004070
Christian Heimes44720832008-05-26 13:01:01 +00004071static PyObject *
4072string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4073{
4074 PyObject *x = NULL;
4075 static char *kwlist[] = {"object", 0};
4076
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004077 if (type != &PyString_Type)
Christian Heimes44720832008-05-26 13:01:01 +00004078 return str_subtype_new(type, args, kwds);
4079 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
4080 return NULL;
4081 if (x == NULL)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004082 return PyString_FromString("");
Christian Heimes44720832008-05-26 13:01:01 +00004083 return PyObject_Str(x);
4084}
4085
4086static PyObject *
4087str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4088{
4089 PyObject *tmp, *pnew;
4090 Py_ssize_t n;
4091
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004092 assert(PyType_IsSubtype(type, &PyString_Type));
4093 tmp = string_new(&PyString_Type, args, kwds);
Christian Heimes44720832008-05-26 13:01:01 +00004094 if (tmp == NULL)
4095 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004096 assert(PyString_CheckExact(tmp));
4097 n = PyString_GET_SIZE(tmp);
Christian Heimes44720832008-05-26 13:01:01 +00004098 pnew = type->tp_alloc(type, n);
4099 if (pnew != NULL) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004100 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
4101 ((PyStringObject *)pnew)->ob_shash =
4102 ((PyStringObject *)tmp)->ob_shash;
4103 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Christian Heimes44720832008-05-26 13:01:01 +00004104 }
4105 Py_DECREF(tmp);
4106 return pnew;
4107}
4108
4109static PyObject *
4110basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4111{
4112 PyErr_SetString(PyExc_TypeError,
4113 "The basestring type cannot be instantiated");
4114 return NULL;
4115}
4116
4117static PyObject *
4118string_mod(PyObject *v, PyObject *w)
4119{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004120 if (!PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00004121 Py_INCREF(Py_NotImplemented);
4122 return Py_NotImplemented;
4123 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004124 return PyString_Format(v, w);
Christian Heimes44720832008-05-26 13:01:01 +00004125}
4126
4127PyDoc_STRVAR(basestring_doc,
4128"Type basestring cannot be instantiated; it is the base for str and unicode.");
4129
4130static PyNumberMethods string_as_number = {
4131 0, /*nb_add*/
4132 0, /*nb_subtract*/
4133 0, /*nb_multiply*/
4134 0, /*nb_divide*/
4135 string_mod, /*nb_remainder*/
4136};
4137
4138
4139PyTypeObject PyBaseString_Type = {
4140 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4141 "basestring",
4142 0,
4143 0,
4144 0, /* tp_dealloc */
4145 0, /* tp_print */
4146 0, /* tp_getattr */
4147 0, /* tp_setattr */
4148 0, /* tp_compare */
4149 0, /* tp_repr */
4150 0, /* tp_as_number */
4151 0, /* tp_as_sequence */
4152 0, /* tp_as_mapping */
4153 0, /* tp_hash */
4154 0, /* tp_call */
4155 0, /* tp_str */
4156 0, /* tp_getattro */
4157 0, /* tp_setattro */
4158 0, /* tp_as_buffer */
4159 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4160 basestring_doc, /* tp_doc */
4161 0, /* tp_traverse */
4162 0, /* tp_clear */
4163 0, /* tp_richcompare */
4164 0, /* tp_weaklistoffset */
4165 0, /* tp_iter */
4166 0, /* tp_iternext */
4167 0, /* tp_methods */
4168 0, /* tp_members */
4169 0, /* tp_getset */
4170 &PyBaseObject_Type, /* tp_base */
4171 0, /* tp_dict */
4172 0, /* tp_descr_get */
4173 0, /* tp_descr_set */
4174 0, /* tp_dictoffset */
4175 0, /* tp_init */
4176 0, /* tp_alloc */
4177 basestring_new, /* tp_new */
4178 0, /* tp_free */
4179};
4180
4181PyDoc_STRVAR(string_doc,
4182"str(object) -> string\n\
4183\n\
4184Return a nice string representation of the object.\n\
4185If the argument is a string, the return value is the same object.");
4186
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004187PyTypeObject PyString_Type = {
Christian Heimes44720832008-05-26 13:01:01 +00004188 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4189 "str",
Mark Dickinson826f3fe2008-12-05 21:55:28 +00004190 PyStringObject_SIZE,
Christian Heimes44720832008-05-26 13:01:01 +00004191 sizeof(char),
4192 string_dealloc, /* tp_dealloc */
4193 (printfunc)string_print, /* tp_print */
4194 0, /* tp_getattr */
4195 0, /* tp_setattr */
4196 0, /* tp_compare */
4197 string_repr, /* tp_repr */
4198 &string_as_number, /* tp_as_number */
4199 &string_as_sequence, /* tp_as_sequence */
4200 &string_as_mapping, /* tp_as_mapping */
4201 (hashfunc)string_hash, /* tp_hash */
4202 0, /* tp_call */
4203 string_str, /* tp_str */
4204 PyObject_GenericGetAttr, /* tp_getattro */
4205 0, /* tp_setattro */
4206 &string_as_buffer, /* tp_as_buffer */
4207 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
4208 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
4209 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
4210 string_doc, /* tp_doc */
4211 0, /* tp_traverse */
4212 0, /* tp_clear */
4213 (richcmpfunc)string_richcompare, /* tp_richcompare */
4214 0, /* tp_weaklistoffset */
4215 0, /* tp_iter */
4216 0, /* tp_iternext */
4217 string_methods, /* tp_methods */
4218 0, /* tp_members */
4219 0, /* tp_getset */
4220 &PyBaseString_Type, /* tp_base */
4221 0, /* tp_dict */
4222 0, /* tp_descr_get */
4223 0, /* tp_descr_set */
4224 0, /* tp_dictoffset */
4225 0, /* tp_init */
4226 0, /* tp_alloc */
4227 string_new, /* tp_new */
4228 PyObject_Del, /* tp_free */
4229};
4230
4231void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004232PyString_Concat(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00004233{
4234 register PyObject *v;
4235 if (*pv == NULL)
4236 return;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004237 if (w == NULL || !PyString_Check(*pv)) {
Christian Heimes44720832008-05-26 13:01:01 +00004238 Py_DECREF(*pv);
4239 *pv = NULL;
4240 return;
4241 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004242 v = string_concat((PyStringObject *) *pv, w);
Christian Heimes44720832008-05-26 13:01:01 +00004243 Py_DECREF(*pv);
4244 *pv = v;
4245}
4246
4247void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004248PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00004249{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004250 PyString_Concat(pv, w);
Christian Heimes44720832008-05-26 13:01:01 +00004251 Py_XDECREF(w);
4252}
4253
4254
4255/* The following function breaks the notion that strings are immutable:
4256 it changes the size of a string. We get away with this only if there
4257 is only one module referencing the object. You can also think of it
4258 as creating a new string object and destroying the old one, only
4259 more efficiently. In any case, don't use this if the string may
4260 already be known to some other part of the code...
4261 Note that if there's not enough memory to resize the string, the original
4262 string object at *pv is deallocated, *pv is set to NULL, an "out of
4263 memory" exception is set, and -1 is returned. Else (on success) 0 is
4264 returned, and the value in *pv may or may not be the same as on input.
4265 As always, an extra byte is allocated for a trailing \0 byte (newsize
4266 does *not* include that), and a trailing \0 byte is stored.
4267*/
4268
4269int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004270_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Christian Heimes44720832008-05-26 13:01:01 +00004271{
4272 register PyObject *v;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004273 register PyStringObject *sv;
Christian Heimes44720832008-05-26 13:01:01 +00004274 v = *pv;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004275 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
4276 PyString_CHECK_INTERNED(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00004277 *pv = 0;
4278 Py_DECREF(v);
4279 PyErr_BadInternalCall();
4280 return -1;
4281 }
4282 /* XXX UNREF/NEWREF interface should be more symmetrical */
4283 _Py_DEC_REFTOTAL;
4284 _Py_ForgetReference(v);
4285 *pv = (PyObject *)
Mark Dickinson826f3fe2008-12-05 21:55:28 +00004286 PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
Christian Heimes44720832008-05-26 13:01:01 +00004287 if (*pv == NULL) {
4288 PyObject_Del(v);
4289 PyErr_NoMemory();
4290 return -1;
4291 }
4292 _Py_NewReference(*pv);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004293 sv = (PyStringObject *) *pv;
Christian Heimes44720832008-05-26 13:01:01 +00004294 Py_SIZE(sv) = newsize;
4295 sv->ob_sval[newsize] = '\0';
4296 sv->ob_shash = -1; /* invalidate cached hash value */
4297 return 0;
4298}
4299
4300/* Helpers for formatstring */
4301
4302Py_LOCAL_INLINE(PyObject *)
4303getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
4304{
4305 Py_ssize_t argidx = *p_argidx;
4306 if (argidx < arglen) {
4307 (*p_argidx)++;
4308 if (arglen < 0)
4309 return args;
4310 else
4311 return PyTuple_GetItem(args, argidx);
4312 }
4313 PyErr_SetString(PyExc_TypeError,
4314 "not enough arguments for format string");
4315 return NULL;
4316}
4317
4318/* Format codes
4319 * F_LJUST '-'
4320 * F_SIGN '+'
4321 * F_BLANK ' '
4322 * F_ALT '#'
4323 * F_ZERO '0'
4324 */
4325#define F_LJUST (1<<0)
4326#define F_SIGN (1<<1)
4327#define F_BLANK (1<<2)
4328#define F_ALT (1<<3)
4329#define F_ZERO (1<<4)
4330
4331Py_LOCAL_INLINE(int)
4332formatfloat(char *buf, size_t buflen, int flags,
4333 int prec, int type, PyObject *v)
4334{
4335 /* fmt = '%#.' + `prec` + `type`
4336 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
4337 char fmt[20];
4338 double x;
4339 x = PyFloat_AsDouble(v);
4340 if (x == -1.0 && PyErr_Occurred()) {
4341 PyErr_Format(PyExc_TypeError, "float argument required, "
4342 "not %.200s", Py_TYPE(v)->tp_name);
4343 return -1;
4344 }
4345 if (prec < 0)
4346 prec = 6;
Mark Dickinson174e9092009-03-29 16:17:16 +00004347 /* make sure that the decimal representation of precision really does
4348 need at most 10 digits: platforms with sizeof(int) == 8 exist! */
4349 if (prec > 0x7fffffffL) {
4350 PyErr_SetString(PyExc_OverflowError,
4351 "outrageously large precision "
4352 "for formatted float");
4353 return -1;
4354 }
4355
Mark Dickinson2e648ec2009-03-29 14:37:51 +00004356 if (type == 'f' && fabs(x) >= 1e50)
Eric Smithd6c393a2008-07-17 19:49:47 +00004357 type = 'g';
Christian Heimes44720832008-05-26 13:01:01 +00004358 /* Worst case length calc to ensure no buffer overrun:
4359
4360 'g' formats:
4361 fmt = %#.<prec>g
4362 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
4363 for any double rep.)
4364 len = 1 + prec + 1 + 2 + 5 = 9 + prec
4365
4366 'f' formats:
4367 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4368 len = 1 + 50 + 1 + prec = 52 + prec
4369
4370 If prec=0 the effective precision is 1 (the leading digit is
4371 always given), therefore increase the length by one.
4372
4373 */
4374 if (((type == 'g' || type == 'G') &&
4375 buflen <= (size_t)10 + (size_t)prec) ||
Eric Smithd6c393a2008-07-17 19:49:47 +00004376 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Christian Heimes44720832008-05-26 13:01:01 +00004377 PyErr_SetString(PyExc_OverflowError,
4378 "formatted float is too long (precision too large?)");
4379 return -1;
4380 }
4381 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4382 (flags&F_ALT) ? "#" : "",
4383 prec, type);
Mark Dickinson174e9092009-03-29 16:17:16 +00004384 PyOS_ascii_formatd(buf, buflen, fmt, x);
Christian Heimes44720832008-05-26 13:01:01 +00004385 return (int)strlen(buf);
4386}
4387
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004388/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
Christian Heimes44720832008-05-26 13:01:01 +00004389 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4390 * Python's regular ints.
4391 * Return value: a new PyString*, or NULL if error.
4392 * . *pbuf is set to point into it,
4393 * *plen set to the # of chars following that.
4394 * Caller must decref it when done using pbuf.
4395 * The string starting at *pbuf is of the form
4396 * "-"? ("0x" | "0X")? digit+
4397 * "0x"/"0X" are present only for x and X conversions, with F_ALT
4398 * set in flags. The case of hex digits will be correct,
4399 * There will be at least prec digits, zero-filled on the left if
4400 * necessary to get that many.
4401 * val object to be converted
4402 * flags bitmask of format flags; only F_ALT is looked at
4403 * prec minimum number of digits; 0-fill on left if needed
4404 * type a character in [duoxX]; u acts the same as d
4405 *
4406 * CAUTION: o, x and X conversions on regular ints can never
4407 * produce a '-' sign, but can for Python's unbounded ints.
4408 */
4409PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004410_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
Christian Heimes44720832008-05-26 13:01:01 +00004411 char **pbuf, int *plen)
4412{
4413 PyObject *result = NULL;
4414 char *buf;
4415 Py_ssize_t i;
4416 int sign; /* 1 if '-', else 0 */
4417 int len; /* number of characters */
4418 Py_ssize_t llen;
4419 int numdigits; /* len == numnondigits + numdigits */
4420 int numnondigits = 0;
4421
4422 switch (type) {
4423 case 'd':
4424 case 'u':
4425 result = Py_TYPE(val)->tp_str(val);
4426 break;
4427 case 'o':
4428 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4429 break;
4430 case 'x':
4431 case 'X':
4432 numnondigits = 2;
4433 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4434 break;
4435 default:
4436 assert(!"'type' not in [duoxX]");
4437 }
4438 if (!result)
4439 return NULL;
4440
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004441 buf = PyString_AsString(result);
Christian Heimes44720832008-05-26 13:01:01 +00004442 if (!buf) {
4443 Py_DECREF(result);
4444 return NULL;
4445 }
4446
4447 /* To modify the string in-place, there can only be one reference. */
4448 if (Py_REFCNT(result) != 1) {
4449 PyErr_BadInternalCall();
4450 return NULL;
4451 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004452 llen = PyString_Size(result);
Christian Heimes44720832008-05-26 13:01:01 +00004453 if (llen > INT_MAX) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004454 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
Christian Heimes44720832008-05-26 13:01:01 +00004455 return NULL;
4456 }
4457 len = (int)llen;
4458 if (buf[len-1] == 'L') {
4459 --len;
4460 buf[len] = '\0';
4461 }
4462 sign = buf[0] == '-';
4463 numnondigits += sign;
4464 numdigits = len - numnondigits;
4465 assert(numdigits > 0);
4466
4467 /* Get rid of base marker unless F_ALT */
4468 if ((flags & F_ALT) == 0) {
4469 /* Need to skip 0x, 0X or 0. */
4470 int skipped = 0;
4471 switch (type) {
4472 case 'o':
4473 assert(buf[sign] == '0');
4474 /* If 0 is only digit, leave it alone. */
4475 if (numdigits > 1) {
4476 skipped = 1;
4477 --numdigits;
4478 }
4479 break;
4480 case 'x':
4481 case 'X':
4482 assert(buf[sign] == '0');
4483 assert(buf[sign + 1] == 'x');
4484 skipped = 2;
4485 numnondigits -= 2;
4486 break;
4487 }
4488 if (skipped) {
4489 buf += skipped;
4490 len -= skipped;
4491 if (sign)
4492 buf[0] = '-';
4493 }
4494 assert(len == numnondigits + numdigits);
4495 assert(numdigits > 0);
4496 }
4497
4498 /* Fill with leading zeroes to meet minimum width. */
4499 if (prec > numdigits) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004500 PyObject *r1 = PyString_FromStringAndSize(NULL,
Christian Heimes44720832008-05-26 13:01:01 +00004501 numnondigits + prec);
4502 char *b1;
4503 if (!r1) {
4504 Py_DECREF(result);
4505 return NULL;
4506 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004507 b1 = PyString_AS_STRING(r1);
Christian Heimes44720832008-05-26 13:01:01 +00004508 for (i = 0; i < numnondigits; ++i)
4509 *b1++ = *buf++;
4510 for (i = 0; i < prec - numdigits; i++)
4511 *b1++ = '0';
4512 for (i = 0; i < numdigits; i++)
4513 *b1++ = *buf++;
4514 *b1 = '\0';
4515 Py_DECREF(result);
4516 result = r1;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004517 buf = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00004518 len = numnondigits + prec;
4519 }
4520
4521 /* Fix up case for hex conversions. */
4522 if (type == 'X') {
4523 /* Need to convert all lower case letters to upper case.
4524 and need to convert 0x to 0X (and -0x to -0X). */
4525 for (i = 0; i < len; i++)
4526 if (buf[i] >= 'a' && buf[i] <= 'x')
4527 buf[i] -= 'a'-'A';
4528 }
4529 *pbuf = buf;
4530 *plen = len;
4531 return result;
4532}
4533
4534Py_LOCAL_INLINE(int)
4535formatint(char *buf, size_t buflen, int flags,
4536 int prec, int type, PyObject *v)
4537{
4538 /* fmt = '%#.' + `prec` + 'l' + `type`
4539 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4540 + 1 + 1 = 24 */
4541 char fmt[64]; /* plenty big enough! */
4542 char *sign;
4543 long x;
4544
4545 x = PyInt_AsLong(v);
4546 if (x == -1 && PyErr_Occurred()) {
4547 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4548 Py_TYPE(v)->tp_name);
4549 return -1;
4550 }
4551 if (x < 0 && type == 'u') {
4552 type = 'd';
4553 }
4554 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4555 sign = "-";
4556 else
4557 sign = "";
4558 if (prec < 0)
4559 prec = 1;
4560
4561 if ((flags & F_ALT) &&
4562 (type == 'x' || type == 'X')) {
4563 /* When converting under %#x or %#X, there are a number
4564 * of issues that cause pain:
4565 * - when 0 is being converted, the C standard leaves off
4566 * the '0x' or '0X', which is inconsistent with other
4567 * %#x/%#X conversions and inconsistent with Python's
4568 * hex() function
4569 * - there are platforms that violate the standard and
4570 * convert 0 with the '0x' or '0X'
4571 * (Metrowerks, Compaq Tru64)
4572 * - there are platforms that give '0x' when converting
4573 * under %#X, but convert 0 in accordance with the
4574 * standard (OS/2 EMX)
4575 *
4576 * We can achieve the desired consistency by inserting our
4577 * own '0x' or '0X' prefix, and substituting %x/%X in place
4578 * of %#x/%#X.
4579 *
4580 * Note that this is the same approach as used in
4581 * formatint() in unicodeobject.c
4582 */
4583 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4584 sign, type, prec, type);
4585 }
4586 else {
4587 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4588 sign, (flags&F_ALT) ? "#" : "",
4589 prec, type);
4590 }
4591
4592 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4593 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4594 */
4595 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4596 PyErr_SetString(PyExc_OverflowError,
4597 "formatted integer is too long (precision too large?)");
4598 return -1;
4599 }
4600 if (sign[0])
4601 PyOS_snprintf(buf, buflen, fmt, -x);
4602 else
4603 PyOS_snprintf(buf, buflen, fmt, x);
4604 return (int)strlen(buf);
4605}
4606
4607Py_LOCAL_INLINE(int)
4608formatchar(char *buf, size_t buflen, PyObject *v)
4609{
4610 /* presume that the buffer is at least 2 characters long */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004611 if (PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00004612 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4613 return -1;
4614 }
4615 else {
4616 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4617 return -1;
4618 }
4619 buf[1] = '\0';
4620 return 1;
4621}
4622
4623/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4624
4625 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4626 chars are formatted. XXX This is a magic number. Each formatting
4627 routine does bounds checking to ensure no overflow, but a better
4628 solution may be to malloc a buffer of appropriate size for each
4629 format. For now, the current solution is sufficient.
4630*/
4631#define FORMATBUFLEN (size_t)120
4632
4633PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004634PyString_Format(PyObject *format, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00004635{
4636 char *fmt, *res;
4637 Py_ssize_t arglen, argidx;
4638 Py_ssize_t reslen, rescnt, fmtcnt;
4639 int args_owned = 0;
4640 PyObject *result, *orig_args;
4641#ifdef Py_USING_UNICODE
4642 PyObject *v, *w;
4643#endif
4644 PyObject *dict = NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004645 if (format == NULL || !PyString_Check(format) || args == NULL) {
Christian Heimes44720832008-05-26 13:01:01 +00004646 PyErr_BadInternalCall();
4647 return NULL;
4648 }
4649 orig_args = args;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004650 fmt = PyString_AS_STRING(format);
4651 fmtcnt = PyString_GET_SIZE(format);
Christian Heimes44720832008-05-26 13:01:01 +00004652 reslen = rescnt = fmtcnt + 100;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004653 result = PyString_FromStringAndSize((char *)NULL, reslen);
Christian Heimes44720832008-05-26 13:01:01 +00004654 if (result == NULL)
4655 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004656 res = PyString_AsString(result);
Christian Heimes44720832008-05-26 13:01:01 +00004657 if (PyTuple_Check(args)) {
4658 arglen = PyTuple_GET_SIZE(args);
4659 argidx = 0;
4660 }
4661 else {
4662 arglen = -1;
4663 argidx = -2;
4664 }
4665 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4666 !PyObject_TypeCheck(args, &PyBaseString_Type))
4667 dict = args;
4668 while (--fmtcnt >= 0) {
4669 if (*fmt != '%') {
4670 if (--rescnt < 0) {
4671 rescnt = fmtcnt + 100;
4672 reslen += rescnt;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004673 if (_PyString_Resize(&result, reslen) < 0)
Christian Heimes44720832008-05-26 13:01:01 +00004674 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004675 res = PyString_AS_STRING(result)
Christian Heimes44720832008-05-26 13:01:01 +00004676 + reslen - rescnt;
4677 --rescnt;
4678 }
4679 *res++ = *fmt++;
4680 }
4681 else {
4682 /* Got a format specifier */
4683 int flags = 0;
4684 Py_ssize_t width = -1;
4685 int prec = -1;
4686 int c = '\0';
4687 int fill;
4688 int isnumok;
4689 PyObject *v = NULL;
4690 PyObject *temp = NULL;
4691 char *pbuf;
4692 int sign;
4693 Py_ssize_t len;
4694 char formatbuf[FORMATBUFLEN];
4695 /* For format{float,int,char}() */
4696#ifdef Py_USING_UNICODE
4697 char *fmt_start = fmt;
4698 Py_ssize_t argidx_start = argidx;
4699#endif
4700
4701 fmt++;
4702 if (*fmt == '(') {
4703 char *keystart;
4704 Py_ssize_t keylen;
4705 PyObject *key;
4706 int pcount = 1;
4707
4708 if (dict == NULL) {
4709 PyErr_SetString(PyExc_TypeError,
4710 "format requires a mapping");
4711 goto error;
4712 }
4713 ++fmt;
4714 --fmtcnt;
4715 keystart = fmt;
4716 /* Skip over balanced parentheses */
4717 while (pcount > 0 && --fmtcnt >= 0) {
4718 if (*fmt == ')')
4719 --pcount;
4720 else if (*fmt == '(')
4721 ++pcount;
4722 fmt++;
4723 }
4724 keylen = fmt - keystart - 1;
4725 if (fmtcnt < 0 || pcount > 0) {
4726 PyErr_SetString(PyExc_ValueError,
4727 "incomplete format key");
4728 goto error;
4729 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004730 key = PyString_FromStringAndSize(keystart,
Christian Heimes44720832008-05-26 13:01:01 +00004731 keylen);
4732 if (key == NULL)
4733 goto error;
4734 if (args_owned) {
4735 Py_DECREF(args);
4736 args_owned = 0;
4737 }
4738 args = PyObject_GetItem(dict, key);
4739 Py_DECREF(key);
4740 if (args == NULL) {
4741 goto error;
4742 }
4743 args_owned = 1;
4744 arglen = -1;
4745 argidx = -2;
4746 }
4747 while (--fmtcnt >= 0) {
4748 switch (c = *fmt++) {
4749 case '-': flags |= F_LJUST; continue;
4750 case '+': flags |= F_SIGN; continue;
4751 case ' ': flags |= F_BLANK; continue;
4752 case '#': flags |= F_ALT; continue;
4753 case '0': flags |= F_ZERO; continue;
4754 }
4755 break;
4756 }
4757 if (c == '*') {
4758 v = getnextarg(args, arglen, &argidx);
4759 if (v == NULL)
4760 goto error;
4761 if (!PyInt_Check(v)) {
4762 PyErr_SetString(PyExc_TypeError,
4763 "* wants int");
4764 goto error;
4765 }
4766 width = PyInt_AsLong(v);
4767 if (width < 0) {
4768 flags |= F_LJUST;
4769 width = -width;
4770 }
4771 if (--fmtcnt >= 0)
4772 c = *fmt++;
4773 }
4774 else if (c >= 0 && isdigit(c)) {
4775 width = c - '0';
4776 while (--fmtcnt >= 0) {
4777 c = Py_CHARMASK(*fmt++);
4778 if (!isdigit(c))
4779 break;
4780 if ((width*10) / 10 != width) {
4781 PyErr_SetString(
4782 PyExc_ValueError,
4783 "width too big");
4784 goto error;
4785 }
4786 width = width*10 + (c - '0');
4787 }
4788 }
4789 if (c == '.') {
4790 prec = 0;
4791 if (--fmtcnt >= 0)
4792 c = *fmt++;
4793 if (c == '*') {
4794 v = getnextarg(args, arglen, &argidx);
4795 if (v == NULL)
4796 goto error;
4797 if (!PyInt_Check(v)) {
4798 PyErr_SetString(
4799 PyExc_TypeError,
4800 "* wants int");
4801 goto error;
4802 }
4803 prec = PyInt_AsLong(v);
4804 if (prec < 0)
4805 prec = 0;
4806 if (--fmtcnt >= 0)
4807 c = *fmt++;
4808 }
4809 else if (c >= 0 && isdigit(c)) {
4810 prec = c - '0';
4811 while (--fmtcnt >= 0) {
4812 c = Py_CHARMASK(*fmt++);
4813 if (!isdigit(c))
4814 break;
4815 if ((prec*10) / 10 != prec) {
4816 PyErr_SetString(
4817 PyExc_ValueError,
4818 "prec too big");
4819 goto error;
4820 }
4821 prec = prec*10 + (c - '0');
4822 }
4823 }
4824 } /* prec */
4825 if (fmtcnt >= 0) {
4826 if (c == 'h' || c == 'l' || c == 'L') {
4827 if (--fmtcnt >= 0)
4828 c = *fmt++;
4829 }
4830 }
4831 if (fmtcnt < 0) {
4832 PyErr_SetString(PyExc_ValueError,
4833 "incomplete format");
4834 goto error;
4835 }
4836 if (c != '%') {
4837 v = getnextarg(args, arglen, &argidx);
4838 if (v == NULL)
4839 goto error;
4840 }
4841 sign = 0;
4842 fill = ' ';
4843 switch (c) {
4844 case '%':
4845 pbuf = "%";
4846 len = 1;
4847 break;
4848 case 's':
4849#ifdef Py_USING_UNICODE
4850 if (PyUnicode_Check(v)) {
4851 fmt = fmt_start;
4852 argidx = argidx_start;
4853 goto unicode;
4854 }
4855#endif
4856 temp = _PyObject_Str(v);
4857#ifdef Py_USING_UNICODE
4858 if (temp != NULL && PyUnicode_Check(temp)) {
4859 Py_DECREF(temp);
4860 fmt = fmt_start;
4861 argidx = argidx_start;
4862 goto unicode;
4863 }
4864#endif
4865 /* Fall through */
4866 case 'r':
4867 if (c == 'r')
4868 temp = PyObject_Repr(v);
4869 if (temp == NULL)
4870 goto error;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004871 if (!PyString_Check(temp)) {
Christian Heimes44720832008-05-26 13:01:01 +00004872 PyErr_SetString(PyExc_TypeError,
4873 "%s argument has non-string str()");
4874 Py_DECREF(temp);
4875 goto error;
4876 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004877 pbuf = PyString_AS_STRING(temp);
4878 len = PyString_GET_SIZE(temp);
Christian Heimes44720832008-05-26 13:01:01 +00004879 if (prec >= 0 && len > prec)
4880 len = prec;
4881 break;
4882 case 'i':
4883 case 'd':
4884 case 'u':
4885 case 'o':
4886 case 'x':
4887 case 'X':
4888 if (c == 'i')
4889 c = 'd';
4890 isnumok = 0;
4891 if (PyNumber_Check(v)) {
4892 PyObject *iobj=NULL;
4893
4894 if (PyInt_Check(v) || (PyLong_Check(v))) {
4895 iobj = v;
4896 Py_INCREF(iobj);
4897 }
4898 else {
4899 iobj = PyNumber_Int(v);
4900 if (iobj==NULL) iobj = PyNumber_Long(v);
4901 }
4902 if (iobj!=NULL) {
4903 if (PyInt_Check(iobj)) {
4904 isnumok = 1;
4905 pbuf = formatbuf;
4906 len = formatint(pbuf,
4907 sizeof(formatbuf),
4908 flags, prec, c, iobj);
4909 Py_DECREF(iobj);
4910 if (len < 0)
4911 goto error;
4912 sign = 1;
4913 }
4914 else if (PyLong_Check(iobj)) {
4915 int ilen;
4916
4917 isnumok = 1;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004918 temp = _PyString_FormatLong(iobj, flags,
Christian Heimes44720832008-05-26 13:01:01 +00004919 prec, c, &pbuf, &ilen);
4920 Py_DECREF(iobj);
4921 len = ilen;
4922 if (!temp)
4923 goto error;
4924 sign = 1;
4925 }
4926 else {
4927 Py_DECREF(iobj);
4928 }
4929 }
4930 }
4931 if (!isnumok) {
4932 PyErr_Format(PyExc_TypeError,
4933 "%%%c format: a number is required, "
4934 "not %.200s", c, Py_TYPE(v)->tp_name);
4935 goto error;
4936 }
4937 if (flags & F_ZERO)
4938 fill = '0';
4939 break;
4940 case 'e':
4941 case 'E':
4942 case 'f':
4943 case 'F':
4944 case 'g':
4945 case 'G':
Eric Smithd6c393a2008-07-17 19:49:47 +00004946 if (c == 'F')
4947 c = 'f';
Christian Heimes44720832008-05-26 13:01:01 +00004948 pbuf = formatbuf;
4949 len = formatfloat(pbuf, sizeof(formatbuf),
4950 flags, prec, c, v);
4951 if (len < 0)
4952 goto error;
4953 sign = 1;
4954 if (flags & F_ZERO)
4955 fill = '0';
4956 break;
4957 case 'c':
4958#ifdef Py_USING_UNICODE
4959 if (PyUnicode_Check(v)) {
4960 fmt = fmt_start;
4961 argidx = argidx_start;
4962 goto unicode;
4963 }
4964#endif
4965 pbuf = formatbuf;
4966 len = formatchar(pbuf, sizeof(formatbuf), v);
4967 if (len < 0)
4968 goto error;
4969 break;
4970 default:
4971 PyErr_Format(PyExc_ValueError,
4972 "unsupported format character '%c' (0x%x) "
4973 "at index %zd",
4974 c, c,
4975 (Py_ssize_t)(fmt - 1 -
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004976 PyString_AsString(format)));
Christian Heimes44720832008-05-26 13:01:01 +00004977 goto error;
4978 }
4979 if (sign) {
4980 if (*pbuf == '-' || *pbuf == '+') {
4981 sign = *pbuf++;
4982 len--;
4983 }
4984 else if (flags & F_SIGN)
4985 sign = '+';
4986 else if (flags & F_BLANK)
4987 sign = ' ';
4988 else
4989 sign = 0;
4990 }
4991 if (width < len)
4992 width = len;
4993 if (rescnt - (sign != 0) < width) {
4994 reslen -= rescnt;
4995 rescnt = width + fmtcnt + 100;
4996 reslen += rescnt;
4997 if (reslen < 0) {
4998 Py_DECREF(result);
4999 Py_XDECREF(temp);
5000 return PyErr_NoMemory();
5001 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005002 if (_PyString_Resize(&result, reslen) < 0) {
Christian Heimes44720832008-05-26 13:01:01 +00005003 Py_XDECREF(temp);
5004 return NULL;
5005 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005006 res = PyString_AS_STRING(result)
Christian Heimes44720832008-05-26 13:01:01 +00005007 + reslen - rescnt;
5008 }
5009 if (sign) {
5010 if (fill != ' ')
5011 *res++ = sign;
5012 rescnt--;
5013 if (width > len)
5014 width--;
5015 }
5016 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
5017 assert(pbuf[0] == '0');
5018 assert(pbuf[1] == c);
5019 if (fill != ' ') {
5020 *res++ = *pbuf++;
5021 *res++ = *pbuf++;
5022 }
5023 rescnt -= 2;
5024 width -= 2;
5025 if (width < 0)
5026 width = 0;
5027 len -= 2;
5028 }
5029 if (width > len && !(flags & F_LJUST)) {
5030 do {
5031 --rescnt;
5032 *res++ = fill;
5033 } while (--width > len);
5034 }
5035 if (fill == ' ') {
5036 if (sign)
5037 *res++ = sign;
5038 if ((flags & F_ALT) &&
5039 (c == 'x' || c == 'X')) {
5040 assert(pbuf[0] == '0');
5041 assert(pbuf[1] == c);
5042 *res++ = *pbuf++;
5043 *res++ = *pbuf++;
5044 }
5045 }
5046 Py_MEMCPY(res, pbuf, len);
5047 res += len;
5048 rescnt -= len;
5049 while (--width >= len) {
5050 --rescnt;
5051 *res++ = ' ';
5052 }
5053 if (dict && (argidx < arglen) && c != '%') {
5054 PyErr_SetString(PyExc_TypeError,
5055 "not all arguments converted during string formatting");
5056 Py_XDECREF(temp);
5057 goto error;
5058 }
5059 Py_XDECREF(temp);
5060 } /* '%' */
5061 } /* until end */
5062 if (argidx < arglen && !dict) {
5063 PyErr_SetString(PyExc_TypeError,
5064 "not all arguments converted during string formatting");
5065 goto error;
5066 }
5067 if (args_owned) {
5068 Py_DECREF(args);
5069 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005070 _PyString_Resize(&result, reslen - rescnt);
Christian Heimes44720832008-05-26 13:01:01 +00005071 return result;
5072
5073#ifdef Py_USING_UNICODE
5074 unicode:
5075 if (args_owned) {
5076 Py_DECREF(args);
5077 args_owned = 0;
5078 }
5079 /* Fiddle args right (remove the first argidx arguments) */
5080 if (PyTuple_Check(orig_args) && argidx > 0) {
5081 PyObject *v;
5082 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
5083 v = PyTuple_New(n);
5084 if (v == NULL)
5085 goto error;
5086 while (--n >= 0) {
5087 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
5088 Py_INCREF(w);
5089 PyTuple_SET_ITEM(v, n, w);
5090 }
5091 args = v;
5092 } else {
5093 Py_INCREF(orig_args);
5094 args = orig_args;
5095 }
5096 args_owned = 1;
5097 /* Take what we have of the result and let the Unicode formatting
5098 function format the rest of the input. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005099 rescnt = res - PyString_AS_STRING(result);
5100 if (_PyString_Resize(&result, rescnt))
Christian Heimes44720832008-05-26 13:01:01 +00005101 goto error;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005102 fmtcnt = PyString_GET_SIZE(format) - \
5103 (fmt - PyString_AS_STRING(format));
Christian Heimes44720832008-05-26 13:01:01 +00005104 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
5105 if (format == NULL)
5106 goto error;
5107 v = PyUnicode_Format(format, args);
5108 Py_DECREF(format);
5109 if (v == NULL)
5110 goto error;
5111 /* Paste what we have (result) to what the Unicode formatting
5112 function returned (v) and return the result (or error) */
5113 w = PyUnicode_Concat(result, v);
5114 Py_DECREF(result);
5115 Py_DECREF(v);
5116 Py_DECREF(args);
5117 return w;
5118#endif /* Py_USING_UNICODE */
5119
5120 error:
5121 Py_DECREF(result);
5122 if (args_owned) {
5123 Py_DECREF(args);
5124 }
5125 return NULL;
5126}
5127
5128void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005129PyString_InternInPlace(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00005130{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005131 register PyStringObject *s = (PyStringObject *)(*p);
Christian Heimes44720832008-05-26 13:01:01 +00005132 PyObject *t;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005133 if (s == NULL || !PyString_Check(s))
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005134 Py_FatalError("PyString_InternInPlace: strings only please!");
Christian Heimes44720832008-05-26 13:01:01 +00005135 /* If it's a string subclass, we don't really know what putting
5136 it in the interned dict might do. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005137 if (!PyString_CheckExact(s))
Christian Heimes44720832008-05-26 13:01:01 +00005138 return;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005139 if (PyString_CHECK_INTERNED(s))
Christian Heimes44720832008-05-26 13:01:01 +00005140 return;
5141 if (interned == NULL) {
5142 interned = PyDict_New();
5143 if (interned == NULL) {
5144 PyErr_Clear(); /* Don't leave an exception */
5145 return;
5146 }
5147 }
5148 t = PyDict_GetItem(interned, (PyObject *)s);
5149 if (t) {
5150 Py_INCREF(t);
5151 Py_DECREF(*p);
5152 *p = t;
5153 return;
5154 }
5155
5156 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
5157 PyErr_Clear();
5158 return;
5159 }
5160 /* The two references in interned are not counted by refcnt.
5161 The string deallocator will take care of this */
5162 Py_REFCNT(s) -= 2;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005163 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Christian Heimes44720832008-05-26 13:01:01 +00005164}
5165
5166void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005167PyString_InternImmortal(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00005168{
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005169 PyString_InternInPlace(p);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005170 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
5171 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
Christian Heimes44720832008-05-26 13:01:01 +00005172 Py_INCREF(*p);
5173 }
5174}
5175
5176
5177PyObject *
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005178PyString_InternFromString(const char *cp)
Christian Heimes44720832008-05-26 13:01:01 +00005179{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005180 PyObject *s = PyString_FromString(cp);
Christian Heimes44720832008-05-26 13:01:01 +00005181 if (s == NULL)
5182 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005183 PyString_InternInPlace(&s);
Christian Heimes44720832008-05-26 13:01:01 +00005184 return s;
5185}
5186
5187void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005188PyString_Fini(void)
Christian Heimes44720832008-05-26 13:01:01 +00005189{
5190 int i;
5191 for (i = 0; i < UCHAR_MAX + 1; i++) {
5192 Py_XDECREF(characters[i]);
5193 characters[i] = NULL;
5194 }
5195 Py_XDECREF(nullstring);
5196 nullstring = NULL;
5197}
5198
5199void _Py_ReleaseInternedStrings(void)
5200{
5201 PyObject *keys;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005202 PyStringObject *s;
Christian Heimes44720832008-05-26 13:01:01 +00005203 Py_ssize_t i, n;
5204 Py_ssize_t immortal_size = 0, mortal_size = 0;
5205
5206 if (interned == NULL || !PyDict_Check(interned))
5207 return;
5208 keys = PyDict_Keys(interned);
5209 if (keys == NULL || !PyList_Check(keys)) {
5210 PyErr_Clear();
5211 return;
5212 }
5213
5214 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5215 detector, interned strings are not forcibly deallocated; rather, we
5216 give them their stolen references back, and then clear and DECREF
5217 the interned dict. */
5218
5219 n = PyList_GET_SIZE(keys);
5220 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5221 n);
5222 for (i = 0; i < n; i++) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005223 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
Christian Heimes44720832008-05-26 13:01:01 +00005224 switch (s->ob_sstate) {
5225 case SSTATE_NOT_INTERNED:
5226 /* XXX Shouldn't happen */
5227 break;
5228 case SSTATE_INTERNED_IMMORTAL:
5229 Py_REFCNT(s) += 1;
5230 immortal_size += Py_SIZE(s);
5231 break;
5232 case SSTATE_INTERNED_MORTAL:
5233 Py_REFCNT(s) += 2;
5234 mortal_size += Py_SIZE(s);
5235 break;
5236 default:
5237 Py_FatalError("Inconsistent interned string state.");
5238 }
5239 s->ob_sstate = SSTATE_NOT_INTERNED;
5240 }
5241 fprintf(stderr, "total size of all interned strings: "
5242 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5243 "mortal/immortal\n", mortal_size, immortal_size);
5244 Py_DECREF(keys);
5245 PyDict_Clear(interned);
5246 Py_DECREF(interned);
5247 interned = NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00005248}