blob: b5faf13fdaec5c099a6aba1c7513d743325e4793 [file] [log] [blame]
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001/* String (str/bytes) object implementation */
Christian Heimes1a6387e2008-03-26 12:49:49 +00002
3#define PY_SSIZE_T_CLEAN
Christian Heimes44720832008-05-26 13:01:01 +00004
Christian Heimes1a6387e2008-03-26 12:49:49 +00005#include "Python.h"
Christian Heimes44720832008-05-26 13:01:01 +00006#include <ctype.h>
Mark Dickinson826f3fe2008-12-05 21:55:28 +00007#include <stddef.h>
Christian Heimes44720832008-05-26 13:01:01 +00008
9#ifdef COUNT_ALLOCS
Martin v. Löwisb90304a2009-01-07 18:40:40 +000010Py_ssize_t null_strings, one_strings;
Christian Heimes44720832008-05-26 13:01:01 +000011#endif
12
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000013static PyStringObject *characters[UCHAR_MAX + 1];
14static PyStringObject *nullstring;
Christian Heimes44720832008-05-26 13:01:01 +000015
16/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Mark Dickinson826f3fe2008-12-05 21:55:28 +000026/* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyStringObject_SIZE + n bytes.
28
29 Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30 3 bytes per string allocation on a typical system.
31*/
32#define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
33
Christian Heimes44720832008-05-26 13:01:01 +000034/*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000035 For both PyString_FromString() and PyString_FromStringAndSize(), the
Christian Heimes44720832008-05-26 13:01:01 +000036 parameter `size' denotes number of characters to allocate, not counting any
37 null terminating character.
38
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000039 For PyString_FromString(), the parameter `str' points to a null-terminated
Christian Heimes44720832008-05-26 13:01:01 +000040 string containing exactly `size' bytes.
41
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000042 For PyString_FromStringAndSize(), the parameter the parameter `str' is
Christian Heimes44720832008-05-26 13:01:01 +000043 either NULL or else points to a string containing at least `size' bytes.
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000044 For PyString_FromStringAndSize(), the string in the `str' parameter does
Christian Heimes44720832008-05-26 13:01:01 +000045 not have to be null-terminated. (Therefore it is safe to construct a
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000046 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
47 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
Christian Heimes44720832008-05-26 13:01:01 +000048 bytes (setting the last byte to the null terminating character) and you can
49 fill in the data yourself. If `str' is non-NULL then the resulting
50 PyString object must be treated as immutable and you must not fill in nor
51 alter the data yourself, since the strings may be shared.
52
53 The PyObject member `op->ob_size', which denotes the number of "extra
54 items" in a variable-size object, will contain the number of bytes
55 allocated for string data, not counting the null terminating character. It
56 is therefore equal to the equal to the `size' parameter (for
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000057 PyString_FromStringAndSize()) or the length of the string in the `str'
58 parameter (for PyString_FromString()).
Christian Heimes44720832008-05-26 13:01:01 +000059*/
60PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000061PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Christian Heimes1a6387e2008-03-26 12:49:49 +000062{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000063 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +000064 if (size < 0) {
65 PyErr_SetString(PyExc_SystemError,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000066 "Negative size passed to PyString_FromStringAndSize");
Christian Heimes44720832008-05-26 13:01:01 +000067 return NULL;
68 }
69 if (size == 0 && (op = nullstring) != NULL) {
70#ifdef COUNT_ALLOCS
71 null_strings++;
72#endif
73 Py_INCREF(op);
74 return (PyObject *)op;
75 }
76 if (size == 1 && str != NULL &&
77 (op = characters[*str & UCHAR_MAX]) != NULL)
78 {
79#ifdef COUNT_ALLOCS
80 one_strings++;
81#endif
82 Py_INCREF(op);
83 return (PyObject *)op;
84 }
85
Mark Dickinson826f3fe2008-12-05 21:55:28 +000086 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
Neal Norwitze7d8be82008-07-31 17:17:14 +000087 PyErr_SetString(PyExc_OverflowError, "string is too large");
88 return NULL;
89 }
90
Christian Heimes44720832008-05-26 13:01:01 +000091 /* Inline PyObject_NewVar */
Mark Dickinson826f3fe2008-12-05 21:55:28 +000092 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
Christian Heimes44720832008-05-26 13:01:01 +000093 if (op == NULL)
94 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000095 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +000096 op->ob_shash = -1;
97 op->ob_sstate = SSTATE_NOT_INTERNED;
98 if (str != NULL)
99 Py_MEMCPY(op->ob_sval, str, size);
100 op->ob_sval[size] = '\0';
101 /* share short strings */
102 if (size == 0) {
103 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000104 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000105 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +0000106 nullstring = op;
107 Py_INCREF(op);
108 } else if (size == 1 && str != NULL) {
109 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000110 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000111 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +0000112 characters[*str & UCHAR_MAX] = op;
113 Py_INCREF(op);
114 }
115 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000116}
117
Christian Heimes44720832008-05-26 13:01:01 +0000118PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000119PyString_FromString(const char *str)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000120{
Christian Heimes44720832008-05-26 13:01:01 +0000121 register size_t size;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000122 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +0000123
124 assert(str != NULL);
125 size = strlen(str);
Mark Dickinson826f3fe2008-12-05 21:55:28 +0000126 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
Christian Heimes44720832008-05-26 13:01:01 +0000127 PyErr_SetString(PyExc_OverflowError,
128 "string is too long for a Python string");
129 return NULL;
130 }
131 if (size == 0 && (op = nullstring) != NULL) {
132#ifdef COUNT_ALLOCS
133 null_strings++;
134#endif
135 Py_INCREF(op);
136 return (PyObject *)op;
137 }
138 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
139#ifdef COUNT_ALLOCS
140 one_strings++;
141#endif
142 Py_INCREF(op);
143 return (PyObject *)op;
144 }
145
146 /* Inline PyObject_NewVar */
Mark Dickinson826f3fe2008-12-05 21:55:28 +0000147 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
Christian Heimes44720832008-05-26 13:01:01 +0000148 if (op == NULL)
149 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000150 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +0000151 op->ob_shash = -1;
152 op->ob_sstate = SSTATE_NOT_INTERNED;
153 Py_MEMCPY(op->ob_sval, str, size+1);
154 /* share short strings */
155 if (size == 0) {
156 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000157 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000158 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +0000159 nullstring = op;
160 Py_INCREF(op);
161 } else if (size == 1) {
162 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000163 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000164 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +0000165 characters[*str & UCHAR_MAX] = op;
166 Py_INCREF(op);
167 }
168 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000169}
170
Christian Heimes44720832008-05-26 13:01:01 +0000171PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000172PyString_FromFormatV(const char *format, va_list vargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000173{
Christian Heimes44720832008-05-26 13:01:01 +0000174 va_list count;
175 Py_ssize_t n = 0;
176 const char* f;
177 char *s;
178 PyObject* string;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000179
Christian Heimes44720832008-05-26 13:01:01 +0000180#ifdef VA_LIST_IS_ARRAY
181 Py_MEMCPY(count, vargs, sizeof(va_list));
182#else
183#ifdef __va_copy
184 __va_copy(count, vargs);
185#else
186 count = vargs;
187#endif
188#endif
189 /* step 1: figure out how large a buffer we need */
190 for (f = format; *f; f++) {
191 if (*f == '%') {
192 const char* p = f;
193 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
194 ;
195
196 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
197 * they don't affect the amount of space we reserve.
198 */
199 if ((*f == 'l' || *f == 'z') &&
200 (f[1] == 'd' || f[1] == 'u'))
201 ++f;
202
203 switch (*f) {
204 case 'c':
205 (void)va_arg(count, int);
206 /* fall through... */
207 case '%':
208 n++;
209 break;
210 case 'd': case 'u': case 'i': case 'x':
211 (void) va_arg(count, int);
212 /* 20 bytes is enough to hold a 64-bit
213 integer. Decimal takes the most space.
214 This isn't enough for octal. */
215 n += 20;
216 break;
217 case 's':
218 s = va_arg(count, char*);
219 n += strlen(s);
220 break;
221 case 'p':
222 (void) va_arg(count, int);
223 /* maximum 64-bit pointer representation:
224 * 0xffffffffffffffff
225 * so 19 characters is enough.
226 * XXX I count 18 -- what's the extra for?
227 */
228 n += 19;
229 break;
230 default:
231 /* if we stumble upon an unknown
232 formatting code, copy the rest of
233 the format string to the output
234 string. (we cannot just skip the
235 code, since there's no way to know
236 what's in the argument list) */
237 n += strlen(p);
238 goto expand;
239 }
240 } else
241 n++;
242 }
243 expand:
244 /* step 2: fill the buffer */
245 /* Since we've analyzed how much space we need for the worst case,
246 use sprintf directly instead of the slower PyOS_snprintf. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000247 string = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +0000248 if (!string)
249 return NULL;
250
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000251 s = PyString_AsString(string);
Christian Heimes44720832008-05-26 13:01:01 +0000252
253 for (f = format; *f; f++) {
254 if (*f == '%') {
255 const char* p = f++;
256 Py_ssize_t i;
257 int longflag = 0;
258 int size_tflag = 0;
259 /* parse the width.precision part (we're only
260 interested in the precision value, if any) */
261 n = 0;
262 while (isdigit(Py_CHARMASK(*f)))
263 n = (n*10) + *f++ - '0';
264 if (*f == '.') {
265 f++;
266 n = 0;
267 while (isdigit(Py_CHARMASK(*f)))
268 n = (n*10) + *f++ - '0';
269 }
270 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
271 f++;
272 /* handle the long flag, but only for %ld and %lu.
273 others can be added when necessary. */
274 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
275 longflag = 1;
276 ++f;
277 }
278 /* handle the size_t flag. */
279 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
280 size_tflag = 1;
281 ++f;
282 }
283
284 switch (*f) {
285 case 'c':
286 *s++ = va_arg(vargs, int);
287 break;
288 case 'd':
289 if (longflag)
290 sprintf(s, "%ld", va_arg(vargs, long));
291 else if (size_tflag)
292 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
293 va_arg(vargs, Py_ssize_t));
294 else
295 sprintf(s, "%d", va_arg(vargs, int));
296 s += strlen(s);
297 break;
298 case 'u':
299 if (longflag)
300 sprintf(s, "%lu",
301 va_arg(vargs, unsigned long));
302 else if (size_tflag)
303 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
304 va_arg(vargs, size_t));
305 else
306 sprintf(s, "%u",
307 va_arg(vargs, unsigned int));
308 s += strlen(s);
309 break;
310 case 'i':
311 sprintf(s, "%i", va_arg(vargs, int));
312 s += strlen(s);
313 break;
314 case 'x':
315 sprintf(s, "%x", va_arg(vargs, int));
316 s += strlen(s);
317 break;
318 case 's':
319 p = va_arg(vargs, char*);
320 i = strlen(p);
321 if (n > 0 && i > n)
322 i = n;
323 Py_MEMCPY(s, p, i);
324 s += i;
325 break;
326 case 'p':
327 sprintf(s, "%p", va_arg(vargs, void*));
328 /* %p is ill-defined: ensure leading 0x. */
329 if (s[1] == 'X')
330 s[1] = 'x';
331 else if (s[1] != 'x') {
332 memmove(s+2, s, strlen(s)+1);
333 s[0] = '0';
334 s[1] = 'x';
335 }
336 s += strlen(s);
337 break;
338 case '%':
339 *s++ = '%';
340 break;
341 default:
342 strcpy(s, p);
343 s += strlen(s);
344 goto end;
345 }
346 } else
347 *s++ = *f;
348 }
349
350 end:
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000351 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Christian Heimes44720832008-05-26 13:01:01 +0000352 return string;
353}
354
355PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000356PyString_FromFormat(const char *format, ...)
Christian Heimes44720832008-05-26 13:01:01 +0000357{
358 PyObject* ret;
359 va_list vargs;
360
361#ifdef HAVE_STDARG_PROTOTYPES
362 va_start(vargs, format);
363#else
364 va_start(vargs);
365#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000366 ret = PyString_FromFormatV(format, vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000367 va_end(vargs);
368 return ret;
369}
370
371
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000372PyObject *PyString_Decode(const char *s,
Christian Heimes44720832008-05-26 13:01:01 +0000373 Py_ssize_t size,
374 const char *encoding,
375 const char *errors)
376{
377 PyObject *v, *str;
378
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000379 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000380 if (str == NULL)
381 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000382 v = PyString_AsDecodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000383 Py_DECREF(str);
384 return v;
385}
386
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000387PyObject *PyString_AsDecodedObject(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000388 const char *encoding,
389 const char *errors)
390{
391 PyObject *v;
392
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000393 if (!PyString_Check(str)) {
Christian Heimes44720832008-05-26 13:01:01 +0000394 PyErr_BadArgument();
395 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000396 }
397
Christian Heimes44720832008-05-26 13:01:01 +0000398 if (encoding == NULL) {
399#ifdef Py_USING_UNICODE
400 encoding = PyUnicode_GetDefaultEncoding();
401#else
402 PyErr_SetString(PyExc_ValueError, "no encoding specified");
403 goto onError;
404#endif
Christian Heimes1a6387e2008-03-26 12:49:49 +0000405 }
Christian Heimes44720832008-05-26 13:01:01 +0000406
407 /* Decode via the codec registry */
408 v = PyCodec_Decode(str, encoding, errors);
409 if (v == NULL)
410 goto onError;
411
412 return v;
413
414 onError:
415 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000416}
417
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000418PyObject *PyString_AsDecodedString(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000419 const char *encoding,
420 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000421{
Christian Heimes44720832008-05-26 13:01:01 +0000422 PyObject *v;
423
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000424 v = PyString_AsDecodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000425 if (v == NULL)
426 goto onError;
427
428#ifdef Py_USING_UNICODE
429 /* Convert Unicode to a string using the default encoding */
430 if (PyUnicode_Check(v)) {
431 PyObject *temp = v;
432 v = PyUnicode_AsEncodedString(v, NULL, NULL);
433 Py_DECREF(temp);
434 if (v == NULL)
435 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000436 }
Christian Heimes44720832008-05-26 13:01:01 +0000437#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000438 if (!PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +0000439 PyErr_Format(PyExc_TypeError,
440 "decoder did not return a string object (type=%.400s)",
441 Py_TYPE(v)->tp_name);
442 Py_DECREF(v);
443 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000444 }
Christian Heimes44720832008-05-26 13:01:01 +0000445
446 return v;
447
448 onError:
449 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000450}
451
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000452PyObject *PyString_Encode(const char *s,
Christian Heimes44720832008-05-26 13:01:01 +0000453 Py_ssize_t size,
454 const char *encoding,
455 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000456{
Christian Heimes44720832008-05-26 13:01:01 +0000457 PyObject *v, *str;
458
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000459 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000460 if (str == NULL)
461 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000462 v = PyString_AsEncodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000463 Py_DECREF(str);
464 return v;
465}
466
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000467PyObject *PyString_AsEncodedObject(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000468 const char *encoding,
469 const char *errors)
470{
471 PyObject *v;
472
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000473 if (!PyString_Check(str)) {
Christian Heimes44720832008-05-26 13:01:01 +0000474 PyErr_BadArgument();
475 goto onError;
476 }
477
478 if (encoding == NULL) {
479#ifdef Py_USING_UNICODE
480 encoding = PyUnicode_GetDefaultEncoding();
481#else
482 PyErr_SetString(PyExc_ValueError, "no encoding specified");
483 goto onError;
484#endif
485 }
486
487 /* Encode via the codec registry */
488 v = PyCodec_Encode(str, encoding, errors);
489 if (v == NULL)
490 goto onError;
491
492 return v;
493
494 onError:
495 return NULL;
496}
497
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000498PyObject *PyString_AsEncodedString(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000499 const char *encoding,
500 const char *errors)
501{
502 PyObject *v;
503
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000504 v = PyString_AsEncodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000505 if (v == NULL)
506 goto onError;
507
508#ifdef Py_USING_UNICODE
509 /* Convert Unicode to a string using the default encoding */
510 if (PyUnicode_Check(v)) {
511 PyObject *temp = v;
512 v = PyUnicode_AsEncodedString(v, NULL, NULL);
513 Py_DECREF(temp);
514 if (v == NULL)
515 goto onError;
516 }
517#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000518 if (!PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +0000519 PyErr_Format(PyExc_TypeError,
520 "encoder did not return a string object (type=%.400s)",
521 Py_TYPE(v)->tp_name);
522 Py_DECREF(v);
523 goto onError;
524 }
525
526 return v;
527
528 onError:
529 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000530}
531
532static void
Christian Heimes44720832008-05-26 13:01:01 +0000533string_dealloc(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000534{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000535 switch (PyString_CHECK_INTERNED(op)) {
Christian Heimes44720832008-05-26 13:01:01 +0000536 case SSTATE_NOT_INTERNED:
537 break;
538
539 case SSTATE_INTERNED_MORTAL:
540 /* revive dead object temporarily for DelItem */
541 Py_REFCNT(op) = 3;
542 if (PyDict_DelItem(interned, op) != 0)
543 Py_FatalError(
544 "deletion of interned string failed");
545 break;
546
547 case SSTATE_INTERNED_IMMORTAL:
548 Py_FatalError("Immortal interned string died.");
549
550 default:
551 Py_FatalError("Inconsistent interned string state.");
552 }
553 Py_TYPE(op)->tp_free(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000554}
555
Christian Heimes44720832008-05-26 13:01:01 +0000556/* Unescape a backslash-escaped string. If unicode is non-zero,
557 the string is a u-literal. If recode_encoding is non-zero,
558 the string is UTF-8 encoded and should be re-encoded in the
559 specified encoding. */
560
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000561PyObject *PyString_DecodeEscape(const char *s,
Christian Heimes44720832008-05-26 13:01:01 +0000562 Py_ssize_t len,
563 const char *errors,
564 Py_ssize_t unicode,
565 const char *recode_encoding)
566{
567 int c;
568 char *p, *buf;
569 const char *end;
570 PyObject *v;
571 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000572 v = PyString_FromStringAndSize((char *)NULL, newlen);
Christian Heimes44720832008-05-26 13:01:01 +0000573 if (v == NULL)
574 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000575 p = buf = PyString_AsString(v);
Christian Heimes44720832008-05-26 13:01:01 +0000576 end = s + len;
577 while (s < end) {
578 if (*s != '\\') {
579 non_esc:
580#ifdef Py_USING_UNICODE
581 if (recode_encoding && (*s & 0x80)) {
582 PyObject *u, *w;
583 char *r;
584 const char* t;
585 Py_ssize_t rn;
586 t = s;
587 /* Decode non-ASCII bytes as UTF-8. */
588 while (t < end && (*t & 0x80)) t++;
589 u = PyUnicode_DecodeUTF8(s, t - s, errors);
590 if(!u) goto failed;
591
592 /* Recode them in target encoding. */
593 w = PyUnicode_AsEncodedString(
594 u, recode_encoding, errors);
595 Py_DECREF(u);
596 if (!w) goto failed;
597
598 /* Append bytes to output buffer. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000599 assert(PyString_Check(w));
600 r = PyString_AS_STRING(w);
601 rn = PyString_GET_SIZE(w);
Christian Heimes44720832008-05-26 13:01:01 +0000602 Py_MEMCPY(p, r, rn);
603 p += rn;
604 Py_DECREF(w);
605 s = t;
606 } else {
607 *p++ = *s++;
608 }
609#else
610 *p++ = *s++;
611#endif
612 continue;
613 }
614 s++;
615 if (s==end) {
616 PyErr_SetString(PyExc_ValueError,
617 "Trailing \\ in string");
618 goto failed;
619 }
620 switch (*s++) {
621 /* XXX This assumes ASCII! */
622 case '\n': break;
623 case '\\': *p++ = '\\'; break;
624 case '\'': *p++ = '\''; break;
625 case '\"': *p++ = '\"'; break;
626 case 'b': *p++ = '\b'; break;
627 case 'f': *p++ = '\014'; break; /* FF */
628 case 't': *p++ = '\t'; break;
629 case 'n': *p++ = '\n'; break;
630 case 'r': *p++ = '\r'; break;
631 case 'v': *p++ = '\013'; break; /* VT */
632 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
633 case '0': case '1': case '2': case '3':
634 case '4': case '5': case '6': case '7':
635 c = s[-1] - '0';
636 if (s < end && '0' <= *s && *s <= '7') {
637 c = (c<<3) + *s++ - '0';
638 if (s < end && '0' <= *s && *s <= '7')
639 c = (c<<3) + *s++ - '0';
640 }
641 *p++ = c;
642 break;
643 case 'x':
644 if (s+1 < end &&
645 isxdigit(Py_CHARMASK(s[0])) &&
646 isxdigit(Py_CHARMASK(s[1])))
647 {
648 unsigned int x = 0;
649 c = Py_CHARMASK(*s);
650 s++;
651 if (isdigit(c))
652 x = c - '0';
653 else if (islower(c))
654 x = 10 + c - 'a';
655 else
656 x = 10 + c - 'A';
657 x = x << 4;
658 c = Py_CHARMASK(*s);
659 s++;
660 if (isdigit(c))
661 x += c - '0';
662 else if (islower(c))
663 x += 10 + c - 'a';
664 else
665 x += 10 + c - 'A';
666 *p++ = x;
667 break;
668 }
669 if (!errors || strcmp(errors, "strict") == 0) {
670 PyErr_SetString(PyExc_ValueError,
671 "invalid \\x escape");
672 goto failed;
673 }
674 if (strcmp(errors, "replace") == 0) {
675 *p++ = '?';
676 } else if (strcmp(errors, "ignore") == 0)
677 /* do nothing */;
678 else {
679 PyErr_Format(PyExc_ValueError,
680 "decoding error; "
681 "unknown error handling code: %.400s",
682 errors);
683 goto failed;
684 }
685#ifndef Py_USING_UNICODE
686 case 'u':
687 case 'U':
688 case 'N':
689 if (unicode) {
690 PyErr_SetString(PyExc_ValueError,
691 "Unicode escapes not legal "
692 "when Unicode disabled");
693 goto failed;
694 }
695#endif
696 default:
697 *p++ = '\\';
698 s--;
699 goto non_esc; /* an arbitry number of unescaped
700 UTF-8 bytes may follow. */
701 }
702 }
703 if (p-buf < newlen)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000704 _PyString_Resize(&v, p - buf);
Christian Heimes44720832008-05-26 13:01:01 +0000705 return v;
706 failed:
707 Py_DECREF(v);
708 return NULL;
709}
710
711/* -------------------------------------------------------------------- */
712/* object api */
713
Christian Heimes1a6387e2008-03-26 12:49:49 +0000714static Py_ssize_t
Christian Heimes44720832008-05-26 13:01:01 +0000715string_getsize(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000716{
Christian Heimes44720832008-05-26 13:01:01 +0000717 char *s;
718 Py_ssize_t len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000719 if (PyString_AsStringAndSize(op, &s, &len))
Christian Heimes44720832008-05-26 13:01:01 +0000720 return -1;
721 return len;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000722}
723
Christian Heimes44720832008-05-26 13:01:01 +0000724static /*const*/ char *
725string_getbuffer(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000726{
Christian Heimes44720832008-05-26 13:01:01 +0000727 char *s;
728 Py_ssize_t len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000729 if (PyString_AsStringAndSize(op, &s, &len))
Christian Heimes44720832008-05-26 13:01:01 +0000730 return NULL;
731 return s;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000732}
733
734Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000735PyString_Size(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000736{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000737 if (!PyString_Check(op))
Christian Heimes44720832008-05-26 13:01:01 +0000738 return string_getsize(op);
739 return Py_SIZE(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000740}
741
Christian Heimes44720832008-05-26 13:01:01 +0000742/*const*/ char *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000743PyString_AsString(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000744{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000745 if (!PyString_Check(op))
Christian Heimes44720832008-05-26 13:01:01 +0000746 return string_getbuffer(op);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000747 return ((PyStringObject *)op) -> ob_sval;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000748}
749
750int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000751PyString_AsStringAndSize(register PyObject *obj,
Christian Heimes44720832008-05-26 13:01:01 +0000752 register char **s,
753 register Py_ssize_t *len)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000754{
Christian Heimes44720832008-05-26 13:01:01 +0000755 if (s == NULL) {
756 PyErr_BadInternalCall();
757 return -1;
758 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000759
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000760 if (!PyString_Check(obj)) {
Christian Heimes44720832008-05-26 13:01:01 +0000761#ifdef Py_USING_UNICODE
762 if (PyUnicode_Check(obj)) {
763 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
764 if (obj == NULL)
765 return -1;
766 }
767 else
Christian Heimes1a6387e2008-03-26 12:49:49 +0000768#endif
Christian Heimes44720832008-05-26 13:01:01 +0000769 {
770 PyErr_Format(PyExc_TypeError,
771 "expected string or Unicode object, "
772 "%.200s found", Py_TYPE(obj)->tp_name);
773 return -1;
774 }
775 }
776
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000777 *s = PyString_AS_STRING(obj);
Christian Heimes44720832008-05-26 13:01:01 +0000778 if (len != NULL)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000779 *len = PyString_GET_SIZE(obj);
780 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Christian Heimes44720832008-05-26 13:01:01 +0000781 PyErr_SetString(PyExc_TypeError,
782 "expected string without null bytes");
783 return -1;
784 }
785 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000786}
787
Christian Heimes1a6387e2008-03-26 12:49:49 +0000788/* -------------------------------------------------------------------- */
789/* Methods */
790
Christian Heimes44720832008-05-26 13:01:01 +0000791#include "stringlib/stringdefs.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000792#include "stringlib/fastsearch.h"
Christian Heimes44720832008-05-26 13:01:01 +0000793
Christian Heimes1a6387e2008-03-26 12:49:49 +0000794#include "stringlib/count.h"
795#include "stringlib/find.h"
796#include "stringlib/partition.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000797
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000798#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
Christian Heimes44720832008-05-26 13:01:01 +0000799#include "stringlib/localeutil.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000800
Christian Heimes1a6387e2008-03-26 12:49:49 +0000801
802
803static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000804string_print(PyStringObject *op, FILE *fp, int flags)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000805{
Christian Heimes44720832008-05-26 13:01:01 +0000806 Py_ssize_t i, str_len;
807 char c;
808 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000809
Christian Heimes44720832008-05-26 13:01:01 +0000810 /* XXX Ought to check for interrupts when writing long strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000811 if (! PyString_CheckExact(op)) {
Christian Heimes44720832008-05-26 13:01:01 +0000812 int ret;
813 /* A str subclass may have its own __str__ method. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000814 op = (PyStringObject *) PyObject_Str((PyObject *)op);
Christian Heimes44720832008-05-26 13:01:01 +0000815 if (op == NULL)
816 return -1;
817 ret = string_print(op, fp, flags);
818 Py_DECREF(op);
819 return ret;
820 }
821 if (flags & Py_PRINT_RAW) {
822 char *data = op->ob_sval;
823 Py_ssize_t size = Py_SIZE(op);
824 Py_BEGIN_ALLOW_THREADS
825 while (size > INT_MAX) {
826 /* Very long strings cannot be written atomically.
827 * But don't write exactly INT_MAX bytes at a time
828 * to avoid memory aligment issues.
829 */
830 const int chunk_size = INT_MAX & ~0x3FFF;
831 fwrite(data, 1, chunk_size, fp);
832 data += chunk_size;
833 size -= chunk_size;
834 }
835#ifdef __VMS
836 if (size) fwrite(data, (int)size, 1, fp);
837#else
838 fwrite(data, 1, (int)size, fp);
839#endif
840 Py_END_ALLOW_THREADS
841 return 0;
842 }
843
844 /* figure out which quote to use; single is preferred */
845 quote = '\'';
846 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
847 !memchr(op->ob_sval, '"', Py_SIZE(op)))
848 quote = '"';
849
850 str_len = Py_SIZE(op);
851 Py_BEGIN_ALLOW_THREADS
852 fputc(quote, fp);
853 for (i = 0; i < str_len; i++) {
854 /* Since strings are immutable and the caller should have a
855 reference, accessing the interal buffer should not be an issue
856 with the GIL released. */
857 c = op->ob_sval[i];
858 if (c == quote || c == '\\')
859 fprintf(fp, "\\%c", c);
860 else if (c == '\t')
861 fprintf(fp, "\\t");
862 else if (c == '\n')
863 fprintf(fp, "\\n");
864 else if (c == '\r')
865 fprintf(fp, "\\r");
866 else if (c < ' ' || c >= 0x7f)
867 fprintf(fp, "\\x%02x", c & 0xff);
868 else
869 fputc(c, fp);
870 }
871 fputc(quote, fp);
872 Py_END_ALLOW_THREADS
873 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000874}
875
Christian Heimes44720832008-05-26 13:01:01 +0000876PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000877PyString_Repr(PyObject *obj, int smartquotes)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000878{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000879 register PyStringObject* op = (PyStringObject*) obj;
Christian Heimes44720832008-05-26 13:01:01 +0000880 size_t newsize = 2 + 4 * Py_SIZE(op);
881 PyObject *v;
882 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
883 PyErr_SetString(PyExc_OverflowError,
884 "string is too large to make repr");
Christian Heimes1a6387e2008-03-26 12:49:49 +0000885 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000886 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000887 v = PyString_FromStringAndSize((char *)NULL, newsize);
Christian Heimes44720832008-05-26 13:01:01 +0000888 if (v == NULL) {
889 return NULL;
890 }
891 else {
892 register Py_ssize_t i;
893 register char c;
894 register char *p;
895 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000896
Christian Heimes44720832008-05-26 13:01:01 +0000897 /* figure out which quote to use; single is preferred */
898 quote = '\'';
899 if (smartquotes &&
900 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
901 !memchr(op->ob_sval, '"', Py_SIZE(op)))
902 quote = '"';
903
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000904 p = PyString_AS_STRING(v);
Christian Heimes44720832008-05-26 13:01:01 +0000905 *p++ = quote;
906 for (i = 0; i < Py_SIZE(op); i++) {
907 /* There's at least enough room for a hex escape
908 and a closing quote. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000909 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Christian Heimes44720832008-05-26 13:01:01 +0000910 c = op->ob_sval[i];
911 if (c == quote || c == '\\')
912 *p++ = '\\', *p++ = c;
913 else if (c == '\t')
914 *p++ = '\\', *p++ = 't';
915 else if (c == '\n')
916 *p++ = '\\', *p++ = 'n';
917 else if (c == '\r')
918 *p++ = '\\', *p++ = 'r';
919 else if (c < ' ' || c >= 0x7f) {
920 /* For performance, we don't want to call
921 PyOS_snprintf here (extra layers of
922 function call). */
923 sprintf(p, "\\x%02x", c & 0xff);
924 p += 4;
925 }
926 else
927 *p++ = c;
928 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000929 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Christian Heimes44720832008-05-26 13:01:01 +0000930 *p++ = quote;
931 *p = '\0';
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000932 _PyString_Resize(
933 &v, (p - PyString_AS_STRING(v)));
Christian Heimes44720832008-05-26 13:01:01 +0000934 return v;
935 }
936}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000937
938static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000939string_repr(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000940{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000941 return PyString_Repr(op, 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000942}
943
Christian Heimes1a6387e2008-03-26 12:49:49 +0000944static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000945string_str(PyObject *s)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000946{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000947 assert(PyString_Check(s));
948 if (PyString_CheckExact(s)) {
Christian Heimes44720832008-05-26 13:01:01 +0000949 Py_INCREF(s);
950 return s;
951 }
952 else {
953 /* Subtype -- return genuine string with the same value. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000954 PyStringObject *t = (PyStringObject *) s;
955 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
Christian Heimes44720832008-05-26 13:01:01 +0000956 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000957}
958
Christian Heimes44720832008-05-26 13:01:01 +0000959static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000960string_length(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +0000961{
962 return Py_SIZE(a);
963}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000964
Christian Heimes44720832008-05-26 13:01:01 +0000965static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000966string_concat(register PyStringObject *a, register PyObject *bb)
Christian Heimes44720832008-05-26 13:01:01 +0000967{
968 register Py_ssize_t size;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000969 register PyStringObject *op;
970 if (!PyString_Check(bb)) {
Christian Heimes44720832008-05-26 13:01:01 +0000971#ifdef Py_USING_UNICODE
972 if (PyUnicode_Check(bb))
973 return PyUnicode_Concat((PyObject *)a, bb);
974#endif
975 if (PyByteArray_Check(bb))
976 return PyByteArray_Concat((PyObject *)a, bb);
977 PyErr_Format(PyExc_TypeError,
978 "cannot concatenate 'str' and '%.200s' objects",
979 Py_TYPE(bb)->tp_name);
980 return NULL;
981 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000982#define b ((PyStringObject *)bb)
Christian Heimes44720832008-05-26 13:01:01 +0000983 /* Optimize cases with empty left or right operand */
984 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000985 PyString_CheckExact(a) && PyString_CheckExact(b)) {
Christian Heimes44720832008-05-26 13:01:01 +0000986 if (Py_SIZE(a) == 0) {
987 Py_INCREF(bb);
988 return bb;
989 }
990 Py_INCREF(a);
991 return (PyObject *)a;
992 }
993 size = Py_SIZE(a) + Py_SIZE(b);
Neal Norwitze7d8be82008-07-31 17:17:14 +0000994 /* Check that string sizes are not negative, to prevent an
995 overflow in cases where we are passed incorrectly-created
996 strings with negative lengths (due to a bug in other code).
997 */
998 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
999 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
Christian Heimes44720832008-05-26 13:01:01 +00001000 PyErr_SetString(PyExc_OverflowError,
1001 "strings are too large to concat");
1002 return NULL;
1003 }
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001004
Christian Heimes44720832008-05-26 13:01:01 +00001005 /* Inline PyObject_NewVar */
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001006 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
Neal Norwitze7d8be82008-07-31 17:17:14 +00001007 PyErr_SetString(PyExc_OverflowError,
1008 "strings are too large to concat");
1009 return NULL;
1010 }
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001011 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
Christian Heimes44720832008-05-26 13:01:01 +00001012 if (op == NULL)
1013 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001014 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +00001015 op->ob_shash = -1;
1016 op->ob_sstate = SSTATE_NOT_INTERNED;
1017 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1018 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1019 op->ob_sval[size] = '\0';
1020 return (PyObject *) op;
1021#undef b
1022}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001023
Christian Heimes44720832008-05-26 13:01:01 +00001024static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001025string_repeat(register PyStringObject *a, register Py_ssize_t n)
Christian Heimes44720832008-05-26 13:01:01 +00001026{
1027 register Py_ssize_t i;
1028 register Py_ssize_t j;
1029 register Py_ssize_t size;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001030 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +00001031 size_t nbytes;
1032 if (n < 0)
1033 n = 0;
1034 /* watch out for overflows: the size can overflow int,
1035 * and the # of bytes needed can overflow size_t
1036 */
1037 size = Py_SIZE(a) * n;
1038 if (n && size / n != Py_SIZE(a)) {
1039 PyErr_SetString(PyExc_OverflowError,
1040 "repeated string is too long");
1041 return NULL;
1042 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001043 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
Christian Heimes44720832008-05-26 13:01:01 +00001044 Py_INCREF(a);
1045 return (PyObject *)a;
1046 }
1047 nbytes = (size_t)size;
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001048 if (nbytes + PyStringObject_SIZE <= nbytes) {
Christian Heimes44720832008-05-26 13:01:01 +00001049 PyErr_SetString(PyExc_OverflowError,
1050 "repeated string is too long");
1051 return NULL;
1052 }
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001053 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
Christian Heimes44720832008-05-26 13:01:01 +00001054 if (op == NULL)
1055 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001056 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +00001057 op->ob_shash = -1;
1058 op->ob_sstate = SSTATE_NOT_INTERNED;
1059 op->ob_sval[size] = '\0';
1060 if (Py_SIZE(a) == 1 && n > 0) {
1061 memset(op->ob_sval, a->ob_sval[0] , n);
1062 return (PyObject *) op;
1063 }
1064 i = 0;
1065 if (i < size) {
1066 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1067 i = Py_SIZE(a);
1068 }
1069 while (i < size) {
1070 j = (i <= size-i) ? i : size-i;
1071 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1072 i += j;
1073 }
1074 return (PyObject *) op;
1075}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001076
Christian Heimes44720832008-05-26 13:01:01 +00001077/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1078
1079static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001080string_slice(register PyStringObject *a, register Py_ssize_t i,
Christian Heimes44720832008-05-26 13:01:01 +00001081 register Py_ssize_t j)
1082 /* j -- may be negative! */
1083{
1084 if (i < 0)
1085 i = 0;
1086 if (j < 0)
1087 j = 0; /* Avoid signed/unsigned bug in next line */
1088 if (j > Py_SIZE(a))
1089 j = Py_SIZE(a);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001090 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
Christian Heimes44720832008-05-26 13:01:01 +00001091 /* It's the same as a */
1092 Py_INCREF(a);
1093 return (PyObject *)a;
1094 }
1095 if (j < i)
1096 j = i;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001097 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001098}
1099
1100static int
1101string_contains(PyObject *str_obj, PyObject *sub_obj)
1102{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001103 if (!PyString_CheckExact(sub_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00001104#ifdef Py_USING_UNICODE
1105 if (PyUnicode_Check(sub_obj))
1106 return PyUnicode_Contains(str_obj, sub_obj);
1107#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001108 if (!PyString_Check(sub_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00001109 PyErr_Format(PyExc_TypeError,
1110 "'in <string>' requires string as left operand, "
1111 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1112 return -1;
1113 }
1114 }
1115
1116 return stringlib_contains_obj(str_obj, sub_obj);
1117}
1118
1119static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001120string_item(PyStringObject *a, register Py_ssize_t i)
Christian Heimes44720832008-05-26 13:01:01 +00001121{
1122 char pchar;
1123 PyObject *v;
1124 if (i < 0 || i >= Py_SIZE(a)) {
1125 PyErr_SetString(PyExc_IndexError, "string index out of range");
1126 return NULL;
1127 }
1128 pchar = a->ob_sval[i];
1129 v = (PyObject *)characters[pchar & UCHAR_MAX];
1130 if (v == NULL)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001131 v = PyString_FromStringAndSize(&pchar, 1);
Christian Heimes44720832008-05-26 13:01:01 +00001132 else {
1133#ifdef COUNT_ALLOCS
1134 one_strings++;
1135#endif
1136 Py_INCREF(v);
1137 }
1138 return v;
1139}
1140
1141static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001142string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Christian Heimes44720832008-05-26 13:01:01 +00001143{
1144 int c;
1145 Py_ssize_t len_a, len_b;
1146 Py_ssize_t min_len;
1147 PyObject *result;
1148
1149 /* Make sure both arguments are strings. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001150 if (!(PyString_Check(a) && PyString_Check(b))) {
Christian Heimes44720832008-05-26 13:01:01 +00001151 result = Py_NotImplemented;
1152 goto out;
1153 }
1154 if (a == b) {
1155 switch (op) {
1156 case Py_EQ:case Py_LE:case Py_GE:
1157 result = Py_True;
1158 goto out;
1159 case Py_NE:case Py_LT:case Py_GT:
1160 result = Py_False;
1161 goto out;
1162 }
1163 }
1164 if (op == Py_EQ) {
1165 /* Supporting Py_NE here as well does not save
1166 much time, since Py_NE is rarely used. */
1167 if (Py_SIZE(a) == Py_SIZE(b)
1168 && (a->ob_sval[0] == b->ob_sval[0]
1169 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1170 result = Py_True;
1171 } else {
1172 result = Py_False;
1173 }
1174 goto out;
1175 }
1176 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1177 min_len = (len_a < len_b) ? len_a : len_b;
1178 if (min_len > 0) {
1179 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1180 if (c==0)
1181 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1182 } else
1183 c = 0;
1184 if (c == 0)
1185 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1186 switch (op) {
1187 case Py_LT: c = c < 0; break;
1188 case Py_LE: c = c <= 0; break;
1189 case Py_EQ: assert(0); break; /* unreachable */
1190 case Py_NE: c = c != 0; break;
1191 case Py_GT: c = c > 0; break;
1192 case Py_GE: c = c >= 0; break;
1193 default:
1194 result = Py_NotImplemented;
1195 goto out;
1196 }
1197 result = c ? Py_True : Py_False;
1198 out:
1199 Py_INCREF(result);
1200 return result;
1201}
1202
1203int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001204_PyString_Eq(PyObject *o1, PyObject *o2)
Christian Heimes44720832008-05-26 13:01:01 +00001205{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001206 PyStringObject *a = (PyStringObject*) o1;
1207 PyStringObject *b = (PyStringObject*) o2;
Christian Heimes44720832008-05-26 13:01:01 +00001208 return Py_SIZE(a) == Py_SIZE(b)
1209 && *a->ob_sval == *b->ob_sval
1210 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1211}
1212
1213static long
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001214string_hash(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001215{
1216 register Py_ssize_t len;
1217 register unsigned char *p;
1218 register long x;
1219
1220 if (a->ob_shash != -1)
1221 return a->ob_shash;
1222 len = Py_SIZE(a);
1223 p = (unsigned char *) a->ob_sval;
1224 x = *p << 7;
1225 while (--len >= 0)
1226 x = (1000003*x) ^ *p++;
1227 x ^= Py_SIZE(a);
1228 if (x == -1)
1229 x = -2;
1230 a->ob_shash = x;
1231 return x;
1232}
1233
1234static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001235string_subscript(PyStringObject* self, PyObject* item)
Christian Heimes44720832008-05-26 13:01:01 +00001236{
1237 if (PyIndex_Check(item)) {
1238 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1239 if (i == -1 && PyErr_Occurred())
1240 return NULL;
1241 if (i < 0)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001242 i += PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001243 return string_item(self, i);
1244 }
1245 else if (PySlice_Check(item)) {
1246 Py_ssize_t start, stop, step, slicelength, cur, i;
1247 char* source_buf;
1248 char* result_buf;
1249 PyObject* result;
1250
1251 if (PySlice_GetIndicesEx((PySliceObject*)item,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001252 PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001253 &start, &stop, &step, &slicelength) < 0) {
1254 return NULL;
1255 }
1256
1257 if (slicelength <= 0) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001258 return PyString_FromStringAndSize("", 0);
Christian Heimes44720832008-05-26 13:01:01 +00001259 }
1260 else if (start == 0 && step == 1 &&
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001261 slicelength == PyString_GET_SIZE(self) &&
1262 PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001263 Py_INCREF(self);
1264 return (PyObject *)self;
1265 }
1266 else if (step == 1) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001267 return PyString_FromStringAndSize(
1268 PyString_AS_STRING(self) + start,
Christian Heimes44720832008-05-26 13:01:01 +00001269 slicelength);
1270 }
1271 else {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001272 source_buf = PyString_AsString((PyObject*)self);
Christian Heimes44720832008-05-26 13:01:01 +00001273 result_buf = (char *)PyMem_Malloc(slicelength);
1274 if (result_buf == NULL)
1275 return PyErr_NoMemory();
1276
1277 for (cur = start, i = 0; i < slicelength;
1278 cur += step, i++) {
1279 result_buf[i] = source_buf[cur];
1280 }
1281
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001282 result = PyString_FromStringAndSize(result_buf,
Christian Heimes44720832008-05-26 13:01:01 +00001283 slicelength);
1284 PyMem_Free(result_buf);
1285 return result;
1286 }
1287 }
1288 else {
1289 PyErr_Format(PyExc_TypeError,
1290 "string indices must be integers, not %.200s",
1291 Py_TYPE(item)->tp_name);
1292 return NULL;
1293 }
1294}
1295
1296static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001297string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001298{
1299 if ( index != 0 ) {
1300 PyErr_SetString(PyExc_SystemError,
1301 "accessing non-existent string segment");
1302 return -1;
1303 }
1304 *ptr = (void *)self->ob_sval;
1305 return Py_SIZE(self);
1306}
1307
1308static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001309string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001310{
1311 PyErr_SetString(PyExc_TypeError,
1312 "Cannot use string as modifiable buffer");
1313 return -1;
1314}
1315
1316static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001317string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Christian Heimes44720832008-05-26 13:01:01 +00001318{
1319 if ( lenp )
1320 *lenp = Py_SIZE(self);
1321 return 1;
1322}
1323
1324static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001325string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001326{
1327 if ( index != 0 ) {
1328 PyErr_SetString(PyExc_SystemError,
1329 "accessing non-existent string segment");
1330 return -1;
1331 }
1332 *ptr = self->ob_sval;
1333 return Py_SIZE(self);
1334}
1335
1336static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001337string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Christian Heimes44720832008-05-26 13:01:01 +00001338{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001339 return PyBuffer_FillInfo(view, (PyObject*)self,
1340 (void *)self->ob_sval, Py_SIZE(self),
Antoine Pitrou92a62402008-08-02 21:58:05 +00001341 1, flags);
Christian Heimes44720832008-05-26 13:01:01 +00001342}
1343
1344static PySequenceMethods string_as_sequence = {
1345 (lenfunc)string_length, /*sq_length*/
1346 (binaryfunc)string_concat, /*sq_concat*/
1347 (ssizeargfunc)string_repeat, /*sq_repeat*/
1348 (ssizeargfunc)string_item, /*sq_item*/
1349 (ssizessizeargfunc)string_slice, /*sq_slice*/
1350 0, /*sq_ass_item*/
1351 0, /*sq_ass_slice*/
1352 (objobjproc)string_contains /*sq_contains*/
1353};
1354
1355static PyMappingMethods string_as_mapping = {
1356 (lenfunc)string_length,
1357 (binaryfunc)string_subscript,
1358 0,
1359};
1360
1361static PyBufferProcs string_as_buffer = {
1362 (readbufferproc)string_buffer_getreadbuf,
1363 (writebufferproc)string_buffer_getwritebuf,
1364 (segcountproc)string_buffer_getsegcount,
1365 (charbufferproc)string_buffer_getcharbuf,
1366 (getbufferproc)string_buffer_getbuffer,
1367 0, /* XXX */
1368};
1369
1370
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001371
Christian Heimes44720832008-05-26 13:01:01 +00001372#define LEFTSTRIP 0
1373#define RIGHTSTRIP 1
1374#define BOTHSTRIP 2
1375
1376/* Arrays indexed by above */
1377static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1378
1379#define STRIPNAME(i) (stripformat[i]+3)
1380
Christian Heimes1a6387e2008-03-26 12:49:49 +00001381
1382/* Don't call if length < 2 */
Christian Heimes44720832008-05-26 13:01:01 +00001383#define Py_STRING_MATCH(target, offset, pattern, length) \
1384 (target[offset] == pattern[0] && \
1385 target[offset+length-1] == pattern[length-1] && \
Christian Heimes1a6387e2008-03-26 12:49:49 +00001386 !memcmp(target+offset+1, pattern+1, length-2) )
1387
1388
Christian Heimes1a6387e2008-03-26 12:49:49 +00001389/* Overallocate the initial list to reduce the number of reallocs for small
1390 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1391 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1392 text (roughly 11 words per line) and field delimited data (usually 1-10
1393 fields). For large strings the split algorithms are bandwidth limited
1394 so increasing the preallocation likely will not improve things.*/
1395
1396#define MAX_PREALLOC 12
1397
1398/* 5 splits gives 6 elements */
1399#define PREALLOC_SIZE(maxsplit) \
Christian Heimes44720832008-05-26 13:01:01 +00001400 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001401
Christian Heimes44720832008-05-26 13:01:01 +00001402#define SPLIT_APPEND(data, left, right) \
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001403 str = PyString_FromStringAndSize((data) + (left), \
Christian Heimes44720832008-05-26 13:01:01 +00001404 (right) - (left)); \
1405 if (str == NULL) \
1406 goto onError; \
1407 if (PyList_Append(list, str)) { \
1408 Py_DECREF(str); \
1409 goto onError; \
1410 } \
1411 else \
1412 Py_DECREF(str);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001413
Christian Heimes44720832008-05-26 13:01:01 +00001414#define SPLIT_ADD(data, left, right) { \
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001415 str = PyString_FromStringAndSize((data) + (left), \
Christian Heimes44720832008-05-26 13:01:01 +00001416 (right) - (left)); \
1417 if (str == NULL) \
1418 goto onError; \
1419 if (count < MAX_PREALLOC) { \
1420 PyList_SET_ITEM(list, count, str); \
1421 } else { \
1422 if (PyList_Append(list, str)) { \
1423 Py_DECREF(str); \
1424 goto onError; \
1425 } \
1426 else \
1427 Py_DECREF(str); \
1428 } \
1429 count++; }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001430
1431/* Always force the list to the expected size. */
1432#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
1433
Christian Heimes44720832008-05-26 13:01:01 +00001434#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1435#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1436#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1437#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001438
1439Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001440split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001441{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001442 const char *s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001443 Py_ssize_t i, j, count=0;
1444 PyObject *str;
1445 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001446
Christian Heimes44720832008-05-26 13:01:01 +00001447 if (list == NULL)
1448 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001449
Christian Heimes44720832008-05-26 13:01:01 +00001450 i = j = 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001451
Christian Heimes44720832008-05-26 13:01:01 +00001452 while (maxsplit-- > 0) {
1453 SKIP_SPACE(s, i, len);
1454 if (i==len) break;
1455 j = i; i++;
1456 SKIP_NONSPACE(s, i, len);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001457 if (j == 0 && i == len && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001458 /* No whitespace in self, so just use it as list[0] */
1459 Py_INCREF(self);
1460 PyList_SET_ITEM(list, 0, (PyObject *)self);
1461 count++;
1462 break;
1463 }
1464 SPLIT_ADD(s, j, i);
1465 }
1466
1467 if (i < len) {
1468 /* Only occurs when maxsplit was reached */
1469 /* Skip any remaining whitespace and copy to end of string */
1470 SKIP_SPACE(s, i, len);
1471 if (i != len)
1472 SPLIT_ADD(s, i, len);
1473 }
1474 FIX_PREALLOC_SIZE(list);
1475 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001476 onError:
Christian Heimes44720832008-05-26 13:01:01 +00001477 Py_DECREF(list);
1478 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001479}
1480
Christian Heimes1a6387e2008-03-26 12:49:49 +00001481Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001482split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001483{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001484 const char *s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001485 register Py_ssize_t i, j, count=0;
1486 PyObject *str;
1487 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001488
Christian Heimes44720832008-05-26 13:01:01 +00001489 if (list == NULL)
1490 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001491
Christian Heimes44720832008-05-26 13:01:01 +00001492 i = j = 0;
1493 while ((j < len) && (maxcount-- > 0)) {
1494 for(; j<len; j++) {
1495 /* I found that using memchr makes no difference */
1496 if (s[j] == ch) {
1497 SPLIT_ADD(s, i, j);
1498 i = j = j + 1;
1499 break;
1500 }
1501 }
1502 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001503 if (i == 0 && count == 0 && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001504 /* ch not in self, so just use self as list[0] */
1505 Py_INCREF(self);
1506 PyList_SET_ITEM(list, 0, (PyObject *)self);
1507 count++;
1508 }
1509 else if (i <= len) {
1510 SPLIT_ADD(s, i, len);
1511 }
1512 FIX_PREALLOC_SIZE(list);
1513 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001514
1515 onError:
Christian Heimes44720832008-05-26 13:01:01 +00001516 Py_DECREF(list);
1517 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001518}
1519
1520PyDoc_STRVAR(split__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001521"S.split([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001522\n\
Christian Heimes44720832008-05-26 13:01:01 +00001523Return a list of the words in the string S, using sep as the\n\
1524delimiter string. If maxsplit is given, at most maxsplit\n\
1525splits are done. If sep is not specified or is None, any\n\
1526whitespace string is a separator and empty strings are removed\n\
1527from the result.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001528
1529static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001530string_split(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001531{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001532 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001533 Py_ssize_t maxsplit = -1, count=0;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001534 const char *s = PyString_AS_STRING(self), *sub;
Christian Heimes44720832008-05-26 13:01:01 +00001535 PyObject *list, *str, *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001536#ifdef USE_FAST
Christian Heimes44720832008-05-26 13:01:01 +00001537 Py_ssize_t pos;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001538#endif
1539
Christian Heimes44720832008-05-26 13:01:01 +00001540 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1541 return NULL;
1542 if (maxsplit < 0)
1543 maxsplit = PY_SSIZE_T_MAX;
1544 if (subobj == Py_None)
1545 return split_whitespace(self, len, maxsplit);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001546 if (PyString_Check(subobj)) {
1547 sub = PyString_AS_STRING(subobj);
1548 n = PyString_GET_SIZE(subobj);
Christian Heimes44720832008-05-26 13:01:01 +00001549 }
1550#ifdef Py_USING_UNICODE
1551 else if (PyUnicode_Check(subobj))
1552 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1553#endif
1554 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1555 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001556
Christian Heimes44720832008-05-26 13:01:01 +00001557 if (n == 0) {
1558 PyErr_SetString(PyExc_ValueError, "empty separator");
1559 return NULL;
1560 }
1561 else if (n == 1)
1562 return split_char(self, len, sub[0], maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001563
Christian Heimes44720832008-05-26 13:01:01 +00001564 list = PyList_New(PREALLOC_SIZE(maxsplit));
1565 if (list == NULL)
1566 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001567
1568#ifdef USE_FAST
Christian Heimes44720832008-05-26 13:01:01 +00001569 i = j = 0;
1570 while (maxsplit-- > 0) {
1571 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1572 if (pos < 0)
1573 break;
1574 j = i+pos;
1575 SPLIT_ADD(s, i, j);
1576 i = j + n;
1577 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001578#else
Christian Heimes44720832008-05-26 13:01:01 +00001579 i = j = 0;
1580 while ((j+n <= len) && (maxsplit-- > 0)) {
1581 for (; j+n <= len; j++) {
1582 if (Py_STRING_MATCH(s, j, sub, n)) {
1583 SPLIT_ADD(s, i, j);
1584 i = j = j + n;
1585 break;
1586 }
1587 }
1588 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001589#endif
Christian Heimes44720832008-05-26 13:01:01 +00001590 SPLIT_ADD(s, i, len);
1591 FIX_PREALLOC_SIZE(list);
1592 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001593
Christian Heimes44720832008-05-26 13:01:01 +00001594 onError:
1595 Py_DECREF(list);
1596 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001597}
1598
1599PyDoc_STRVAR(partition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001600"S.partition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001601\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001602Search for the separator sep in S, and return the part before it,\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001603the separator itself, and the part after it. If the separator is not\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001604found, return S and two empty strings.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001605
1606static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001607string_partition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001608{
Christian Heimes44720832008-05-26 13:01:01 +00001609 const char *sep;
1610 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001611
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001612 if (PyString_Check(sep_obj)) {
1613 sep = PyString_AS_STRING(sep_obj);
1614 sep_len = PyString_GET_SIZE(sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001615 }
1616#ifdef Py_USING_UNICODE
1617 else if (PyUnicode_Check(sep_obj))
1618 return PyUnicode_Partition((PyObject *) self, sep_obj);
1619#endif
1620 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1621 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001622
Christian Heimes44720832008-05-26 13:01:01 +00001623 return stringlib_partition(
1624 (PyObject*) self,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001625 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001626 sep_obj, sep, sep_len
1627 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001628}
1629
1630PyDoc_STRVAR(rpartition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001631"S.rpartition(sep) -> (tail, sep, head)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001632\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001633Search for the separator sep in S, starting at the end of S, and return\n\
Christian Heimes44720832008-05-26 13:01:01 +00001634the part before it, the separator itself, and the part after it. If the\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001635separator is not found, return two empty strings and S.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001636
1637static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001638string_rpartition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001639{
Christian Heimes44720832008-05-26 13:01:01 +00001640 const char *sep;
1641 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001642
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001643 if (PyString_Check(sep_obj)) {
1644 sep = PyString_AS_STRING(sep_obj);
1645 sep_len = PyString_GET_SIZE(sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001646 }
1647#ifdef Py_USING_UNICODE
1648 else if (PyUnicode_Check(sep_obj))
Amaury Forgeot d'Arc3571fbf2008-09-01 19:52:00 +00001649 return PyUnicode_RPartition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001650#endif
1651 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1652 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001653
Christian Heimes44720832008-05-26 13:01:01 +00001654 return stringlib_rpartition(
1655 (PyObject*) self,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001656 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001657 sep_obj, sep, sep_len
1658 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001659}
1660
1661Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001662rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001663{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001664 const char *s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001665 Py_ssize_t i, j, count=0;
1666 PyObject *str;
1667 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001668
Christian Heimes44720832008-05-26 13:01:01 +00001669 if (list == NULL)
1670 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001671
Christian Heimes44720832008-05-26 13:01:01 +00001672 i = j = len-1;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001673
Christian Heimes44720832008-05-26 13:01:01 +00001674 while (maxsplit-- > 0) {
1675 RSKIP_SPACE(s, i);
1676 if (i<0) break;
1677 j = i; i--;
1678 RSKIP_NONSPACE(s, i);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001679 if (j == len-1 && i < 0 && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001680 /* No whitespace in self, so just use it as list[0] */
1681 Py_INCREF(self);
1682 PyList_SET_ITEM(list, 0, (PyObject *)self);
1683 count++;
1684 break;
1685 }
1686 SPLIT_ADD(s, i + 1, j + 1);
1687 }
1688 if (i >= 0) {
1689 /* Only occurs when maxsplit was reached */
1690 /* Skip any remaining whitespace and copy to beginning of string */
1691 RSKIP_SPACE(s, i);
1692 if (i >= 0)
1693 SPLIT_ADD(s, 0, i + 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001694
Christian Heimes44720832008-05-26 13:01:01 +00001695 }
1696 FIX_PREALLOC_SIZE(list);
1697 if (PyList_Reverse(list) < 0)
1698 goto onError;
1699 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001700 onError:
Christian Heimes44720832008-05-26 13:01:01 +00001701 Py_DECREF(list);
1702 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001703}
1704
1705Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001706rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001707{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001708 const char *s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001709 register Py_ssize_t i, j, count=0;
1710 PyObject *str;
1711 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001712
Christian Heimes44720832008-05-26 13:01:01 +00001713 if (list == NULL)
1714 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001715
Christian Heimes44720832008-05-26 13:01:01 +00001716 i = j = len - 1;
1717 while ((i >= 0) && (maxcount-- > 0)) {
1718 for (; i >= 0; i--) {
1719 if (s[i] == ch) {
1720 SPLIT_ADD(s, i + 1, j + 1);
1721 j = i = i - 1;
1722 break;
1723 }
1724 }
1725 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001726 if (i < 0 && count == 0 && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001727 /* ch not in self, so just use self as list[0] */
1728 Py_INCREF(self);
1729 PyList_SET_ITEM(list, 0, (PyObject *)self);
1730 count++;
1731 }
1732 else if (j >= -1) {
1733 SPLIT_ADD(s, 0, j + 1);
1734 }
1735 FIX_PREALLOC_SIZE(list);
1736 if (PyList_Reverse(list) < 0)
1737 goto onError;
1738 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001739
Christian Heimes44720832008-05-26 13:01:01 +00001740 onError:
1741 Py_DECREF(list);
1742 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001743}
1744
1745PyDoc_STRVAR(rsplit__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001746"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001747\n\
Christian Heimes44720832008-05-26 13:01:01 +00001748Return a list of the words in the string S, using sep as the\n\
1749delimiter string, starting at the end of the string and working\n\
1750to the front. If maxsplit is given, at most maxsplit splits are\n\
1751done. If sep is not specified or is None, any whitespace string\n\
1752is a separator.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001753
1754static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001755string_rsplit(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001756{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001757 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001758 Py_ssize_t maxsplit = -1, count=0;
1759 const char *s, *sub;
1760 PyObject *list, *str, *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001761
Christian Heimes44720832008-05-26 13:01:01 +00001762 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1763 return NULL;
1764 if (maxsplit < 0)
1765 maxsplit = PY_SSIZE_T_MAX;
1766 if (subobj == Py_None)
1767 return rsplit_whitespace(self, len, maxsplit);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001768 if (PyString_Check(subobj)) {
1769 sub = PyString_AS_STRING(subobj);
1770 n = PyString_GET_SIZE(subobj);
Christian Heimes44720832008-05-26 13:01:01 +00001771 }
1772#ifdef Py_USING_UNICODE
1773 else if (PyUnicode_Check(subobj))
1774 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1775#endif
1776 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1777 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001778
Christian Heimes44720832008-05-26 13:01:01 +00001779 if (n == 0) {
1780 PyErr_SetString(PyExc_ValueError, "empty separator");
1781 return NULL;
1782 }
1783 else if (n == 1)
1784 return rsplit_char(self, len, sub[0], maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001785
Christian Heimes44720832008-05-26 13:01:01 +00001786 list = PyList_New(PREALLOC_SIZE(maxsplit));
1787 if (list == NULL)
1788 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001789
Christian Heimes44720832008-05-26 13:01:01 +00001790 j = len;
1791 i = j - n;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001792
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001793 s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001794 while ( (i >= 0) && (maxsplit-- > 0) ) {
1795 for (; i>=0; i--) {
1796 if (Py_STRING_MATCH(s, i, sub, n)) {
1797 SPLIT_ADD(s, i + n, j);
1798 j = i;
1799 i -= n;
1800 break;
1801 }
1802 }
1803 }
1804 SPLIT_ADD(s, 0, j);
1805 FIX_PREALLOC_SIZE(list);
1806 if (PyList_Reverse(list) < 0)
1807 goto onError;
1808 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001809
1810onError:
Christian Heimes44720832008-05-26 13:01:01 +00001811 Py_DECREF(list);
1812 return NULL;
1813}
1814
1815
1816PyDoc_STRVAR(join__doc__,
Georg Brandl9b4e5822009-10-14 18:48:32 +00001817"S.join(iterable) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00001818\n\
1819Return a string which is the concatenation of the strings in the\n\
Georg Brandl9b4e5822009-10-14 18:48:32 +00001820iterable. The separator between elements is S.");
Christian Heimes44720832008-05-26 13:01:01 +00001821
1822static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001823string_join(PyStringObject *self, PyObject *orig)
Christian Heimes44720832008-05-26 13:01:01 +00001824{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001825 char *sep = PyString_AS_STRING(self);
1826 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001827 PyObject *res = NULL;
1828 char *p;
1829 Py_ssize_t seqlen = 0;
1830 size_t sz = 0;
1831 Py_ssize_t i;
1832 PyObject *seq, *item;
1833
1834 seq = PySequence_Fast(orig, "");
1835 if (seq == NULL) {
1836 return NULL;
1837 }
1838
1839 seqlen = PySequence_Size(seq);
1840 if (seqlen == 0) {
1841 Py_DECREF(seq);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001842 return PyString_FromString("");
Christian Heimes44720832008-05-26 13:01:01 +00001843 }
1844 if (seqlen == 1) {
1845 item = PySequence_Fast_GET_ITEM(seq, 0);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001846 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
Christian Heimes44720832008-05-26 13:01:01 +00001847 Py_INCREF(item);
1848 Py_DECREF(seq);
1849 return item;
1850 }
1851 }
1852
1853 /* There are at least two things to join, or else we have a subclass
1854 * of the builtin types in the sequence.
1855 * Do a pre-pass to figure out the total amount of space we'll
1856 * need (sz), see whether any argument is absurd, and defer to
1857 * the Unicode join if appropriate.
1858 */
1859 for (i = 0; i < seqlen; i++) {
1860 const size_t old_sz = sz;
1861 item = PySequence_Fast_GET_ITEM(seq, i);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001862 if (!PyString_Check(item)){
Christian Heimes44720832008-05-26 13:01:01 +00001863#ifdef Py_USING_UNICODE
1864 if (PyUnicode_Check(item)) {
1865 /* Defer to Unicode join.
1866 * CAUTION: There's no gurantee that the
1867 * original sequence can be iterated over
1868 * again, so we must pass seq here.
1869 */
1870 PyObject *result;
1871 result = PyUnicode_Join((PyObject *)self, seq);
1872 Py_DECREF(seq);
1873 return result;
1874 }
1875#endif
1876 PyErr_Format(PyExc_TypeError,
1877 "sequence item %zd: expected string,"
1878 " %.80s found",
1879 i, Py_TYPE(item)->tp_name);
1880 Py_DECREF(seq);
1881 return NULL;
1882 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001883 sz += PyString_GET_SIZE(item);
Christian Heimes44720832008-05-26 13:01:01 +00001884 if (i != 0)
1885 sz += seplen;
1886 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1887 PyErr_SetString(PyExc_OverflowError,
1888 "join() result is too long for a Python string");
1889 Py_DECREF(seq);
1890 return NULL;
1891 }
1892 }
1893
1894 /* Allocate result space. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001895 res = PyString_FromStringAndSize((char*)NULL, sz);
Christian Heimes44720832008-05-26 13:01:01 +00001896 if (res == NULL) {
1897 Py_DECREF(seq);
1898 return NULL;
1899 }
1900
1901 /* Catenate everything. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001902 p = PyString_AS_STRING(res);
Christian Heimes44720832008-05-26 13:01:01 +00001903 for (i = 0; i < seqlen; ++i) {
1904 size_t n;
1905 item = PySequence_Fast_GET_ITEM(seq, i);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001906 n = PyString_GET_SIZE(item);
1907 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Christian Heimes44720832008-05-26 13:01:01 +00001908 p += n;
1909 if (i < seqlen - 1) {
1910 Py_MEMCPY(p, sep, seplen);
1911 p += seplen;
1912 }
1913 }
1914
1915 Py_DECREF(seq);
1916 return res;
1917}
1918
1919PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001920_PyString_Join(PyObject *sep, PyObject *x)
Christian Heimes44720832008-05-26 13:01:01 +00001921{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001922 assert(sep != NULL && PyString_Check(sep));
Christian Heimes44720832008-05-26 13:01:01 +00001923 assert(x != NULL);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001924 return string_join((PyStringObject *)sep, x);
Christian Heimes44720832008-05-26 13:01:01 +00001925}
1926
1927Py_LOCAL_INLINE(void)
1928string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1929{
1930 if (*end > len)
1931 *end = len;
1932 else if (*end < 0)
1933 *end += len;
1934 if (*end < 0)
1935 *end = 0;
1936 if (*start < 0)
1937 *start += len;
1938 if (*start < 0)
1939 *start = 0;
1940}
1941
1942Py_LOCAL_INLINE(Py_ssize_t)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001943string_find_internal(PyStringObject *self, PyObject *args, int dir)
Christian Heimes44720832008-05-26 13:01:01 +00001944{
1945 PyObject *subobj;
1946 const char *sub;
1947 Py_ssize_t sub_len;
1948 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1949 PyObject *obj_start=Py_None, *obj_end=Py_None;
1950
1951 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1952 &obj_start, &obj_end))
1953 return -2;
1954 /* To support None in "start" and "end" arguments, meaning
1955 the same as if they were not passed.
1956 */
1957 if (obj_start != Py_None)
1958 if (!_PyEval_SliceIndex(obj_start, &start))
1959 return -2;
1960 if (obj_end != Py_None)
1961 if (!_PyEval_SliceIndex(obj_end, &end))
1962 return -2;
1963
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001964 if (PyString_Check(subobj)) {
1965 sub = PyString_AS_STRING(subobj);
1966 sub_len = PyString_GET_SIZE(subobj);
Christian Heimes44720832008-05-26 13:01:01 +00001967 }
1968#ifdef Py_USING_UNICODE
1969 else if (PyUnicode_Check(subobj))
1970 return PyUnicode_Find(
1971 (PyObject *)self, subobj, start, end, dir);
1972#endif
1973 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1974 /* XXX - the "expected a character buffer object" is pretty
1975 confusing for a non-expert. remap to something else ? */
1976 return -2;
1977
1978 if (dir > 0)
1979 return stringlib_find_slice(
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001980 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001981 sub, sub_len, start, end);
1982 else
1983 return stringlib_rfind_slice(
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001984 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001985 sub, sub_len, start, end);
1986}
1987
1988
1989PyDoc_STRVAR(find__doc__,
1990"S.find(sub [,start [,end]]) -> int\n\
1991\n\
1992Return the lowest index in S where substring sub is found,\n\
1993such that sub is contained within s[start:end]. Optional\n\
1994arguments start and end are interpreted as in slice notation.\n\
1995\n\
1996Return -1 on failure.");
1997
1998static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001999string_find(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002000{
2001 Py_ssize_t result = string_find_internal(self, args, +1);
2002 if (result == -2)
2003 return NULL;
2004 return PyInt_FromSsize_t(result);
2005}
2006
2007
2008PyDoc_STRVAR(index__doc__,
2009"S.index(sub [,start [,end]]) -> int\n\
2010\n\
2011Like S.find() but raise ValueError when the substring is not found.");
2012
2013static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002014string_index(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002015{
2016 Py_ssize_t result = string_find_internal(self, args, +1);
2017 if (result == -2)
2018 return NULL;
2019 if (result == -1) {
2020 PyErr_SetString(PyExc_ValueError,
2021 "substring not found");
2022 return NULL;
2023 }
2024 return PyInt_FromSsize_t(result);
2025}
2026
2027
2028PyDoc_STRVAR(rfind__doc__,
2029"S.rfind(sub [,start [,end]]) -> int\n\
2030\n\
2031Return the highest index in S where substring sub is found,\n\
2032such that sub is contained within s[start:end]. Optional\n\
2033arguments start and end are interpreted as in slice notation.\n\
2034\n\
2035Return -1 on failure.");
2036
2037static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002038string_rfind(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002039{
2040 Py_ssize_t result = string_find_internal(self, args, -1);
2041 if (result == -2)
2042 return NULL;
2043 return PyInt_FromSsize_t(result);
2044}
2045
2046
2047PyDoc_STRVAR(rindex__doc__,
2048"S.rindex(sub [,start [,end]]) -> int\n\
2049\n\
2050Like S.rfind() but raise ValueError when the substring is not found.");
2051
2052static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002053string_rindex(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002054{
2055 Py_ssize_t result = string_find_internal(self, args, -1);
2056 if (result == -2)
2057 return NULL;
2058 if (result == -1) {
2059 PyErr_SetString(PyExc_ValueError,
2060 "substring not found");
2061 return NULL;
2062 }
2063 return PyInt_FromSsize_t(result);
2064}
2065
2066
2067Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002068do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
Christian Heimes44720832008-05-26 13:01:01 +00002069{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002070 char *s = PyString_AS_STRING(self);
2071 Py_ssize_t len = PyString_GET_SIZE(self);
2072 char *sep = PyString_AS_STRING(sepobj);
2073 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
Christian Heimes44720832008-05-26 13:01:01 +00002074 Py_ssize_t i, j;
2075
2076 i = 0;
2077 if (striptype != RIGHTSTRIP) {
2078 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2079 i++;
2080 }
2081 }
2082
2083 j = len;
2084 if (striptype != LEFTSTRIP) {
2085 do {
2086 j--;
2087 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2088 j++;
2089 }
2090
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002091 if (i == 0 && j == len && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00002092 Py_INCREF(self);
2093 return (PyObject*)self;
2094 }
2095 else
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002096 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00002097}
2098
2099
2100Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002101do_strip(PyStringObject *self, int striptype)
Christian Heimes44720832008-05-26 13:01:01 +00002102{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002103 char *s = PyString_AS_STRING(self);
2104 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Christian Heimes44720832008-05-26 13:01:01 +00002105
2106 i = 0;
2107 if (striptype != RIGHTSTRIP) {
2108 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2109 i++;
2110 }
2111 }
2112
2113 j = len;
2114 if (striptype != LEFTSTRIP) {
2115 do {
2116 j--;
2117 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2118 j++;
2119 }
2120
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002121 if (i == 0 && j == len && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00002122 Py_INCREF(self);
2123 return (PyObject*)self;
2124 }
2125 else
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002126 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00002127}
2128
2129
2130Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002131do_argstrip(PyStringObject *self, int striptype, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002132{
2133 PyObject *sep = NULL;
2134
2135 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
2136 return NULL;
2137
2138 if (sep != NULL && sep != Py_None) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002139 if (PyString_Check(sep))
Christian Heimes44720832008-05-26 13:01:01 +00002140 return do_xstrip(self, striptype, sep);
2141#ifdef Py_USING_UNICODE
2142 else if (PyUnicode_Check(sep)) {
2143 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2144 PyObject *res;
2145 if (uniself==NULL)
2146 return NULL;
2147 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2148 striptype, sep);
2149 Py_DECREF(uniself);
2150 return res;
2151 }
2152#endif
2153 PyErr_Format(PyExc_TypeError,
2154#ifdef Py_USING_UNICODE
2155 "%s arg must be None, str or unicode",
2156#else
2157 "%s arg must be None or str",
2158#endif
2159 STRIPNAME(striptype));
2160 return NULL;
2161 }
2162
2163 return do_strip(self, striptype);
2164}
2165
2166
2167PyDoc_STRVAR(strip__doc__,
2168"S.strip([chars]) -> string or unicode\n\
2169\n\
2170Return a copy of the string S with leading and trailing\n\
2171whitespace removed.\n\
2172If chars is given and not None, remove characters in chars instead.\n\
2173If chars is unicode, S will be converted to unicode before stripping");
2174
2175static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002176string_strip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002177{
2178 if (PyTuple_GET_SIZE(args) == 0)
2179 return do_strip(self, BOTHSTRIP); /* Common case */
2180 else
2181 return do_argstrip(self, BOTHSTRIP, args);
2182}
2183
2184
2185PyDoc_STRVAR(lstrip__doc__,
2186"S.lstrip([chars]) -> string or unicode\n\
2187\n\
2188Return a copy of the string S with leading whitespace removed.\n\
2189If chars is given and not None, remove characters in chars instead.\n\
2190If chars is unicode, S will be converted to unicode before stripping");
2191
2192static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002193string_lstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002194{
2195 if (PyTuple_GET_SIZE(args) == 0)
2196 return do_strip(self, LEFTSTRIP); /* Common case */
2197 else
2198 return do_argstrip(self, LEFTSTRIP, args);
2199}
2200
2201
2202PyDoc_STRVAR(rstrip__doc__,
2203"S.rstrip([chars]) -> string or unicode\n\
2204\n\
2205Return a copy of the string S with trailing whitespace removed.\n\
2206If chars is given and not None, remove characters in chars instead.\n\
2207If chars is unicode, S will be converted to unicode before stripping");
2208
2209static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002210string_rstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002211{
2212 if (PyTuple_GET_SIZE(args) == 0)
2213 return do_strip(self, RIGHTSTRIP); /* Common case */
2214 else
2215 return do_argstrip(self, RIGHTSTRIP, args);
2216}
2217
2218
2219PyDoc_STRVAR(lower__doc__,
2220"S.lower() -> string\n\
2221\n\
2222Return a copy of the string S converted to lowercase.");
2223
2224/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2225#ifndef _tolower
2226#define _tolower tolower
2227#endif
2228
2229static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002230string_lower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002231{
2232 char *s;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002233 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002234 PyObject *newobj;
2235
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002236 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002237 if (!newobj)
2238 return NULL;
2239
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002240 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002241
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002242 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002243
2244 for (i = 0; i < n; i++) {
2245 int c = Py_CHARMASK(s[i]);
2246 if (isupper(c))
2247 s[i] = _tolower(c);
2248 }
2249
2250 return newobj;
2251}
2252
2253PyDoc_STRVAR(upper__doc__,
2254"S.upper() -> string\n\
2255\n\
2256Return a copy of the string S converted to uppercase.");
2257
2258#ifndef _toupper
2259#define _toupper toupper
2260#endif
2261
2262static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002263string_upper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002264{
2265 char *s;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002266 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002267 PyObject *newobj;
2268
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002269 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002270 if (!newobj)
2271 return NULL;
2272
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002273 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002274
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002275 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002276
2277 for (i = 0; i < n; i++) {
2278 int c = Py_CHARMASK(s[i]);
2279 if (islower(c))
2280 s[i] = _toupper(c);
2281 }
2282
2283 return newobj;
2284}
2285
2286PyDoc_STRVAR(title__doc__,
2287"S.title() -> string\n\
2288\n\
2289Return a titlecased version of S, i.e. words start with uppercase\n\
2290characters, all remaining cased characters have lowercase.");
2291
2292static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002293string_title(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002294{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002295 char *s = PyString_AS_STRING(self), *s_new;
2296 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002297 int previous_is_cased = 0;
2298 PyObject *newobj;
2299
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002300 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002301 if (newobj == NULL)
2302 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002303 s_new = PyString_AsString(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002304 for (i = 0; i < n; i++) {
2305 int c = Py_CHARMASK(*s++);
2306 if (islower(c)) {
2307 if (!previous_is_cased)
2308 c = toupper(c);
2309 previous_is_cased = 1;
2310 } else if (isupper(c)) {
2311 if (previous_is_cased)
2312 c = tolower(c);
2313 previous_is_cased = 1;
2314 } else
2315 previous_is_cased = 0;
2316 *s_new++ = c;
2317 }
2318 return newobj;
2319}
2320
2321PyDoc_STRVAR(capitalize__doc__,
2322"S.capitalize() -> string\n\
2323\n\
2324Return a copy of the string S with only its first character\n\
2325capitalized.");
2326
2327static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002328string_capitalize(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002329{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002330 char *s = PyString_AS_STRING(self), *s_new;
2331 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002332 PyObject *newobj;
2333
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002334 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002335 if (newobj == NULL)
2336 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002337 s_new = PyString_AsString(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002338 if (0 < n) {
2339 int c = Py_CHARMASK(*s++);
2340 if (islower(c))
2341 *s_new = toupper(c);
2342 else
2343 *s_new = c;
2344 s_new++;
2345 }
2346 for (i = 1; i < n; i++) {
2347 int c = Py_CHARMASK(*s++);
2348 if (isupper(c))
2349 *s_new = tolower(c);
2350 else
2351 *s_new = c;
2352 s_new++;
2353 }
2354 return newobj;
2355}
2356
2357
2358PyDoc_STRVAR(count__doc__,
2359"S.count(sub[, start[, end]]) -> int\n\
2360\n\
2361Return the number of non-overlapping occurrences of substring sub in\n\
2362string S[start:end]. Optional arguments start and end are interpreted\n\
2363as in slice notation.");
2364
2365static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002366string_count(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002367{
2368 PyObject *sub_obj;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002369 const char *str = PyString_AS_STRING(self), *sub;
Christian Heimes44720832008-05-26 13:01:01 +00002370 Py_ssize_t sub_len;
2371 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2372
2373 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2374 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2375 return NULL;
2376
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002377 if (PyString_Check(sub_obj)) {
2378 sub = PyString_AS_STRING(sub_obj);
2379 sub_len = PyString_GET_SIZE(sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00002380 }
2381#ifdef Py_USING_UNICODE
2382 else if (PyUnicode_Check(sub_obj)) {
2383 Py_ssize_t count;
2384 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2385 if (count == -1)
2386 return NULL;
2387 else
2388 return PyInt_FromSsize_t(count);
2389 }
2390#endif
2391 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2392 return NULL;
2393
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002394 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002395
2396 return PyInt_FromSsize_t(
2397 stringlib_count(str + start, end - start, sub, sub_len)
2398 );
2399}
2400
2401PyDoc_STRVAR(swapcase__doc__,
2402"S.swapcase() -> string\n\
2403\n\
2404Return a copy of the string S with uppercase characters\n\
2405converted to lowercase and vice versa.");
2406
2407static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002408string_swapcase(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002409{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002410 char *s = PyString_AS_STRING(self), *s_new;
2411 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002412 PyObject *newobj;
2413
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002414 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002415 if (newobj == NULL)
2416 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002417 s_new = PyString_AsString(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002418 for (i = 0; i < n; i++) {
2419 int c = Py_CHARMASK(*s++);
2420 if (islower(c)) {
2421 *s_new = toupper(c);
2422 }
2423 else if (isupper(c)) {
2424 *s_new = tolower(c);
2425 }
2426 else
2427 *s_new = c;
2428 s_new++;
2429 }
2430 return newobj;
2431}
2432
2433
2434PyDoc_STRVAR(translate__doc__,
2435"S.translate(table [,deletechars]) -> string\n\
2436\n\
2437Return a copy of the string S, where all characters occurring\n\
2438in the optional argument deletechars are removed, and the\n\
2439remaining characters have been mapped through the given\n\
2440translation table, which must be a string of length 256.");
2441
2442static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002443string_translate(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002444{
2445 register char *input, *output;
2446 const char *table;
2447 register Py_ssize_t i, c, changed = 0;
2448 PyObject *input_obj = (PyObject*)self;
2449 const char *output_start, *del_table=NULL;
2450 Py_ssize_t inlen, tablen, dellen = 0;
2451 PyObject *result;
2452 int trans_table[256];
2453 PyObject *tableobj, *delobj = NULL;
2454
2455 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2456 &tableobj, &delobj))
2457 return NULL;
2458
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002459 if (PyString_Check(tableobj)) {
2460 table = PyString_AS_STRING(tableobj);
2461 tablen = PyString_GET_SIZE(tableobj);
Christian Heimes44720832008-05-26 13:01:01 +00002462 }
2463 else if (tableobj == Py_None) {
2464 table = NULL;
2465 tablen = 256;
2466 }
2467#ifdef Py_USING_UNICODE
2468 else if (PyUnicode_Check(tableobj)) {
2469 /* Unicode .translate() does not support the deletechars
2470 parameter; instead a mapping to None will cause characters
2471 to be deleted. */
2472 if (delobj != NULL) {
2473 PyErr_SetString(PyExc_TypeError,
2474 "deletions are implemented differently for unicode");
2475 return NULL;
2476 }
2477 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2478 }
2479#endif
2480 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2481 return NULL;
2482
2483 if (tablen != 256) {
2484 PyErr_SetString(PyExc_ValueError,
2485 "translation table must be 256 characters long");
2486 return NULL;
2487 }
2488
2489 if (delobj != NULL) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002490 if (PyString_Check(delobj)) {
2491 del_table = PyString_AS_STRING(delobj);
2492 dellen = PyString_GET_SIZE(delobj);
Christian Heimes44720832008-05-26 13:01:01 +00002493 }
2494#ifdef Py_USING_UNICODE
2495 else if (PyUnicode_Check(delobj)) {
2496 PyErr_SetString(PyExc_TypeError,
2497 "deletions are implemented differently for unicode");
2498 return NULL;
2499 }
2500#endif
2501 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2502 return NULL;
2503 }
2504 else {
2505 del_table = NULL;
2506 dellen = 0;
2507 }
2508
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002509 inlen = PyString_GET_SIZE(input_obj);
2510 result = PyString_FromStringAndSize((char *)NULL, inlen);
Christian Heimes44720832008-05-26 13:01:01 +00002511 if (result == NULL)
2512 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002513 output_start = output = PyString_AsString(result);
2514 input = PyString_AS_STRING(input_obj);
Christian Heimes44720832008-05-26 13:01:01 +00002515
2516 if (dellen == 0 && table != NULL) {
2517 /* If no deletions are required, use faster code */
2518 for (i = inlen; --i >= 0; ) {
2519 c = Py_CHARMASK(*input++);
2520 if (Py_CHARMASK((*output++ = table[c])) != c)
2521 changed = 1;
2522 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002523 if (changed || !PyString_CheckExact(input_obj))
Christian Heimes44720832008-05-26 13:01:01 +00002524 return result;
2525 Py_DECREF(result);
2526 Py_INCREF(input_obj);
2527 return input_obj;
2528 }
2529
2530 if (table == NULL) {
2531 for (i = 0; i < 256; i++)
2532 trans_table[i] = Py_CHARMASK(i);
2533 } else {
2534 for (i = 0; i < 256; i++)
2535 trans_table[i] = Py_CHARMASK(table[i]);
2536 }
2537
2538 for (i = 0; i < dellen; i++)
2539 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2540
2541 for (i = inlen; --i >= 0; ) {
2542 c = Py_CHARMASK(*input++);
2543 if (trans_table[c] != -1)
2544 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2545 continue;
2546 changed = 1;
2547 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002548 if (!changed && PyString_CheckExact(input_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00002549 Py_DECREF(result);
2550 Py_INCREF(input_obj);
2551 return input_obj;
2552 }
2553 /* Fix the size of the resulting string */
2554 if (inlen > 0)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002555 _PyString_Resize(&result, output - output_start);
Christian Heimes44720832008-05-26 13:01:01 +00002556 return result;
2557}
2558
2559
2560#define FORWARD 1
2561#define REVERSE -1
2562
2563/* find and count characters and substrings */
2564
2565#define findchar(target, target_len, c) \
2566 ((char *)memchr((const void *)(target), c, target_len))
2567
2568/* String ops must return a string. */
2569/* If the object is subclass of string, create a copy */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002570Py_LOCAL(PyStringObject *)
2571return_self(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002572{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002573 if (PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00002574 Py_INCREF(self);
2575 return self;
2576 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002577 return (PyStringObject *)PyString_FromStringAndSize(
2578 PyString_AS_STRING(self),
2579 PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002580}
2581
2582Py_LOCAL_INLINE(Py_ssize_t)
2583countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2584{
2585 Py_ssize_t count=0;
2586 const char *start=target;
2587 const char *end=target+target_len;
2588
2589 while ( (start=findchar(start, end-start, c)) != NULL ) {
2590 count++;
2591 if (count >= maxcount)
2592 break;
2593 start += 1;
2594 }
2595 return count;
2596}
2597
2598Py_LOCAL(Py_ssize_t)
2599findstring(const char *target, Py_ssize_t target_len,
2600 const char *pattern, Py_ssize_t pattern_len,
2601 Py_ssize_t start,
2602 Py_ssize_t end,
2603 int direction)
2604{
2605 if (start < 0) {
2606 start += target_len;
2607 if (start < 0)
2608 start = 0;
2609 }
2610 if (end > target_len) {
2611 end = target_len;
2612 } else if (end < 0) {
2613 end += target_len;
2614 if (end < 0)
2615 end = 0;
2616 }
2617
2618 /* zero-length substrings always match at the first attempt */
2619 if (pattern_len == 0)
2620 return (direction > 0) ? start : end;
2621
2622 end -= pattern_len;
2623
2624 if (direction < 0) {
2625 for (; end >= start; end--)
2626 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2627 return end;
2628 } else {
2629 for (; start <= end; start++)
2630 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2631 return start;
2632 }
2633 return -1;
2634}
2635
2636Py_LOCAL_INLINE(Py_ssize_t)
2637countstring(const char *target, Py_ssize_t target_len,
2638 const char *pattern, Py_ssize_t pattern_len,
2639 Py_ssize_t start,
2640 Py_ssize_t end,
2641 int direction, Py_ssize_t maxcount)
2642{
2643 Py_ssize_t count=0;
2644
2645 if (start < 0) {
2646 start += target_len;
2647 if (start < 0)
2648 start = 0;
2649 }
2650 if (end > target_len) {
2651 end = target_len;
2652 } else if (end < 0) {
2653 end += target_len;
2654 if (end < 0)
2655 end = 0;
2656 }
2657
2658 /* zero-length substrings match everywhere */
2659 if (pattern_len == 0 || maxcount == 0) {
2660 if (target_len+1 < maxcount)
2661 return target_len+1;
2662 return maxcount;
2663 }
2664
2665 end -= pattern_len;
2666 if (direction < 0) {
2667 for (; (end >= start); end--)
2668 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2669 count++;
2670 if (--maxcount <= 0) break;
2671 end -= pattern_len-1;
2672 }
2673 } else {
2674 for (; (start <= end); start++)
2675 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2676 count++;
2677 if (--maxcount <= 0)
2678 break;
2679 start += pattern_len-1;
2680 }
2681 }
2682 return count;
2683}
2684
2685
2686/* Algorithms for different cases of string replacement */
2687
2688/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002689Py_LOCAL(PyStringObject *)
2690replace_interleave(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002691 const char *to_s, Py_ssize_t to_len,
2692 Py_ssize_t maxcount)
2693{
2694 char *self_s, *result_s;
2695 Py_ssize_t self_len, result_len;
2696 Py_ssize_t count, i, product;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002697 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002698
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002699 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002700
2701 /* 1 at the end plus 1 after every character */
2702 count = self_len+1;
2703 if (maxcount < count)
2704 count = maxcount;
2705
2706 /* Check for overflow */
2707 /* result_len = count * to_len + self_len; */
2708 product = count * to_len;
2709 if (product / to_len != count) {
2710 PyErr_SetString(PyExc_OverflowError,
2711 "replace string is too long");
2712 return NULL;
2713 }
2714 result_len = product + self_len;
2715 if (result_len < 0) {
2716 PyErr_SetString(PyExc_OverflowError,
2717 "replace string is too long");
2718 return NULL;
2719 }
2720
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002721 if (! (result = (PyStringObject *)
2722 PyString_FromStringAndSize(NULL, result_len)) )
Christian Heimes44720832008-05-26 13:01:01 +00002723 return NULL;
2724
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002725 self_s = PyString_AS_STRING(self);
2726 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002727
2728 /* TODO: special case single character, which doesn't need memcpy */
2729
2730 /* Lay the first one down (guaranteed this will occur) */
2731 Py_MEMCPY(result_s, to_s, to_len);
2732 result_s += to_len;
2733 count -= 1;
2734
2735 for (i=0; i<count; i++) {
2736 *result_s++ = *self_s++;
2737 Py_MEMCPY(result_s, to_s, to_len);
2738 result_s += to_len;
2739 }
2740
2741 /* Copy the rest of the original string */
2742 Py_MEMCPY(result_s, self_s, self_len-i);
2743
2744 return result;
2745}
2746
2747/* Special case for deleting a single character */
2748/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002749Py_LOCAL(PyStringObject *)
2750replace_delete_single_character(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002751 char from_c, Py_ssize_t maxcount)
2752{
2753 char *self_s, *result_s;
2754 char *start, *next, *end;
2755 Py_ssize_t self_len, result_len;
2756 Py_ssize_t count;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002757 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002758
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002759 self_len = PyString_GET_SIZE(self);
2760 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002761
2762 count = countchar(self_s, self_len, from_c, maxcount);
2763 if (count == 0) {
2764 return return_self(self);
2765 }
2766
2767 result_len = self_len - count; /* from_len == 1 */
2768 assert(result_len>=0);
2769
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002770 if ( (result = (PyStringObject *)
2771 PyString_FromStringAndSize(NULL, result_len)) == NULL)
Christian Heimes44720832008-05-26 13:01:01 +00002772 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002773 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002774
2775 start = self_s;
2776 end = self_s + self_len;
2777 while (count-- > 0) {
2778 next = findchar(start, end-start, from_c);
2779 if (next == NULL)
2780 break;
2781 Py_MEMCPY(result_s, start, next-start);
2782 result_s += (next-start);
2783 start = next+1;
2784 }
2785 Py_MEMCPY(result_s, start, end-start);
2786
2787 return result;
2788}
2789
2790/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2791
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002792Py_LOCAL(PyStringObject *)
2793replace_delete_substring(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002794 const char *from_s, Py_ssize_t from_len,
2795 Py_ssize_t maxcount) {
2796 char *self_s, *result_s;
2797 char *start, *next, *end;
2798 Py_ssize_t self_len, result_len;
2799 Py_ssize_t count, offset;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002800 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002801
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002802 self_len = PyString_GET_SIZE(self);
2803 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002804
2805 count = countstring(self_s, self_len,
2806 from_s, from_len,
2807 0, self_len, 1,
2808 maxcount);
2809
2810 if (count == 0) {
2811 /* no matches */
2812 return return_self(self);
2813 }
2814
2815 result_len = self_len - (count * from_len);
2816 assert (result_len>=0);
2817
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002818 if ( (result = (PyStringObject *)
2819 PyString_FromStringAndSize(NULL, result_len)) == NULL )
Christian Heimes44720832008-05-26 13:01:01 +00002820 return NULL;
2821
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002822 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002823
2824 start = self_s;
2825 end = self_s + self_len;
2826 while (count-- > 0) {
2827 offset = findstring(start, end-start,
2828 from_s, from_len,
2829 0, end-start, FORWARD);
2830 if (offset == -1)
2831 break;
2832 next = start + offset;
2833
2834 Py_MEMCPY(result_s, start, next-start);
2835
2836 result_s += (next-start);
2837 start = next+from_len;
2838 }
2839 Py_MEMCPY(result_s, start, end-start);
2840 return result;
2841}
2842
2843/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002844Py_LOCAL(PyStringObject *)
2845replace_single_character_in_place(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002846 char from_c, char to_c,
2847 Py_ssize_t maxcount)
2848{
2849 char *self_s, *result_s, *start, *end, *next;
2850 Py_ssize_t self_len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002851 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002852
2853 /* The result string will be the same size */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002854 self_s = PyString_AS_STRING(self);
2855 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002856
2857 next = findchar(self_s, self_len, from_c);
2858
2859 if (next == NULL) {
2860 /* No matches; return the original string */
2861 return return_self(self);
2862 }
2863
2864 /* Need to make a new string */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002865 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002866 if (result == NULL)
2867 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002868 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002869 Py_MEMCPY(result_s, self_s, self_len);
2870
2871 /* change everything in-place, starting with this one */
2872 start = result_s + (next-self_s);
2873 *start = to_c;
2874 start++;
2875 end = result_s + self_len;
2876
2877 while (--maxcount > 0) {
2878 next = findchar(start, end-start, from_c);
2879 if (next == NULL)
2880 break;
2881 *next = to_c;
2882 start = next+1;
2883 }
2884
2885 return result;
2886}
2887
2888/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002889Py_LOCAL(PyStringObject *)
2890replace_substring_in_place(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002891 const char *from_s, Py_ssize_t from_len,
2892 const char *to_s, Py_ssize_t to_len,
2893 Py_ssize_t maxcount)
2894{
2895 char *result_s, *start, *end;
2896 char *self_s;
2897 Py_ssize_t self_len, offset;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002898 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002899
2900 /* The result string will be the same size */
2901
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002902 self_s = PyString_AS_STRING(self);
2903 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002904
2905 offset = findstring(self_s, self_len,
2906 from_s, from_len,
2907 0, self_len, FORWARD);
2908 if (offset == -1) {
2909 /* No matches; return the original string */
2910 return return_self(self);
2911 }
2912
2913 /* Need to make a new string */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002914 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002915 if (result == NULL)
2916 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002917 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002918 Py_MEMCPY(result_s, self_s, self_len);
2919
2920 /* change everything in-place, starting with this one */
2921 start = result_s + offset;
2922 Py_MEMCPY(start, to_s, from_len);
2923 start += from_len;
2924 end = result_s + self_len;
2925
2926 while ( --maxcount > 0) {
2927 offset = findstring(start, end-start,
2928 from_s, from_len,
2929 0, end-start, FORWARD);
2930 if (offset==-1)
2931 break;
2932 Py_MEMCPY(start+offset, to_s, from_len);
2933 start += offset+from_len;
2934 }
2935
2936 return result;
2937}
2938
2939/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002940Py_LOCAL(PyStringObject *)
2941replace_single_character(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002942 char from_c,
2943 const char *to_s, Py_ssize_t to_len,
2944 Py_ssize_t maxcount)
2945{
2946 char *self_s, *result_s;
2947 char *start, *next, *end;
2948 Py_ssize_t self_len, result_len;
2949 Py_ssize_t count, product;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002950 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002951
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002952 self_s = PyString_AS_STRING(self);
2953 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002954
2955 count = countchar(self_s, self_len, from_c, maxcount);
2956 if (count == 0) {
2957 /* no matches, return unchanged */
2958 return return_self(self);
2959 }
2960
2961 /* use the difference between current and new, hence the "-1" */
2962 /* result_len = self_len + count * (to_len-1) */
2963 product = count * (to_len-1);
2964 if (product / (to_len-1) != count) {
2965 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2966 return NULL;
2967 }
2968 result_len = self_len + product;
2969 if (result_len < 0) {
2970 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2971 return NULL;
2972 }
2973
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002974 if ( (result = (PyStringObject *)
2975 PyString_FromStringAndSize(NULL, result_len)) == NULL)
Christian Heimes44720832008-05-26 13:01:01 +00002976 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002977 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002978
2979 start = self_s;
2980 end = self_s + self_len;
2981 while (count-- > 0) {
2982 next = findchar(start, end-start, from_c);
2983 if (next == NULL)
2984 break;
2985
2986 if (next == start) {
2987 /* replace with the 'to' */
2988 Py_MEMCPY(result_s, to_s, to_len);
2989 result_s += to_len;
2990 start += 1;
2991 } else {
2992 /* copy the unchanged old then the 'to' */
2993 Py_MEMCPY(result_s, start, next-start);
2994 result_s += (next-start);
2995 Py_MEMCPY(result_s, to_s, to_len);
2996 result_s += to_len;
2997 start = next+1;
2998 }
2999 }
3000 /* Copy the remainder of the remaining string */
3001 Py_MEMCPY(result_s, start, end-start);
3002
3003 return result;
3004}
3005
3006/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003007Py_LOCAL(PyStringObject *)
3008replace_substring(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00003009 const char *from_s, Py_ssize_t from_len,
3010 const char *to_s, Py_ssize_t to_len,
3011 Py_ssize_t maxcount) {
3012 char *self_s, *result_s;
3013 char *start, *next, *end;
3014 Py_ssize_t self_len, result_len;
3015 Py_ssize_t count, offset, product;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003016 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00003017
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003018 self_s = PyString_AS_STRING(self);
3019 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003020
3021 count = countstring(self_s, self_len,
3022 from_s, from_len,
3023 0, self_len, FORWARD, maxcount);
3024 if (count == 0) {
3025 /* no matches, return unchanged */
3026 return return_self(self);
3027 }
3028
3029 /* Check for overflow */
3030 /* result_len = self_len + count * (to_len-from_len) */
3031 product = count * (to_len-from_len);
3032 if (product / (to_len-from_len) != count) {
3033 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3034 return NULL;
3035 }
3036 result_len = self_len + product;
3037 if (result_len < 0) {
3038 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3039 return NULL;
3040 }
3041
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003042 if ( (result = (PyStringObject *)
3043 PyString_FromStringAndSize(NULL, result_len)) == NULL)
Christian Heimes44720832008-05-26 13:01:01 +00003044 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003045 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00003046
3047 start = self_s;
3048 end = self_s + self_len;
3049 while (count-- > 0) {
3050 offset = findstring(start, end-start,
3051 from_s, from_len,
3052 0, end-start, FORWARD);
3053 if (offset == -1)
3054 break;
3055 next = start+offset;
3056 if (next == start) {
3057 /* replace with the 'to' */
3058 Py_MEMCPY(result_s, to_s, to_len);
3059 result_s += to_len;
3060 start += from_len;
3061 } else {
3062 /* copy the unchanged old then the 'to' */
3063 Py_MEMCPY(result_s, start, next-start);
3064 result_s += (next-start);
3065 Py_MEMCPY(result_s, to_s, to_len);
3066 result_s += to_len;
3067 start = next+from_len;
3068 }
3069 }
3070 /* Copy the remainder of the remaining string */
3071 Py_MEMCPY(result_s, start, end-start);
3072
3073 return result;
3074}
3075
3076
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003077Py_LOCAL(PyStringObject *)
3078replace(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00003079 const char *from_s, Py_ssize_t from_len,
3080 const char *to_s, Py_ssize_t to_len,
3081 Py_ssize_t maxcount)
3082{
3083 if (maxcount < 0) {
3084 maxcount = PY_SSIZE_T_MAX;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003085 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
Christian Heimes44720832008-05-26 13:01:01 +00003086 /* nothing to do; return the original string */
3087 return return_self(self);
3088 }
3089
3090 if (maxcount == 0 ||
3091 (from_len == 0 && to_len == 0)) {
3092 /* nothing to do; return the original string */
3093 return return_self(self);
3094 }
3095
3096 /* Handle zero-length special cases */
3097
3098 if (from_len == 0) {
3099 /* insert the 'to' string everywhere. */
3100 /* >>> "Python".replace("", ".") */
3101 /* '.P.y.t.h.o.n.' */
3102 return replace_interleave(self, to_s, to_len, maxcount);
3103 }
3104
3105 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3106 /* point for an empty self string to generate a non-empty string */
3107 /* Special case so the remaining code always gets a non-empty string */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003108 if (PyString_GET_SIZE(self) == 0) {
Christian Heimes44720832008-05-26 13:01:01 +00003109 return return_self(self);
3110 }
3111
3112 if (to_len == 0) {
3113 /* delete all occurances of 'from' string */
3114 if (from_len == 1) {
3115 return replace_delete_single_character(
3116 self, from_s[0], maxcount);
3117 } else {
3118 return replace_delete_substring(self, from_s, from_len, maxcount);
3119 }
3120 }
3121
3122 /* Handle special case where both strings have the same length */
3123
3124 if (from_len == to_len) {
3125 if (from_len == 1) {
3126 return replace_single_character_in_place(
3127 self,
3128 from_s[0],
3129 to_s[0],
3130 maxcount);
3131 } else {
3132 return replace_substring_in_place(
3133 self, from_s, from_len, to_s, to_len, maxcount);
3134 }
3135 }
3136
3137 /* Otherwise use the more generic algorithms */
3138 if (from_len == 1) {
3139 return replace_single_character(self, from_s[0],
3140 to_s, to_len, maxcount);
3141 } else {
3142 /* len('from')>=2, len('to')>=1 */
3143 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
3144 }
3145}
3146
3147PyDoc_STRVAR(replace__doc__,
3148"S.replace (old, new[, count]) -> string\n\
3149\n\
3150Return a copy of string S with all occurrences of substring\n\
3151old replaced by new. If the optional argument count is\n\
3152given, only the first count occurrences are replaced.");
3153
3154static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003155string_replace(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003156{
3157 Py_ssize_t count = -1;
3158 PyObject *from, *to;
3159 const char *from_s, *to_s;
3160 Py_ssize_t from_len, to_len;
3161
3162 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
3163 return NULL;
3164
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003165 if (PyString_Check(from)) {
3166 from_s = PyString_AS_STRING(from);
3167 from_len = PyString_GET_SIZE(from);
Christian Heimes44720832008-05-26 13:01:01 +00003168 }
3169#ifdef Py_USING_UNICODE
3170 if (PyUnicode_Check(from))
3171 return PyUnicode_Replace((PyObject *)self,
3172 from, to, count);
3173#endif
3174 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
3175 return NULL;
3176
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003177 if (PyString_Check(to)) {
3178 to_s = PyString_AS_STRING(to);
3179 to_len = PyString_GET_SIZE(to);
Christian Heimes44720832008-05-26 13:01:01 +00003180 }
3181#ifdef Py_USING_UNICODE
3182 else if (PyUnicode_Check(to))
3183 return PyUnicode_Replace((PyObject *)self,
3184 from, to, count);
3185#endif
3186 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
3187 return NULL;
3188
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003189 return (PyObject *)replace((PyStringObject *) self,
Christian Heimes44720832008-05-26 13:01:01 +00003190 from_s, from_len,
3191 to_s, to_len, count);
3192}
3193
3194/** End DALKE **/
3195
3196/* Matches the end (direction >= 0) or start (direction < 0) of self
3197 * against substr, using the start and end arguments. Returns
3198 * -1 on error, 0 if not found and 1 if found.
3199 */
3200Py_LOCAL(int)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003201_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
Christian Heimes44720832008-05-26 13:01:01 +00003202 Py_ssize_t end, int direction)
3203{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003204 Py_ssize_t len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003205 Py_ssize_t slen;
3206 const char* sub;
3207 const char* str;
3208
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003209 if (PyString_Check(substr)) {
3210 sub = PyString_AS_STRING(substr);
3211 slen = PyString_GET_SIZE(substr);
Christian Heimes44720832008-05-26 13:01:01 +00003212 }
3213#ifdef Py_USING_UNICODE
3214 else if (PyUnicode_Check(substr))
3215 return PyUnicode_Tailmatch((PyObject *)self,
3216 substr, start, end, direction);
3217#endif
3218 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3219 return -1;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003220 str = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003221
3222 string_adjust_indices(&start, &end, len);
3223
3224 if (direction < 0) {
3225 /* startswith */
3226 if (start+slen > len)
3227 return 0;
3228 } else {
3229 /* endswith */
3230 if (end-start < slen || start > len)
3231 return 0;
3232
3233 if (end-slen > start)
3234 start = end - slen;
3235 }
3236 if (end-start >= slen)
3237 return ! memcmp(str+start, sub, slen);
3238 return 0;
3239}
3240
3241
3242PyDoc_STRVAR(startswith__doc__,
3243"S.startswith(prefix[, start[, end]]) -> bool\n\
3244\n\
3245Return True if S starts with the specified prefix, False otherwise.\n\
3246With optional start, test S beginning at that position.\n\
3247With optional end, stop comparing S at that position.\n\
3248prefix can also be a tuple of strings to try.");
3249
3250static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003251string_startswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003252{
3253 Py_ssize_t start = 0;
3254 Py_ssize_t end = PY_SSIZE_T_MAX;
3255 PyObject *subobj;
3256 int result;
3257
3258 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3259 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3260 return NULL;
3261 if (PyTuple_Check(subobj)) {
3262 Py_ssize_t i;
3263 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3264 result = _string_tailmatch(self,
3265 PyTuple_GET_ITEM(subobj, i),
3266 start, end, -1);
3267 if (result == -1)
3268 return NULL;
3269 else if (result) {
3270 Py_RETURN_TRUE;
3271 }
3272 }
3273 Py_RETURN_FALSE;
3274 }
3275 result = _string_tailmatch(self, subobj, start, end, -1);
3276 if (result == -1)
3277 return NULL;
3278 else
3279 return PyBool_FromLong(result);
3280}
3281
3282
3283PyDoc_STRVAR(endswith__doc__,
3284"S.endswith(suffix[, start[, end]]) -> bool\n\
3285\n\
3286Return True if S ends with the specified suffix, False otherwise.\n\
3287With optional start, test S beginning at that position.\n\
3288With optional end, stop comparing S at that position.\n\
3289suffix can also be a tuple of strings to try.");
3290
3291static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003292string_endswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003293{
3294 Py_ssize_t start = 0;
3295 Py_ssize_t end = PY_SSIZE_T_MAX;
3296 PyObject *subobj;
3297 int result;
3298
3299 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3300 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3301 return NULL;
3302 if (PyTuple_Check(subobj)) {
3303 Py_ssize_t i;
3304 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3305 result = _string_tailmatch(self,
3306 PyTuple_GET_ITEM(subobj, i),
3307 start, end, +1);
3308 if (result == -1)
3309 return NULL;
3310 else if (result) {
3311 Py_RETURN_TRUE;
3312 }
3313 }
3314 Py_RETURN_FALSE;
3315 }
3316 result = _string_tailmatch(self, subobj, start, end, +1);
3317 if (result == -1)
3318 return NULL;
3319 else
3320 return PyBool_FromLong(result);
3321}
3322
3323
3324PyDoc_STRVAR(encode__doc__,
3325"S.encode([encoding[,errors]]) -> object\n\
3326\n\
3327Encodes S using the codec registered for encoding. encoding defaults\n\
3328to the default encoding. errors may be given to set a different error\n\
3329handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3330a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3331'xmlcharrefreplace' as well as any other name registered with\n\
3332codecs.register_error that is able to handle UnicodeEncodeErrors.");
3333
3334static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00003335string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes44720832008-05-26 13:01:01 +00003336{
Benjamin Peterson332d7212009-09-18 21:14:55 +00003337 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003338 char *encoding = NULL;
3339 char *errors = NULL;
3340 PyObject *v;
3341
Benjamin Peterson332d7212009-09-18 21:14:55 +00003342 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
3343 kwlist, &encoding, &errors))
Christian Heimes44720832008-05-26 13:01:01 +00003344 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003345 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003346 if (v == NULL)
3347 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003348 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00003349 PyErr_Format(PyExc_TypeError,
3350 "encoder did not return a string/unicode object "
3351 "(type=%.400s)",
3352 Py_TYPE(v)->tp_name);
3353 Py_DECREF(v);
3354 return NULL;
3355 }
3356 return v;
3357
3358 onError:
Christian Heimes1a6387e2008-03-26 12:49:49 +00003359 return NULL;
3360}
3361
Christian Heimes44720832008-05-26 13:01:01 +00003362
3363PyDoc_STRVAR(decode__doc__,
3364"S.decode([encoding[,errors]]) -> object\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003365\n\
Christian Heimes44720832008-05-26 13:01:01 +00003366Decodes S using the codec registered for encoding. encoding defaults\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003367to the default encoding. errors may be given to set a different error\n\
Christian Heimes44720832008-05-26 13:01:01 +00003368handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3369a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003370as well as any other name registered with codecs.register_error that is\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003371able to handle UnicodeDecodeErrors.");
3372
3373static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00003374string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003375{
Benjamin Peterson332d7212009-09-18 21:14:55 +00003376 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003377 char *encoding = NULL;
3378 char *errors = NULL;
3379 PyObject *v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003380
Benjamin Peterson332d7212009-09-18 21:14:55 +00003381 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
3382 kwlist, &encoding, &errors))
Christian Heimes1a6387e2008-03-26 12:49:49 +00003383 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003384 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003385 if (v == NULL)
3386 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003387 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00003388 PyErr_Format(PyExc_TypeError,
3389 "decoder did not return a string/unicode object "
3390 "(type=%.400s)",
3391 Py_TYPE(v)->tp_name);
3392 Py_DECREF(v);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003393 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003394 }
3395 return v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003396
Christian Heimes44720832008-05-26 13:01:01 +00003397 onError:
3398 return NULL;
3399}
3400
3401
3402PyDoc_STRVAR(expandtabs__doc__,
3403"S.expandtabs([tabsize]) -> string\n\
3404\n\
3405Return a copy of S where all tab characters are expanded using spaces.\n\
3406If tabsize is not given, a tab size of 8 characters is assumed.");
3407
3408static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003409string_expandtabs(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003410{
3411 const char *e, *p, *qe;
3412 char *q;
3413 Py_ssize_t i, j, incr;
3414 PyObject *u;
3415 int tabsize = 8;
3416
3417 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3418 return NULL;
3419
3420 /* First pass: determine size of output string */
3421 i = 0; /* chars up to and including most recent \n or \r */
3422 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003423 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3424 for (p = PyString_AS_STRING(self); p < e; p++)
Christian Heimes44720832008-05-26 13:01:01 +00003425 if (*p == '\t') {
3426 if (tabsize > 0) {
3427 incr = tabsize - (j % tabsize);
3428 if (j > PY_SSIZE_T_MAX - incr)
3429 goto overflow1;
3430 j += incr;
3431 }
3432 }
3433 else {
3434 if (j > PY_SSIZE_T_MAX - 1)
3435 goto overflow1;
3436 j++;
3437 if (*p == '\n' || *p == '\r') {
3438 if (i > PY_SSIZE_T_MAX - j)
3439 goto overflow1;
3440 i += j;
3441 j = 0;
3442 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00003443 }
Christian Heimes44720832008-05-26 13:01:01 +00003444
3445 if (i > PY_SSIZE_T_MAX - j)
3446 goto overflow1;
3447
3448 /* Second pass: create output string and fill it */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003449 u = PyString_FromStringAndSize(NULL, i + j);
Christian Heimes44720832008-05-26 13:01:01 +00003450 if (!u)
3451 return NULL;
3452
3453 j = 0; /* same as in first pass */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003454 q = PyString_AS_STRING(u); /* next output char */
3455 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
Christian Heimes44720832008-05-26 13:01:01 +00003456
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003457 for (p = PyString_AS_STRING(self); p < e; p++)
Christian Heimes44720832008-05-26 13:01:01 +00003458 if (*p == '\t') {
3459 if (tabsize > 0) {
3460 i = tabsize - (j % tabsize);
3461 j += i;
3462 while (i--) {
3463 if (q >= qe)
3464 goto overflow2;
3465 *q++ = ' ';
3466 }
3467 }
3468 }
3469 else {
3470 if (q >= qe)
3471 goto overflow2;
3472 *q++ = *p;
3473 j++;
3474 if (*p == '\n' || *p == '\r')
3475 j = 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003476 }
Christian Heimes44720832008-05-26 13:01:01 +00003477
3478 return u;
3479
3480 overflow2:
3481 Py_DECREF(u);
3482 overflow1:
3483 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3484 return NULL;
3485}
3486
3487Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003488pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Christian Heimes44720832008-05-26 13:01:01 +00003489{
3490 PyObject *u;
3491
3492 if (left < 0)
3493 left = 0;
3494 if (right < 0)
3495 right = 0;
3496
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003497 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003498 Py_INCREF(self);
3499 return (PyObject *)self;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003500 }
3501
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003502 u = PyString_FromStringAndSize(NULL,
3503 left + PyString_GET_SIZE(self) + right);
Christian Heimes44720832008-05-26 13:01:01 +00003504 if (u) {
3505 if (left)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003506 memset(PyString_AS_STRING(u), fill, left);
3507 Py_MEMCPY(PyString_AS_STRING(u) + left,
3508 PyString_AS_STRING(self),
3509 PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00003510 if (right)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003511 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00003512 fill, right);
3513 }
3514
3515 return u;
3516}
3517
3518PyDoc_STRVAR(ljust__doc__,
3519"S.ljust(width[, fillchar]) -> string\n"
3520"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003521"Return S left-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003522"done using the specified fill character (default is a space).");
3523
3524static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003525string_ljust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003526{
3527 Py_ssize_t width;
3528 char fillchar = ' ';
3529
3530 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3531 return NULL;
3532
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003533 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003534 Py_INCREF(self);
3535 return (PyObject*) self;
3536 }
3537
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003538 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003539}
3540
3541
3542PyDoc_STRVAR(rjust__doc__,
3543"S.rjust(width[, fillchar]) -> string\n"
3544"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003545"Return S right-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003546"done using the specified fill character (default is a space)");
3547
3548static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003549string_rjust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003550{
3551 Py_ssize_t width;
3552 char fillchar = ' ';
3553
3554 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3555 return NULL;
3556
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003557 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003558 Py_INCREF(self);
3559 return (PyObject*) self;
3560 }
3561
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003562 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003563}
3564
3565
3566PyDoc_STRVAR(center__doc__,
3567"S.center(width[, fillchar]) -> string\n"
3568"\n"
3569"Return S centered in a string of length width. Padding is\n"
3570"done using the specified fill character (default is a space)");
3571
3572static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003573string_center(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003574{
3575 Py_ssize_t marg, left;
3576 Py_ssize_t width;
3577 char fillchar = ' ';
3578
3579 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3580 return NULL;
3581
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003582 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003583 Py_INCREF(self);
3584 return (PyObject*) self;
3585 }
3586
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003587 marg = width - PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003588 left = marg / 2 + (marg & width & 1);
3589
3590 return pad(self, left, marg - left, fillchar);
3591}
3592
3593PyDoc_STRVAR(zfill__doc__,
3594"S.zfill(width) -> string\n"
3595"\n"
3596"Pad a numeric string S with zeros on the left, to fill a field\n"
3597"of the specified width. The string S is never truncated.");
3598
3599static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003600string_zfill(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003601{
3602 Py_ssize_t fill;
3603 PyObject *s;
3604 char *p;
3605 Py_ssize_t width;
3606
3607 if (!PyArg_ParseTuple(args, "n:zfill", &width))
3608 return NULL;
3609
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003610 if (PyString_GET_SIZE(self) >= width) {
3611 if (PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003612 Py_INCREF(self);
3613 return (PyObject*) self;
3614 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00003615 else
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003616 return PyString_FromStringAndSize(
3617 PyString_AS_STRING(self),
3618 PyString_GET_SIZE(self)
Christian Heimes44720832008-05-26 13:01:01 +00003619 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003620 }
3621
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003622 fill = width - PyString_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003623
Christian Heimes44720832008-05-26 13:01:01 +00003624 s = pad(self, fill, 0, '0');
3625
3626 if (s == NULL)
3627 return NULL;
3628
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003629 p = PyString_AS_STRING(s);
Christian Heimes44720832008-05-26 13:01:01 +00003630 if (p[fill] == '+' || p[fill] == '-') {
3631 /* move sign to beginning of string */
3632 p[0] = p[fill];
3633 p[fill] = '0';
3634 }
3635
3636 return (PyObject*) s;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003637}
3638
Christian Heimes44720832008-05-26 13:01:01 +00003639PyDoc_STRVAR(isspace__doc__,
3640"S.isspace() -> bool\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003641\n\
Christian Heimes44720832008-05-26 13:01:01 +00003642Return True if all characters in S are whitespace\n\
3643and there is at least one character in S, False otherwise.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003644
Christian Heimes44720832008-05-26 13:01:01 +00003645static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003646string_isspace(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003647{
Christian Heimes44720832008-05-26 13:01:01 +00003648 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003649 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003650 register const unsigned char *e;
3651
3652 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003653 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003654 isspace(*p))
3655 return PyBool_FromLong(1);
3656
3657 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003658 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003659 return PyBool_FromLong(0);
3660
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003661 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003662 for (; p < e; p++) {
3663 if (!isspace(*p))
3664 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003665 }
Christian Heimes44720832008-05-26 13:01:01 +00003666 return PyBool_FromLong(1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003667}
3668
Christian Heimes44720832008-05-26 13:01:01 +00003669
3670PyDoc_STRVAR(isalpha__doc__,
3671"S.isalpha() -> bool\n\
3672\n\
3673Return True if all characters in S are alphabetic\n\
3674and there is at least one character in S, False otherwise.");
3675
3676static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003677string_isalpha(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003678{
Christian Heimes44720832008-05-26 13:01:01 +00003679 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003680 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003681 register const unsigned char *e;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003682
Christian Heimes44720832008-05-26 13:01:01 +00003683 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003684 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003685 isalpha(*p))
3686 return PyBool_FromLong(1);
3687
3688 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003689 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003690 return PyBool_FromLong(0);
3691
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003692 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003693 for (; p < e; p++) {
3694 if (!isalpha(*p))
3695 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003696 }
Christian Heimes44720832008-05-26 13:01:01 +00003697 return PyBool_FromLong(1);
3698}
Christian Heimes1a6387e2008-03-26 12:49:49 +00003699
Christian Heimes44720832008-05-26 13:01:01 +00003700
3701PyDoc_STRVAR(isalnum__doc__,
3702"S.isalnum() -> bool\n\
3703\n\
3704Return True if all characters in S are alphanumeric\n\
3705and there is at least one character in S, False otherwise.");
3706
3707static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003708string_isalnum(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003709{
3710 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003711 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003712 register const unsigned char *e;
3713
3714 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003715 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003716 isalnum(*p))
3717 return PyBool_FromLong(1);
3718
3719 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003720 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003721 return PyBool_FromLong(0);
3722
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003723 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003724 for (; p < e; p++) {
3725 if (!isalnum(*p))
3726 return PyBool_FromLong(0);
3727 }
3728 return PyBool_FromLong(1);
3729}
3730
3731
3732PyDoc_STRVAR(isdigit__doc__,
3733"S.isdigit() -> bool\n\
3734\n\
3735Return True if all characters in S are digits\n\
3736and there is at least one character in S, False otherwise.");
3737
3738static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003739string_isdigit(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003740{
3741 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003742 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003743 register const unsigned char *e;
3744
3745 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003746 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003747 isdigit(*p))
3748 return PyBool_FromLong(1);
3749
3750 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003751 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003752 return PyBool_FromLong(0);
3753
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003754 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003755 for (; p < e; p++) {
3756 if (!isdigit(*p))
3757 return PyBool_FromLong(0);
3758 }
3759 return PyBool_FromLong(1);
3760}
3761
3762
3763PyDoc_STRVAR(islower__doc__,
3764"S.islower() -> bool\n\
3765\n\
3766Return True if all cased characters in S are lowercase and there is\n\
3767at least one cased character in S, False otherwise.");
3768
3769static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003770string_islower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003771{
3772 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003773 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003774 register const unsigned char *e;
3775 int cased;
3776
3777 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003778 if (PyString_GET_SIZE(self) == 1)
Christian Heimes44720832008-05-26 13:01:01 +00003779 return PyBool_FromLong(islower(*p) != 0);
3780
3781 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003782 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003783 return PyBool_FromLong(0);
3784
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003785 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003786 cased = 0;
3787 for (; p < e; p++) {
3788 if (isupper(*p))
3789 return PyBool_FromLong(0);
3790 else if (!cased && islower(*p))
3791 cased = 1;
3792 }
3793 return PyBool_FromLong(cased);
3794}
3795
3796
3797PyDoc_STRVAR(isupper__doc__,
3798"S.isupper() -> bool\n\
3799\n\
3800Return True if all cased characters in S are uppercase and there is\n\
3801at least one cased character in S, False otherwise.");
3802
3803static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003804string_isupper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003805{
3806 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003807 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003808 register const unsigned char *e;
3809 int cased;
3810
3811 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003812 if (PyString_GET_SIZE(self) == 1)
Christian Heimes44720832008-05-26 13:01:01 +00003813 return PyBool_FromLong(isupper(*p) != 0);
3814
3815 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003816 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003817 return PyBool_FromLong(0);
3818
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003819 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003820 cased = 0;
3821 for (; p < e; p++) {
3822 if (islower(*p))
3823 return PyBool_FromLong(0);
3824 else if (!cased && isupper(*p))
3825 cased = 1;
3826 }
3827 return PyBool_FromLong(cased);
3828}
3829
3830
3831PyDoc_STRVAR(istitle__doc__,
3832"S.istitle() -> bool\n\
3833\n\
3834Return True if S is a titlecased string and there is at least one\n\
3835character in S, i.e. uppercase characters may only follow uncased\n\
3836characters and lowercase characters only cased ones. Return False\n\
3837otherwise.");
3838
3839static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003840string_istitle(PyStringObject *self, PyObject *uncased)
Christian Heimes44720832008-05-26 13:01:01 +00003841{
3842 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003843 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003844 register const unsigned char *e;
3845 int cased, previous_is_cased;
3846
3847 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003848 if (PyString_GET_SIZE(self) == 1)
Christian Heimes44720832008-05-26 13:01:01 +00003849 return PyBool_FromLong(isupper(*p) != 0);
3850
3851 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003852 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003853 return PyBool_FromLong(0);
3854
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003855 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003856 cased = 0;
3857 previous_is_cased = 0;
3858 for (; p < e; p++) {
3859 register const unsigned char ch = *p;
3860
3861 if (isupper(ch)) {
3862 if (previous_is_cased)
3863 return PyBool_FromLong(0);
3864 previous_is_cased = 1;
3865 cased = 1;
3866 }
3867 else if (islower(ch)) {
3868 if (!previous_is_cased)
3869 return PyBool_FromLong(0);
3870 previous_is_cased = 1;
3871 cased = 1;
3872 }
3873 else
3874 previous_is_cased = 0;
3875 }
3876 return PyBool_FromLong(cased);
3877}
3878
3879
3880PyDoc_STRVAR(splitlines__doc__,
3881"S.splitlines([keepends]) -> list of strings\n\
3882\n\
3883Return a list of the lines in S, breaking at line boundaries.\n\
3884Line breaks are not included in the resulting list unless keepends\n\
3885is given and true.");
3886
3887static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003888string_splitlines(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003889{
3890 register Py_ssize_t i;
3891 register Py_ssize_t j;
3892 Py_ssize_t len;
3893 int keepends = 0;
3894 PyObject *list;
3895 PyObject *str;
3896 char *data;
3897
3898 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3899 return NULL;
3900
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003901 data = PyString_AS_STRING(self);
3902 len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003903
3904 /* This does not use the preallocated list because splitlines is
3905 usually run with hundreds of newlines. The overhead of
3906 switching between PyList_SET_ITEM and append causes about a
3907 2-3% slowdown for that common case. A smarter implementation
3908 could move the if check out, so the SET_ITEMs are done first
3909 and the appends only done when the prealloc buffer is full.
3910 That's too much work for little gain.*/
3911
3912 list = PyList_New(0);
3913 if (!list)
3914 goto onError;
3915
3916 for (i = j = 0; i < len; ) {
3917 Py_ssize_t eol;
3918
3919 /* Find a line and append it */
3920 while (i < len && data[i] != '\n' && data[i] != '\r')
3921 i++;
3922
3923 /* Skip the line break reading CRLF as one line break */
3924 eol = i;
3925 if (i < len) {
3926 if (data[i] == '\r' && i + 1 < len &&
3927 data[i+1] == '\n')
3928 i += 2;
3929 else
3930 i++;
3931 if (keepends)
3932 eol = i;
3933 }
3934 SPLIT_APPEND(data, j, eol);
3935 j = i;
3936 }
3937 if (j < len) {
3938 SPLIT_APPEND(data, j, len);
3939 }
3940
3941 return list;
3942
3943 onError:
3944 Py_XDECREF(list);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003945 return NULL;
3946}
3947
Robert Schuppenies51df0642008-06-01 16:16:17 +00003948PyDoc_STRVAR(sizeof__doc__,
Georg Brandl7a6de8b2008-06-01 16:42:16 +00003949"S.__sizeof__() -> size of S in memory, in bytes");
Robert Schuppenies51df0642008-06-01 16:16:17 +00003950
3951static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003952string_sizeof(PyStringObject *v)
Robert Schuppenies51df0642008-06-01 16:16:17 +00003953{
3954 Py_ssize_t res;
Benjamin Peterson4fe03352009-09-17 21:33:46 +00003955 res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
Robert Schuppenies51df0642008-06-01 16:16:17 +00003956 return PyInt_FromSsize_t(res);
3957}
3958
Christian Heimes44720832008-05-26 13:01:01 +00003959#undef SPLIT_APPEND
3960#undef SPLIT_ADD
3961#undef MAX_PREALLOC
3962#undef PREALLOC_SIZE
Christian Heimes1a6387e2008-03-26 12:49:49 +00003963
3964static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003965string_getnewargs(PyStringObject *v)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003966{
Christian Heimes44720832008-05-26 13:01:01 +00003967 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Christian Heimes1a6387e2008-03-26 12:49:49 +00003968}
3969
Christian Heimes1a6387e2008-03-26 12:49:49 +00003970
Christian Heimes44720832008-05-26 13:01:01 +00003971#include "stringlib/string_format.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +00003972
Christian Heimes44720832008-05-26 13:01:01 +00003973PyDoc_STRVAR(format__doc__,
3974"S.format(*args, **kwargs) -> unicode\n\
3975\n\
3976");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003977
Eric Smithdc13b792008-05-30 18:10:04 +00003978static PyObject *
3979string__format__(PyObject* self, PyObject* args)
3980{
3981 PyObject *format_spec;
3982 PyObject *result = NULL;
3983 PyObject *tmp = NULL;
3984
3985 /* If 2.x, convert format_spec to the same type as value */
3986 /* This is to allow things like u''.format('') */
3987 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
3988 goto done;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003989 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
Eric Smithdc13b792008-05-30 18:10:04 +00003990 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3991 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3992 goto done;
3993 }
3994 tmp = PyObject_Str(format_spec);
3995 if (tmp == NULL)
3996 goto done;
3997 format_spec = tmp;
3998
3999 result = _PyBytes_FormatAdvanced(self,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004000 PyString_AS_STRING(format_spec),
4001 PyString_GET_SIZE(format_spec));
Eric Smithdc13b792008-05-30 18:10:04 +00004002done:
4003 Py_XDECREF(tmp);
4004 return result;
4005}
4006
Christian Heimes44720832008-05-26 13:01:01 +00004007PyDoc_STRVAR(p_format__doc__,
4008"S.__format__(format_spec) -> unicode\n\
4009\n\
4010");
4011
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00004012
Christian Heimes1a6387e2008-03-26 12:49:49 +00004013static PyMethodDef
Christian Heimes44720832008-05-26 13:01:01 +00004014string_methods[] = {
4015 /* Counterparts of the obsolete stropmodule functions; except
4016 string.maketrans(). */
4017 {"join", (PyCFunction)string_join, METH_O, join__doc__},
4018 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
4019 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
4020 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
4021 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
4022 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
4023 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
4024 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
4025 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
4026 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
4027 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
4028 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
4029 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
4030 capitalize__doc__},
4031 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
4032 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
4033 endswith__doc__},
4034 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
4035 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
4036 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
4037 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
4038 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
4039 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
4040 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
4041 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
4042 {"rpartition", (PyCFunction)string_rpartition, METH_O,
4043 rpartition__doc__},
4044 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
4045 startswith__doc__},
4046 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
4047 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
4048 swapcase__doc__},
4049 {"translate", (PyCFunction)string_translate, METH_VARARGS,
4050 translate__doc__},
4051 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
4052 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
4053 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
4054 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
4055 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
4056 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
4057 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
4058 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
4059 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
Benjamin Peterson332d7212009-09-18 21:14:55 +00004060 {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
4061 {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
Christian Heimes44720832008-05-26 13:01:01 +00004062 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
4063 expandtabs__doc__},
4064 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
4065 splitlines__doc__},
Robert Schuppenies51df0642008-06-01 16:16:17 +00004066 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
4067 sizeof__doc__},
Christian Heimes44720832008-05-26 13:01:01 +00004068 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
4069 {NULL, NULL} /* sentinel */
Christian Heimes1a6387e2008-03-26 12:49:49 +00004070};
4071
4072static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00004073str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes1a6387e2008-03-26 12:49:49 +00004074
Christian Heimes44720832008-05-26 13:01:01 +00004075static PyObject *
4076string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4077{
4078 PyObject *x = NULL;
4079 static char *kwlist[] = {"object", 0};
4080
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004081 if (type != &PyString_Type)
Christian Heimes44720832008-05-26 13:01:01 +00004082 return str_subtype_new(type, args, kwds);
4083 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
4084 return NULL;
4085 if (x == NULL)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004086 return PyString_FromString("");
Christian Heimes44720832008-05-26 13:01:01 +00004087 return PyObject_Str(x);
4088}
4089
4090static PyObject *
4091str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4092{
4093 PyObject *tmp, *pnew;
4094 Py_ssize_t n;
4095
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004096 assert(PyType_IsSubtype(type, &PyString_Type));
4097 tmp = string_new(&PyString_Type, args, kwds);
Christian Heimes44720832008-05-26 13:01:01 +00004098 if (tmp == NULL)
4099 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004100 assert(PyString_CheckExact(tmp));
4101 n = PyString_GET_SIZE(tmp);
Christian Heimes44720832008-05-26 13:01:01 +00004102 pnew = type->tp_alloc(type, n);
4103 if (pnew != NULL) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004104 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
4105 ((PyStringObject *)pnew)->ob_shash =
4106 ((PyStringObject *)tmp)->ob_shash;
4107 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Christian Heimes44720832008-05-26 13:01:01 +00004108 }
4109 Py_DECREF(tmp);
4110 return pnew;
4111}
4112
4113static PyObject *
4114basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4115{
4116 PyErr_SetString(PyExc_TypeError,
4117 "The basestring type cannot be instantiated");
4118 return NULL;
4119}
4120
4121static PyObject *
4122string_mod(PyObject *v, PyObject *w)
4123{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004124 if (!PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00004125 Py_INCREF(Py_NotImplemented);
4126 return Py_NotImplemented;
4127 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004128 return PyString_Format(v, w);
Christian Heimes44720832008-05-26 13:01:01 +00004129}
4130
4131PyDoc_STRVAR(basestring_doc,
4132"Type basestring cannot be instantiated; it is the base for str and unicode.");
4133
4134static PyNumberMethods string_as_number = {
4135 0, /*nb_add*/
4136 0, /*nb_subtract*/
4137 0, /*nb_multiply*/
4138 0, /*nb_divide*/
4139 string_mod, /*nb_remainder*/
4140};
4141
4142
4143PyTypeObject PyBaseString_Type = {
4144 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4145 "basestring",
4146 0,
4147 0,
4148 0, /* tp_dealloc */
4149 0, /* tp_print */
4150 0, /* tp_getattr */
4151 0, /* tp_setattr */
4152 0, /* tp_compare */
4153 0, /* tp_repr */
4154 0, /* tp_as_number */
4155 0, /* tp_as_sequence */
4156 0, /* tp_as_mapping */
4157 0, /* tp_hash */
4158 0, /* tp_call */
4159 0, /* tp_str */
4160 0, /* tp_getattro */
4161 0, /* tp_setattro */
4162 0, /* tp_as_buffer */
4163 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4164 basestring_doc, /* tp_doc */
4165 0, /* tp_traverse */
4166 0, /* tp_clear */
4167 0, /* tp_richcompare */
4168 0, /* tp_weaklistoffset */
4169 0, /* tp_iter */
4170 0, /* tp_iternext */
4171 0, /* tp_methods */
4172 0, /* tp_members */
4173 0, /* tp_getset */
4174 &PyBaseObject_Type, /* tp_base */
4175 0, /* tp_dict */
4176 0, /* tp_descr_get */
4177 0, /* tp_descr_set */
4178 0, /* tp_dictoffset */
4179 0, /* tp_init */
4180 0, /* tp_alloc */
4181 basestring_new, /* tp_new */
4182 0, /* tp_free */
4183};
4184
4185PyDoc_STRVAR(string_doc,
4186"str(object) -> string\n\
4187\n\
4188Return a nice string representation of the object.\n\
4189If the argument is a string, the return value is the same object.");
4190
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004191PyTypeObject PyString_Type = {
Christian Heimes44720832008-05-26 13:01:01 +00004192 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4193 "str",
Mark Dickinson826f3fe2008-12-05 21:55:28 +00004194 PyStringObject_SIZE,
Christian Heimes44720832008-05-26 13:01:01 +00004195 sizeof(char),
4196 string_dealloc, /* tp_dealloc */
4197 (printfunc)string_print, /* tp_print */
4198 0, /* tp_getattr */
4199 0, /* tp_setattr */
4200 0, /* tp_compare */
4201 string_repr, /* tp_repr */
4202 &string_as_number, /* tp_as_number */
4203 &string_as_sequence, /* tp_as_sequence */
4204 &string_as_mapping, /* tp_as_mapping */
4205 (hashfunc)string_hash, /* tp_hash */
4206 0, /* tp_call */
4207 string_str, /* tp_str */
4208 PyObject_GenericGetAttr, /* tp_getattro */
4209 0, /* tp_setattro */
4210 &string_as_buffer, /* tp_as_buffer */
4211 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
4212 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
4213 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
4214 string_doc, /* tp_doc */
4215 0, /* tp_traverse */
4216 0, /* tp_clear */
4217 (richcmpfunc)string_richcompare, /* tp_richcompare */
4218 0, /* tp_weaklistoffset */
4219 0, /* tp_iter */
4220 0, /* tp_iternext */
4221 string_methods, /* tp_methods */
4222 0, /* tp_members */
4223 0, /* tp_getset */
4224 &PyBaseString_Type, /* tp_base */
4225 0, /* tp_dict */
4226 0, /* tp_descr_get */
4227 0, /* tp_descr_set */
4228 0, /* tp_dictoffset */
4229 0, /* tp_init */
4230 0, /* tp_alloc */
4231 string_new, /* tp_new */
4232 PyObject_Del, /* tp_free */
4233};
4234
4235void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004236PyString_Concat(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00004237{
4238 register PyObject *v;
4239 if (*pv == NULL)
4240 return;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004241 if (w == NULL || !PyString_Check(*pv)) {
Christian Heimes44720832008-05-26 13:01:01 +00004242 Py_DECREF(*pv);
4243 *pv = NULL;
4244 return;
4245 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004246 v = string_concat((PyStringObject *) *pv, w);
Christian Heimes44720832008-05-26 13:01:01 +00004247 Py_DECREF(*pv);
4248 *pv = v;
4249}
4250
4251void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004252PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00004253{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004254 PyString_Concat(pv, w);
Christian Heimes44720832008-05-26 13:01:01 +00004255 Py_XDECREF(w);
4256}
4257
4258
4259/* The following function breaks the notion that strings are immutable:
4260 it changes the size of a string. We get away with this only if there
4261 is only one module referencing the object. You can also think of it
4262 as creating a new string object and destroying the old one, only
4263 more efficiently. In any case, don't use this if the string may
4264 already be known to some other part of the code...
4265 Note that if there's not enough memory to resize the string, the original
4266 string object at *pv is deallocated, *pv is set to NULL, an "out of
4267 memory" exception is set, and -1 is returned. Else (on success) 0 is
4268 returned, and the value in *pv may or may not be the same as on input.
4269 As always, an extra byte is allocated for a trailing \0 byte (newsize
4270 does *not* include that), and a trailing \0 byte is stored.
4271*/
4272
4273int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004274_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Christian Heimes44720832008-05-26 13:01:01 +00004275{
4276 register PyObject *v;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004277 register PyStringObject *sv;
Christian Heimes44720832008-05-26 13:01:01 +00004278 v = *pv;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004279 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
4280 PyString_CHECK_INTERNED(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00004281 *pv = 0;
4282 Py_DECREF(v);
4283 PyErr_BadInternalCall();
4284 return -1;
4285 }
4286 /* XXX UNREF/NEWREF interface should be more symmetrical */
4287 _Py_DEC_REFTOTAL;
4288 _Py_ForgetReference(v);
4289 *pv = (PyObject *)
Mark Dickinson826f3fe2008-12-05 21:55:28 +00004290 PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
Christian Heimes44720832008-05-26 13:01:01 +00004291 if (*pv == NULL) {
4292 PyObject_Del(v);
4293 PyErr_NoMemory();
4294 return -1;
4295 }
4296 _Py_NewReference(*pv);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004297 sv = (PyStringObject *) *pv;
Christian Heimes44720832008-05-26 13:01:01 +00004298 Py_SIZE(sv) = newsize;
4299 sv->ob_sval[newsize] = '\0';
4300 sv->ob_shash = -1; /* invalidate cached hash value */
4301 return 0;
4302}
4303
4304/* Helpers for formatstring */
4305
4306Py_LOCAL_INLINE(PyObject *)
4307getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
4308{
4309 Py_ssize_t argidx = *p_argidx;
4310 if (argidx < arglen) {
4311 (*p_argidx)++;
4312 if (arglen < 0)
4313 return args;
4314 else
4315 return PyTuple_GetItem(args, argidx);
4316 }
4317 PyErr_SetString(PyExc_TypeError,
4318 "not enough arguments for format string");
4319 return NULL;
4320}
4321
4322/* Format codes
4323 * F_LJUST '-'
4324 * F_SIGN '+'
4325 * F_BLANK ' '
4326 * F_ALT '#'
4327 * F_ZERO '0'
4328 */
4329#define F_LJUST (1<<0)
4330#define F_SIGN (1<<1)
4331#define F_BLANK (1<<2)
4332#define F_ALT (1<<3)
4333#define F_ZERO (1<<4)
4334
4335Py_LOCAL_INLINE(int)
4336formatfloat(char *buf, size_t buflen, int flags,
4337 int prec, int type, PyObject *v)
4338{
Eric Smithc1bdf892009-10-26 17:46:17 +00004339 char *tmp;
Christian Heimes44720832008-05-26 13:01:01 +00004340 double x;
Eric Smithc1bdf892009-10-26 17:46:17 +00004341 Py_ssize_t len;
4342
Christian Heimes44720832008-05-26 13:01:01 +00004343 x = PyFloat_AsDouble(v);
4344 if (x == -1.0 && PyErr_Occurred()) {
4345 PyErr_Format(PyExc_TypeError, "float argument required, "
4346 "not %.200s", Py_TYPE(v)->tp_name);
4347 return -1;
4348 }
4349 if (prec < 0)
4350 prec = 6;
Mark Dickinson2fdd58a2009-08-28 20:46:24 +00004351#if SIZEOF_INT > 4
Mark Dickinson174e9092009-03-29 16:17:16 +00004352 /* make sure that the decimal representation of precision really does
4353 need at most 10 digits: platforms with sizeof(int) == 8 exist! */
Mark Dickinson2fdd58a2009-08-28 20:46:24 +00004354 if (prec > 0x7fffffff) {
Mark Dickinson174e9092009-03-29 16:17:16 +00004355 PyErr_SetString(PyExc_OverflowError,
4356 "outrageously large precision "
4357 "for formatted float");
4358 return -1;
4359 }
Mark Dickinson2fdd58a2009-08-28 20:46:24 +00004360#endif
Mark Dickinson174e9092009-03-29 16:17:16 +00004361
Mark Dickinson2e648ec2009-03-29 14:37:51 +00004362 if (type == 'f' && fabs(x) >= 1e50)
Eric Smithd6c393a2008-07-17 19:49:47 +00004363 type = 'g';
Christian Heimes44720832008-05-26 13:01:01 +00004364 /* Worst case length calc to ensure no buffer overrun:
4365
4366 'g' formats:
4367 fmt = %#.<prec>g
4368 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
4369 for any double rep.)
4370 len = 1 + prec + 1 + 2 + 5 = 9 + prec
4371
4372 'f' formats:
4373 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4374 len = 1 + 50 + 1 + prec = 52 + prec
4375
4376 If prec=0 the effective precision is 1 (the leading digit is
4377 always given), therefore increase the length by one.
4378
4379 */
4380 if (((type == 'g' || type == 'G') &&
4381 buflen <= (size_t)10 + (size_t)prec) ||
Eric Smithd6c393a2008-07-17 19:49:47 +00004382 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Christian Heimes44720832008-05-26 13:01:01 +00004383 PyErr_SetString(PyExc_OverflowError,
4384 "formatted float is too long (precision too large?)");
4385 return -1;
4386 }
Eric Smithc1bdf892009-10-26 17:46:17 +00004387 tmp = PyOS_double_to_string(x, type, prec,
4388 (flags&F_ALT)?Py_DTSF_ALT:0, NULL);
4389 if (!tmp)
4390 return -1;
4391 len = strlen(tmp);
4392 if (len >= buflen) {
4393 PyErr_SetString(PyExc_OverflowError,
4394 "formatted float is too long (precision too large?)");
4395 PyMem_Free(tmp);
4396 return -1;
4397 }
4398 strcpy(buf, tmp);
4399 PyMem_Free(tmp);
4400 return (int)len;
Christian Heimes44720832008-05-26 13:01:01 +00004401}
4402
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004403/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
Christian Heimes44720832008-05-26 13:01:01 +00004404 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4405 * Python's regular ints.
4406 * Return value: a new PyString*, or NULL if error.
4407 * . *pbuf is set to point into it,
4408 * *plen set to the # of chars following that.
4409 * Caller must decref it when done using pbuf.
4410 * The string starting at *pbuf is of the form
4411 * "-"? ("0x" | "0X")? digit+
4412 * "0x"/"0X" are present only for x and X conversions, with F_ALT
4413 * set in flags. The case of hex digits will be correct,
4414 * There will be at least prec digits, zero-filled on the left if
4415 * necessary to get that many.
4416 * val object to be converted
4417 * flags bitmask of format flags; only F_ALT is looked at
4418 * prec minimum number of digits; 0-fill on left if needed
4419 * type a character in [duoxX]; u acts the same as d
4420 *
4421 * CAUTION: o, x and X conversions on regular ints can never
4422 * produce a '-' sign, but can for Python's unbounded ints.
4423 */
4424PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004425_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
Christian Heimes44720832008-05-26 13:01:01 +00004426 char **pbuf, int *plen)
4427{
4428 PyObject *result = NULL;
4429 char *buf;
4430 Py_ssize_t i;
4431 int sign; /* 1 if '-', else 0 */
4432 int len; /* number of characters */
4433 Py_ssize_t llen;
4434 int numdigits; /* len == numnondigits + numdigits */
4435 int numnondigits = 0;
4436
4437 switch (type) {
4438 case 'd':
4439 case 'u':
4440 result = Py_TYPE(val)->tp_str(val);
4441 break;
4442 case 'o':
4443 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4444 break;
4445 case 'x':
4446 case 'X':
4447 numnondigits = 2;
4448 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4449 break;
4450 default:
4451 assert(!"'type' not in [duoxX]");
4452 }
4453 if (!result)
4454 return NULL;
4455
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004456 buf = PyString_AsString(result);
Christian Heimes44720832008-05-26 13:01:01 +00004457 if (!buf) {
4458 Py_DECREF(result);
4459 return NULL;
4460 }
4461
4462 /* To modify the string in-place, there can only be one reference. */
4463 if (Py_REFCNT(result) != 1) {
4464 PyErr_BadInternalCall();
4465 return NULL;
4466 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004467 llen = PyString_Size(result);
Christian Heimes44720832008-05-26 13:01:01 +00004468 if (llen > INT_MAX) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004469 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
Christian Heimes44720832008-05-26 13:01:01 +00004470 return NULL;
4471 }
4472 len = (int)llen;
4473 if (buf[len-1] == 'L') {
4474 --len;
4475 buf[len] = '\0';
4476 }
4477 sign = buf[0] == '-';
4478 numnondigits += sign;
4479 numdigits = len - numnondigits;
4480 assert(numdigits > 0);
4481
4482 /* Get rid of base marker unless F_ALT */
4483 if ((flags & F_ALT) == 0) {
4484 /* Need to skip 0x, 0X or 0. */
4485 int skipped = 0;
4486 switch (type) {
4487 case 'o':
4488 assert(buf[sign] == '0');
4489 /* If 0 is only digit, leave it alone. */
4490 if (numdigits > 1) {
4491 skipped = 1;
4492 --numdigits;
4493 }
4494 break;
4495 case 'x':
4496 case 'X':
4497 assert(buf[sign] == '0');
4498 assert(buf[sign + 1] == 'x');
4499 skipped = 2;
4500 numnondigits -= 2;
4501 break;
4502 }
4503 if (skipped) {
4504 buf += skipped;
4505 len -= skipped;
4506 if (sign)
4507 buf[0] = '-';
4508 }
4509 assert(len == numnondigits + numdigits);
4510 assert(numdigits > 0);
4511 }
4512
4513 /* Fill with leading zeroes to meet minimum width. */
4514 if (prec > numdigits) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004515 PyObject *r1 = PyString_FromStringAndSize(NULL,
Christian Heimes44720832008-05-26 13:01:01 +00004516 numnondigits + prec);
4517 char *b1;
4518 if (!r1) {
4519 Py_DECREF(result);
4520 return NULL;
4521 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004522 b1 = PyString_AS_STRING(r1);
Christian Heimes44720832008-05-26 13:01:01 +00004523 for (i = 0; i < numnondigits; ++i)
4524 *b1++ = *buf++;
4525 for (i = 0; i < prec - numdigits; i++)
4526 *b1++ = '0';
4527 for (i = 0; i < numdigits; i++)
4528 *b1++ = *buf++;
4529 *b1 = '\0';
4530 Py_DECREF(result);
4531 result = r1;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004532 buf = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00004533 len = numnondigits + prec;
4534 }
4535
4536 /* Fix up case for hex conversions. */
4537 if (type == 'X') {
4538 /* Need to convert all lower case letters to upper case.
4539 and need to convert 0x to 0X (and -0x to -0X). */
4540 for (i = 0; i < len; i++)
4541 if (buf[i] >= 'a' && buf[i] <= 'x')
4542 buf[i] -= 'a'-'A';
4543 }
4544 *pbuf = buf;
4545 *plen = len;
4546 return result;
4547}
4548
4549Py_LOCAL_INLINE(int)
4550formatint(char *buf, size_t buflen, int flags,
4551 int prec, int type, PyObject *v)
4552{
4553 /* fmt = '%#.' + `prec` + 'l' + `type`
4554 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4555 + 1 + 1 = 24 */
4556 char fmt[64]; /* plenty big enough! */
4557 char *sign;
4558 long x;
4559
4560 x = PyInt_AsLong(v);
4561 if (x == -1 && PyErr_Occurred()) {
4562 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4563 Py_TYPE(v)->tp_name);
4564 return -1;
4565 }
4566 if (x < 0 && type == 'u') {
4567 type = 'd';
4568 }
4569 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4570 sign = "-";
4571 else
4572 sign = "";
4573 if (prec < 0)
4574 prec = 1;
4575
4576 if ((flags & F_ALT) &&
4577 (type == 'x' || type == 'X')) {
4578 /* When converting under %#x or %#X, there are a number
4579 * of issues that cause pain:
4580 * - when 0 is being converted, the C standard leaves off
4581 * the '0x' or '0X', which is inconsistent with other
4582 * %#x/%#X conversions and inconsistent with Python's
4583 * hex() function
4584 * - there are platforms that violate the standard and
4585 * convert 0 with the '0x' or '0X'
4586 * (Metrowerks, Compaq Tru64)
4587 * - there are platforms that give '0x' when converting
4588 * under %#X, but convert 0 in accordance with the
4589 * standard (OS/2 EMX)
4590 *
4591 * We can achieve the desired consistency by inserting our
4592 * own '0x' or '0X' prefix, and substituting %x/%X in place
4593 * of %#x/%#X.
4594 *
4595 * Note that this is the same approach as used in
4596 * formatint() in unicodeobject.c
4597 */
4598 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4599 sign, type, prec, type);
4600 }
4601 else {
4602 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4603 sign, (flags&F_ALT) ? "#" : "",
4604 prec, type);
4605 }
4606
4607 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4608 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4609 */
4610 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4611 PyErr_SetString(PyExc_OverflowError,
4612 "formatted integer is too long (precision too large?)");
4613 return -1;
4614 }
4615 if (sign[0])
4616 PyOS_snprintf(buf, buflen, fmt, -x);
4617 else
4618 PyOS_snprintf(buf, buflen, fmt, x);
4619 return (int)strlen(buf);
4620}
4621
4622Py_LOCAL_INLINE(int)
4623formatchar(char *buf, size_t buflen, PyObject *v)
4624{
4625 /* presume that the buffer is at least 2 characters long */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004626 if (PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00004627 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4628 return -1;
4629 }
4630 else {
4631 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4632 return -1;
4633 }
4634 buf[1] = '\0';
4635 return 1;
4636}
4637
4638/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4639
4640 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4641 chars are formatted. XXX This is a magic number. Each formatting
4642 routine does bounds checking to ensure no overflow, but a better
4643 solution may be to malloc a buffer of appropriate size for each
4644 format. For now, the current solution is sufficient.
4645*/
4646#define FORMATBUFLEN (size_t)120
4647
4648PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004649PyString_Format(PyObject *format, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00004650{
4651 char *fmt, *res;
4652 Py_ssize_t arglen, argidx;
4653 Py_ssize_t reslen, rescnt, fmtcnt;
4654 int args_owned = 0;
4655 PyObject *result, *orig_args;
4656#ifdef Py_USING_UNICODE
4657 PyObject *v, *w;
4658#endif
4659 PyObject *dict = NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004660 if (format == NULL || !PyString_Check(format) || args == NULL) {
Christian Heimes44720832008-05-26 13:01:01 +00004661 PyErr_BadInternalCall();
4662 return NULL;
4663 }
4664 orig_args = args;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004665 fmt = PyString_AS_STRING(format);
4666 fmtcnt = PyString_GET_SIZE(format);
Christian Heimes44720832008-05-26 13:01:01 +00004667 reslen = rescnt = fmtcnt + 100;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004668 result = PyString_FromStringAndSize((char *)NULL, reslen);
Christian Heimes44720832008-05-26 13:01:01 +00004669 if (result == NULL)
4670 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004671 res = PyString_AsString(result);
Christian Heimes44720832008-05-26 13:01:01 +00004672 if (PyTuple_Check(args)) {
4673 arglen = PyTuple_GET_SIZE(args);
4674 argidx = 0;
4675 }
4676 else {
4677 arglen = -1;
4678 argidx = -2;
4679 }
4680 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4681 !PyObject_TypeCheck(args, &PyBaseString_Type))
4682 dict = args;
4683 while (--fmtcnt >= 0) {
4684 if (*fmt != '%') {
4685 if (--rescnt < 0) {
4686 rescnt = fmtcnt + 100;
4687 reslen += rescnt;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004688 if (_PyString_Resize(&result, reslen) < 0)
Christian Heimes44720832008-05-26 13:01:01 +00004689 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004690 res = PyString_AS_STRING(result)
Christian Heimes44720832008-05-26 13:01:01 +00004691 + reslen - rescnt;
4692 --rescnt;
4693 }
4694 *res++ = *fmt++;
4695 }
4696 else {
4697 /* Got a format specifier */
4698 int flags = 0;
4699 Py_ssize_t width = -1;
4700 int prec = -1;
4701 int c = '\0';
4702 int fill;
4703 int isnumok;
4704 PyObject *v = NULL;
4705 PyObject *temp = NULL;
4706 char *pbuf;
4707 int sign;
4708 Py_ssize_t len;
4709 char formatbuf[FORMATBUFLEN];
4710 /* For format{float,int,char}() */
4711#ifdef Py_USING_UNICODE
4712 char *fmt_start = fmt;
4713 Py_ssize_t argidx_start = argidx;
4714#endif
4715
4716 fmt++;
4717 if (*fmt == '(') {
4718 char *keystart;
4719 Py_ssize_t keylen;
4720 PyObject *key;
4721 int pcount = 1;
4722
4723 if (dict == NULL) {
4724 PyErr_SetString(PyExc_TypeError,
4725 "format requires a mapping");
4726 goto error;
4727 }
4728 ++fmt;
4729 --fmtcnt;
4730 keystart = fmt;
4731 /* Skip over balanced parentheses */
4732 while (pcount > 0 && --fmtcnt >= 0) {
4733 if (*fmt == ')')
4734 --pcount;
4735 else if (*fmt == '(')
4736 ++pcount;
4737 fmt++;
4738 }
4739 keylen = fmt - keystart - 1;
4740 if (fmtcnt < 0 || pcount > 0) {
4741 PyErr_SetString(PyExc_ValueError,
4742 "incomplete format key");
4743 goto error;
4744 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004745 key = PyString_FromStringAndSize(keystart,
Christian Heimes44720832008-05-26 13:01:01 +00004746 keylen);
4747 if (key == NULL)
4748 goto error;
4749 if (args_owned) {
4750 Py_DECREF(args);
4751 args_owned = 0;
4752 }
4753 args = PyObject_GetItem(dict, key);
4754 Py_DECREF(key);
4755 if (args == NULL) {
4756 goto error;
4757 }
4758 args_owned = 1;
4759 arglen = -1;
4760 argidx = -2;
4761 }
4762 while (--fmtcnt >= 0) {
4763 switch (c = *fmt++) {
4764 case '-': flags |= F_LJUST; continue;
4765 case '+': flags |= F_SIGN; continue;
4766 case ' ': flags |= F_BLANK; continue;
4767 case '#': flags |= F_ALT; continue;
4768 case '0': flags |= F_ZERO; continue;
4769 }
4770 break;
4771 }
4772 if (c == '*') {
4773 v = getnextarg(args, arglen, &argidx);
4774 if (v == NULL)
4775 goto error;
4776 if (!PyInt_Check(v)) {
4777 PyErr_SetString(PyExc_TypeError,
4778 "* wants int");
4779 goto error;
4780 }
4781 width = PyInt_AsLong(v);
4782 if (width < 0) {
4783 flags |= F_LJUST;
4784 width = -width;
4785 }
4786 if (--fmtcnt >= 0)
4787 c = *fmt++;
4788 }
4789 else if (c >= 0 && isdigit(c)) {
4790 width = c - '0';
4791 while (--fmtcnt >= 0) {
4792 c = Py_CHARMASK(*fmt++);
4793 if (!isdigit(c))
4794 break;
4795 if ((width*10) / 10 != width) {
4796 PyErr_SetString(
4797 PyExc_ValueError,
4798 "width too big");
4799 goto error;
4800 }
4801 width = width*10 + (c - '0');
4802 }
4803 }
4804 if (c == '.') {
4805 prec = 0;
4806 if (--fmtcnt >= 0)
4807 c = *fmt++;
4808 if (c == '*') {
4809 v = getnextarg(args, arglen, &argidx);
4810 if (v == NULL)
4811 goto error;
4812 if (!PyInt_Check(v)) {
4813 PyErr_SetString(
4814 PyExc_TypeError,
4815 "* wants int");
4816 goto error;
4817 }
4818 prec = PyInt_AsLong(v);
4819 if (prec < 0)
4820 prec = 0;
4821 if (--fmtcnt >= 0)
4822 c = *fmt++;
4823 }
4824 else if (c >= 0 && isdigit(c)) {
4825 prec = c - '0';
4826 while (--fmtcnt >= 0) {
4827 c = Py_CHARMASK(*fmt++);
4828 if (!isdigit(c))
4829 break;
4830 if ((prec*10) / 10 != prec) {
4831 PyErr_SetString(
4832 PyExc_ValueError,
4833 "prec too big");
4834 goto error;
4835 }
4836 prec = prec*10 + (c - '0');
4837 }
4838 }
4839 } /* prec */
4840 if (fmtcnt >= 0) {
4841 if (c == 'h' || c == 'l' || c == 'L') {
4842 if (--fmtcnt >= 0)
4843 c = *fmt++;
4844 }
4845 }
4846 if (fmtcnt < 0) {
4847 PyErr_SetString(PyExc_ValueError,
4848 "incomplete format");
4849 goto error;
4850 }
4851 if (c != '%') {
4852 v = getnextarg(args, arglen, &argidx);
4853 if (v == NULL)
4854 goto error;
4855 }
4856 sign = 0;
4857 fill = ' ';
4858 switch (c) {
4859 case '%':
4860 pbuf = "%";
4861 len = 1;
4862 break;
4863 case 's':
4864#ifdef Py_USING_UNICODE
4865 if (PyUnicode_Check(v)) {
4866 fmt = fmt_start;
4867 argidx = argidx_start;
4868 goto unicode;
4869 }
4870#endif
4871 temp = _PyObject_Str(v);
4872#ifdef Py_USING_UNICODE
4873 if (temp != NULL && PyUnicode_Check(temp)) {
4874 Py_DECREF(temp);
4875 fmt = fmt_start;
4876 argidx = argidx_start;
4877 goto unicode;
4878 }
4879#endif
4880 /* Fall through */
4881 case 'r':
4882 if (c == 'r')
4883 temp = PyObject_Repr(v);
4884 if (temp == NULL)
4885 goto error;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004886 if (!PyString_Check(temp)) {
Christian Heimes44720832008-05-26 13:01:01 +00004887 PyErr_SetString(PyExc_TypeError,
4888 "%s argument has non-string str()");
4889 Py_DECREF(temp);
4890 goto error;
4891 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004892 pbuf = PyString_AS_STRING(temp);
4893 len = PyString_GET_SIZE(temp);
Christian Heimes44720832008-05-26 13:01:01 +00004894 if (prec >= 0 && len > prec)
4895 len = prec;
4896 break;
4897 case 'i':
4898 case 'd':
4899 case 'u':
4900 case 'o':
4901 case 'x':
4902 case 'X':
4903 if (c == 'i')
4904 c = 'd';
4905 isnumok = 0;
4906 if (PyNumber_Check(v)) {
4907 PyObject *iobj=NULL;
4908
4909 if (PyInt_Check(v) || (PyLong_Check(v))) {
4910 iobj = v;
4911 Py_INCREF(iobj);
4912 }
4913 else {
4914 iobj = PyNumber_Int(v);
4915 if (iobj==NULL) iobj = PyNumber_Long(v);
4916 }
4917 if (iobj!=NULL) {
4918 if (PyInt_Check(iobj)) {
4919 isnumok = 1;
4920 pbuf = formatbuf;
4921 len = formatint(pbuf,
4922 sizeof(formatbuf),
4923 flags, prec, c, iobj);
4924 Py_DECREF(iobj);
4925 if (len < 0)
4926 goto error;
4927 sign = 1;
4928 }
4929 else if (PyLong_Check(iobj)) {
4930 int ilen;
4931
4932 isnumok = 1;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004933 temp = _PyString_FormatLong(iobj, flags,
Christian Heimes44720832008-05-26 13:01:01 +00004934 prec, c, &pbuf, &ilen);
4935 Py_DECREF(iobj);
4936 len = ilen;
4937 if (!temp)
4938 goto error;
4939 sign = 1;
4940 }
4941 else {
4942 Py_DECREF(iobj);
4943 }
4944 }
4945 }
4946 if (!isnumok) {
4947 PyErr_Format(PyExc_TypeError,
4948 "%%%c format: a number is required, "
4949 "not %.200s", c, Py_TYPE(v)->tp_name);
4950 goto error;
4951 }
4952 if (flags & F_ZERO)
4953 fill = '0';
4954 break;
4955 case 'e':
4956 case 'E':
4957 case 'f':
4958 case 'F':
4959 case 'g':
4960 case 'G':
Eric Smithd6c393a2008-07-17 19:49:47 +00004961 if (c == 'F')
4962 c = 'f';
Christian Heimes44720832008-05-26 13:01:01 +00004963 pbuf = formatbuf;
4964 len = formatfloat(pbuf, sizeof(formatbuf),
4965 flags, prec, c, v);
4966 if (len < 0)
4967 goto error;
4968 sign = 1;
4969 if (flags & F_ZERO)
4970 fill = '0';
4971 break;
4972 case 'c':
4973#ifdef Py_USING_UNICODE
4974 if (PyUnicode_Check(v)) {
4975 fmt = fmt_start;
4976 argidx = argidx_start;
4977 goto unicode;
4978 }
4979#endif
4980 pbuf = formatbuf;
4981 len = formatchar(pbuf, sizeof(formatbuf), v);
4982 if (len < 0)
4983 goto error;
4984 break;
4985 default:
4986 PyErr_Format(PyExc_ValueError,
4987 "unsupported format character '%c' (0x%x) "
4988 "at index %zd",
4989 c, c,
4990 (Py_ssize_t)(fmt - 1 -
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004991 PyString_AsString(format)));
Christian Heimes44720832008-05-26 13:01:01 +00004992 goto error;
4993 }
4994 if (sign) {
4995 if (*pbuf == '-' || *pbuf == '+') {
4996 sign = *pbuf++;
4997 len--;
4998 }
4999 else if (flags & F_SIGN)
5000 sign = '+';
5001 else if (flags & F_BLANK)
5002 sign = ' ';
5003 else
5004 sign = 0;
5005 }
5006 if (width < len)
5007 width = len;
5008 if (rescnt - (sign != 0) < width) {
5009 reslen -= rescnt;
5010 rescnt = width + fmtcnt + 100;
5011 reslen += rescnt;
5012 if (reslen < 0) {
5013 Py_DECREF(result);
5014 Py_XDECREF(temp);
5015 return PyErr_NoMemory();
5016 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005017 if (_PyString_Resize(&result, reslen) < 0) {
Christian Heimes44720832008-05-26 13:01:01 +00005018 Py_XDECREF(temp);
5019 return NULL;
5020 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005021 res = PyString_AS_STRING(result)
Christian Heimes44720832008-05-26 13:01:01 +00005022 + reslen - rescnt;
5023 }
5024 if (sign) {
5025 if (fill != ' ')
5026 *res++ = sign;
5027 rescnt--;
5028 if (width > len)
5029 width--;
5030 }
5031 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
5032 assert(pbuf[0] == '0');
5033 assert(pbuf[1] == c);
5034 if (fill != ' ') {
5035 *res++ = *pbuf++;
5036 *res++ = *pbuf++;
5037 }
5038 rescnt -= 2;
5039 width -= 2;
5040 if (width < 0)
5041 width = 0;
5042 len -= 2;
5043 }
5044 if (width > len && !(flags & F_LJUST)) {
5045 do {
5046 --rescnt;
5047 *res++ = fill;
5048 } while (--width > len);
5049 }
5050 if (fill == ' ') {
5051 if (sign)
5052 *res++ = sign;
5053 if ((flags & F_ALT) &&
5054 (c == 'x' || c == 'X')) {
5055 assert(pbuf[0] == '0');
5056 assert(pbuf[1] == c);
5057 *res++ = *pbuf++;
5058 *res++ = *pbuf++;
5059 }
5060 }
5061 Py_MEMCPY(res, pbuf, len);
5062 res += len;
5063 rescnt -= len;
5064 while (--width >= len) {
5065 --rescnt;
5066 *res++ = ' ';
5067 }
5068 if (dict && (argidx < arglen) && c != '%') {
5069 PyErr_SetString(PyExc_TypeError,
5070 "not all arguments converted during string formatting");
5071 Py_XDECREF(temp);
5072 goto error;
5073 }
5074 Py_XDECREF(temp);
5075 } /* '%' */
5076 } /* until end */
5077 if (argidx < arglen && !dict) {
5078 PyErr_SetString(PyExc_TypeError,
5079 "not all arguments converted during string formatting");
5080 goto error;
5081 }
5082 if (args_owned) {
5083 Py_DECREF(args);
5084 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005085 _PyString_Resize(&result, reslen - rescnt);
Christian Heimes44720832008-05-26 13:01:01 +00005086 return result;
5087
5088#ifdef Py_USING_UNICODE
5089 unicode:
5090 if (args_owned) {
5091 Py_DECREF(args);
5092 args_owned = 0;
5093 }
5094 /* Fiddle args right (remove the first argidx arguments) */
5095 if (PyTuple_Check(orig_args) && argidx > 0) {
5096 PyObject *v;
5097 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
5098 v = PyTuple_New(n);
5099 if (v == NULL)
5100 goto error;
5101 while (--n >= 0) {
5102 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
5103 Py_INCREF(w);
5104 PyTuple_SET_ITEM(v, n, w);
5105 }
5106 args = v;
5107 } else {
5108 Py_INCREF(orig_args);
5109 args = orig_args;
5110 }
5111 args_owned = 1;
5112 /* Take what we have of the result and let the Unicode formatting
5113 function format the rest of the input. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005114 rescnt = res - PyString_AS_STRING(result);
5115 if (_PyString_Resize(&result, rescnt))
Christian Heimes44720832008-05-26 13:01:01 +00005116 goto error;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005117 fmtcnt = PyString_GET_SIZE(format) - \
5118 (fmt - PyString_AS_STRING(format));
Christian Heimes44720832008-05-26 13:01:01 +00005119 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
5120 if (format == NULL)
5121 goto error;
5122 v = PyUnicode_Format(format, args);
5123 Py_DECREF(format);
5124 if (v == NULL)
5125 goto error;
5126 /* Paste what we have (result) to what the Unicode formatting
5127 function returned (v) and return the result (or error) */
5128 w = PyUnicode_Concat(result, v);
5129 Py_DECREF(result);
5130 Py_DECREF(v);
5131 Py_DECREF(args);
5132 return w;
5133#endif /* Py_USING_UNICODE */
5134
5135 error:
5136 Py_DECREF(result);
5137 if (args_owned) {
5138 Py_DECREF(args);
5139 }
5140 return NULL;
5141}
5142
5143void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005144PyString_InternInPlace(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00005145{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005146 register PyStringObject *s = (PyStringObject *)(*p);
Christian Heimes44720832008-05-26 13:01:01 +00005147 PyObject *t;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005148 if (s == NULL || !PyString_Check(s))
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005149 Py_FatalError("PyString_InternInPlace: strings only please!");
Christian Heimes44720832008-05-26 13:01:01 +00005150 /* If it's a string subclass, we don't really know what putting
5151 it in the interned dict might do. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005152 if (!PyString_CheckExact(s))
Christian Heimes44720832008-05-26 13:01:01 +00005153 return;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005154 if (PyString_CHECK_INTERNED(s))
Christian Heimes44720832008-05-26 13:01:01 +00005155 return;
5156 if (interned == NULL) {
5157 interned = PyDict_New();
5158 if (interned == NULL) {
5159 PyErr_Clear(); /* Don't leave an exception */
5160 return;
5161 }
5162 }
5163 t = PyDict_GetItem(interned, (PyObject *)s);
5164 if (t) {
5165 Py_INCREF(t);
5166 Py_DECREF(*p);
5167 *p = t;
5168 return;
5169 }
5170
5171 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
5172 PyErr_Clear();
5173 return;
5174 }
5175 /* The two references in interned are not counted by refcnt.
5176 The string deallocator will take care of this */
5177 Py_REFCNT(s) -= 2;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005178 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Christian Heimes44720832008-05-26 13:01:01 +00005179}
5180
5181void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005182PyString_InternImmortal(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00005183{
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005184 PyString_InternInPlace(p);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005185 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
5186 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
Christian Heimes44720832008-05-26 13:01:01 +00005187 Py_INCREF(*p);
5188 }
5189}
5190
5191
5192PyObject *
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005193PyString_InternFromString(const char *cp)
Christian Heimes44720832008-05-26 13:01:01 +00005194{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005195 PyObject *s = PyString_FromString(cp);
Christian Heimes44720832008-05-26 13:01:01 +00005196 if (s == NULL)
5197 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005198 PyString_InternInPlace(&s);
Christian Heimes44720832008-05-26 13:01:01 +00005199 return s;
5200}
5201
5202void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005203PyString_Fini(void)
Christian Heimes44720832008-05-26 13:01:01 +00005204{
5205 int i;
5206 for (i = 0; i < UCHAR_MAX + 1; i++) {
5207 Py_XDECREF(characters[i]);
5208 characters[i] = NULL;
5209 }
5210 Py_XDECREF(nullstring);
5211 nullstring = NULL;
5212}
5213
5214void _Py_ReleaseInternedStrings(void)
5215{
5216 PyObject *keys;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005217 PyStringObject *s;
Christian Heimes44720832008-05-26 13:01:01 +00005218 Py_ssize_t i, n;
5219 Py_ssize_t immortal_size = 0, mortal_size = 0;
5220
5221 if (interned == NULL || !PyDict_Check(interned))
5222 return;
5223 keys = PyDict_Keys(interned);
5224 if (keys == NULL || !PyList_Check(keys)) {
5225 PyErr_Clear();
5226 return;
5227 }
5228
5229 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5230 detector, interned strings are not forcibly deallocated; rather, we
5231 give them their stolen references back, and then clear and DECREF
5232 the interned dict. */
5233
5234 n = PyList_GET_SIZE(keys);
5235 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5236 n);
5237 for (i = 0; i < n; i++) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005238 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
Christian Heimes44720832008-05-26 13:01:01 +00005239 switch (s->ob_sstate) {
5240 case SSTATE_NOT_INTERNED:
5241 /* XXX Shouldn't happen */
5242 break;
5243 case SSTATE_INTERNED_IMMORTAL:
5244 Py_REFCNT(s) += 1;
5245 immortal_size += Py_SIZE(s);
5246 break;
5247 case SSTATE_INTERNED_MORTAL:
5248 Py_REFCNT(s) += 2;
5249 mortal_size += Py_SIZE(s);
5250 break;
5251 default:
5252 Py_FatalError("Inconsistent interned string state.");
5253 }
5254 s->ob_sstate = SSTATE_NOT_INTERNED;
5255 }
5256 fprintf(stderr, "total size of all interned strings: "
5257 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5258 "mortal/immortal\n", mortal_size, immortal_size);
5259 Py_DECREF(keys);
5260 PyDict_Clear(interned);
5261 Py_DECREF(interned);
5262 interned = NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00005263}