blob: 70d90d4ee75896c4c88645bbd48b15c767825017 [file] [log] [blame]
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001/* String (str/bytes) object implementation */
Christian Heimes1a6387e2008-03-26 12:49:49 +00002
3#define PY_SSIZE_T_CLEAN
Christian Heimes44720832008-05-26 13:01:01 +00004
Christian Heimes1a6387e2008-03-26 12:49:49 +00005#include "Python.h"
Christian Heimes44720832008-05-26 13:01:01 +00006#include <ctype.h>
Mark Dickinson826f3fe2008-12-05 21:55:28 +00007#include <stddef.h>
Christian Heimes44720832008-05-26 13:01:01 +00008
9#ifdef COUNT_ALLOCS
Martin v. Löwisb90304a2009-01-07 18:40:40 +000010Py_ssize_t null_strings, one_strings;
Christian Heimes44720832008-05-26 13:01:01 +000011#endif
12
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000013static PyStringObject *characters[UCHAR_MAX + 1];
14static PyStringObject *nullstring;
Christian Heimes44720832008-05-26 13:01:01 +000015
16/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Mark Dickinson826f3fe2008-12-05 21:55:28 +000026/* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyStringObject_SIZE + n bytes.
28
29 Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30 3 bytes per string allocation on a typical system.
31*/
32#define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
33
Christian Heimes44720832008-05-26 13:01:01 +000034/*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000035 For both PyString_FromString() and PyString_FromStringAndSize(), the
Christian Heimes44720832008-05-26 13:01:01 +000036 parameter `size' denotes number of characters to allocate, not counting any
37 null terminating character.
38
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000039 For PyString_FromString(), the parameter `str' points to a null-terminated
Christian Heimes44720832008-05-26 13:01:01 +000040 string containing exactly `size' bytes.
41
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000042 For PyString_FromStringAndSize(), the parameter the parameter `str' is
Christian Heimes44720832008-05-26 13:01:01 +000043 either NULL or else points to a string containing at least `size' bytes.
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000044 For PyString_FromStringAndSize(), the string in the `str' parameter does
Christian Heimes44720832008-05-26 13:01:01 +000045 not have to be null-terminated. (Therefore it is safe to construct a
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000046 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
47 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
Christian Heimes44720832008-05-26 13:01:01 +000048 bytes (setting the last byte to the null terminating character) and you can
49 fill in the data yourself. If `str' is non-NULL then the resulting
50 PyString object must be treated as immutable and you must not fill in nor
51 alter the data yourself, since the strings may be shared.
52
53 The PyObject member `op->ob_size', which denotes the number of "extra
54 items" in a variable-size object, will contain the number of bytes
55 allocated for string data, not counting the null terminating character. It
56 is therefore equal to the equal to the `size' parameter (for
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000057 PyString_FromStringAndSize()) or the length of the string in the `str'
58 parameter (for PyString_FromString()).
Christian Heimes44720832008-05-26 13:01:01 +000059*/
60PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000061PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Christian Heimes1a6387e2008-03-26 12:49:49 +000062{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000063 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +000064 if (size < 0) {
65 PyErr_SetString(PyExc_SystemError,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000066 "Negative size passed to PyString_FromStringAndSize");
Christian Heimes44720832008-05-26 13:01:01 +000067 return NULL;
68 }
69 if (size == 0 && (op = nullstring) != NULL) {
70#ifdef COUNT_ALLOCS
71 null_strings++;
72#endif
73 Py_INCREF(op);
74 return (PyObject *)op;
75 }
76 if (size == 1 && str != NULL &&
77 (op = characters[*str & UCHAR_MAX]) != NULL)
78 {
79#ifdef COUNT_ALLOCS
80 one_strings++;
81#endif
82 Py_INCREF(op);
83 return (PyObject *)op;
84 }
85
Mark Dickinson826f3fe2008-12-05 21:55:28 +000086 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
Neal Norwitze7d8be82008-07-31 17:17:14 +000087 PyErr_SetString(PyExc_OverflowError, "string is too large");
88 return NULL;
89 }
90
Christian Heimes44720832008-05-26 13:01:01 +000091 /* Inline PyObject_NewVar */
Mark Dickinson826f3fe2008-12-05 21:55:28 +000092 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
Christian Heimes44720832008-05-26 13:01:01 +000093 if (op == NULL)
94 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000095 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +000096 op->ob_shash = -1;
97 op->ob_sstate = SSTATE_NOT_INTERNED;
98 if (str != NULL)
99 Py_MEMCPY(op->ob_sval, str, size);
100 op->ob_sval[size] = '\0';
101 /* share short strings */
102 if (size == 0) {
103 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000104 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000105 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +0000106 nullstring = op;
107 Py_INCREF(op);
108 } else if (size == 1 && str != NULL) {
109 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000110 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000111 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +0000112 characters[*str & UCHAR_MAX] = op;
113 Py_INCREF(op);
114 }
115 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000116}
117
Christian Heimes44720832008-05-26 13:01:01 +0000118PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000119PyString_FromString(const char *str)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000120{
Christian Heimes44720832008-05-26 13:01:01 +0000121 register size_t size;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000122 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +0000123
124 assert(str != NULL);
125 size = strlen(str);
Mark Dickinson826f3fe2008-12-05 21:55:28 +0000126 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
Christian Heimes44720832008-05-26 13:01:01 +0000127 PyErr_SetString(PyExc_OverflowError,
128 "string is too long for a Python string");
129 return NULL;
130 }
131 if (size == 0 && (op = nullstring) != NULL) {
132#ifdef COUNT_ALLOCS
133 null_strings++;
134#endif
135 Py_INCREF(op);
136 return (PyObject *)op;
137 }
138 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
139#ifdef COUNT_ALLOCS
140 one_strings++;
141#endif
142 Py_INCREF(op);
143 return (PyObject *)op;
144 }
145
146 /* Inline PyObject_NewVar */
Mark Dickinson826f3fe2008-12-05 21:55:28 +0000147 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
Christian Heimes44720832008-05-26 13:01:01 +0000148 if (op == NULL)
149 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000150 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +0000151 op->ob_shash = -1;
152 op->ob_sstate = SSTATE_NOT_INTERNED;
153 Py_MEMCPY(op->ob_sval, str, size+1);
154 /* share short strings */
155 if (size == 0) {
156 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000157 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000158 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +0000159 nullstring = op;
160 Py_INCREF(op);
161 } else if (size == 1) {
162 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000163 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000164 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +0000165 characters[*str & UCHAR_MAX] = op;
166 Py_INCREF(op);
167 }
168 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000169}
170
Christian Heimes44720832008-05-26 13:01:01 +0000171PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000172PyString_FromFormatV(const char *format, va_list vargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000173{
Christian Heimes44720832008-05-26 13:01:01 +0000174 va_list count;
175 Py_ssize_t n = 0;
176 const char* f;
177 char *s;
178 PyObject* string;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000179
Christian Heimes44720832008-05-26 13:01:01 +0000180#ifdef VA_LIST_IS_ARRAY
181 Py_MEMCPY(count, vargs, sizeof(va_list));
182#else
183#ifdef __va_copy
184 __va_copy(count, vargs);
185#else
186 count = vargs;
187#endif
188#endif
189 /* step 1: figure out how large a buffer we need */
190 for (f = format; *f; f++) {
191 if (*f == '%') {
192 const char* p = f;
193 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
194 ;
195
196 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
197 * they don't affect the amount of space we reserve.
198 */
199 if ((*f == 'l' || *f == 'z') &&
200 (f[1] == 'd' || f[1] == 'u'))
201 ++f;
202
203 switch (*f) {
204 case 'c':
205 (void)va_arg(count, int);
206 /* fall through... */
207 case '%':
208 n++;
209 break;
210 case 'd': case 'u': case 'i': case 'x':
211 (void) va_arg(count, int);
212 /* 20 bytes is enough to hold a 64-bit
213 integer. Decimal takes the most space.
214 This isn't enough for octal. */
215 n += 20;
216 break;
217 case 's':
218 s = va_arg(count, char*);
219 n += strlen(s);
220 break;
221 case 'p':
222 (void) va_arg(count, int);
223 /* maximum 64-bit pointer representation:
224 * 0xffffffffffffffff
225 * so 19 characters is enough.
226 * XXX I count 18 -- what's the extra for?
227 */
228 n += 19;
229 break;
230 default:
231 /* if we stumble upon an unknown
232 formatting code, copy the rest of
233 the format string to the output
234 string. (we cannot just skip the
235 code, since there's no way to know
236 what's in the argument list) */
237 n += strlen(p);
238 goto expand;
239 }
240 } else
241 n++;
242 }
243 expand:
244 /* step 2: fill the buffer */
245 /* Since we've analyzed how much space we need for the worst case,
246 use sprintf directly instead of the slower PyOS_snprintf. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000247 string = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +0000248 if (!string)
249 return NULL;
250
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000251 s = PyString_AsString(string);
Christian Heimes44720832008-05-26 13:01:01 +0000252
253 for (f = format; *f; f++) {
254 if (*f == '%') {
255 const char* p = f++;
256 Py_ssize_t i;
257 int longflag = 0;
258 int size_tflag = 0;
259 /* parse the width.precision part (we're only
260 interested in the precision value, if any) */
261 n = 0;
262 while (isdigit(Py_CHARMASK(*f)))
263 n = (n*10) + *f++ - '0';
264 if (*f == '.') {
265 f++;
266 n = 0;
267 while (isdigit(Py_CHARMASK(*f)))
268 n = (n*10) + *f++ - '0';
269 }
270 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
271 f++;
272 /* handle the long flag, but only for %ld and %lu.
273 others can be added when necessary. */
274 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
275 longflag = 1;
276 ++f;
277 }
278 /* handle the size_t flag. */
279 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
280 size_tflag = 1;
281 ++f;
282 }
283
284 switch (*f) {
285 case 'c':
286 *s++ = va_arg(vargs, int);
287 break;
288 case 'd':
289 if (longflag)
290 sprintf(s, "%ld", va_arg(vargs, long));
291 else if (size_tflag)
292 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
293 va_arg(vargs, Py_ssize_t));
294 else
295 sprintf(s, "%d", va_arg(vargs, int));
296 s += strlen(s);
297 break;
298 case 'u':
299 if (longflag)
300 sprintf(s, "%lu",
301 va_arg(vargs, unsigned long));
302 else if (size_tflag)
303 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
304 va_arg(vargs, size_t));
305 else
306 sprintf(s, "%u",
307 va_arg(vargs, unsigned int));
308 s += strlen(s);
309 break;
310 case 'i':
311 sprintf(s, "%i", va_arg(vargs, int));
312 s += strlen(s);
313 break;
314 case 'x':
315 sprintf(s, "%x", va_arg(vargs, int));
316 s += strlen(s);
317 break;
318 case 's':
319 p = va_arg(vargs, char*);
320 i = strlen(p);
321 if (n > 0 && i > n)
322 i = n;
323 Py_MEMCPY(s, p, i);
324 s += i;
325 break;
326 case 'p':
327 sprintf(s, "%p", va_arg(vargs, void*));
328 /* %p is ill-defined: ensure leading 0x. */
329 if (s[1] == 'X')
330 s[1] = 'x';
331 else if (s[1] != 'x') {
332 memmove(s+2, s, strlen(s)+1);
333 s[0] = '0';
334 s[1] = 'x';
335 }
336 s += strlen(s);
337 break;
338 case '%':
339 *s++ = '%';
340 break;
341 default:
342 strcpy(s, p);
343 s += strlen(s);
344 goto end;
345 }
346 } else
347 *s++ = *f;
348 }
349
350 end:
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000351 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Christian Heimes44720832008-05-26 13:01:01 +0000352 return string;
353}
354
355PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000356PyString_FromFormat(const char *format, ...)
Christian Heimes44720832008-05-26 13:01:01 +0000357{
358 PyObject* ret;
359 va_list vargs;
360
361#ifdef HAVE_STDARG_PROTOTYPES
362 va_start(vargs, format);
363#else
364 va_start(vargs);
365#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000366 ret = PyString_FromFormatV(format, vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000367 va_end(vargs);
368 return ret;
369}
370
371
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000372PyObject *PyString_Decode(const char *s,
Christian Heimes44720832008-05-26 13:01:01 +0000373 Py_ssize_t size,
374 const char *encoding,
375 const char *errors)
376{
377 PyObject *v, *str;
378
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000379 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000380 if (str == NULL)
381 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000382 v = PyString_AsDecodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000383 Py_DECREF(str);
384 return v;
385}
386
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000387PyObject *PyString_AsDecodedObject(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000388 const char *encoding,
389 const char *errors)
390{
391 PyObject *v;
392
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000393 if (!PyString_Check(str)) {
Christian Heimes44720832008-05-26 13:01:01 +0000394 PyErr_BadArgument();
395 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000396 }
397
Christian Heimes44720832008-05-26 13:01:01 +0000398 if (encoding == NULL) {
399#ifdef Py_USING_UNICODE
400 encoding = PyUnicode_GetDefaultEncoding();
401#else
402 PyErr_SetString(PyExc_ValueError, "no encoding specified");
403 goto onError;
404#endif
Christian Heimes1a6387e2008-03-26 12:49:49 +0000405 }
Christian Heimes44720832008-05-26 13:01:01 +0000406
407 /* Decode via the codec registry */
408 v = PyCodec_Decode(str, encoding, errors);
409 if (v == NULL)
410 goto onError;
411
412 return v;
413
414 onError:
415 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000416}
417
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000418PyObject *PyString_AsDecodedString(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000419 const char *encoding,
420 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000421{
Christian Heimes44720832008-05-26 13:01:01 +0000422 PyObject *v;
423
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000424 v = PyString_AsDecodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000425 if (v == NULL)
426 goto onError;
427
428#ifdef Py_USING_UNICODE
429 /* Convert Unicode to a string using the default encoding */
430 if (PyUnicode_Check(v)) {
431 PyObject *temp = v;
432 v = PyUnicode_AsEncodedString(v, NULL, NULL);
433 Py_DECREF(temp);
434 if (v == NULL)
435 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000436 }
Christian Heimes44720832008-05-26 13:01:01 +0000437#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000438 if (!PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +0000439 PyErr_Format(PyExc_TypeError,
440 "decoder did not return a string object (type=%.400s)",
441 Py_TYPE(v)->tp_name);
442 Py_DECREF(v);
443 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000444 }
Christian Heimes44720832008-05-26 13:01:01 +0000445
446 return v;
447
448 onError:
449 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000450}
451
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000452PyObject *PyString_Encode(const char *s,
Christian Heimes44720832008-05-26 13:01:01 +0000453 Py_ssize_t size,
454 const char *encoding,
455 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000456{
Christian Heimes44720832008-05-26 13:01:01 +0000457 PyObject *v, *str;
458
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000459 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000460 if (str == NULL)
461 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000462 v = PyString_AsEncodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000463 Py_DECREF(str);
464 return v;
465}
466
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000467PyObject *PyString_AsEncodedObject(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000468 const char *encoding,
469 const char *errors)
470{
471 PyObject *v;
472
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000473 if (!PyString_Check(str)) {
Christian Heimes44720832008-05-26 13:01:01 +0000474 PyErr_BadArgument();
475 goto onError;
476 }
477
478 if (encoding == NULL) {
479#ifdef Py_USING_UNICODE
480 encoding = PyUnicode_GetDefaultEncoding();
481#else
482 PyErr_SetString(PyExc_ValueError, "no encoding specified");
483 goto onError;
484#endif
485 }
486
487 /* Encode via the codec registry */
488 v = PyCodec_Encode(str, encoding, errors);
489 if (v == NULL)
490 goto onError;
491
492 return v;
493
494 onError:
495 return NULL;
496}
497
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000498PyObject *PyString_AsEncodedString(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000499 const char *encoding,
500 const char *errors)
501{
502 PyObject *v;
503
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000504 v = PyString_AsEncodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000505 if (v == NULL)
506 goto onError;
507
508#ifdef Py_USING_UNICODE
509 /* Convert Unicode to a string using the default encoding */
510 if (PyUnicode_Check(v)) {
511 PyObject *temp = v;
512 v = PyUnicode_AsEncodedString(v, NULL, NULL);
513 Py_DECREF(temp);
514 if (v == NULL)
515 goto onError;
516 }
517#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000518 if (!PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +0000519 PyErr_Format(PyExc_TypeError,
520 "encoder did not return a string object (type=%.400s)",
521 Py_TYPE(v)->tp_name);
522 Py_DECREF(v);
523 goto onError;
524 }
525
526 return v;
527
528 onError:
529 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000530}
531
532static void
Christian Heimes44720832008-05-26 13:01:01 +0000533string_dealloc(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000534{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000535 switch (PyString_CHECK_INTERNED(op)) {
Christian Heimes44720832008-05-26 13:01:01 +0000536 case SSTATE_NOT_INTERNED:
537 break;
538
539 case SSTATE_INTERNED_MORTAL:
540 /* revive dead object temporarily for DelItem */
541 Py_REFCNT(op) = 3;
542 if (PyDict_DelItem(interned, op) != 0)
543 Py_FatalError(
544 "deletion of interned string failed");
545 break;
546
547 case SSTATE_INTERNED_IMMORTAL:
548 Py_FatalError("Immortal interned string died.");
549
550 default:
551 Py_FatalError("Inconsistent interned string state.");
552 }
553 Py_TYPE(op)->tp_free(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000554}
555
Christian Heimes44720832008-05-26 13:01:01 +0000556/* Unescape a backslash-escaped string. If unicode is non-zero,
557 the string is a u-literal. If recode_encoding is non-zero,
558 the string is UTF-8 encoded and should be re-encoded in the
559 specified encoding. */
560
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000561PyObject *PyString_DecodeEscape(const char *s,
Christian Heimes44720832008-05-26 13:01:01 +0000562 Py_ssize_t len,
563 const char *errors,
564 Py_ssize_t unicode,
565 const char *recode_encoding)
566{
567 int c;
568 char *p, *buf;
569 const char *end;
570 PyObject *v;
571 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000572 v = PyString_FromStringAndSize((char *)NULL, newlen);
Christian Heimes44720832008-05-26 13:01:01 +0000573 if (v == NULL)
574 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000575 p = buf = PyString_AsString(v);
Christian Heimes44720832008-05-26 13:01:01 +0000576 end = s + len;
577 while (s < end) {
578 if (*s != '\\') {
579 non_esc:
580#ifdef Py_USING_UNICODE
581 if (recode_encoding && (*s & 0x80)) {
582 PyObject *u, *w;
583 char *r;
584 const char* t;
585 Py_ssize_t rn;
586 t = s;
587 /* Decode non-ASCII bytes as UTF-8. */
588 while (t < end && (*t & 0x80)) t++;
589 u = PyUnicode_DecodeUTF8(s, t - s, errors);
590 if(!u) goto failed;
591
592 /* Recode them in target encoding. */
593 w = PyUnicode_AsEncodedString(
594 u, recode_encoding, errors);
595 Py_DECREF(u);
596 if (!w) goto failed;
597
598 /* Append bytes to output buffer. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000599 assert(PyString_Check(w));
600 r = PyString_AS_STRING(w);
601 rn = PyString_GET_SIZE(w);
Christian Heimes44720832008-05-26 13:01:01 +0000602 Py_MEMCPY(p, r, rn);
603 p += rn;
604 Py_DECREF(w);
605 s = t;
606 } else {
607 *p++ = *s++;
608 }
609#else
610 *p++ = *s++;
611#endif
612 continue;
613 }
614 s++;
615 if (s==end) {
616 PyErr_SetString(PyExc_ValueError,
617 "Trailing \\ in string");
618 goto failed;
619 }
620 switch (*s++) {
621 /* XXX This assumes ASCII! */
622 case '\n': break;
623 case '\\': *p++ = '\\'; break;
624 case '\'': *p++ = '\''; break;
625 case '\"': *p++ = '\"'; break;
626 case 'b': *p++ = '\b'; break;
627 case 'f': *p++ = '\014'; break; /* FF */
628 case 't': *p++ = '\t'; break;
629 case 'n': *p++ = '\n'; break;
630 case 'r': *p++ = '\r'; break;
631 case 'v': *p++ = '\013'; break; /* VT */
632 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
633 case '0': case '1': case '2': case '3':
634 case '4': case '5': case '6': case '7':
635 c = s[-1] - '0';
636 if (s < end && '0' <= *s && *s <= '7') {
637 c = (c<<3) + *s++ - '0';
638 if (s < end && '0' <= *s && *s <= '7')
639 c = (c<<3) + *s++ - '0';
640 }
641 *p++ = c;
642 break;
643 case 'x':
644 if (s+1 < end &&
645 isxdigit(Py_CHARMASK(s[0])) &&
646 isxdigit(Py_CHARMASK(s[1])))
647 {
648 unsigned int x = 0;
649 c = Py_CHARMASK(*s);
650 s++;
651 if (isdigit(c))
652 x = c - '0';
653 else if (islower(c))
654 x = 10 + c - 'a';
655 else
656 x = 10 + c - 'A';
657 x = x << 4;
658 c = Py_CHARMASK(*s);
659 s++;
660 if (isdigit(c))
661 x += c - '0';
662 else if (islower(c))
663 x += 10 + c - 'a';
664 else
665 x += 10 + c - 'A';
666 *p++ = x;
667 break;
668 }
669 if (!errors || strcmp(errors, "strict") == 0) {
670 PyErr_SetString(PyExc_ValueError,
671 "invalid \\x escape");
672 goto failed;
673 }
674 if (strcmp(errors, "replace") == 0) {
675 *p++ = '?';
676 } else if (strcmp(errors, "ignore") == 0)
677 /* do nothing */;
678 else {
679 PyErr_Format(PyExc_ValueError,
680 "decoding error; "
681 "unknown error handling code: %.400s",
682 errors);
683 goto failed;
684 }
685#ifndef Py_USING_UNICODE
686 case 'u':
687 case 'U':
688 case 'N':
689 if (unicode) {
690 PyErr_SetString(PyExc_ValueError,
691 "Unicode escapes not legal "
692 "when Unicode disabled");
693 goto failed;
694 }
695#endif
696 default:
697 *p++ = '\\';
698 s--;
699 goto non_esc; /* an arbitry number of unescaped
700 UTF-8 bytes may follow. */
701 }
702 }
703 if (p-buf < newlen)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000704 _PyString_Resize(&v, p - buf);
Christian Heimes44720832008-05-26 13:01:01 +0000705 return v;
706 failed:
707 Py_DECREF(v);
708 return NULL;
709}
710
711/* -------------------------------------------------------------------- */
712/* object api */
713
Christian Heimes1a6387e2008-03-26 12:49:49 +0000714static Py_ssize_t
Christian Heimes44720832008-05-26 13:01:01 +0000715string_getsize(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000716{
Christian Heimes44720832008-05-26 13:01:01 +0000717 char *s;
718 Py_ssize_t len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000719 if (PyString_AsStringAndSize(op, &s, &len))
Christian Heimes44720832008-05-26 13:01:01 +0000720 return -1;
721 return len;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000722}
723
Christian Heimes44720832008-05-26 13:01:01 +0000724static /*const*/ char *
725string_getbuffer(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000726{
Christian Heimes44720832008-05-26 13:01:01 +0000727 char *s;
728 Py_ssize_t len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000729 if (PyString_AsStringAndSize(op, &s, &len))
Christian Heimes44720832008-05-26 13:01:01 +0000730 return NULL;
731 return s;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000732}
733
734Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000735PyString_Size(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000736{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000737 if (!PyString_Check(op))
Christian Heimes44720832008-05-26 13:01:01 +0000738 return string_getsize(op);
739 return Py_SIZE(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000740}
741
Christian Heimes44720832008-05-26 13:01:01 +0000742/*const*/ char *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000743PyString_AsString(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000744{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000745 if (!PyString_Check(op))
Christian Heimes44720832008-05-26 13:01:01 +0000746 return string_getbuffer(op);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000747 return ((PyStringObject *)op) -> ob_sval;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000748}
749
750int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000751PyString_AsStringAndSize(register PyObject *obj,
Christian Heimes44720832008-05-26 13:01:01 +0000752 register char **s,
753 register Py_ssize_t *len)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000754{
Christian Heimes44720832008-05-26 13:01:01 +0000755 if (s == NULL) {
756 PyErr_BadInternalCall();
757 return -1;
758 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000759
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000760 if (!PyString_Check(obj)) {
Christian Heimes44720832008-05-26 13:01:01 +0000761#ifdef Py_USING_UNICODE
762 if (PyUnicode_Check(obj)) {
763 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
764 if (obj == NULL)
765 return -1;
766 }
767 else
Christian Heimes1a6387e2008-03-26 12:49:49 +0000768#endif
Christian Heimes44720832008-05-26 13:01:01 +0000769 {
770 PyErr_Format(PyExc_TypeError,
771 "expected string or Unicode object, "
772 "%.200s found", Py_TYPE(obj)->tp_name);
773 return -1;
774 }
775 }
776
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000777 *s = PyString_AS_STRING(obj);
Christian Heimes44720832008-05-26 13:01:01 +0000778 if (len != NULL)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000779 *len = PyString_GET_SIZE(obj);
780 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Christian Heimes44720832008-05-26 13:01:01 +0000781 PyErr_SetString(PyExc_TypeError,
782 "expected string without null bytes");
783 return -1;
784 }
785 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000786}
787
Christian Heimes1a6387e2008-03-26 12:49:49 +0000788/* -------------------------------------------------------------------- */
789/* Methods */
790
Christian Heimes44720832008-05-26 13:01:01 +0000791#include "stringlib/stringdefs.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000792#include "stringlib/fastsearch.h"
Christian Heimes44720832008-05-26 13:01:01 +0000793
Christian Heimes1a6387e2008-03-26 12:49:49 +0000794#include "stringlib/count.h"
795#include "stringlib/find.h"
796#include "stringlib/partition.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000797
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000798#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
Christian Heimes44720832008-05-26 13:01:01 +0000799#include "stringlib/localeutil.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000800
Christian Heimes1a6387e2008-03-26 12:49:49 +0000801
802
803static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000804string_print(PyStringObject *op, FILE *fp, int flags)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000805{
Christian Heimes44720832008-05-26 13:01:01 +0000806 Py_ssize_t i, str_len;
807 char c;
808 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000809
Christian Heimes44720832008-05-26 13:01:01 +0000810 /* XXX Ought to check for interrupts when writing long strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000811 if (! PyString_CheckExact(op)) {
Christian Heimes44720832008-05-26 13:01:01 +0000812 int ret;
813 /* A str subclass may have its own __str__ method. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000814 op = (PyStringObject *) PyObject_Str((PyObject *)op);
Christian Heimes44720832008-05-26 13:01:01 +0000815 if (op == NULL)
816 return -1;
817 ret = string_print(op, fp, flags);
818 Py_DECREF(op);
819 return ret;
820 }
821 if (flags & Py_PRINT_RAW) {
822 char *data = op->ob_sval;
823 Py_ssize_t size = Py_SIZE(op);
824 Py_BEGIN_ALLOW_THREADS
825 while (size > INT_MAX) {
826 /* Very long strings cannot be written atomically.
827 * But don't write exactly INT_MAX bytes at a time
828 * to avoid memory aligment issues.
829 */
830 const int chunk_size = INT_MAX & ~0x3FFF;
831 fwrite(data, 1, chunk_size, fp);
832 data += chunk_size;
833 size -= chunk_size;
834 }
835#ifdef __VMS
836 if (size) fwrite(data, (int)size, 1, fp);
837#else
838 fwrite(data, 1, (int)size, fp);
839#endif
840 Py_END_ALLOW_THREADS
841 return 0;
842 }
843
844 /* figure out which quote to use; single is preferred */
845 quote = '\'';
846 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
847 !memchr(op->ob_sval, '"', Py_SIZE(op)))
848 quote = '"';
849
850 str_len = Py_SIZE(op);
851 Py_BEGIN_ALLOW_THREADS
852 fputc(quote, fp);
853 for (i = 0; i < str_len; i++) {
854 /* Since strings are immutable and the caller should have a
855 reference, accessing the interal buffer should not be an issue
856 with the GIL released. */
857 c = op->ob_sval[i];
858 if (c == quote || c == '\\')
859 fprintf(fp, "\\%c", c);
860 else if (c == '\t')
861 fprintf(fp, "\\t");
862 else if (c == '\n')
863 fprintf(fp, "\\n");
864 else if (c == '\r')
865 fprintf(fp, "\\r");
866 else if (c < ' ' || c >= 0x7f)
867 fprintf(fp, "\\x%02x", c & 0xff);
868 else
869 fputc(c, fp);
870 }
871 fputc(quote, fp);
872 Py_END_ALLOW_THREADS
873 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000874}
875
Christian Heimes44720832008-05-26 13:01:01 +0000876PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000877PyString_Repr(PyObject *obj, int smartquotes)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000878{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000879 register PyStringObject* op = (PyStringObject*) obj;
Christian Heimes44720832008-05-26 13:01:01 +0000880 size_t newsize = 2 + 4 * Py_SIZE(op);
881 PyObject *v;
882 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
883 PyErr_SetString(PyExc_OverflowError,
884 "string is too large to make repr");
Christian Heimes1a6387e2008-03-26 12:49:49 +0000885 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000886 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000887 v = PyString_FromStringAndSize((char *)NULL, newsize);
Christian Heimes44720832008-05-26 13:01:01 +0000888 if (v == NULL) {
889 return NULL;
890 }
891 else {
892 register Py_ssize_t i;
893 register char c;
894 register char *p;
895 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000896
Christian Heimes44720832008-05-26 13:01:01 +0000897 /* figure out which quote to use; single is preferred */
898 quote = '\'';
899 if (smartquotes &&
900 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
901 !memchr(op->ob_sval, '"', Py_SIZE(op)))
902 quote = '"';
903
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000904 p = PyString_AS_STRING(v);
Christian Heimes44720832008-05-26 13:01:01 +0000905 *p++ = quote;
906 for (i = 0; i < Py_SIZE(op); i++) {
907 /* There's at least enough room for a hex escape
908 and a closing quote. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000909 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Christian Heimes44720832008-05-26 13:01:01 +0000910 c = op->ob_sval[i];
911 if (c == quote || c == '\\')
912 *p++ = '\\', *p++ = c;
913 else if (c == '\t')
914 *p++ = '\\', *p++ = 't';
915 else if (c == '\n')
916 *p++ = '\\', *p++ = 'n';
917 else if (c == '\r')
918 *p++ = '\\', *p++ = 'r';
919 else if (c < ' ' || c >= 0x7f) {
920 /* For performance, we don't want to call
921 PyOS_snprintf here (extra layers of
922 function call). */
923 sprintf(p, "\\x%02x", c & 0xff);
924 p += 4;
925 }
926 else
927 *p++ = c;
928 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000929 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Christian Heimes44720832008-05-26 13:01:01 +0000930 *p++ = quote;
931 *p = '\0';
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000932 _PyString_Resize(
933 &v, (p - PyString_AS_STRING(v)));
Christian Heimes44720832008-05-26 13:01:01 +0000934 return v;
935 }
936}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000937
938static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000939string_repr(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000940{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000941 return PyString_Repr(op, 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000942}
943
Christian Heimes1a6387e2008-03-26 12:49:49 +0000944static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000945string_str(PyObject *s)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000946{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000947 assert(PyString_Check(s));
948 if (PyString_CheckExact(s)) {
Christian Heimes44720832008-05-26 13:01:01 +0000949 Py_INCREF(s);
950 return s;
951 }
952 else {
953 /* Subtype -- return genuine string with the same value. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000954 PyStringObject *t = (PyStringObject *) s;
955 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
Christian Heimes44720832008-05-26 13:01:01 +0000956 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000957}
958
Christian Heimes44720832008-05-26 13:01:01 +0000959static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000960string_length(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +0000961{
962 return Py_SIZE(a);
963}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000964
Christian Heimes44720832008-05-26 13:01:01 +0000965static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000966string_concat(register PyStringObject *a, register PyObject *bb)
Christian Heimes44720832008-05-26 13:01:01 +0000967{
968 register Py_ssize_t size;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000969 register PyStringObject *op;
970 if (!PyString_Check(bb)) {
Christian Heimes44720832008-05-26 13:01:01 +0000971#ifdef Py_USING_UNICODE
972 if (PyUnicode_Check(bb))
973 return PyUnicode_Concat((PyObject *)a, bb);
974#endif
975 if (PyByteArray_Check(bb))
976 return PyByteArray_Concat((PyObject *)a, bb);
977 PyErr_Format(PyExc_TypeError,
978 "cannot concatenate 'str' and '%.200s' objects",
979 Py_TYPE(bb)->tp_name);
980 return NULL;
981 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000982#define b ((PyStringObject *)bb)
Christian Heimes44720832008-05-26 13:01:01 +0000983 /* Optimize cases with empty left or right operand */
984 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000985 PyString_CheckExact(a) && PyString_CheckExact(b)) {
Christian Heimes44720832008-05-26 13:01:01 +0000986 if (Py_SIZE(a) == 0) {
987 Py_INCREF(bb);
988 return bb;
989 }
990 Py_INCREF(a);
991 return (PyObject *)a;
992 }
993 size = Py_SIZE(a) + Py_SIZE(b);
Neal Norwitze7d8be82008-07-31 17:17:14 +0000994 /* Check that string sizes are not negative, to prevent an
995 overflow in cases where we are passed incorrectly-created
996 strings with negative lengths (due to a bug in other code).
997 */
998 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
999 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
Christian Heimes44720832008-05-26 13:01:01 +00001000 PyErr_SetString(PyExc_OverflowError,
1001 "strings are too large to concat");
1002 return NULL;
1003 }
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001004
Christian Heimes44720832008-05-26 13:01:01 +00001005 /* Inline PyObject_NewVar */
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001006 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
Neal Norwitze7d8be82008-07-31 17:17:14 +00001007 PyErr_SetString(PyExc_OverflowError,
1008 "strings are too large to concat");
1009 return NULL;
1010 }
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001011 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
Christian Heimes44720832008-05-26 13:01:01 +00001012 if (op == NULL)
1013 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001014 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +00001015 op->ob_shash = -1;
1016 op->ob_sstate = SSTATE_NOT_INTERNED;
1017 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1018 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1019 op->ob_sval[size] = '\0';
1020 return (PyObject *) op;
1021#undef b
1022}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001023
Christian Heimes44720832008-05-26 13:01:01 +00001024static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001025string_repeat(register PyStringObject *a, register Py_ssize_t n)
Christian Heimes44720832008-05-26 13:01:01 +00001026{
1027 register Py_ssize_t i;
1028 register Py_ssize_t j;
1029 register Py_ssize_t size;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001030 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +00001031 size_t nbytes;
1032 if (n < 0)
1033 n = 0;
1034 /* watch out for overflows: the size can overflow int,
1035 * and the # of bytes needed can overflow size_t
1036 */
1037 size = Py_SIZE(a) * n;
1038 if (n && size / n != Py_SIZE(a)) {
1039 PyErr_SetString(PyExc_OverflowError,
1040 "repeated string is too long");
1041 return NULL;
1042 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001043 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
Christian Heimes44720832008-05-26 13:01:01 +00001044 Py_INCREF(a);
1045 return (PyObject *)a;
1046 }
1047 nbytes = (size_t)size;
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001048 if (nbytes + PyStringObject_SIZE <= nbytes) {
Christian Heimes44720832008-05-26 13:01:01 +00001049 PyErr_SetString(PyExc_OverflowError,
1050 "repeated string is too long");
1051 return NULL;
1052 }
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001053 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
Christian Heimes44720832008-05-26 13:01:01 +00001054 if (op == NULL)
1055 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001056 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +00001057 op->ob_shash = -1;
1058 op->ob_sstate = SSTATE_NOT_INTERNED;
1059 op->ob_sval[size] = '\0';
1060 if (Py_SIZE(a) == 1 && n > 0) {
1061 memset(op->ob_sval, a->ob_sval[0] , n);
1062 return (PyObject *) op;
1063 }
1064 i = 0;
1065 if (i < size) {
1066 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1067 i = Py_SIZE(a);
1068 }
1069 while (i < size) {
1070 j = (i <= size-i) ? i : size-i;
1071 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1072 i += j;
1073 }
1074 return (PyObject *) op;
1075}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001076
Christian Heimes44720832008-05-26 13:01:01 +00001077/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1078
1079static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001080string_slice(register PyStringObject *a, register Py_ssize_t i,
Christian Heimes44720832008-05-26 13:01:01 +00001081 register Py_ssize_t j)
1082 /* j -- may be negative! */
1083{
1084 if (i < 0)
1085 i = 0;
1086 if (j < 0)
1087 j = 0; /* Avoid signed/unsigned bug in next line */
1088 if (j > Py_SIZE(a))
1089 j = Py_SIZE(a);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001090 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
Christian Heimes44720832008-05-26 13:01:01 +00001091 /* It's the same as a */
1092 Py_INCREF(a);
1093 return (PyObject *)a;
1094 }
1095 if (j < i)
1096 j = i;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001097 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001098}
1099
1100static int
1101string_contains(PyObject *str_obj, PyObject *sub_obj)
1102{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001103 if (!PyString_CheckExact(sub_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00001104#ifdef Py_USING_UNICODE
1105 if (PyUnicode_Check(sub_obj))
1106 return PyUnicode_Contains(str_obj, sub_obj);
1107#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001108 if (!PyString_Check(sub_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00001109 PyErr_Format(PyExc_TypeError,
1110 "'in <string>' requires string as left operand, "
1111 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1112 return -1;
1113 }
1114 }
1115
1116 return stringlib_contains_obj(str_obj, sub_obj);
1117}
1118
1119static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001120string_item(PyStringObject *a, register Py_ssize_t i)
Christian Heimes44720832008-05-26 13:01:01 +00001121{
1122 char pchar;
1123 PyObject *v;
1124 if (i < 0 || i >= Py_SIZE(a)) {
1125 PyErr_SetString(PyExc_IndexError, "string index out of range");
1126 return NULL;
1127 }
1128 pchar = a->ob_sval[i];
1129 v = (PyObject *)characters[pchar & UCHAR_MAX];
1130 if (v == NULL)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001131 v = PyString_FromStringAndSize(&pchar, 1);
Christian Heimes44720832008-05-26 13:01:01 +00001132 else {
1133#ifdef COUNT_ALLOCS
1134 one_strings++;
1135#endif
1136 Py_INCREF(v);
1137 }
1138 return v;
1139}
1140
1141static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001142string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Christian Heimes44720832008-05-26 13:01:01 +00001143{
1144 int c;
1145 Py_ssize_t len_a, len_b;
1146 Py_ssize_t min_len;
1147 PyObject *result;
1148
1149 /* Make sure both arguments are strings. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001150 if (!(PyString_Check(a) && PyString_Check(b))) {
Christian Heimes44720832008-05-26 13:01:01 +00001151 result = Py_NotImplemented;
1152 goto out;
1153 }
1154 if (a == b) {
1155 switch (op) {
1156 case Py_EQ:case Py_LE:case Py_GE:
1157 result = Py_True;
1158 goto out;
1159 case Py_NE:case Py_LT:case Py_GT:
1160 result = Py_False;
1161 goto out;
1162 }
1163 }
1164 if (op == Py_EQ) {
1165 /* Supporting Py_NE here as well does not save
1166 much time, since Py_NE is rarely used. */
1167 if (Py_SIZE(a) == Py_SIZE(b)
1168 && (a->ob_sval[0] == b->ob_sval[0]
1169 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1170 result = Py_True;
1171 } else {
1172 result = Py_False;
1173 }
1174 goto out;
1175 }
1176 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1177 min_len = (len_a < len_b) ? len_a : len_b;
1178 if (min_len > 0) {
1179 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1180 if (c==0)
1181 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1182 } else
1183 c = 0;
1184 if (c == 0)
1185 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1186 switch (op) {
1187 case Py_LT: c = c < 0; break;
1188 case Py_LE: c = c <= 0; break;
1189 case Py_EQ: assert(0); break; /* unreachable */
1190 case Py_NE: c = c != 0; break;
1191 case Py_GT: c = c > 0; break;
1192 case Py_GE: c = c >= 0; break;
1193 default:
1194 result = Py_NotImplemented;
1195 goto out;
1196 }
1197 result = c ? Py_True : Py_False;
1198 out:
1199 Py_INCREF(result);
1200 return result;
1201}
1202
1203int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001204_PyString_Eq(PyObject *o1, PyObject *o2)
Christian Heimes44720832008-05-26 13:01:01 +00001205{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001206 PyStringObject *a = (PyStringObject*) o1;
1207 PyStringObject *b = (PyStringObject*) o2;
Christian Heimes44720832008-05-26 13:01:01 +00001208 return Py_SIZE(a) == Py_SIZE(b)
1209 && *a->ob_sval == *b->ob_sval
1210 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1211}
1212
1213static long
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001214string_hash(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001215{
1216 register Py_ssize_t len;
1217 register unsigned char *p;
1218 register long x;
1219
1220 if (a->ob_shash != -1)
1221 return a->ob_shash;
1222 len = Py_SIZE(a);
1223 p = (unsigned char *) a->ob_sval;
1224 x = *p << 7;
1225 while (--len >= 0)
1226 x = (1000003*x) ^ *p++;
1227 x ^= Py_SIZE(a);
1228 if (x == -1)
1229 x = -2;
1230 a->ob_shash = x;
1231 return x;
1232}
1233
1234static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001235string_subscript(PyStringObject* self, PyObject* item)
Christian Heimes44720832008-05-26 13:01:01 +00001236{
1237 if (PyIndex_Check(item)) {
1238 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1239 if (i == -1 && PyErr_Occurred())
1240 return NULL;
1241 if (i < 0)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001242 i += PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001243 return string_item(self, i);
1244 }
1245 else if (PySlice_Check(item)) {
1246 Py_ssize_t start, stop, step, slicelength, cur, i;
1247 char* source_buf;
1248 char* result_buf;
1249 PyObject* result;
1250
1251 if (PySlice_GetIndicesEx((PySliceObject*)item,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001252 PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001253 &start, &stop, &step, &slicelength) < 0) {
1254 return NULL;
1255 }
1256
1257 if (slicelength <= 0) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001258 return PyString_FromStringAndSize("", 0);
Christian Heimes44720832008-05-26 13:01:01 +00001259 }
1260 else if (start == 0 && step == 1 &&
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001261 slicelength == PyString_GET_SIZE(self) &&
1262 PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001263 Py_INCREF(self);
1264 return (PyObject *)self;
1265 }
1266 else if (step == 1) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001267 return PyString_FromStringAndSize(
1268 PyString_AS_STRING(self) + start,
Christian Heimes44720832008-05-26 13:01:01 +00001269 slicelength);
1270 }
1271 else {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001272 source_buf = PyString_AsString((PyObject*)self);
Christian Heimes44720832008-05-26 13:01:01 +00001273 result_buf = (char *)PyMem_Malloc(slicelength);
1274 if (result_buf == NULL)
1275 return PyErr_NoMemory();
1276
1277 for (cur = start, i = 0; i < slicelength;
1278 cur += step, i++) {
1279 result_buf[i] = source_buf[cur];
1280 }
1281
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001282 result = PyString_FromStringAndSize(result_buf,
Christian Heimes44720832008-05-26 13:01:01 +00001283 slicelength);
1284 PyMem_Free(result_buf);
1285 return result;
1286 }
1287 }
1288 else {
1289 PyErr_Format(PyExc_TypeError,
1290 "string indices must be integers, not %.200s",
1291 Py_TYPE(item)->tp_name);
1292 return NULL;
1293 }
1294}
1295
1296static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001297string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001298{
1299 if ( index != 0 ) {
1300 PyErr_SetString(PyExc_SystemError,
1301 "accessing non-existent string segment");
1302 return -1;
1303 }
1304 *ptr = (void *)self->ob_sval;
1305 return Py_SIZE(self);
1306}
1307
1308static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001309string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001310{
1311 PyErr_SetString(PyExc_TypeError,
1312 "Cannot use string as modifiable buffer");
1313 return -1;
1314}
1315
1316static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001317string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Christian Heimes44720832008-05-26 13:01:01 +00001318{
1319 if ( lenp )
1320 *lenp = Py_SIZE(self);
1321 return 1;
1322}
1323
1324static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001325string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001326{
1327 if ( index != 0 ) {
1328 PyErr_SetString(PyExc_SystemError,
1329 "accessing non-existent string segment");
1330 return -1;
1331 }
1332 *ptr = self->ob_sval;
1333 return Py_SIZE(self);
1334}
1335
1336static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001337string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Christian Heimes44720832008-05-26 13:01:01 +00001338{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001339 return PyBuffer_FillInfo(view, (PyObject*)self,
1340 (void *)self->ob_sval, Py_SIZE(self),
Antoine Pitrou92a62402008-08-02 21:58:05 +00001341 1, flags);
Christian Heimes44720832008-05-26 13:01:01 +00001342}
1343
1344static PySequenceMethods string_as_sequence = {
1345 (lenfunc)string_length, /*sq_length*/
1346 (binaryfunc)string_concat, /*sq_concat*/
1347 (ssizeargfunc)string_repeat, /*sq_repeat*/
1348 (ssizeargfunc)string_item, /*sq_item*/
1349 (ssizessizeargfunc)string_slice, /*sq_slice*/
1350 0, /*sq_ass_item*/
1351 0, /*sq_ass_slice*/
1352 (objobjproc)string_contains /*sq_contains*/
1353};
1354
1355static PyMappingMethods string_as_mapping = {
1356 (lenfunc)string_length,
1357 (binaryfunc)string_subscript,
1358 0,
1359};
1360
1361static PyBufferProcs string_as_buffer = {
1362 (readbufferproc)string_buffer_getreadbuf,
1363 (writebufferproc)string_buffer_getwritebuf,
1364 (segcountproc)string_buffer_getsegcount,
1365 (charbufferproc)string_buffer_getcharbuf,
1366 (getbufferproc)string_buffer_getbuffer,
1367 0, /* XXX */
1368};
1369
1370
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001371
Christian Heimes44720832008-05-26 13:01:01 +00001372#define LEFTSTRIP 0
1373#define RIGHTSTRIP 1
1374#define BOTHSTRIP 2
1375
1376/* Arrays indexed by above */
1377static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1378
1379#define STRIPNAME(i) (stripformat[i]+3)
1380
Christian Heimes1a6387e2008-03-26 12:49:49 +00001381
1382/* Don't call if length < 2 */
Christian Heimes44720832008-05-26 13:01:01 +00001383#define Py_STRING_MATCH(target, offset, pattern, length) \
1384 (target[offset] == pattern[0] && \
1385 target[offset+length-1] == pattern[length-1] && \
Christian Heimes1a6387e2008-03-26 12:49:49 +00001386 !memcmp(target+offset+1, pattern+1, length-2) )
1387
1388
Christian Heimes1a6387e2008-03-26 12:49:49 +00001389/* Overallocate the initial list to reduce the number of reallocs for small
1390 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1391 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1392 text (roughly 11 words per line) and field delimited data (usually 1-10
1393 fields). For large strings the split algorithms are bandwidth limited
1394 so increasing the preallocation likely will not improve things.*/
1395
1396#define MAX_PREALLOC 12
1397
1398/* 5 splits gives 6 elements */
1399#define PREALLOC_SIZE(maxsplit) \
Christian Heimes44720832008-05-26 13:01:01 +00001400 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001401
Christian Heimes44720832008-05-26 13:01:01 +00001402#define SPLIT_APPEND(data, left, right) \
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001403 str = PyString_FromStringAndSize((data) + (left), \
Christian Heimes44720832008-05-26 13:01:01 +00001404 (right) - (left)); \
1405 if (str == NULL) \
1406 goto onError; \
1407 if (PyList_Append(list, str)) { \
1408 Py_DECREF(str); \
1409 goto onError; \
1410 } \
1411 else \
1412 Py_DECREF(str);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001413
Christian Heimes44720832008-05-26 13:01:01 +00001414#define SPLIT_ADD(data, left, right) { \
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001415 str = PyString_FromStringAndSize((data) + (left), \
Christian Heimes44720832008-05-26 13:01:01 +00001416 (right) - (left)); \
1417 if (str == NULL) \
1418 goto onError; \
1419 if (count < MAX_PREALLOC) { \
1420 PyList_SET_ITEM(list, count, str); \
1421 } else { \
1422 if (PyList_Append(list, str)) { \
1423 Py_DECREF(str); \
1424 goto onError; \
1425 } \
1426 else \
1427 Py_DECREF(str); \
1428 } \
1429 count++; }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001430
1431/* Always force the list to the expected size. */
1432#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
1433
Christian Heimes44720832008-05-26 13:01:01 +00001434#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1435#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1436#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1437#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001438
1439Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001440split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001441{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001442 const char *s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001443 Py_ssize_t i, j, count=0;
1444 PyObject *str;
1445 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001446
Christian Heimes44720832008-05-26 13:01:01 +00001447 if (list == NULL)
1448 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001449
Christian Heimes44720832008-05-26 13:01:01 +00001450 i = j = 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001451
Christian Heimes44720832008-05-26 13:01:01 +00001452 while (maxsplit-- > 0) {
1453 SKIP_SPACE(s, i, len);
1454 if (i==len) break;
1455 j = i; i++;
1456 SKIP_NONSPACE(s, i, len);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001457 if (j == 0 && i == len && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001458 /* No whitespace in self, so just use it as list[0] */
1459 Py_INCREF(self);
1460 PyList_SET_ITEM(list, 0, (PyObject *)self);
1461 count++;
1462 break;
1463 }
1464 SPLIT_ADD(s, j, i);
1465 }
1466
1467 if (i < len) {
1468 /* Only occurs when maxsplit was reached */
1469 /* Skip any remaining whitespace and copy to end of string */
1470 SKIP_SPACE(s, i, len);
1471 if (i != len)
1472 SPLIT_ADD(s, i, len);
1473 }
1474 FIX_PREALLOC_SIZE(list);
1475 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001476 onError:
Christian Heimes44720832008-05-26 13:01:01 +00001477 Py_DECREF(list);
1478 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001479}
1480
Christian Heimes1a6387e2008-03-26 12:49:49 +00001481Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001482split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001483{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001484 const char *s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001485 register Py_ssize_t i, j, count=0;
1486 PyObject *str;
1487 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001488
Christian Heimes44720832008-05-26 13:01:01 +00001489 if (list == NULL)
1490 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001491
Christian Heimes44720832008-05-26 13:01:01 +00001492 i = j = 0;
1493 while ((j < len) && (maxcount-- > 0)) {
1494 for(; j<len; j++) {
1495 /* I found that using memchr makes no difference */
1496 if (s[j] == ch) {
1497 SPLIT_ADD(s, i, j);
1498 i = j = j + 1;
1499 break;
1500 }
1501 }
1502 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001503 if (i == 0 && count == 0 && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001504 /* ch not in self, so just use self as list[0] */
1505 Py_INCREF(self);
1506 PyList_SET_ITEM(list, 0, (PyObject *)self);
1507 count++;
1508 }
1509 else if (i <= len) {
1510 SPLIT_ADD(s, i, len);
1511 }
1512 FIX_PREALLOC_SIZE(list);
1513 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001514
1515 onError:
Christian Heimes44720832008-05-26 13:01:01 +00001516 Py_DECREF(list);
1517 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001518}
1519
1520PyDoc_STRVAR(split__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001521"S.split([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001522\n\
Christian Heimes44720832008-05-26 13:01:01 +00001523Return a list of the words in the string S, using sep as the\n\
1524delimiter string. If maxsplit is given, at most maxsplit\n\
1525splits are done. If sep is not specified or is None, any\n\
1526whitespace string is a separator and empty strings are removed\n\
1527from the result.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001528
1529static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001530string_split(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001531{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001532 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001533 Py_ssize_t maxsplit = -1, count=0;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001534 const char *s = PyString_AS_STRING(self), *sub;
Christian Heimes44720832008-05-26 13:01:01 +00001535 PyObject *list, *str, *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001536#ifdef USE_FAST
Christian Heimes44720832008-05-26 13:01:01 +00001537 Py_ssize_t pos;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001538#endif
1539
Christian Heimes44720832008-05-26 13:01:01 +00001540 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1541 return NULL;
1542 if (maxsplit < 0)
1543 maxsplit = PY_SSIZE_T_MAX;
1544 if (subobj == Py_None)
1545 return split_whitespace(self, len, maxsplit);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001546 if (PyString_Check(subobj)) {
1547 sub = PyString_AS_STRING(subobj);
1548 n = PyString_GET_SIZE(subobj);
Christian Heimes44720832008-05-26 13:01:01 +00001549 }
1550#ifdef Py_USING_UNICODE
1551 else if (PyUnicode_Check(subobj))
1552 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1553#endif
1554 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1555 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001556
Christian Heimes44720832008-05-26 13:01:01 +00001557 if (n == 0) {
1558 PyErr_SetString(PyExc_ValueError, "empty separator");
1559 return NULL;
1560 }
1561 else if (n == 1)
1562 return split_char(self, len, sub[0], maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001563
Christian Heimes44720832008-05-26 13:01:01 +00001564 list = PyList_New(PREALLOC_SIZE(maxsplit));
1565 if (list == NULL)
1566 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001567
1568#ifdef USE_FAST
Christian Heimes44720832008-05-26 13:01:01 +00001569 i = j = 0;
1570 while (maxsplit-- > 0) {
1571 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1572 if (pos < 0)
1573 break;
1574 j = i+pos;
1575 SPLIT_ADD(s, i, j);
1576 i = j + n;
1577 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001578#else
Christian Heimes44720832008-05-26 13:01:01 +00001579 i = j = 0;
1580 while ((j+n <= len) && (maxsplit-- > 0)) {
1581 for (; j+n <= len; j++) {
1582 if (Py_STRING_MATCH(s, j, sub, n)) {
1583 SPLIT_ADD(s, i, j);
1584 i = j = j + n;
1585 break;
1586 }
1587 }
1588 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001589#endif
Christian Heimes44720832008-05-26 13:01:01 +00001590 SPLIT_ADD(s, i, len);
1591 FIX_PREALLOC_SIZE(list);
1592 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001593
Christian Heimes44720832008-05-26 13:01:01 +00001594 onError:
1595 Py_DECREF(list);
1596 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001597}
1598
1599PyDoc_STRVAR(partition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001600"S.partition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001601\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001602Search for the separator sep in S, and return the part before it,\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001603the separator itself, and the part after it. If the separator is not\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001604found, return S and two empty strings.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001605
1606static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001607string_partition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001608{
Christian Heimes44720832008-05-26 13:01:01 +00001609 const char *sep;
1610 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001611
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001612 if (PyString_Check(sep_obj)) {
1613 sep = PyString_AS_STRING(sep_obj);
1614 sep_len = PyString_GET_SIZE(sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001615 }
1616#ifdef Py_USING_UNICODE
1617 else if (PyUnicode_Check(sep_obj))
1618 return PyUnicode_Partition((PyObject *) self, sep_obj);
1619#endif
1620 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1621 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001622
Christian Heimes44720832008-05-26 13:01:01 +00001623 return stringlib_partition(
1624 (PyObject*) self,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001625 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001626 sep_obj, sep, sep_len
1627 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001628}
1629
1630PyDoc_STRVAR(rpartition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001631"S.rpartition(sep) -> (tail, sep, head)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001632\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001633Search for the separator sep in S, starting at the end of S, and return\n\
Christian Heimes44720832008-05-26 13:01:01 +00001634the part before it, the separator itself, and the part after it. If the\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001635separator is not found, return two empty strings and S.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001636
1637static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001638string_rpartition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001639{
Christian Heimes44720832008-05-26 13:01:01 +00001640 const char *sep;
1641 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001642
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001643 if (PyString_Check(sep_obj)) {
1644 sep = PyString_AS_STRING(sep_obj);
1645 sep_len = PyString_GET_SIZE(sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001646 }
1647#ifdef Py_USING_UNICODE
1648 else if (PyUnicode_Check(sep_obj))
Amaury Forgeot d'Arc3571fbf2008-09-01 19:52:00 +00001649 return PyUnicode_RPartition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001650#endif
1651 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1652 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001653
Christian Heimes44720832008-05-26 13:01:01 +00001654 return stringlib_rpartition(
1655 (PyObject*) self,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001656 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001657 sep_obj, sep, sep_len
1658 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001659}
1660
1661Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001662rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001663{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001664 const char *s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001665 Py_ssize_t i, j, count=0;
1666 PyObject *str;
1667 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001668
Christian Heimes44720832008-05-26 13:01:01 +00001669 if (list == NULL)
1670 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001671
Christian Heimes44720832008-05-26 13:01:01 +00001672 i = j = len-1;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001673
Christian Heimes44720832008-05-26 13:01:01 +00001674 while (maxsplit-- > 0) {
1675 RSKIP_SPACE(s, i);
1676 if (i<0) break;
1677 j = i; i--;
1678 RSKIP_NONSPACE(s, i);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001679 if (j == len-1 && i < 0 && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001680 /* No whitespace in self, so just use it as list[0] */
1681 Py_INCREF(self);
1682 PyList_SET_ITEM(list, 0, (PyObject *)self);
1683 count++;
1684 break;
1685 }
1686 SPLIT_ADD(s, i + 1, j + 1);
1687 }
1688 if (i >= 0) {
1689 /* Only occurs when maxsplit was reached */
1690 /* Skip any remaining whitespace and copy to beginning of string */
1691 RSKIP_SPACE(s, i);
1692 if (i >= 0)
1693 SPLIT_ADD(s, 0, i + 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001694
Christian Heimes44720832008-05-26 13:01:01 +00001695 }
1696 FIX_PREALLOC_SIZE(list);
1697 if (PyList_Reverse(list) < 0)
1698 goto onError;
1699 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001700 onError:
Christian Heimes44720832008-05-26 13:01:01 +00001701 Py_DECREF(list);
1702 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001703}
1704
1705Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001706rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001707{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001708 const char *s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001709 register Py_ssize_t i, j, count=0;
1710 PyObject *str;
1711 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001712
Christian Heimes44720832008-05-26 13:01:01 +00001713 if (list == NULL)
1714 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001715
Christian Heimes44720832008-05-26 13:01:01 +00001716 i = j = len - 1;
1717 while ((i >= 0) && (maxcount-- > 0)) {
1718 for (; i >= 0; i--) {
1719 if (s[i] == ch) {
1720 SPLIT_ADD(s, i + 1, j + 1);
1721 j = i = i - 1;
1722 break;
1723 }
1724 }
1725 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001726 if (i < 0 && count == 0 && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001727 /* ch not in self, so just use self as list[0] */
1728 Py_INCREF(self);
1729 PyList_SET_ITEM(list, 0, (PyObject *)self);
1730 count++;
1731 }
1732 else if (j >= -1) {
1733 SPLIT_ADD(s, 0, j + 1);
1734 }
1735 FIX_PREALLOC_SIZE(list);
1736 if (PyList_Reverse(list) < 0)
1737 goto onError;
1738 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001739
Christian Heimes44720832008-05-26 13:01:01 +00001740 onError:
1741 Py_DECREF(list);
1742 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001743}
1744
1745PyDoc_STRVAR(rsplit__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001746"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001747\n\
Christian Heimes44720832008-05-26 13:01:01 +00001748Return a list of the words in the string S, using sep as the\n\
1749delimiter string, starting at the end of the string and working\n\
1750to the front. If maxsplit is given, at most maxsplit splits are\n\
1751done. If sep is not specified or is None, any whitespace string\n\
1752is a separator.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001753
1754static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001755string_rsplit(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001756{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001757 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001758 Py_ssize_t maxsplit = -1, count=0;
1759 const char *s, *sub;
1760 PyObject *list, *str, *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001761
Christian Heimes44720832008-05-26 13:01:01 +00001762 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1763 return NULL;
1764 if (maxsplit < 0)
1765 maxsplit = PY_SSIZE_T_MAX;
1766 if (subobj == Py_None)
1767 return rsplit_whitespace(self, len, maxsplit);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001768 if (PyString_Check(subobj)) {
1769 sub = PyString_AS_STRING(subobj);
1770 n = PyString_GET_SIZE(subobj);
Christian Heimes44720832008-05-26 13:01:01 +00001771 }
1772#ifdef Py_USING_UNICODE
1773 else if (PyUnicode_Check(subobj))
1774 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1775#endif
1776 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1777 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001778
Christian Heimes44720832008-05-26 13:01:01 +00001779 if (n == 0) {
1780 PyErr_SetString(PyExc_ValueError, "empty separator");
1781 return NULL;
1782 }
1783 else if (n == 1)
1784 return rsplit_char(self, len, sub[0], maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001785
Christian Heimes44720832008-05-26 13:01:01 +00001786 list = PyList_New(PREALLOC_SIZE(maxsplit));
1787 if (list == NULL)
1788 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001789
Christian Heimes44720832008-05-26 13:01:01 +00001790 j = len;
1791 i = j - n;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001792
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001793 s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001794 while ( (i >= 0) && (maxsplit-- > 0) ) {
1795 for (; i>=0; i--) {
1796 if (Py_STRING_MATCH(s, i, sub, n)) {
1797 SPLIT_ADD(s, i + n, j);
1798 j = i;
1799 i -= n;
1800 break;
1801 }
1802 }
1803 }
1804 SPLIT_ADD(s, 0, j);
1805 FIX_PREALLOC_SIZE(list);
1806 if (PyList_Reverse(list) < 0)
1807 goto onError;
1808 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001809
1810onError:
Christian Heimes44720832008-05-26 13:01:01 +00001811 Py_DECREF(list);
1812 return NULL;
1813}
1814
1815
1816PyDoc_STRVAR(join__doc__,
Georg Brandl9b4e5822009-10-14 18:48:32 +00001817"S.join(iterable) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00001818\n\
1819Return a string which is the concatenation of the strings in the\n\
Georg Brandl9b4e5822009-10-14 18:48:32 +00001820iterable. The separator between elements is S.");
Christian Heimes44720832008-05-26 13:01:01 +00001821
1822static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001823string_join(PyStringObject *self, PyObject *orig)
Christian Heimes44720832008-05-26 13:01:01 +00001824{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001825 char *sep = PyString_AS_STRING(self);
1826 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001827 PyObject *res = NULL;
1828 char *p;
1829 Py_ssize_t seqlen = 0;
1830 size_t sz = 0;
1831 Py_ssize_t i;
1832 PyObject *seq, *item;
1833
1834 seq = PySequence_Fast(orig, "");
1835 if (seq == NULL) {
1836 return NULL;
1837 }
1838
1839 seqlen = PySequence_Size(seq);
1840 if (seqlen == 0) {
1841 Py_DECREF(seq);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001842 return PyString_FromString("");
Christian Heimes44720832008-05-26 13:01:01 +00001843 }
1844 if (seqlen == 1) {
1845 item = PySequence_Fast_GET_ITEM(seq, 0);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001846 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
Christian Heimes44720832008-05-26 13:01:01 +00001847 Py_INCREF(item);
1848 Py_DECREF(seq);
1849 return item;
1850 }
1851 }
1852
1853 /* There are at least two things to join, or else we have a subclass
1854 * of the builtin types in the sequence.
1855 * Do a pre-pass to figure out the total amount of space we'll
1856 * need (sz), see whether any argument is absurd, and defer to
1857 * the Unicode join if appropriate.
1858 */
1859 for (i = 0; i < seqlen; i++) {
1860 const size_t old_sz = sz;
1861 item = PySequence_Fast_GET_ITEM(seq, i);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001862 if (!PyString_Check(item)){
Christian Heimes44720832008-05-26 13:01:01 +00001863#ifdef Py_USING_UNICODE
1864 if (PyUnicode_Check(item)) {
1865 /* Defer to Unicode join.
1866 * CAUTION: There's no gurantee that the
1867 * original sequence can be iterated over
1868 * again, so we must pass seq here.
1869 */
1870 PyObject *result;
1871 result = PyUnicode_Join((PyObject *)self, seq);
1872 Py_DECREF(seq);
1873 return result;
1874 }
1875#endif
1876 PyErr_Format(PyExc_TypeError,
1877 "sequence item %zd: expected string,"
1878 " %.80s found",
1879 i, Py_TYPE(item)->tp_name);
1880 Py_DECREF(seq);
1881 return NULL;
1882 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001883 sz += PyString_GET_SIZE(item);
Christian Heimes44720832008-05-26 13:01:01 +00001884 if (i != 0)
1885 sz += seplen;
1886 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1887 PyErr_SetString(PyExc_OverflowError,
1888 "join() result is too long for a Python string");
1889 Py_DECREF(seq);
1890 return NULL;
1891 }
1892 }
1893
1894 /* Allocate result space. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001895 res = PyString_FromStringAndSize((char*)NULL, sz);
Christian Heimes44720832008-05-26 13:01:01 +00001896 if (res == NULL) {
1897 Py_DECREF(seq);
1898 return NULL;
1899 }
1900
1901 /* Catenate everything. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001902 p = PyString_AS_STRING(res);
Christian Heimes44720832008-05-26 13:01:01 +00001903 for (i = 0; i < seqlen; ++i) {
1904 size_t n;
1905 item = PySequence_Fast_GET_ITEM(seq, i);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001906 n = PyString_GET_SIZE(item);
1907 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Christian Heimes44720832008-05-26 13:01:01 +00001908 p += n;
1909 if (i < seqlen - 1) {
1910 Py_MEMCPY(p, sep, seplen);
1911 p += seplen;
1912 }
1913 }
1914
1915 Py_DECREF(seq);
1916 return res;
1917}
1918
1919PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001920_PyString_Join(PyObject *sep, PyObject *x)
Christian Heimes44720832008-05-26 13:01:01 +00001921{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001922 assert(sep != NULL && PyString_Check(sep));
Christian Heimes44720832008-05-26 13:01:01 +00001923 assert(x != NULL);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001924 return string_join((PyStringObject *)sep, x);
Christian Heimes44720832008-05-26 13:01:01 +00001925}
1926
1927Py_LOCAL_INLINE(void)
1928string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1929{
1930 if (*end > len)
1931 *end = len;
1932 else if (*end < 0)
1933 *end += len;
1934 if (*end < 0)
1935 *end = 0;
1936 if (*start < 0)
1937 *start += len;
1938 if (*start < 0)
1939 *start = 0;
1940}
1941
1942Py_LOCAL_INLINE(Py_ssize_t)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001943string_find_internal(PyStringObject *self, PyObject *args, int dir)
Christian Heimes44720832008-05-26 13:01:01 +00001944{
1945 PyObject *subobj;
1946 const char *sub;
1947 Py_ssize_t sub_len;
1948 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1949 PyObject *obj_start=Py_None, *obj_end=Py_None;
1950
1951 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1952 &obj_start, &obj_end))
1953 return -2;
1954 /* To support None in "start" and "end" arguments, meaning
1955 the same as if they were not passed.
1956 */
1957 if (obj_start != Py_None)
1958 if (!_PyEval_SliceIndex(obj_start, &start))
1959 return -2;
1960 if (obj_end != Py_None)
1961 if (!_PyEval_SliceIndex(obj_end, &end))
1962 return -2;
1963
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001964 if (PyString_Check(subobj)) {
1965 sub = PyString_AS_STRING(subobj);
1966 sub_len = PyString_GET_SIZE(subobj);
Christian Heimes44720832008-05-26 13:01:01 +00001967 }
1968#ifdef Py_USING_UNICODE
1969 else if (PyUnicode_Check(subobj))
1970 return PyUnicode_Find(
1971 (PyObject *)self, subobj, start, end, dir);
1972#endif
1973 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1974 /* XXX - the "expected a character buffer object" is pretty
1975 confusing for a non-expert. remap to something else ? */
1976 return -2;
1977
1978 if (dir > 0)
1979 return stringlib_find_slice(
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001980 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001981 sub, sub_len, start, end);
1982 else
1983 return stringlib_rfind_slice(
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001984 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001985 sub, sub_len, start, end);
1986}
1987
1988
1989PyDoc_STRVAR(find__doc__,
1990"S.find(sub [,start [,end]]) -> int\n\
1991\n\
1992Return the lowest index in S where substring sub is found,\n\
1993such that sub is contained within s[start:end]. Optional\n\
1994arguments start and end are interpreted as in slice notation.\n\
1995\n\
1996Return -1 on failure.");
1997
1998static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001999string_find(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002000{
2001 Py_ssize_t result = string_find_internal(self, args, +1);
2002 if (result == -2)
2003 return NULL;
2004 return PyInt_FromSsize_t(result);
2005}
2006
2007
2008PyDoc_STRVAR(index__doc__,
2009"S.index(sub [,start [,end]]) -> int\n\
2010\n\
2011Like S.find() but raise ValueError when the substring is not found.");
2012
2013static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002014string_index(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002015{
2016 Py_ssize_t result = string_find_internal(self, args, +1);
2017 if (result == -2)
2018 return NULL;
2019 if (result == -1) {
2020 PyErr_SetString(PyExc_ValueError,
2021 "substring not found");
2022 return NULL;
2023 }
2024 return PyInt_FromSsize_t(result);
2025}
2026
2027
2028PyDoc_STRVAR(rfind__doc__,
2029"S.rfind(sub [,start [,end]]) -> int\n\
2030\n\
2031Return the highest index in S where substring sub is found,\n\
2032such that sub is contained within s[start:end]. Optional\n\
2033arguments start and end are interpreted as in slice notation.\n\
2034\n\
2035Return -1 on failure.");
2036
2037static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002038string_rfind(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002039{
2040 Py_ssize_t result = string_find_internal(self, args, -1);
2041 if (result == -2)
2042 return NULL;
2043 return PyInt_FromSsize_t(result);
2044}
2045
2046
2047PyDoc_STRVAR(rindex__doc__,
2048"S.rindex(sub [,start [,end]]) -> int\n\
2049\n\
2050Like S.rfind() but raise ValueError when the substring is not found.");
2051
2052static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002053string_rindex(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002054{
2055 Py_ssize_t result = string_find_internal(self, args, -1);
2056 if (result == -2)
2057 return NULL;
2058 if (result == -1) {
2059 PyErr_SetString(PyExc_ValueError,
2060 "substring not found");
2061 return NULL;
2062 }
2063 return PyInt_FromSsize_t(result);
2064}
2065
2066
2067Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002068do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
Christian Heimes44720832008-05-26 13:01:01 +00002069{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002070 char *s = PyString_AS_STRING(self);
2071 Py_ssize_t len = PyString_GET_SIZE(self);
2072 char *sep = PyString_AS_STRING(sepobj);
2073 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
Christian Heimes44720832008-05-26 13:01:01 +00002074 Py_ssize_t i, j;
2075
2076 i = 0;
2077 if (striptype != RIGHTSTRIP) {
2078 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2079 i++;
2080 }
2081 }
2082
2083 j = len;
2084 if (striptype != LEFTSTRIP) {
2085 do {
2086 j--;
2087 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2088 j++;
2089 }
2090
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002091 if (i == 0 && j == len && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00002092 Py_INCREF(self);
2093 return (PyObject*)self;
2094 }
2095 else
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002096 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00002097}
2098
2099
2100Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002101do_strip(PyStringObject *self, int striptype)
Christian Heimes44720832008-05-26 13:01:01 +00002102{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002103 char *s = PyString_AS_STRING(self);
2104 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Christian Heimes44720832008-05-26 13:01:01 +00002105
2106 i = 0;
2107 if (striptype != RIGHTSTRIP) {
2108 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2109 i++;
2110 }
2111 }
2112
2113 j = len;
2114 if (striptype != LEFTSTRIP) {
2115 do {
2116 j--;
2117 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2118 j++;
2119 }
2120
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002121 if (i == 0 && j == len && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00002122 Py_INCREF(self);
2123 return (PyObject*)self;
2124 }
2125 else
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002126 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00002127}
2128
2129
2130Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002131do_argstrip(PyStringObject *self, int striptype, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002132{
2133 PyObject *sep = NULL;
2134
2135 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
2136 return NULL;
2137
2138 if (sep != NULL && sep != Py_None) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002139 if (PyString_Check(sep))
Christian Heimes44720832008-05-26 13:01:01 +00002140 return do_xstrip(self, striptype, sep);
2141#ifdef Py_USING_UNICODE
2142 else if (PyUnicode_Check(sep)) {
2143 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2144 PyObject *res;
2145 if (uniself==NULL)
2146 return NULL;
2147 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2148 striptype, sep);
2149 Py_DECREF(uniself);
2150 return res;
2151 }
2152#endif
2153 PyErr_Format(PyExc_TypeError,
2154#ifdef Py_USING_UNICODE
2155 "%s arg must be None, str or unicode",
2156#else
2157 "%s arg must be None or str",
2158#endif
2159 STRIPNAME(striptype));
2160 return NULL;
2161 }
2162
2163 return do_strip(self, striptype);
2164}
2165
2166
2167PyDoc_STRVAR(strip__doc__,
2168"S.strip([chars]) -> string or unicode\n\
2169\n\
2170Return a copy of the string S with leading and trailing\n\
2171whitespace removed.\n\
2172If chars is given and not None, remove characters in chars instead.\n\
2173If chars is unicode, S will be converted to unicode before stripping");
2174
2175static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002176string_strip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002177{
2178 if (PyTuple_GET_SIZE(args) == 0)
2179 return do_strip(self, BOTHSTRIP); /* Common case */
2180 else
2181 return do_argstrip(self, BOTHSTRIP, args);
2182}
2183
2184
2185PyDoc_STRVAR(lstrip__doc__,
2186"S.lstrip([chars]) -> string or unicode\n\
2187\n\
2188Return a copy of the string S with leading whitespace removed.\n\
2189If chars is given and not None, remove characters in chars instead.\n\
2190If chars is unicode, S will be converted to unicode before stripping");
2191
2192static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002193string_lstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002194{
2195 if (PyTuple_GET_SIZE(args) == 0)
2196 return do_strip(self, LEFTSTRIP); /* Common case */
2197 else
2198 return do_argstrip(self, LEFTSTRIP, args);
2199}
2200
2201
2202PyDoc_STRVAR(rstrip__doc__,
2203"S.rstrip([chars]) -> string or unicode\n\
2204\n\
2205Return a copy of the string S with trailing whitespace removed.\n\
2206If chars is given and not None, remove characters in chars instead.\n\
2207If chars is unicode, S will be converted to unicode before stripping");
2208
2209static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002210string_rstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002211{
2212 if (PyTuple_GET_SIZE(args) == 0)
2213 return do_strip(self, RIGHTSTRIP); /* Common case */
2214 else
2215 return do_argstrip(self, RIGHTSTRIP, args);
2216}
2217
2218
2219PyDoc_STRVAR(lower__doc__,
2220"S.lower() -> string\n\
2221\n\
2222Return a copy of the string S converted to lowercase.");
2223
2224/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2225#ifndef _tolower
2226#define _tolower tolower
2227#endif
2228
2229static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002230string_lower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002231{
2232 char *s;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002233 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002234 PyObject *newobj;
2235
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002236 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002237 if (!newobj)
2238 return NULL;
2239
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002240 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002241
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002242 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002243
2244 for (i = 0; i < n; i++) {
2245 int c = Py_CHARMASK(s[i]);
2246 if (isupper(c))
2247 s[i] = _tolower(c);
2248 }
2249
2250 return newobj;
2251}
2252
2253PyDoc_STRVAR(upper__doc__,
2254"S.upper() -> string\n\
2255\n\
2256Return a copy of the string S converted to uppercase.");
2257
2258#ifndef _toupper
2259#define _toupper toupper
2260#endif
2261
2262static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002263string_upper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002264{
2265 char *s;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002266 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002267 PyObject *newobj;
2268
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002269 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002270 if (!newobj)
2271 return NULL;
2272
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002273 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002274
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002275 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002276
2277 for (i = 0; i < n; i++) {
2278 int c = Py_CHARMASK(s[i]);
2279 if (islower(c))
2280 s[i] = _toupper(c);
2281 }
2282
2283 return newobj;
2284}
2285
2286PyDoc_STRVAR(title__doc__,
2287"S.title() -> string\n\
2288\n\
2289Return a titlecased version of S, i.e. words start with uppercase\n\
2290characters, all remaining cased characters have lowercase.");
2291
2292static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002293string_title(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002294{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002295 char *s = PyString_AS_STRING(self), *s_new;
2296 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002297 int previous_is_cased = 0;
2298 PyObject *newobj;
2299
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002300 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002301 if (newobj == NULL)
2302 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002303 s_new = PyString_AsString(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002304 for (i = 0; i < n; i++) {
2305 int c = Py_CHARMASK(*s++);
2306 if (islower(c)) {
2307 if (!previous_is_cased)
2308 c = toupper(c);
2309 previous_is_cased = 1;
2310 } else if (isupper(c)) {
2311 if (previous_is_cased)
2312 c = tolower(c);
2313 previous_is_cased = 1;
2314 } else
2315 previous_is_cased = 0;
2316 *s_new++ = c;
2317 }
2318 return newobj;
2319}
2320
2321PyDoc_STRVAR(capitalize__doc__,
2322"S.capitalize() -> string\n\
2323\n\
2324Return a copy of the string S with only its first character\n\
2325capitalized.");
2326
2327static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002328string_capitalize(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002329{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002330 char *s = PyString_AS_STRING(self), *s_new;
2331 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002332 PyObject *newobj;
2333
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002334 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002335 if (newobj == NULL)
2336 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002337 s_new = PyString_AsString(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002338 if (0 < n) {
2339 int c = Py_CHARMASK(*s++);
2340 if (islower(c))
2341 *s_new = toupper(c);
2342 else
2343 *s_new = c;
2344 s_new++;
2345 }
2346 for (i = 1; i < n; i++) {
2347 int c = Py_CHARMASK(*s++);
2348 if (isupper(c))
2349 *s_new = tolower(c);
2350 else
2351 *s_new = c;
2352 s_new++;
2353 }
2354 return newobj;
2355}
2356
2357
2358PyDoc_STRVAR(count__doc__,
2359"S.count(sub[, start[, end]]) -> int\n\
2360\n\
2361Return the number of non-overlapping occurrences of substring sub in\n\
2362string S[start:end]. Optional arguments start and end are interpreted\n\
2363as in slice notation.");
2364
2365static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002366string_count(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002367{
2368 PyObject *sub_obj;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002369 const char *str = PyString_AS_STRING(self), *sub;
Christian Heimes44720832008-05-26 13:01:01 +00002370 Py_ssize_t sub_len;
2371 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2372
2373 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2374 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2375 return NULL;
2376
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002377 if (PyString_Check(sub_obj)) {
2378 sub = PyString_AS_STRING(sub_obj);
2379 sub_len = PyString_GET_SIZE(sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00002380 }
2381#ifdef Py_USING_UNICODE
2382 else if (PyUnicode_Check(sub_obj)) {
2383 Py_ssize_t count;
2384 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2385 if (count == -1)
2386 return NULL;
2387 else
2388 return PyInt_FromSsize_t(count);
2389 }
2390#endif
2391 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2392 return NULL;
2393
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002394 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002395
2396 return PyInt_FromSsize_t(
2397 stringlib_count(str + start, end - start, sub, sub_len)
2398 );
2399}
2400
2401PyDoc_STRVAR(swapcase__doc__,
2402"S.swapcase() -> string\n\
2403\n\
2404Return a copy of the string S with uppercase characters\n\
2405converted to lowercase and vice versa.");
2406
2407static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002408string_swapcase(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002409{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002410 char *s = PyString_AS_STRING(self), *s_new;
2411 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002412 PyObject *newobj;
2413
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002414 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002415 if (newobj == NULL)
2416 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002417 s_new = PyString_AsString(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002418 for (i = 0; i < n; i++) {
2419 int c = Py_CHARMASK(*s++);
2420 if (islower(c)) {
2421 *s_new = toupper(c);
2422 }
2423 else if (isupper(c)) {
2424 *s_new = tolower(c);
2425 }
2426 else
2427 *s_new = c;
2428 s_new++;
2429 }
2430 return newobj;
2431}
2432
2433
2434PyDoc_STRVAR(translate__doc__,
2435"S.translate(table [,deletechars]) -> string\n\
2436\n\
2437Return a copy of the string S, where all characters occurring\n\
2438in the optional argument deletechars are removed, and the\n\
2439remaining characters have been mapped through the given\n\
2440translation table, which must be a string of length 256.");
2441
2442static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002443string_translate(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002444{
2445 register char *input, *output;
2446 const char *table;
2447 register Py_ssize_t i, c, changed = 0;
2448 PyObject *input_obj = (PyObject*)self;
2449 const char *output_start, *del_table=NULL;
2450 Py_ssize_t inlen, tablen, dellen = 0;
2451 PyObject *result;
2452 int trans_table[256];
2453 PyObject *tableobj, *delobj = NULL;
2454
2455 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2456 &tableobj, &delobj))
2457 return NULL;
2458
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002459 if (PyString_Check(tableobj)) {
2460 table = PyString_AS_STRING(tableobj);
2461 tablen = PyString_GET_SIZE(tableobj);
Christian Heimes44720832008-05-26 13:01:01 +00002462 }
2463 else if (tableobj == Py_None) {
2464 table = NULL;
2465 tablen = 256;
2466 }
2467#ifdef Py_USING_UNICODE
2468 else if (PyUnicode_Check(tableobj)) {
2469 /* Unicode .translate() does not support the deletechars
2470 parameter; instead a mapping to None will cause characters
2471 to be deleted. */
2472 if (delobj != NULL) {
2473 PyErr_SetString(PyExc_TypeError,
2474 "deletions are implemented differently for unicode");
2475 return NULL;
2476 }
2477 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2478 }
2479#endif
2480 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2481 return NULL;
2482
2483 if (tablen != 256) {
2484 PyErr_SetString(PyExc_ValueError,
2485 "translation table must be 256 characters long");
2486 return NULL;
2487 }
2488
2489 if (delobj != NULL) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002490 if (PyString_Check(delobj)) {
2491 del_table = PyString_AS_STRING(delobj);
2492 dellen = PyString_GET_SIZE(delobj);
Christian Heimes44720832008-05-26 13:01:01 +00002493 }
2494#ifdef Py_USING_UNICODE
2495 else if (PyUnicode_Check(delobj)) {
2496 PyErr_SetString(PyExc_TypeError,
2497 "deletions are implemented differently for unicode");
2498 return NULL;
2499 }
2500#endif
2501 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2502 return NULL;
2503 }
2504 else {
2505 del_table = NULL;
2506 dellen = 0;
2507 }
2508
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002509 inlen = PyString_GET_SIZE(input_obj);
2510 result = PyString_FromStringAndSize((char *)NULL, inlen);
Christian Heimes44720832008-05-26 13:01:01 +00002511 if (result == NULL)
2512 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002513 output_start = output = PyString_AsString(result);
2514 input = PyString_AS_STRING(input_obj);
Christian Heimes44720832008-05-26 13:01:01 +00002515
2516 if (dellen == 0 && table != NULL) {
2517 /* If no deletions are required, use faster code */
2518 for (i = inlen; --i >= 0; ) {
2519 c = Py_CHARMASK(*input++);
2520 if (Py_CHARMASK((*output++ = table[c])) != c)
2521 changed = 1;
2522 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002523 if (changed || !PyString_CheckExact(input_obj))
Christian Heimes44720832008-05-26 13:01:01 +00002524 return result;
2525 Py_DECREF(result);
2526 Py_INCREF(input_obj);
2527 return input_obj;
2528 }
2529
2530 if (table == NULL) {
2531 for (i = 0; i < 256; i++)
2532 trans_table[i] = Py_CHARMASK(i);
2533 } else {
2534 for (i = 0; i < 256; i++)
2535 trans_table[i] = Py_CHARMASK(table[i]);
2536 }
2537
2538 for (i = 0; i < dellen; i++)
2539 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2540
2541 for (i = inlen; --i >= 0; ) {
2542 c = Py_CHARMASK(*input++);
2543 if (trans_table[c] != -1)
2544 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2545 continue;
2546 changed = 1;
2547 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002548 if (!changed && PyString_CheckExact(input_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00002549 Py_DECREF(result);
2550 Py_INCREF(input_obj);
2551 return input_obj;
2552 }
2553 /* Fix the size of the resulting string */
2554 if (inlen > 0)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002555 _PyString_Resize(&result, output - output_start);
Christian Heimes44720832008-05-26 13:01:01 +00002556 return result;
2557}
2558
2559
2560#define FORWARD 1
2561#define REVERSE -1
2562
2563/* find and count characters and substrings */
2564
2565#define findchar(target, target_len, c) \
2566 ((char *)memchr((const void *)(target), c, target_len))
2567
2568/* String ops must return a string. */
2569/* If the object is subclass of string, create a copy */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002570Py_LOCAL(PyStringObject *)
2571return_self(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002572{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002573 if (PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00002574 Py_INCREF(self);
2575 return self;
2576 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002577 return (PyStringObject *)PyString_FromStringAndSize(
2578 PyString_AS_STRING(self),
2579 PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002580}
2581
2582Py_LOCAL_INLINE(Py_ssize_t)
2583countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2584{
2585 Py_ssize_t count=0;
2586 const char *start=target;
2587 const char *end=target+target_len;
2588
2589 while ( (start=findchar(start, end-start, c)) != NULL ) {
2590 count++;
2591 if (count >= maxcount)
2592 break;
2593 start += 1;
2594 }
2595 return count;
2596}
2597
2598Py_LOCAL(Py_ssize_t)
2599findstring(const char *target, Py_ssize_t target_len,
2600 const char *pattern, Py_ssize_t pattern_len,
2601 Py_ssize_t start,
2602 Py_ssize_t end,
2603 int direction)
2604{
2605 if (start < 0) {
2606 start += target_len;
2607 if (start < 0)
2608 start = 0;
2609 }
2610 if (end > target_len) {
2611 end = target_len;
2612 } else if (end < 0) {
2613 end += target_len;
2614 if (end < 0)
2615 end = 0;
2616 }
2617
2618 /* zero-length substrings always match at the first attempt */
2619 if (pattern_len == 0)
2620 return (direction > 0) ? start : end;
2621
2622 end -= pattern_len;
2623
2624 if (direction < 0) {
2625 for (; end >= start; end--)
2626 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2627 return end;
2628 } else {
2629 for (; start <= end; start++)
2630 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2631 return start;
2632 }
2633 return -1;
2634}
2635
2636Py_LOCAL_INLINE(Py_ssize_t)
2637countstring(const char *target, Py_ssize_t target_len,
2638 const char *pattern, Py_ssize_t pattern_len,
2639 Py_ssize_t start,
2640 Py_ssize_t end,
2641 int direction, Py_ssize_t maxcount)
2642{
2643 Py_ssize_t count=0;
2644
2645 if (start < 0) {
2646 start += target_len;
2647 if (start < 0)
2648 start = 0;
2649 }
2650 if (end > target_len) {
2651 end = target_len;
2652 } else if (end < 0) {
2653 end += target_len;
2654 if (end < 0)
2655 end = 0;
2656 }
2657
2658 /* zero-length substrings match everywhere */
2659 if (pattern_len == 0 || maxcount == 0) {
2660 if (target_len+1 < maxcount)
2661 return target_len+1;
2662 return maxcount;
2663 }
2664
2665 end -= pattern_len;
2666 if (direction < 0) {
2667 for (; (end >= start); end--)
2668 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2669 count++;
2670 if (--maxcount <= 0) break;
2671 end -= pattern_len-1;
2672 }
2673 } else {
2674 for (; (start <= end); start++)
2675 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2676 count++;
2677 if (--maxcount <= 0)
2678 break;
2679 start += pattern_len-1;
2680 }
2681 }
2682 return count;
2683}
2684
2685
2686/* Algorithms for different cases of string replacement */
2687
2688/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002689Py_LOCAL(PyStringObject *)
2690replace_interleave(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002691 const char *to_s, Py_ssize_t to_len,
2692 Py_ssize_t maxcount)
2693{
2694 char *self_s, *result_s;
2695 Py_ssize_t self_len, result_len;
2696 Py_ssize_t count, i, product;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002697 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002698
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002699 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002700
2701 /* 1 at the end plus 1 after every character */
2702 count = self_len+1;
2703 if (maxcount < count)
2704 count = maxcount;
2705
2706 /* Check for overflow */
2707 /* result_len = count * to_len + self_len; */
2708 product = count * to_len;
2709 if (product / to_len != count) {
2710 PyErr_SetString(PyExc_OverflowError,
2711 "replace string is too long");
2712 return NULL;
2713 }
2714 result_len = product + self_len;
2715 if (result_len < 0) {
2716 PyErr_SetString(PyExc_OverflowError,
2717 "replace string is too long");
2718 return NULL;
2719 }
2720
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002721 if (! (result = (PyStringObject *)
2722 PyString_FromStringAndSize(NULL, result_len)) )
Christian Heimes44720832008-05-26 13:01:01 +00002723 return NULL;
2724
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002725 self_s = PyString_AS_STRING(self);
2726 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002727
2728 /* TODO: special case single character, which doesn't need memcpy */
2729
2730 /* Lay the first one down (guaranteed this will occur) */
2731 Py_MEMCPY(result_s, to_s, to_len);
2732 result_s += to_len;
2733 count -= 1;
2734
2735 for (i=0; i<count; i++) {
2736 *result_s++ = *self_s++;
2737 Py_MEMCPY(result_s, to_s, to_len);
2738 result_s += to_len;
2739 }
2740
2741 /* Copy the rest of the original string */
2742 Py_MEMCPY(result_s, self_s, self_len-i);
2743
2744 return result;
2745}
2746
2747/* Special case for deleting a single character */
2748/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002749Py_LOCAL(PyStringObject *)
2750replace_delete_single_character(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002751 char from_c, Py_ssize_t maxcount)
2752{
2753 char *self_s, *result_s;
2754 char *start, *next, *end;
2755 Py_ssize_t self_len, result_len;
2756 Py_ssize_t count;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002757 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002758
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002759 self_len = PyString_GET_SIZE(self);
2760 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002761
2762 count = countchar(self_s, self_len, from_c, maxcount);
2763 if (count == 0) {
2764 return return_self(self);
2765 }
2766
2767 result_len = self_len - count; /* from_len == 1 */
2768 assert(result_len>=0);
2769
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002770 if ( (result = (PyStringObject *)
2771 PyString_FromStringAndSize(NULL, result_len)) == NULL)
Christian Heimes44720832008-05-26 13:01:01 +00002772 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002773 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002774
2775 start = self_s;
2776 end = self_s + self_len;
2777 while (count-- > 0) {
2778 next = findchar(start, end-start, from_c);
2779 if (next == NULL)
2780 break;
2781 Py_MEMCPY(result_s, start, next-start);
2782 result_s += (next-start);
2783 start = next+1;
2784 }
2785 Py_MEMCPY(result_s, start, end-start);
2786
2787 return result;
2788}
2789
2790/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2791
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002792Py_LOCAL(PyStringObject *)
2793replace_delete_substring(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002794 const char *from_s, Py_ssize_t from_len,
2795 Py_ssize_t maxcount) {
2796 char *self_s, *result_s;
2797 char *start, *next, *end;
2798 Py_ssize_t self_len, result_len;
2799 Py_ssize_t count, offset;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002800 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002801
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002802 self_len = PyString_GET_SIZE(self);
2803 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002804
2805 count = countstring(self_s, self_len,
2806 from_s, from_len,
2807 0, self_len, 1,
2808 maxcount);
2809
2810 if (count == 0) {
2811 /* no matches */
2812 return return_self(self);
2813 }
2814
2815 result_len = self_len - (count * from_len);
2816 assert (result_len>=0);
2817
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002818 if ( (result = (PyStringObject *)
2819 PyString_FromStringAndSize(NULL, result_len)) == NULL )
Christian Heimes44720832008-05-26 13:01:01 +00002820 return NULL;
2821
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002822 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002823
2824 start = self_s;
2825 end = self_s + self_len;
2826 while (count-- > 0) {
2827 offset = findstring(start, end-start,
2828 from_s, from_len,
2829 0, end-start, FORWARD);
2830 if (offset == -1)
2831 break;
2832 next = start + offset;
2833
2834 Py_MEMCPY(result_s, start, next-start);
2835
2836 result_s += (next-start);
2837 start = next+from_len;
2838 }
2839 Py_MEMCPY(result_s, start, end-start);
2840 return result;
2841}
2842
2843/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002844Py_LOCAL(PyStringObject *)
2845replace_single_character_in_place(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002846 char from_c, char to_c,
2847 Py_ssize_t maxcount)
2848{
2849 char *self_s, *result_s, *start, *end, *next;
2850 Py_ssize_t self_len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002851 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002852
2853 /* The result string will be the same size */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002854 self_s = PyString_AS_STRING(self);
2855 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002856
2857 next = findchar(self_s, self_len, from_c);
2858
2859 if (next == NULL) {
2860 /* No matches; return the original string */
2861 return return_self(self);
2862 }
2863
2864 /* Need to make a new string */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002865 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002866 if (result == NULL)
2867 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002868 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002869 Py_MEMCPY(result_s, self_s, self_len);
2870
2871 /* change everything in-place, starting with this one */
2872 start = result_s + (next-self_s);
2873 *start = to_c;
2874 start++;
2875 end = result_s + self_len;
2876
2877 while (--maxcount > 0) {
2878 next = findchar(start, end-start, from_c);
2879 if (next == NULL)
2880 break;
2881 *next = to_c;
2882 start = next+1;
2883 }
2884
2885 return result;
2886}
2887
2888/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002889Py_LOCAL(PyStringObject *)
2890replace_substring_in_place(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002891 const char *from_s, Py_ssize_t from_len,
2892 const char *to_s, Py_ssize_t to_len,
2893 Py_ssize_t maxcount)
2894{
2895 char *result_s, *start, *end;
2896 char *self_s;
2897 Py_ssize_t self_len, offset;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002898 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002899
2900 /* The result string will be the same size */
2901
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002902 self_s = PyString_AS_STRING(self);
2903 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002904
2905 offset = findstring(self_s, self_len,
2906 from_s, from_len,
2907 0, self_len, FORWARD);
2908 if (offset == -1) {
2909 /* No matches; return the original string */
2910 return return_self(self);
2911 }
2912
2913 /* Need to make a new string */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002914 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002915 if (result == NULL)
2916 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002917 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002918 Py_MEMCPY(result_s, self_s, self_len);
2919
2920 /* change everything in-place, starting with this one */
2921 start = result_s + offset;
2922 Py_MEMCPY(start, to_s, from_len);
2923 start += from_len;
2924 end = result_s + self_len;
2925
2926 while ( --maxcount > 0) {
2927 offset = findstring(start, end-start,
2928 from_s, from_len,
2929 0, end-start, FORWARD);
2930 if (offset==-1)
2931 break;
2932 Py_MEMCPY(start+offset, to_s, from_len);
2933 start += offset+from_len;
2934 }
2935
2936 return result;
2937}
2938
2939/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002940Py_LOCAL(PyStringObject *)
2941replace_single_character(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002942 char from_c,
2943 const char *to_s, Py_ssize_t to_len,
2944 Py_ssize_t maxcount)
2945{
2946 char *self_s, *result_s;
2947 char *start, *next, *end;
2948 Py_ssize_t self_len, result_len;
2949 Py_ssize_t count, product;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002950 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002951
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002952 self_s = PyString_AS_STRING(self);
2953 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002954
2955 count = countchar(self_s, self_len, from_c, maxcount);
2956 if (count == 0) {
2957 /* no matches, return unchanged */
2958 return return_self(self);
2959 }
2960
2961 /* use the difference between current and new, hence the "-1" */
2962 /* result_len = self_len + count * (to_len-1) */
2963 product = count * (to_len-1);
2964 if (product / (to_len-1) != count) {
2965 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2966 return NULL;
2967 }
2968 result_len = self_len + product;
2969 if (result_len < 0) {
2970 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2971 return NULL;
2972 }
2973
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002974 if ( (result = (PyStringObject *)
2975 PyString_FromStringAndSize(NULL, result_len)) == NULL)
Christian Heimes44720832008-05-26 13:01:01 +00002976 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002977 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002978
2979 start = self_s;
2980 end = self_s + self_len;
2981 while (count-- > 0) {
2982 next = findchar(start, end-start, from_c);
2983 if (next == NULL)
2984 break;
2985
2986 if (next == start) {
2987 /* replace with the 'to' */
2988 Py_MEMCPY(result_s, to_s, to_len);
2989 result_s += to_len;
2990 start += 1;
2991 } else {
2992 /* copy the unchanged old then the 'to' */
2993 Py_MEMCPY(result_s, start, next-start);
2994 result_s += (next-start);
2995 Py_MEMCPY(result_s, to_s, to_len);
2996 result_s += to_len;
2997 start = next+1;
2998 }
2999 }
3000 /* Copy the remainder of the remaining string */
3001 Py_MEMCPY(result_s, start, end-start);
3002
3003 return result;
3004}
3005
3006/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003007Py_LOCAL(PyStringObject *)
3008replace_substring(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00003009 const char *from_s, Py_ssize_t from_len,
3010 const char *to_s, Py_ssize_t to_len,
3011 Py_ssize_t maxcount) {
3012 char *self_s, *result_s;
3013 char *start, *next, *end;
3014 Py_ssize_t self_len, result_len;
3015 Py_ssize_t count, offset, product;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003016 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00003017
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003018 self_s = PyString_AS_STRING(self);
3019 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003020
3021 count = countstring(self_s, self_len,
3022 from_s, from_len,
3023 0, self_len, FORWARD, maxcount);
3024 if (count == 0) {
3025 /* no matches, return unchanged */
3026 return return_self(self);
3027 }
3028
3029 /* Check for overflow */
3030 /* result_len = self_len + count * (to_len-from_len) */
3031 product = count * (to_len-from_len);
3032 if (product / (to_len-from_len) != count) {
3033 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3034 return NULL;
3035 }
3036 result_len = self_len + product;
3037 if (result_len < 0) {
3038 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3039 return NULL;
3040 }
3041
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003042 if ( (result = (PyStringObject *)
3043 PyString_FromStringAndSize(NULL, result_len)) == NULL)
Christian Heimes44720832008-05-26 13:01:01 +00003044 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003045 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00003046
3047 start = self_s;
3048 end = self_s + self_len;
3049 while (count-- > 0) {
3050 offset = findstring(start, end-start,
3051 from_s, from_len,
3052 0, end-start, FORWARD);
3053 if (offset == -1)
3054 break;
3055 next = start+offset;
3056 if (next == start) {
3057 /* replace with the 'to' */
3058 Py_MEMCPY(result_s, to_s, to_len);
3059 result_s += to_len;
3060 start += from_len;
3061 } else {
3062 /* copy the unchanged old then the 'to' */
3063 Py_MEMCPY(result_s, start, next-start);
3064 result_s += (next-start);
3065 Py_MEMCPY(result_s, to_s, to_len);
3066 result_s += to_len;
3067 start = next+from_len;
3068 }
3069 }
3070 /* Copy the remainder of the remaining string */
3071 Py_MEMCPY(result_s, start, end-start);
3072
3073 return result;
3074}
3075
3076
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003077Py_LOCAL(PyStringObject *)
3078replace(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00003079 const char *from_s, Py_ssize_t from_len,
3080 const char *to_s, Py_ssize_t to_len,
3081 Py_ssize_t maxcount)
3082{
3083 if (maxcount < 0) {
3084 maxcount = PY_SSIZE_T_MAX;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003085 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
Christian Heimes44720832008-05-26 13:01:01 +00003086 /* nothing to do; return the original string */
3087 return return_self(self);
3088 }
3089
3090 if (maxcount == 0 ||
3091 (from_len == 0 && to_len == 0)) {
3092 /* nothing to do; return the original string */
3093 return return_self(self);
3094 }
3095
3096 /* Handle zero-length special cases */
3097
3098 if (from_len == 0) {
3099 /* insert the 'to' string everywhere. */
3100 /* >>> "Python".replace("", ".") */
3101 /* '.P.y.t.h.o.n.' */
3102 return replace_interleave(self, to_s, to_len, maxcount);
3103 }
3104
3105 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3106 /* point for an empty self string to generate a non-empty string */
3107 /* Special case so the remaining code always gets a non-empty string */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003108 if (PyString_GET_SIZE(self) == 0) {
Christian Heimes44720832008-05-26 13:01:01 +00003109 return return_self(self);
3110 }
3111
3112 if (to_len == 0) {
3113 /* delete all occurances of 'from' string */
3114 if (from_len == 1) {
3115 return replace_delete_single_character(
3116 self, from_s[0], maxcount);
3117 } else {
3118 return replace_delete_substring(self, from_s, from_len, maxcount);
3119 }
3120 }
3121
3122 /* Handle special case where both strings have the same length */
3123
3124 if (from_len == to_len) {
3125 if (from_len == 1) {
3126 return replace_single_character_in_place(
3127 self,
3128 from_s[0],
3129 to_s[0],
3130 maxcount);
3131 } else {
3132 return replace_substring_in_place(
3133 self, from_s, from_len, to_s, to_len, maxcount);
3134 }
3135 }
3136
3137 /* Otherwise use the more generic algorithms */
3138 if (from_len == 1) {
3139 return replace_single_character(self, from_s[0],
3140 to_s, to_len, maxcount);
3141 } else {
3142 /* len('from')>=2, len('to')>=1 */
3143 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
3144 }
3145}
3146
3147PyDoc_STRVAR(replace__doc__,
3148"S.replace (old, new[, count]) -> string\n\
3149\n\
3150Return a copy of string S with all occurrences of substring\n\
3151old replaced by new. If the optional argument count is\n\
3152given, only the first count occurrences are replaced.");
3153
3154static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003155string_replace(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003156{
3157 Py_ssize_t count = -1;
3158 PyObject *from, *to;
3159 const char *from_s, *to_s;
3160 Py_ssize_t from_len, to_len;
3161
3162 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
3163 return NULL;
3164
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003165 if (PyString_Check(from)) {
3166 from_s = PyString_AS_STRING(from);
3167 from_len = PyString_GET_SIZE(from);
Christian Heimes44720832008-05-26 13:01:01 +00003168 }
3169#ifdef Py_USING_UNICODE
3170 if (PyUnicode_Check(from))
3171 return PyUnicode_Replace((PyObject *)self,
3172 from, to, count);
3173#endif
3174 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
3175 return NULL;
3176
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003177 if (PyString_Check(to)) {
3178 to_s = PyString_AS_STRING(to);
3179 to_len = PyString_GET_SIZE(to);
Christian Heimes44720832008-05-26 13:01:01 +00003180 }
3181#ifdef Py_USING_UNICODE
3182 else if (PyUnicode_Check(to))
3183 return PyUnicode_Replace((PyObject *)self,
3184 from, to, count);
3185#endif
3186 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
3187 return NULL;
3188
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003189 return (PyObject *)replace((PyStringObject *) self,
Christian Heimes44720832008-05-26 13:01:01 +00003190 from_s, from_len,
3191 to_s, to_len, count);
3192}
3193
3194/** End DALKE **/
3195
3196/* Matches the end (direction >= 0) or start (direction < 0) of self
3197 * against substr, using the start and end arguments. Returns
3198 * -1 on error, 0 if not found and 1 if found.
3199 */
3200Py_LOCAL(int)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003201_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
Christian Heimes44720832008-05-26 13:01:01 +00003202 Py_ssize_t end, int direction)
3203{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003204 Py_ssize_t len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003205 Py_ssize_t slen;
3206 const char* sub;
3207 const char* str;
3208
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003209 if (PyString_Check(substr)) {
3210 sub = PyString_AS_STRING(substr);
3211 slen = PyString_GET_SIZE(substr);
Christian Heimes44720832008-05-26 13:01:01 +00003212 }
3213#ifdef Py_USING_UNICODE
3214 else if (PyUnicode_Check(substr))
3215 return PyUnicode_Tailmatch((PyObject *)self,
3216 substr, start, end, direction);
3217#endif
3218 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3219 return -1;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003220 str = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003221
3222 string_adjust_indices(&start, &end, len);
3223
3224 if (direction < 0) {
3225 /* startswith */
3226 if (start+slen > len)
3227 return 0;
3228 } else {
3229 /* endswith */
3230 if (end-start < slen || start > len)
3231 return 0;
3232
3233 if (end-slen > start)
3234 start = end - slen;
3235 }
3236 if (end-start >= slen)
3237 return ! memcmp(str+start, sub, slen);
3238 return 0;
3239}
3240
3241
3242PyDoc_STRVAR(startswith__doc__,
3243"S.startswith(prefix[, start[, end]]) -> bool\n\
3244\n\
3245Return True if S starts with the specified prefix, False otherwise.\n\
3246With optional start, test S beginning at that position.\n\
3247With optional end, stop comparing S at that position.\n\
3248prefix can also be a tuple of strings to try.");
3249
3250static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003251string_startswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003252{
3253 Py_ssize_t start = 0;
3254 Py_ssize_t end = PY_SSIZE_T_MAX;
3255 PyObject *subobj;
3256 int result;
3257
3258 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3259 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3260 return NULL;
3261 if (PyTuple_Check(subobj)) {
3262 Py_ssize_t i;
3263 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3264 result = _string_tailmatch(self,
3265 PyTuple_GET_ITEM(subobj, i),
3266 start, end, -1);
3267 if (result == -1)
3268 return NULL;
3269 else if (result) {
3270 Py_RETURN_TRUE;
3271 }
3272 }
3273 Py_RETURN_FALSE;
3274 }
3275 result = _string_tailmatch(self, subobj, start, end, -1);
3276 if (result == -1)
3277 return NULL;
3278 else
3279 return PyBool_FromLong(result);
3280}
3281
3282
3283PyDoc_STRVAR(endswith__doc__,
3284"S.endswith(suffix[, start[, end]]) -> bool\n\
3285\n\
3286Return True if S ends with the specified suffix, False otherwise.\n\
3287With optional start, test S beginning at that position.\n\
3288With optional end, stop comparing S at that position.\n\
3289suffix can also be a tuple of strings to try.");
3290
3291static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003292string_endswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003293{
3294 Py_ssize_t start = 0;
3295 Py_ssize_t end = PY_SSIZE_T_MAX;
3296 PyObject *subobj;
3297 int result;
3298
3299 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3300 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3301 return NULL;
3302 if (PyTuple_Check(subobj)) {
3303 Py_ssize_t i;
3304 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3305 result = _string_tailmatch(self,
3306 PyTuple_GET_ITEM(subobj, i),
3307 start, end, +1);
3308 if (result == -1)
3309 return NULL;
3310 else if (result) {
3311 Py_RETURN_TRUE;
3312 }
3313 }
3314 Py_RETURN_FALSE;
3315 }
3316 result = _string_tailmatch(self, subobj, start, end, +1);
3317 if (result == -1)
3318 return NULL;
3319 else
3320 return PyBool_FromLong(result);
3321}
3322
3323
3324PyDoc_STRVAR(encode__doc__,
3325"S.encode([encoding[,errors]]) -> object\n\
3326\n\
3327Encodes S using the codec registered for encoding. encoding defaults\n\
3328to the default encoding. errors may be given to set a different error\n\
3329handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3330a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3331'xmlcharrefreplace' as well as any other name registered with\n\
3332codecs.register_error that is able to handle UnicodeEncodeErrors.");
3333
3334static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00003335string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes44720832008-05-26 13:01:01 +00003336{
Benjamin Peterson332d7212009-09-18 21:14:55 +00003337 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003338 char *encoding = NULL;
3339 char *errors = NULL;
3340 PyObject *v;
3341
Benjamin Peterson332d7212009-09-18 21:14:55 +00003342 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
3343 kwlist, &encoding, &errors))
Christian Heimes44720832008-05-26 13:01:01 +00003344 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003345 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003346 if (v == NULL)
3347 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003348 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00003349 PyErr_Format(PyExc_TypeError,
3350 "encoder did not return a string/unicode object "
3351 "(type=%.400s)",
3352 Py_TYPE(v)->tp_name);
3353 Py_DECREF(v);
3354 return NULL;
3355 }
3356 return v;
3357
3358 onError:
Christian Heimes1a6387e2008-03-26 12:49:49 +00003359 return NULL;
3360}
3361
Christian Heimes44720832008-05-26 13:01:01 +00003362
3363PyDoc_STRVAR(decode__doc__,
3364"S.decode([encoding[,errors]]) -> object\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003365\n\
Christian Heimes44720832008-05-26 13:01:01 +00003366Decodes S using the codec registered for encoding. encoding defaults\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003367to the default encoding. errors may be given to set a different error\n\
Christian Heimes44720832008-05-26 13:01:01 +00003368handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3369a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003370as well as any other name registered with codecs.register_error that is\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003371able to handle UnicodeDecodeErrors.");
3372
3373static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00003374string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003375{
Benjamin Peterson332d7212009-09-18 21:14:55 +00003376 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003377 char *encoding = NULL;
3378 char *errors = NULL;
3379 PyObject *v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003380
Benjamin Peterson332d7212009-09-18 21:14:55 +00003381 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
3382 kwlist, &encoding, &errors))
Christian Heimes1a6387e2008-03-26 12:49:49 +00003383 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003384 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003385 if (v == NULL)
3386 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003387 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00003388 PyErr_Format(PyExc_TypeError,
3389 "decoder did not return a string/unicode object "
3390 "(type=%.400s)",
3391 Py_TYPE(v)->tp_name);
3392 Py_DECREF(v);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003393 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003394 }
3395 return v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003396
Christian Heimes44720832008-05-26 13:01:01 +00003397 onError:
3398 return NULL;
3399}
3400
3401
3402PyDoc_STRVAR(expandtabs__doc__,
3403"S.expandtabs([tabsize]) -> string\n\
3404\n\
3405Return a copy of S where all tab characters are expanded using spaces.\n\
3406If tabsize is not given, a tab size of 8 characters is assumed.");
3407
3408static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003409string_expandtabs(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003410{
3411 const char *e, *p, *qe;
3412 char *q;
3413 Py_ssize_t i, j, incr;
3414 PyObject *u;
3415 int tabsize = 8;
3416
3417 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3418 return NULL;
3419
3420 /* First pass: determine size of output string */
3421 i = 0; /* chars up to and including most recent \n or \r */
3422 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003423 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3424 for (p = PyString_AS_STRING(self); p < e; p++)
Christian Heimes44720832008-05-26 13:01:01 +00003425 if (*p == '\t') {
3426 if (tabsize > 0) {
3427 incr = tabsize - (j % tabsize);
3428 if (j > PY_SSIZE_T_MAX - incr)
3429 goto overflow1;
3430 j += incr;
3431 }
3432 }
3433 else {
3434 if (j > PY_SSIZE_T_MAX - 1)
3435 goto overflow1;
3436 j++;
3437 if (*p == '\n' || *p == '\r') {
3438 if (i > PY_SSIZE_T_MAX - j)
3439 goto overflow1;
3440 i += j;
3441 j = 0;
3442 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00003443 }
Christian Heimes44720832008-05-26 13:01:01 +00003444
3445 if (i > PY_SSIZE_T_MAX - j)
3446 goto overflow1;
3447
3448 /* Second pass: create output string and fill it */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003449 u = PyString_FromStringAndSize(NULL, i + j);
Christian Heimes44720832008-05-26 13:01:01 +00003450 if (!u)
3451 return NULL;
3452
3453 j = 0; /* same as in first pass */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003454 q = PyString_AS_STRING(u); /* next output char */
3455 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
Christian Heimes44720832008-05-26 13:01:01 +00003456
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003457 for (p = PyString_AS_STRING(self); p < e; p++)
Christian Heimes44720832008-05-26 13:01:01 +00003458 if (*p == '\t') {
3459 if (tabsize > 0) {
3460 i = tabsize - (j % tabsize);
3461 j += i;
3462 while (i--) {
3463 if (q >= qe)
3464 goto overflow2;
3465 *q++ = ' ';
3466 }
3467 }
3468 }
3469 else {
3470 if (q >= qe)
3471 goto overflow2;
3472 *q++ = *p;
3473 j++;
3474 if (*p == '\n' || *p == '\r')
3475 j = 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003476 }
Christian Heimes44720832008-05-26 13:01:01 +00003477
3478 return u;
3479
3480 overflow2:
3481 Py_DECREF(u);
3482 overflow1:
3483 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3484 return NULL;
3485}
3486
3487Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003488pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Christian Heimes44720832008-05-26 13:01:01 +00003489{
3490 PyObject *u;
3491
3492 if (left < 0)
3493 left = 0;
3494 if (right < 0)
3495 right = 0;
3496
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003497 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003498 Py_INCREF(self);
3499 return (PyObject *)self;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003500 }
3501
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003502 u = PyString_FromStringAndSize(NULL,
3503 left + PyString_GET_SIZE(self) + right);
Christian Heimes44720832008-05-26 13:01:01 +00003504 if (u) {
3505 if (left)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003506 memset(PyString_AS_STRING(u), fill, left);
3507 Py_MEMCPY(PyString_AS_STRING(u) + left,
3508 PyString_AS_STRING(self),
3509 PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00003510 if (right)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003511 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00003512 fill, right);
3513 }
3514
3515 return u;
3516}
3517
3518PyDoc_STRVAR(ljust__doc__,
3519"S.ljust(width[, fillchar]) -> string\n"
3520"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003521"Return S left-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003522"done using the specified fill character (default is a space).");
3523
3524static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003525string_ljust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003526{
3527 Py_ssize_t width;
3528 char fillchar = ' ';
3529
3530 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3531 return NULL;
3532
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003533 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003534 Py_INCREF(self);
3535 return (PyObject*) self;
3536 }
3537
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003538 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003539}
3540
3541
3542PyDoc_STRVAR(rjust__doc__,
3543"S.rjust(width[, fillchar]) -> string\n"
3544"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003545"Return S right-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003546"done using the specified fill character (default is a space)");
3547
3548static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003549string_rjust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003550{
3551 Py_ssize_t width;
3552 char fillchar = ' ';
3553
3554 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3555 return NULL;
3556
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003557 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003558 Py_INCREF(self);
3559 return (PyObject*) self;
3560 }
3561
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003562 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003563}
3564
3565
3566PyDoc_STRVAR(center__doc__,
3567"S.center(width[, fillchar]) -> string\n"
3568"\n"
3569"Return S centered in a string of length width. Padding is\n"
3570"done using the specified fill character (default is a space)");
3571
3572static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003573string_center(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003574{
3575 Py_ssize_t marg, left;
3576 Py_ssize_t width;
3577 char fillchar = ' ';
3578
3579 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3580 return NULL;
3581
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003582 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003583 Py_INCREF(self);
3584 return (PyObject*) self;
3585 }
3586
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003587 marg = width - PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003588 left = marg / 2 + (marg & width & 1);
3589
3590 return pad(self, left, marg - left, fillchar);
3591}
3592
3593PyDoc_STRVAR(zfill__doc__,
3594"S.zfill(width) -> string\n"
3595"\n"
3596"Pad a numeric string S with zeros on the left, to fill a field\n"
3597"of the specified width. The string S is never truncated.");
3598
3599static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003600string_zfill(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003601{
3602 Py_ssize_t fill;
3603 PyObject *s;
3604 char *p;
3605 Py_ssize_t width;
3606
3607 if (!PyArg_ParseTuple(args, "n:zfill", &width))
3608 return NULL;
3609
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003610 if (PyString_GET_SIZE(self) >= width) {
3611 if (PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003612 Py_INCREF(self);
3613 return (PyObject*) self;
3614 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00003615 else
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003616 return PyString_FromStringAndSize(
3617 PyString_AS_STRING(self),
3618 PyString_GET_SIZE(self)
Christian Heimes44720832008-05-26 13:01:01 +00003619 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003620 }
3621
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003622 fill = width - PyString_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003623
Christian Heimes44720832008-05-26 13:01:01 +00003624 s = pad(self, fill, 0, '0');
3625
3626 if (s == NULL)
3627 return NULL;
3628
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003629 p = PyString_AS_STRING(s);
Christian Heimes44720832008-05-26 13:01:01 +00003630 if (p[fill] == '+' || p[fill] == '-') {
3631 /* move sign to beginning of string */
3632 p[0] = p[fill];
3633 p[fill] = '0';
3634 }
3635
3636 return (PyObject*) s;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003637}
3638
Christian Heimes44720832008-05-26 13:01:01 +00003639PyDoc_STRVAR(isspace__doc__,
3640"S.isspace() -> bool\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003641\n\
Christian Heimes44720832008-05-26 13:01:01 +00003642Return True if all characters in S are whitespace\n\
3643and there is at least one character in S, False otherwise.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003644
Christian Heimes44720832008-05-26 13:01:01 +00003645static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003646string_isspace(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003647{
Christian Heimes44720832008-05-26 13:01:01 +00003648 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003649 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003650 register const unsigned char *e;
3651
3652 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003653 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003654 isspace(*p))
3655 return PyBool_FromLong(1);
3656
3657 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003658 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003659 return PyBool_FromLong(0);
3660
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003661 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003662 for (; p < e; p++) {
3663 if (!isspace(*p))
3664 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003665 }
Christian Heimes44720832008-05-26 13:01:01 +00003666 return PyBool_FromLong(1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003667}
3668
Christian Heimes44720832008-05-26 13:01:01 +00003669
3670PyDoc_STRVAR(isalpha__doc__,
3671"S.isalpha() -> bool\n\
3672\n\
3673Return True if all characters in S are alphabetic\n\
3674and there is at least one character in S, False otherwise.");
3675
3676static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003677string_isalpha(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003678{
Christian Heimes44720832008-05-26 13:01:01 +00003679 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003680 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003681 register const unsigned char *e;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003682
Christian Heimes44720832008-05-26 13:01:01 +00003683 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003684 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003685 isalpha(*p))
3686 return PyBool_FromLong(1);
3687
3688 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003689 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003690 return PyBool_FromLong(0);
3691
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003692 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003693 for (; p < e; p++) {
3694 if (!isalpha(*p))
3695 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003696 }
Christian Heimes44720832008-05-26 13:01:01 +00003697 return PyBool_FromLong(1);
3698}
Christian Heimes1a6387e2008-03-26 12:49:49 +00003699
Christian Heimes44720832008-05-26 13:01:01 +00003700
3701PyDoc_STRVAR(isalnum__doc__,
3702"S.isalnum() -> bool\n\
3703\n\
3704Return True if all characters in S are alphanumeric\n\
3705and there is at least one character in S, False otherwise.");
3706
3707static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003708string_isalnum(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003709{
3710 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003711 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003712 register const unsigned char *e;
3713
3714 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003715 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003716 isalnum(*p))
3717 return PyBool_FromLong(1);
3718
3719 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003720 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003721 return PyBool_FromLong(0);
3722
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003723 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003724 for (; p < e; p++) {
3725 if (!isalnum(*p))
3726 return PyBool_FromLong(0);
3727 }
3728 return PyBool_FromLong(1);
3729}
3730
3731
3732PyDoc_STRVAR(isdigit__doc__,
3733"S.isdigit() -> bool\n\
3734\n\
3735Return True if all characters in S are digits\n\
3736and there is at least one character in S, False otherwise.");
3737
3738static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003739string_isdigit(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003740{
3741 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003742 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003743 register const unsigned char *e;
3744
3745 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003746 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003747 isdigit(*p))
3748 return PyBool_FromLong(1);
3749
3750 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003751 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003752 return PyBool_FromLong(0);
3753
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003754 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003755 for (; p < e; p++) {
3756 if (!isdigit(*p))
3757 return PyBool_FromLong(0);
3758 }
3759 return PyBool_FromLong(1);
3760}
3761
3762
3763PyDoc_STRVAR(islower__doc__,
3764"S.islower() -> bool\n\
3765\n\
3766Return True if all cased characters in S are lowercase and there is\n\
3767at least one cased character in S, False otherwise.");
3768
3769static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003770string_islower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003771{
3772 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003773 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003774 register const unsigned char *e;
3775 int cased;
3776
3777 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003778 if (PyString_GET_SIZE(self) == 1)
Christian Heimes44720832008-05-26 13:01:01 +00003779 return PyBool_FromLong(islower(*p) != 0);
3780
3781 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003782 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003783 return PyBool_FromLong(0);
3784
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003785 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003786 cased = 0;
3787 for (; p < e; p++) {
3788 if (isupper(*p))
3789 return PyBool_FromLong(0);
3790 else if (!cased && islower(*p))
3791 cased = 1;
3792 }
3793 return PyBool_FromLong(cased);
3794}
3795
3796
3797PyDoc_STRVAR(isupper__doc__,
3798"S.isupper() -> bool\n\
3799\n\
3800Return True if all cased characters in S are uppercase and there is\n\
3801at least one cased character in S, False otherwise.");
3802
3803static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003804string_isupper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003805{
3806 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003807 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003808 register const unsigned char *e;
3809 int cased;
3810
3811 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003812 if (PyString_GET_SIZE(self) == 1)
Christian Heimes44720832008-05-26 13:01:01 +00003813 return PyBool_FromLong(isupper(*p) != 0);
3814
3815 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003816 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003817 return PyBool_FromLong(0);
3818
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003819 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003820 cased = 0;
3821 for (; p < e; p++) {
3822 if (islower(*p))
3823 return PyBool_FromLong(0);
3824 else if (!cased && isupper(*p))
3825 cased = 1;
3826 }
3827 return PyBool_FromLong(cased);
3828}
3829
3830
3831PyDoc_STRVAR(istitle__doc__,
3832"S.istitle() -> bool\n\
3833\n\
3834Return True if S is a titlecased string and there is at least one\n\
3835character in S, i.e. uppercase characters may only follow uncased\n\
3836characters and lowercase characters only cased ones. Return False\n\
3837otherwise.");
3838
3839static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003840string_istitle(PyStringObject *self, PyObject *uncased)
Christian Heimes44720832008-05-26 13:01:01 +00003841{
3842 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003843 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003844 register const unsigned char *e;
3845 int cased, previous_is_cased;
3846
3847 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003848 if (PyString_GET_SIZE(self) == 1)
Christian Heimes44720832008-05-26 13:01:01 +00003849 return PyBool_FromLong(isupper(*p) != 0);
3850
3851 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003852 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003853 return PyBool_FromLong(0);
3854
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003855 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003856 cased = 0;
3857 previous_is_cased = 0;
3858 for (; p < e; p++) {
3859 register const unsigned char ch = *p;
3860
3861 if (isupper(ch)) {
3862 if (previous_is_cased)
3863 return PyBool_FromLong(0);
3864 previous_is_cased = 1;
3865 cased = 1;
3866 }
3867 else if (islower(ch)) {
3868 if (!previous_is_cased)
3869 return PyBool_FromLong(0);
3870 previous_is_cased = 1;
3871 cased = 1;
3872 }
3873 else
3874 previous_is_cased = 0;
3875 }
3876 return PyBool_FromLong(cased);
3877}
3878
3879
3880PyDoc_STRVAR(splitlines__doc__,
3881"S.splitlines([keepends]) -> list of strings\n\
3882\n\
3883Return a list of the lines in S, breaking at line boundaries.\n\
3884Line breaks are not included in the resulting list unless keepends\n\
3885is given and true.");
3886
3887static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003888string_splitlines(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003889{
3890 register Py_ssize_t i;
3891 register Py_ssize_t j;
3892 Py_ssize_t len;
3893 int keepends = 0;
3894 PyObject *list;
3895 PyObject *str;
3896 char *data;
3897
3898 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3899 return NULL;
3900
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003901 data = PyString_AS_STRING(self);
3902 len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003903
3904 /* This does not use the preallocated list because splitlines is
3905 usually run with hundreds of newlines. The overhead of
3906 switching between PyList_SET_ITEM and append causes about a
3907 2-3% slowdown for that common case. A smarter implementation
3908 could move the if check out, so the SET_ITEMs are done first
3909 and the appends only done when the prealloc buffer is full.
3910 That's too much work for little gain.*/
3911
3912 list = PyList_New(0);
3913 if (!list)
3914 goto onError;
3915
3916 for (i = j = 0; i < len; ) {
3917 Py_ssize_t eol;
3918
3919 /* Find a line and append it */
3920 while (i < len && data[i] != '\n' && data[i] != '\r')
3921 i++;
3922
3923 /* Skip the line break reading CRLF as one line break */
3924 eol = i;
3925 if (i < len) {
3926 if (data[i] == '\r' && i + 1 < len &&
3927 data[i+1] == '\n')
3928 i += 2;
3929 else
3930 i++;
3931 if (keepends)
3932 eol = i;
3933 }
3934 SPLIT_APPEND(data, j, eol);
3935 j = i;
3936 }
3937 if (j < len) {
3938 SPLIT_APPEND(data, j, len);
3939 }
3940
3941 return list;
3942
3943 onError:
3944 Py_XDECREF(list);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003945 return NULL;
3946}
3947
Robert Schuppenies51df0642008-06-01 16:16:17 +00003948PyDoc_STRVAR(sizeof__doc__,
Georg Brandl7a6de8b2008-06-01 16:42:16 +00003949"S.__sizeof__() -> size of S in memory, in bytes");
Robert Schuppenies51df0642008-06-01 16:16:17 +00003950
3951static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003952string_sizeof(PyStringObject *v)
Robert Schuppenies51df0642008-06-01 16:16:17 +00003953{
3954 Py_ssize_t res;
Benjamin Peterson4fe03352009-09-17 21:33:46 +00003955 res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
Robert Schuppenies51df0642008-06-01 16:16:17 +00003956 return PyInt_FromSsize_t(res);
3957}
3958
Christian Heimes44720832008-05-26 13:01:01 +00003959#undef SPLIT_APPEND
3960#undef SPLIT_ADD
3961#undef MAX_PREALLOC
3962#undef PREALLOC_SIZE
Christian Heimes1a6387e2008-03-26 12:49:49 +00003963
3964static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003965string_getnewargs(PyStringObject *v)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003966{
Christian Heimes44720832008-05-26 13:01:01 +00003967 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Christian Heimes1a6387e2008-03-26 12:49:49 +00003968}
3969
Christian Heimes1a6387e2008-03-26 12:49:49 +00003970
Christian Heimes44720832008-05-26 13:01:01 +00003971#include "stringlib/string_format.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +00003972
Christian Heimes44720832008-05-26 13:01:01 +00003973PyDoc_STRVAR(format__doc__,
3974"S.format(*args, **kwargs) -> unicode\n\
3975\n\
3976");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003977
Eric Smithdc13b792008-05-30 18:10:04 +00003978static PyObject *
3979string__format__(PyObject* self, PyObject* args)
3980{
3981 PyObject *format_spec;
3982 PyObject *result = NULL;
3983 PyObject *tmp = NULL;
3984
3985 /* If 2.x, convert format_spec to the same type as value */
3986 /* This is to allow things like u''.format('') */
3987 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
3988 goto done;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003989 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
Eric Smithdc13b792008-05-30 18:10:04 +00003990 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3991 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3992 goto done;
3993 }
3994 tmp = PyObject_Str(format_spec);
3995 if (tmp == NULL)
3996 goto done;
3997 format_spec = tmp;
3998
3999 result = _PyBytes_FormatAdvanced(self,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004000 PyString_AS_STRING(format_spec),
4001 PyString_GET_SIZE(format_spec));
Eric Smithdc13b792008-05-30 18:10:04 +00004002done:
4003 Py_XDECREF(tmp);
4004 return result;
4005}
4006
Christian Heimes44720832008-05-26 13:01:01 +00004007PyDoc_STRVAR(p_format__doc__,
4008"S.__format__(format_spec) -> unicode\n\
4009\n\
4010");
4011
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00004012
Christian Heimes1a6387e2008-03-26 12:49:49 +00004013static PyMethodDef
Christian Heimes44720832008-05-26 13:01:01 +00004014string_methods[] = {
4015 /* Counterparts of the obsolete stropmodule functions; except
4016 string.maketrans(). */
4017 {"join", (PyCFunction)string_join, METH_O, join__doc__},
4018 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
4019 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
4020 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
4021 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
4022 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
4023 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
4024 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
4025 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
4026 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
4027 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
4028 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
4029 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
4030 capitalize__doc__},
4031 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
4032 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
4033 endswith__doc__},
4034 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
4035 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
4036 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
4037 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
4038 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
4039 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
4040 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
4041 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
4042 {"rpartition", (PyCFunction)string_rpartition, METH_O,
4043 rpartition__doc__},
4044 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
4045 startswith__doc__},
4046 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
4047 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
4048 swapcase__doc__},
4049 {"translate", (PyCFunction)string_translate, METH_VARARGS,
4050 translate__doc__},
4051 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
4052 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
4053 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
4054 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
4055 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
4056 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
4057 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
4058 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
4059 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
Benjamin Peterson332d7212009-09-18 21:14:55 +00004060 {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
4061 {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
Christian Heimes44720832008-05-26 13:01:01 +00004062 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
4063 expandtabs__doc__},
4064 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
4065 splitlines__doc__},
Robert Schuppenies51df0642008-06-01 16:16:17 +00004066 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
4067 sizeof__doc__},
Christian Heimes44720832008-05-26 13:01:01 +00004068 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
4069 {NULL, NULL} /* sentinel */
Christian Heimes1a6387e2008-03-26 12:49:49 +00004070};
4071
4072static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00004073str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes1a6387e2008-03-26 12:49:49 +00004074
Christian Heimes44720832008-05-26 13:01:01 +00004075static PyObject *
4076string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4077{
4078 PyObject *x = NULL;
4079 static char *kwlist[] = {"object", 0};
4080
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004081 if (type != &PyString_Type)
Christian Heimes44720832008-05-26 13:01:01 +00004082 return str_subtype_new(type, args, kwds);
4083 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
4084 return NULL;
4085 if (x == NULL)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004086 return PyString_FromString("");
Christian Heimes44720832008-05-26 13:01:01 +00004087 return PyObject_Str(x);
4088}
4089
4090static PyObject *
4091str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4092{
4093 PyObject *tmp, *pnew;
4094 Py_ssize_t n;
4095
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004096 assert(PyType_IsSubtype(type, &PyString_Type));
4097 tmp = string_new(&PyString_Type, args, kwds);
Christian Heimes44720832008-05-26 13:01:01 +00004098 if (tmp == NULL)
4099 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004100 assert(PyString_CheckExact(tmp));
4101 n = PyString_GET_SIZE(tmp);
Christian Heimes44720832008-05-26 13:01:01 +00004102 pnew = type->tp_alloc(type, n);
4103 if (pnew != NULL) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004104 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
4105 ((PyStringObject *)pnew)->ob_shash =
4106 ((PyStringObject *)tmp)->ob_shash;
4107 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Christian Heimes44720832008-05-26 13:01:01 +00004108 }
4109 Py_DECREF(tmp);
4110 return pnew;
4111}
4112
4113static PyObject *
4114basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4115{
4116 PyErr_SetString(PyExc_TypeError,
4117 "The basestring type cannot be instantiated");
4118 return NULL;
4119}
4120
4121static PyObject *
4122string_mod(PyObject *v, PyObject *w)
4123{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004124 if (!PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00004125 Py_INCREF(Py_NotImplemented);
4126 return Py_NotImplemented;
4127 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004128 return PyString_Format(v, w);
Christian Heimes44720832008-05-26 13:01:01 +00004129}
4130
4131PyDoc_STRVAR(basestring_doc,
4132"Type basestring cannot be instantiated; it is the base for str and unicode.");
4133
4134static PyNumberMethods string_as_number = {
4135 0, /*nb_add*/
4136 0, /*nb_subtract*/
4137 0, /*nb_multiply*/
4138 0, /*nb_divide*/
4139 string_mod, /*nb_remainder*/
4140};
4141
4142
4143PyTypeObject PyBaseString_Type = {
4144 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4145 "basestring",
4146 0,
4147 0,
4148 0, /* tp_dealloc */
4149 0, /* tp_print */
4150 0, /* tp_getattr */
4151 0, /* tp_setattr */
4152 0, /* tp_compare */
4153 0, /* tp_repr */
4154 0, /* tp_as_number */
4155 0, /* tp_as_sequence */
4156 0, /* tp_as_mapping */
4157 0, /* tp_hash */
4158 0, /* tp_call */
4159 0, /* tp_str */
4160 0, /* tp_getattro */
4161 0, /* tp_setattro */
4162 0, /* tp_as_buffer */
4163 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4164 basestring_doc, /* tp_doc */
4165 0, /* tp_traverse */
4166 0, /* tp_clear */
4167 0, /* tp_richcompare */
4168 0, /* tp_weaklistoffset */
4169 0, /* tp_iter */
4170 0, /* tp_iternext */
4171 0, /* tp_methods */
4172 0, /* tp_members */
4173 0, /* tp_getset */
4174 &PyBaseObject_Type, /* tp_base */
4175 0, /* tp_dict */
4176 0, /* tp_descr_get */
4177 0, /* tp_descr_set */
4178 0, /* tp_dictoffset */
4179 0, /* tp_init */
4180 0, /* tp_alloc */
4181 basestring_new, /* tp_new */
4182 0, /* tp_free */
4183};
4184
4185PyDoc_STRVAR(string_doc,
4186"str(object) -> string\n\
4187\n\
4188Return a nice string representation of the object.\n\
4189If the argument is a string, the return value is the same object.");
4190
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004191PyTypeObject PyString_Type = {
Christian Heimes44720832008-05-26 13:01:01 +00004192 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4193 "str",
Mark Dickinson826f3fe2008-12-05 21:55:28 +00004194 PyStringObject_SIZE,
Christian Heimes44720832008-05-26 13:01:01 +00004195 sizeof(char),
4196 string_dealloc, /* tp_dealloc */
4197 (printfunc)string_print, /* tp_print */
4198 0, /* tp_getattr */
4199 0, /* tp_setattr */
4200 0, /* tp_compare */
4201 string_repr, /* tp_repr */
4202 &string_as_number, /* tp_as_number */
4203 &string_as_sequence, /* tp_as_sequence */
4204 &string_as_mapping, /* tp_as_mapping */
4205 (hashfunc)string_hash, /* tp_hash */
4206 0, /* tp_call */
4207 string_str, /* tp_str */
4208 PyObject_GenericGetAttr, /* tp_getattro */
4209 0, /* tp_setattro */
4210 &string_as_buffer, /* tp_as_buffer */
4211 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
4212 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
4213 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
4214 string_doc, /* tp_doc */
4215 0, /* tp_traverse */
4216 0, /* tp_clear */
4217 (richcmpfunc)string_richcompare, /* tp_richcompare */
4218 0, /* tp_weaklistoffset */
4219 0, /* tp_iter */
4220 0, /* tp_iternext */
4221 string_methods, /* tp_methods */
4222 0, /* tp_members */
4223 0, /* tp_getset */
4224 &PyBaseString_Type, /* tp_base */
4225 0, /* tp_dict */
4226 0, /* tp_descr_get */
4227 0, /* tp_descr_set */
4228 0, /* tp_dictoffset */
4229 0, /* tp_init */
4230 0, /* tp_alloc */
4231 string_new, /* tp_new */
4232 PyObject_Del, /* tp_free */
4233};
4234
4235void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004236PyString_Concat(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00004237{
4238 register PyObject *v;
4239 if (*pv == NULL)
4240 return;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004241 if (w == NULL || !PyString_Check(*pv)) {
Christian Heimes44720832008-05-26 13:01:01 +00004242 Py_DECREF(*pv);
4243 *pv = NULL;
4244 return;
4245 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004246 v = string_concat((PyStringObject *) *pv, w);
Christian Heimes44720832008-05-26 13:01:01 +00004247 Py_DECREF(*pv);
4248 *pv = v;
4249}
4250
4251void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004252PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00004253{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004254 PyString_Concat(pv, w);
Christian Heimes44720832008-05-26 13:01:01 +00004255 Py_XDECREF(w);
4256}
4257
4258
4259/* The following function breaks the notion that strings are immutable:
4260 it changes the size of a string. We get away with this only if there
4261 is only one module referencing the object. You can also think of it
4262 as creating a new string object and destroying the old one, only
4263 more efficiently. In any case, don't use this if the string may
4264 already be known to some other part of the code...
4265 Note that if there's not enough memory to resize the string, the original
4266 string object at *pv is deallocated, *pv is set to NULL, an "out of
4267 memory" exception is set, and -1 is returned. Else (on success) 0 is
4268 returned, and the value in *pv may or may not be the same as on input.
4269 As always, an extra byte is allocated for a trailing \0 byte (newsize
4270 does *not* include that), and a trailing \0 byte is stored.
4271*/
4272
4273int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004274_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Christian Heimes44720832008-05-26 13:01:01 +00004275{
4276 register PyObject *v;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004277 register PyStringObject *sv;
Christian Heimes44720832008-05-26 13:01:01 +00004278 v = *pv;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004279 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
4280 PyString_CHECK_INTERNED(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00004281 *pv = 0;
4282 Py_DECREF(v);
4283 PyErr_BadInternalCall();
4284 return -1;
4285 }
4286 /* XXX UNREF/NEWREF interface should be more symmetrical */
4287 _Py_DEC_REFTOTAL;
4288 _Py_ForgetReference(v);
4289 *pv = (PyObject *)
Mark Dickinson826f3fe2008-12-05 21:55:28 +00004290 PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
Christian Heimes44720832008-05-26 13:01:01 +00004291 if (*pv == NULL) {
4292 PyObject_Del(v);
4293 PyErr_NoMemory();
4294 return -1;
4295 }
4296 _Py_NewReference(*pv);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004297 sv = (PyStringObject *) *pv;
Christian Heimes44720832008-05-26 13:01:01 +00004298 Py_SIZE(sv) = newsize;
4299 sv->ob_sval[newsize] = '\0';
4300 sv->ob_shash = -1; /* invalidate cached hash value */
4301 return 0;
4302}
4303
4304/* Helpers for formatstring */
4305
4306Py_LOCAL_INLINE(PyObject *)
4307getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
4308{
4309 Py_ssize_t argidx = *p_argidx;
4310 if (argidx < arglen) {
4311 (*p_argidx)++;
4312 if (arglen < 0)
4313 return args;
4314 else
4315 return PyTuple_GetItem(args, argidx);
4316 }
4317 PyErr_SetString(PyExc_TypeError,
4318 "not enough arguments for format string");
4319 return NULL;
4320}
4321
4322/* Format codes
4323 * F_LJUST '-'
4324 * F_SIGN '+'
4325 * F_BLANK ' '
4326 * F_ALT '#'
4327 * F_ZERO '0'
4328 */
4329#define F_LJUST (1<<0)
4330#define F_SIGN (1<<1)
4331#define F_BLANK (1<<2)
4332#define F_ALT (1<<3)
4333#define F_ZERO (1<<4)
4334
4335Py_LOCAL_INLINE(int)
4336formatfloat(char *buf, size_t buflen, int flags,
4337 int prec, int type, PyObject *v)
4338{
Christian Heimes44720832008-05-26 13:01:01 +00004339 double x;
4340 x = PyFloat_AsDouble(v);
4341 if (x == -1.0 && PyErr_Occurred()) {
4342 PyErr_Format(PyExc_TypeError, "float argument required, "
4343 "not %.200s", Py_TYPE(v)->tp_name);
4344 return -1;
4345 }
4346 if (prec < 0)
4347 prec = 6;
Mark Dickinson2fdd58a2009-08-28 20:46:24 +00004348#if SIZEOF_INT > 4
Mark Dickinson174e9092009-03-29 16:17:16 +00004349 /* make sure that the decimal representation of precision really does
4350 need at most 10 digits: platforms with sizeof(int) == 8 exist! */
Mark Dickinson2fdd58a2009-08-28 20:46:24 +00004351 if (prec > 0x7fffffff) {
Mark Dickinson174e9092009-03-29 16:17:16 +00004352 PyErr_SetString(PyExc_OverflowError,
4353 "outrageously large precision "
4354 "for formatted float");
4355 return -1;
4356 }
Mark Dickinson2fdd58a2009-08-28 20:46:24 +00004357#endif
Mark Dickinson174e9092009-03-29 16:17:16 +00004358
Mark Dickinson2e648ec2009-03-29 14:37:51 +00004359 if (type == 'f' && fabs(x) >= 1e50)
Eric Smithd6c393a2008-07-17 19:49:47 +00004360 type = 'g';
Christian Heimes44720832008-05-26 13:01:01 +00004361 /* Worst case length calc to ensure no buffer overrun:
4362
4363 'g' formats:
4364 fmt = %#.<prec>g
4365 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
4366 for any double rep.)
4367 len = 1 + prec + 1 + 2 + 5 = 9 + prec
4368
4369 'f' formats:
4370 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4371 len = 1 + 50 + 1 + prec = 52 + prec
4372
4373 If prec=0 the effective precision is 1 (the leading digit is
4374 always given), therefore increase the length by one.
4375
4376 */
4377 if (((type == 'g' || type == 'G') &&
4378 buflen <= (size_t)10 + (size_t)prec) ||
Eric Smithd6c393a2008-07-17 19:49:47 +00004379 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Christian Heimes44720832008-05-26 13:01:01 +00004380 PyErr_SetString(PyExc_OverflowError,
4381 "formatted float is too long (precision too large?)");
4382 return -1;
4383 }
Eric Smith068f0652009-04-25 21:40:15 +00004384 _PyOS_double_to_string(buf, buflen, x, type, prec,
4385 (flags&F_ALT)?Py_DTSF_ALT:0, NULL);
Christian Heimes44720832008-05-26 13:01:01 +00004386 return (int)strlen(buf);
4387}
4388
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004389/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
Christian Heimes44720832008-05-26 13:01:01 +00004390 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4391 * Python's regular ints.
4392 * Return value: a new PyString*, or NULL if error.
4393 * . *pbuf is set to point into it,
4394 * *plen set to the # of chars following that.
4395 * Caller must decref it when done using pbuf.
4396 * The string starting at *pbuf is of the form
4397 * "-"? ("0x" | "0X")? digit+
4398 * "0x"/"0X" are present only for x and X conversions, with F_ALT
4399 * set in flags. The case of hex digits will be correct,
4400 * There will be at least prec digits, zero-filled on the left if
4401 * necessary to get that many.
4402 * val object to be converted
4403 * flags bitmask of format flags; only F_ALT is looked at
4404 * prec minimum number of digits; 0-fill on left if needed
4405 * type a character in [duoxX]; u acts the same as d
4406 *
4407 * CAUTION: o, x and X conversions on regular ints can never
4408 * produce a '-' sign, but can for Python's unbounded ints.
4409 */
4410PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004411_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
Christian Heimes44720832008-05-26 13:01:01 +00004412 char **pbuf, int *plen)
4413{
4414 PyObject *result = NULL;
4415 char *buf;
4416 Py_ssize_t i;
4417 int sign; /* 1 if '-', else 0 */
4418 int len; /* number of characters */
4419 Py_ssize_t llen;
4420 int numdigits; /* len == numnondigits + numdigits */
4421 int numnondigits = 0;
4422
4423 switch (type) {
4424 case 'd':
4425 case 'u':
4426 result = Py_TYPE(val)->tp_str(val);
4427 break;
4428 case 'o':
4429 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4430 break;
4431 case 'x':
4432 case 'X':
4433 numnondigits = 2;
4434 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4435 break;
4436 default:
4437 assert(!"'type' not in [duoxX]");
4438 }
4439 if (!result)
4440 return NULL;
4441
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004442 buf = PyString_AsString(result);
Christian Heimes44720832008-05-26 13:01:01 +00004443 if (!buf) {
4444 Py_DECREF(result);
4445 return NULL;
4446 }
4447
4448 /* To modify the string in-place, there can only be one reference. */
4449 if (Py_REFCNT(result) != 1) {
4450 PyErr_BadInternalCall();
4451 return NULL;
4452 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004453 llen = PyString_Size(result);
Christian Heimes44720832008-05-26 13:01:01 +00004454 if (llen > INT_MAX) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004455 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
Christian Heimes44720832008-05-26 13:01:01 +00004456 return NULL;
4457 }
4458 len = (int)llen;
4459 if (buf[len-1] == 'L') {
4460 --len;
4461 buf[len] = '\0';
4462 }
4463 sign = buf[0] == '-';
4464 numnondigits += sign;
4465 numdigits = len - numnondigits;
4466 assert(numdigits > 0);
4467
4468 /* Get rid of base marker unless F_ALT */
4469 if ((flags & F_ALT) == 0) {
4470 /* Need to skip 0x, 0X or 0. */
4471 int skipped = 0;
4472 switch (type) {
4473 case 'o':
4474 assert(buf[sign] == '0');
4475 /* If 0 is only digit, leave it alone. */
4476 if (numdigits > 1) {
4477 skipped = 1;
4478 --numdigits;
4479 }
4480 break;
4481 case 'x':
4482 case 'X':
4483 assert(buf[sign] == '0');
4484 assert(buf[sign + 1] == 'x');
4485 skipped = 2;
4486 numnondigits -= 2;
4487 break;
4488 }
4489 if (skipped) {
4490 buf += skipped;
4491 len -= skipped;
4492 if (sign)
4493 buf[0] = '-';
4494 }
4495 assert(len == numnondigits + numdigits);
4496 assert(numdigits > 0);
4497 }
4498
4499 /* Fill with leading zeroes to meet minimum width. */
4500 if (prec > numdigits) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004501 PyObject *r1 = PyString_FromStringAndSize(NULL,
Christian Heimes44720832008-05-26 13:01:01 +00004502 numnondigits + prec);
4503 char *b1;
4504 if (!r1) {
4505 Py_DECREF(result);
4506 return NULL;
4507 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004508 b1 = PyString_AS_STRING(r1);
Christian Heimes44720832008-05-26 13:01:01 +00004509 for (i = 0; i < numnondigits; ++i)
4510 *b1++ = *buf++;
4511 for (i = 0; i < prec - numdigits; i++)
4512 *b1++ = '0';
4513 for (i = 0; i < numdigits; i++)
4514 *b1++ = *buf++;
4515 *b1 = '\0';
4516 Py_DECREF(result);
4517 result = r1;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004518 buf = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00004519 len = numnondigits + prec;
4520 }
4521
4522 /* Fix up case for hex conversions. */
4523 if (type == 'X') {
4524 /* Need to convert all lower case letters to upper case.
4525 and need to convert 0x to 0X (and -0x to -0X). */
4526 for (i = 0; i < len; i++)
4527 if (buf[i] >= 'a' && buf[i] <= 'x')
4528 buf[i] -= 'a'-'A';
4529 }
4530 *pbuf = buf;
4531 *plen = len;
4532 return result;
4533}
4534
4535Py_LOCAL_INLINE(int)
4536formatint(char *buf, size_t buflen, int flags,
4537 int prec, int type, PyObject *v)
4538{
4539 /* fmt = '%#.' + `prec` + 'l' + `type`
4540 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4541 + 1 + 1 = 24 */
4542 char fmt[64]; /* plenty big enough! */
4543 char *sign;
4544 long x;
4545
4546 x = PyInt_AsLong(v);
4547 if (x == -1 && PyErr_Occurred()) {
4548 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4549 Py_TYPE(v)->tp_name);
4550 return -1;
4551 }
4552 if (x < 0 && type == 'u') {
4553 type = 'd';
4554 }
4555 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4556 sign = "-";
4557 else
4558 sign = "";
4559 if (prec < 0)
4560 prec = 1;
4561
4562 if ((flags & F_ALT) &&
4563 (type == 'x' || type == 'X')) {
4564 /* When converting under %#x or %#X, there are a number
4565 * of issues that cause pain:
4566 * - when 0 is being converted, the C standard leaves off
4567 * the '0x' or '0X', which is inconsistent with other
4568 * %#x/%#X conversions and inconsistent with Python's
4569 * hex() function
4570 * - there are platforms that violate the standard and
4571 * convert 0 with the '0x' or '0X'
4572 * (Metrowerks, Compaq Tru64)
4573 * - there are platforms that give '0x' when converting
4574 * under %#X, but convert 0 in accordance with the
4575 * standard (OS/2 EMX)
4576 *
4577 * We can achieve the desired consistency by inserting our
4578 * own '0x' or '0X' prefix, and substituting %x/%X in place
4579 * of %#x/%#X.
4580 *
4581 * Note that this is the same approach as used in
4582 * formatint() in unicodeobject.c
4583 */
4584 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4585 sign, type, prec, type);
4586 }
4587 else {
4588 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4589 sign, (flags&F_ALT) ? "#" : "",
4590 prec, type);
4591 }
4592
4593 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4594 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4595 */
4596 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4597 PyErr_SetString(PyExc_OverflowError,
4598 "formatted integer is too long (precision too large?)");
4599 return -1;
4600 }
4601 if (sign[0])
4602 PyOS_snprintf(buf, buflen, fmt, -x);
4603 else
4604 PyOS_snprintf(buf, buflen, fmt, x);
4605 return (int)strlen(buf);
4606}
4607
4608Py_LOCAL_INLINE(int)
4609formatchar(char *buf, size_t buflen, PyObject *v)
4610{
4611 /* presume that the buffer is at least 2 characters long */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004612 if (PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00004613 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4614 return -1;
4615 }
4616 else {
4617 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4618 return -1;
4619 }
4620 buf[1] = '\0';
4621 return 1;
4622}
4623
4624/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4625
4626 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4627 chars are formatted. XXX This is a magic number. Each formatting
4628 routine does bounds checking to ensure no overflow, but a better
4629 solution may be to malloc a buffer of appropriate size for each
4630 format. For now, the current solution is sufficient.
4631*/
4632#define FORMATBUFLEN (size_t)120
4633
4634PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004635PyString_Format(PyObject *format, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00004636{
4637 char *fmt, *res;
4638 Py_ssize_t arglen, argidx;
4639 Py_ssize_t reslen, rescnt, fmtcnt;
4640 int args_owned = 0;
4641 PyObject *result, *orig_args;
4642#ifdef Py_USING_UNICODE
4643 PyObject *v, *w;
4644#endif
4645 PyObject *dict = NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004646 if (format == NULL || !PyString_Check(format) || args == NULL) {
Christian Heimes44720832008-05-26 13:01:01 +00004647 PyErr_BadInternalCall();
4648 return NULL;
4649 }
4650 orig_args = args;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004651 fmt = PyString_AS_STRING(format);
4652 fmtcnt = PyString_GET_SIZE(format);
Christian Heimes44720832008-05-26 13:01:01 +00004653 reslen = rescnt = fmtcnt + 100;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004654 result = PyString_FromStringAndSize((char *)NULL, reslen);
Christian Heimes44720832008-05-26 13:01:01 +00004655 if (result == NULL)
4656 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004657 res = PyString_AsString(result);
Christian Heimes44720832008-05-26 13:01:01 +00004658 if (PyTuple_Check(args)) {
4659 arglen = PyTuple_GET_SIZE(args);
4660 argidx = 0;
4661 }
4662 else {
4663 arglen = -1;
4664 argidx = -2;
4665 }
4666 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4667 !PyObject_TypeCheck(args, &PyBaseString_Type))
4668 dict = args;
4669 while (--fmtcnt >= 0) {
4670 if (*fmt != '%') {
4671 if (--rescnt < 0) {
4672 rescnt = fmtcnt + 100;
4673 reslen += rescnt;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004674 if (_PyString_Resize(&result, reslen) < 0)
Christian Heimes44720832008-05-26 13:01:01 +00004675 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004676 res = PyString_AS_STRING(result)
Christian Heimes44720832008-05-26 13:01:01 +00004677 + reslen - rescnt;
4678 --rescnt;
4679 }
4680 *res++ = *fmt++;
4681 }
4682 else {
4683 /* Got a format specifier */
4684 int flags = 0;
4685 Py_ssize_t width = -1;
4686 int prec = -1;
4687 int c = '\0';
4688 int fill;
4689 int isnumok;
4690 PyObject *v = NULL;
4691 PyObject *temp = NULL;
4692 char *pbuf;
4693 int sign;
4694 Py_ssize_t len;
4695 char formatbuf[FORMATBUFLEN];
4696 /* For format{float,int,char}() */
4697#ifdef Py_USING_UNICODE
4698 char *fmt_start = fmt;
4699 Py_ssize_t argidx_start = argidx;
4700#endif
4701
4702 fmt++;
4703 if (*fmt == '(') {
4704 char *keystart;
4705 Py_ssize_t keylen;
4706 PyObject *key;
4707 int pcount = 1;
4708
4709 if (dict == NULL) {
4710 PyErr_SetString(PyExc_TypeError,
4711 "format requires a mapping");
4712 goto error;
4713 }
4714 ++fmt;
4715 --fmtcnt;
4716 keystart = fmt;
4717 /* Skip over balanced parentheses */
4718 while (pcount > 0 && --fmtcnt >= 0) {
4719 if (*fmt == ')')
4720 --pcount;
4721 else if (*fmt == '(')
4722 ++pcount;
4723 fmt++;
4724 }
4725 keylen = fmt - keystart - 1;
4726 if (fmtcnt < 0 || pcount > 0) {
4727 PyErr_SetString(PyExc_ValueError,
4728 "incomplete format key");
4729 goto error;
4730 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004731 key = PyString_FromStringAndSize(keystart,
Christian Heimes44720832008-05-26 13:01:01 +00004732 keylen);
4733 if (key == NULL)
4734 goto error;
4735 if (args_owned) {
4736 Py_DECREF(args);
4737 args_owned = 0;
4738 }
4739 args = PyObject_GetItem(dict, key);
4740 Py_DECREF(key);
4741 if (args == NULL) {
4742 goto error;
4743 }
4744 args_owned = 1;
4745 arglen = -1;
4746 argidx = -2;
4747 }
4748 while (--fmtcnt >= 0) {
4749 switch (c = *fmt++) {
4750 case '-': flags |= F_LJUST; continue;
4751 case '+': flags |= F_SIGN; continue;
4752 case ' ': flags |= F_BLANK; continue;
4753 case '#': flags |= F_ALT; continue;
4754 case '0': flags |= F_ZERO; continue;
4755 }
4756 break;
4757 }
4758 if (c == '*') {
4759 v = getnextarg(args, arglen, &argidx);
4760 if (v == NULL)
4761 goto error;
4762 if (!PyInt_Check(v)) {
4763 PyErr_SetString(PyExc_TypeError,
4764 "* wants int");
4765 goto error;
4766 }
4767 width = PyInt_AsLong(v);
4768 if (width < 0) {
4769 flags |= F_LJUST;
4770 width = -width;
4771 }
4772 if (--fmtcnt >= 0)
4773 c = *fmt++;
4774 }
4775 else if (c >= 0 && isdigit(c)) {
4776 width = c - '0';
4777 while (--fmtcnt >= 0) {
4778 c = Py_CHARMASK(*fmt++);
4779 if (!isdigit(c))
4780 break;
4781 if ((width*10) / 10 != width) {
4782 PyErr_SetString(
4783 PyExc_ValueError,
4784 "width too big");
4785 goto error;
4786 }
4787 width = width*10 + (c - '0');
4788 }
4789 }
4790 if (c == '.') {
4791 prec = 0;
4792 if (--fmtcnt >= 0)
4793 c = *fmt++;
4794 if (c == '*') {
4795 v = getnextarg(args, arglen, &argidx);
4796 if (v == NULL)
4797 goto error;
4798 if (!PyInt_Check(v)) {
4799 PyErr_SetString(
4800 PyExc_TypeError,
4801 "* wants int");
4802 goto error;
4803 }
4804 prec = PyInt_AsLong(v);
4805 if (prec < 0)
4806 prec = 0;
4807 if (--fmtcnt >= 0)
4808 c = *fmt++;
4809 }
4810 else if (c >= 0 && isdigit(c)) {
4811 prec = c - '0';
4812 while (--fmtcnt >= 0) {
4813 c = Py_CHARMASK(*fmt++);
4814 if (!isdigit(c))
4815 break;
4816 if ((prec*10) / 10 != prec) {
4817 PyErr_SetString(
4818 PyExc_ValueError,
4819 "prec too big");
4820 goto error;
4821 }
4822 prec = prec*10 + (c - '0');
4823 }
4824 }
4825 } /* prec */
4826 if (fmtcnt >= 0) {
4827 if (c == 'h' || c == 'l' || c == 'L') {
4828 if (--fmtcnt >= 0)
4829 c = *fmt++;
4830 }
4831 }
4832 if (fmtcnt < 0) {
4833 PyErr_SetString(PyExc_ValueError,
4834 "incomplete format");
4835 goto error;
4836 }
4837 if (c != '%') {
4838 v = getnextarg(args, arglen, &argidx);
4839 if (v == NULL)
4840 goto error;
4841 }
4842 sign = 0;
4843 fill = ' ';
4844 switch (c) {
4845 case '%':
4846 pbuf = "%";
4847 len = 1;
4848 break;
4849 case 's':
4850#ifdef Py_USING_UNICODE
4851 if (PyUnicode_Check(v)) {
4852 fmt = fmt_start;
4853 argidx = argidx_start;
4854 goto unicode;
4855 }
4856#endif
4857 temp = _PyObject_Str(v);
4858#ifdef Py_USING_UNICODE
4859 if (temp != NULL && PyUnicode_Check(temp)) {
4860 Py_DECREF(temp);
4861 fmt = fmt_start;
4862 argidx = argidx_start;
4863 goto unicode;
4864 }
4865#endif
4866 /* Fall through */
4867 case 'r':
4868 if (c == 'r')
4869 temp = PyObject_Repr(v);
4870 if (temp == NULL)
4871 goto error;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004872 if (!PyString_Check(temp)) {
Christian Heimes44720832008-05-26 13:01:01 +00004873 PyErr_SetString(PyExc_TypeError,
4874 "%s argument has non-string str()");
4875 Py_DECREF(temp);
4876 goto error;
4877 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004878 pbuf = PyString_AS_STRING(temp);
4879 len = PyString_GET_SIZE(temp);
Christian Heimes44720832008-05-26 13:01:01 +00004880 if (prec >= 0 && len > prec)
4881 len = prec;
4882 break;
4883 case 'i':
4884 case 'd':
4885 case 'u':
4886 case 'o':
4887 case 'x':
4888 case 'X':
4889 if (c == 'i')
4890 c = 'd';
4891 isnumok = 0;
4892 if (PyNumber_Check(v)) {
4893 PyObject *iobj=NULL;
4894
4895 if (PyInt_Check(v) || (PyLong_Check(v))) {
4896 iobj = v;
4897 Py_INCREF(iobj);
4898 }
4899 else {
4900 iobj = PyNumber_Int(v);
4901 if (iobj==NULL) iobj = PyNumber_Long(v);
4902 }
4903 if (iobj!=NULL) {
4904 if (PyInt_Check(iobj)) {
4905 isnumok = 1;
4906 pbuf = formatbuf;
4907 len = formatint(pbuf,
4908 sizeof(formatbuf),
4909 flags, prec, c, iobj);
4910 Py_DECREF(iobj);
4911 if (len < 0)
4912 goto error;
4913 sign = 1;
4914 }
4915 else if (PyLong_Check(iobj)) {
4916 int ilen;
4917
4918 isnumok = 1;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004919 temp = _PyString_FormatLong(iobj, flags,
Christian Heimes44720832008-05-26 13:01:01 +00004920 prec, c, &pbuf, &ilen);
4921 Py_DECREF(iobj);
4922 len = ilen;
4923 if (!temp)
4924 goto error;
4925 sign = 1;
4926 }
4927 else {
4928 Py_DECREF(iobj);
4929 }
4930 }
4931 }
4932 if (!isnumok) {
4933 PyErr_Format(PyExc_TypeError,
4934 "%%%c format: a number is required, "
4935 "not %.200s", c, Py_TYPE(v)->tp_name);
4936 goto error;
4937 }
4938 if (flags & F_ZERO)
4939 fill = '0';
4940 break;
4941 case 'e':
4942 case 'E':
4943 case 'f':
4944 case 'F':
4945 case 'g':
4946 case 'G':
Eric Smithd6c393a2008-07-17 19:49:47 +00004947 if (c == 'F')
4948 c = 'f';
Christian Heimes44720832008-05-26 13:01:01 +00004949 pbuf = formatbuf;
4950 len = formatfloat(pbuf, sizeof(formatbuf),
4951 flags, prec, c, v);
4952 if (len < 0)
4953 goto error;
4954 sign = 1;
4955 if (flags & F_ZERO)
4956 fill = '0';
4957 break;
4958 case 'c':
4959#ifdef Py_USING_UNICODE
4960 if (PyUnicode_Check(v)) {
4961 fmt = fmt_start;
4962 argidx = argidx_start;
4963 goto unicode;
4964 }
4965#endif
4966 pbuf = formatbuf;
4967 len = formatchar(pbuf, sizeof(formatbuf), v);
4968 if (len < 0)
4969 goto error;
4970 break;
4971 default:
4972 PyErr_Format(PyExc_ValueError,
4973 "unsupported format character '%c' (0x%x) "
4974 "at index %zd",
4975 c, c,
4976 (Py_ssize_t)(fmt - 1 -
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004977 PyString_AsString(format)));
Christian Heimes44720832008-05-26 13:01:01 +00004978 goto error;
4979 }
4980 if (sign) {
4981 if (*pbuf == '-' || *pbuf == '+') {
4982 sign = *pbuf++;
4983 len--;
4984 }
4985 else if (flags & F_SIGN)
4986 sign = '+';
4987 else if (flags & F_BLANK)
4988 sign = ' ';
4989 else
4990 sign = 0;
4991 }
4992 if (width < len)
4993 width = len;
4994 if (rescnt - (sign != 0) < width) {
4995 reslen -= rescnt;
4996 rescnt = width + fmtcnt + 100;
4997 reslen += rescnt;
4998 if (reslen < 0) {
4999 Py_DECREF(result);
5000 Py_XDECREF(temp);
5001 return PyErr_NoMemory();
5002 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005003 if (_PyString_Resize(&result, reslen) < 0) {
Christian Heimes44720832008-05-26 13:01:01 +00005004 Py_XDECREF(temp);
5005 return NULL;
5006 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005007 res = PyString_AS_STRING(result)
Christian Heimes44720832008-05-26 13:01:01 +00005008 + reslen - rescnt;
5009 }
5010 if (sign) {
5011 if (fill != ' ')
5012 *res++ = sign;
5013 rescnt--;
5014 if (width > len)
5015 width--;
5016 }
5017 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
5018 assert(pbuf[0] == '0');
5019 assert(pbuf[1] == c);
5020 if (fill != ' ') {
5021 *res++ = *pbuf++;
5022 *res++ = *pbuf++;
5023 }
5024 rescnt -= 2;
5025 width -= 2;
5026 if (width < 0)
5027 width = 0;
5028 len -= 2;
5029 }
5030 if (width > len && !(flags & F_LJUST)) {
5031 do {
5032 --rescnt;
5033 *res++ = fill;
5034 } while (--width > len);
5035 }
5036 if (fill == ' ') {
5037 if (sign)
5038 *res++ = sign;
5039 if ((flags & F_ALT) &&
5040 (c == 'x' || c == 'X')) {
5041 assert(pbuf[0] == '0');
5042 assert(pbuf[1] == c);
5043 *res++ = *pbuf++;
5044 *res++ = *pbuf++;
5045 }
5046 }
5047 Py_MEMCPY(res, pbuf, len);
5048 res += len;
5049 rescnt -= len;
5050 while (--width >= len) {
5051 --rescnt;
5052 *res++ = ' ';
5053 }
5054 if (dict && (argidx < arglen) && c != '%') {
5055 PyErr_SetString(PyExc_TypeError,
5056 "not all arguments converted during string formatting");
5057 Py_XDECREF(temp);
5058 goto error;
5059 }
5060 Py_XDECREF(temp);
5061 } /* '%' */
5062 } /* until end */
5063 if (argidx < arglen && !dict) {
5064 PyErr_SetString(PyExc_TypeError,
5065 "not all arguments converted during string formatting");
5066 goto error;
5067 }
5068 if (args_owned) {
5069 Py_DECREF(args);
5070 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005071 _PyString_Resize(&result, reslen - rescnt);
Christian Heimes44720832008-05-26 13:01:01 +00005072 return result;
5073
5074#ifdef Py_USING_UNICODE
5075 unicode:
5076 if (args_owned) {
5077 Py_DECREF(args);
5078 args_owned = 0;
5079 }
5080 /* Fiddle args right (remove the first argidx arguments) */
5081 if (PyTuple_Check(orig_args) && argidx > 0) {
5082 PyObject *v;
5083 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
5084 v = PyTuple_New(n);
5085 if (v == NULL)
5086 goto error;
5087 while (--n >= 0) {
5088 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
5089 Py_INCREF(w);
5090 PyTuple_SET_ITEM(v, n, w);
5091 }
5092 args = v;
5093 } else {
5094 Py_INCREF(orig_args);
5095 args = orig_args;
5096 }
5097 args_owned = 1;
5098 /* Take what we have of the result and let the Unicode formatting
5099 function format the rest of the input. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005100 rescnt = res - PyString_AS_STRING(result);
5101 if (_PyString_Resize(&result, rescnt))
Christian Heimes44720832008-05-26 13:01:01 +00005102 goto error;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005103 fmtcnt = PyString_GET_SIZE(format) - \
5104 (fmt - PyString_AS_STRING(format));
Christian Heimes44720832008-05-26 13:01:01 +00005105 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
5106 if (format == NULL)
5107 goto error;
5108 v = PyUnicode_Format(format, args);
5109 Py_DECREF(format);
5110 if (v == NULL)
5111 goto error;
5112 /* Paste what we have (result) to what the Unicode formatting
5113 function returned (v) and return the result (or error) */
5114 w = PyUnicode_Concat(result, v);
5115 Py_DECREF(result);
5116 Py_DECREF(v);
5117 Py_DECREF(args);
5118 return w;
5119#endif /* Py_USING_UNICODE */
5120
5121 error:
5122 Py_DECREF(result);
5123 if (args_owned) {
5124 Py_DECREF(args);
5125 }
5126 return NULL;
5127}
5128
5129void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005130PyString_InternInPlace(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00005131{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005132 register PyStringObject *s = (PyStringObject *)(*p);
Christian Heimes44720832008-05-26 13:01:01 +00005133 PyObject *t;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005134 if (s == NULL || !PyString_Check(s))
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005135 Py_FatalError("PyString_InternInPlace: strings only please!");
Christian Heimes44720832008-05-26 13:01:01 +00005136 /* If it's a string subclass, we don't really know what putting
5137 it in the interned dict might do. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005138 if (!PyString_CheckExact(s))
Christian Heimes44720832008-05-26 13:01:01 +00005139 return;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005140 if (PyString_CHECK_INTERNED(s))
Christian Heimes44720832008-05-26 13:01:01 +00005141 return;
5142 if (interned == NULL) {
5143 interned = PyDict_New();
5144 if (interned == NULL) {
5145 PyErr_Clear(); /* Don't leave an exception */
5146 return;
5147 }
5148 }
5149 t = PyDict_GetItem(interned, (PyObject *)s);
5150 if (t) {
5151 Py_INCREF(t);
5152 Py_DECREF(*p);
5153 *p = t;
5154 return;
5155 }
5156
5157 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
5158 PyErr_Clear();
5159 return;
5160 }
5161 /* The two references in interned are not counted by refcnt.
5162 The string deallocator will take care of this */
5163 Py_REFCNT(s) -= 2;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005164 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Christian Heimes44720832008-05-26 13:01:01 +00005165}
5166
5167void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005168PyString_InternImmortal(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00005169{
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005170 PyString_InternInPlace(p);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005171 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
5172 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
Christian Heimes44720832008-05-26 13:01:01 +00005173 Py_INCREF(*p);
5174 }
5175}
5176
5177
5178PyObject *
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005179PyString_InternFromString(const char *cp)
Christian Heimes44720832008-05-26 13:01:01 +00005180{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005181 PyObject *s = PyString_FromString(cp);
Christian Heimes44720832008-05-26 13:01:01 +00005182 if (s == NULL)
5183 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005184 PyString_InternInPlace(&s);
Christian Heimes44720832008-05-26 13:01:01 +00005185 return s;
5186}
5187
5188void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005189PyString_Fini(void)
Christian Heimes44720832008-05-26 13:01:01 +00005190{
5191 int i;
5192 for (i = 0; i < UCHAR_MAX + 1; i++) {
5193 Py_XDECREF(characters[i]);
5194 characters[i] = NULL;
5195 }
5196 Py_XDECREF(nullstring);
5197 nullstring = NULL;
5198}
5199
5200void _Py_ReleaseInternedStrings(void)
5201{
5202 PyObject *keys;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005203 PyStringObject *s;
Christian Heimes44720832008-05-26 13:01:01 +00005204 Py_ssize_t i, n;
5205 Py_ssize_t immortal_size = 0, mortal_size = 0;
5206
5207 if (interned == NULL || !PyDict_Check(interned))
5208 return;
5209 keys = PyDict_Keys(interned);
5210 if (keys == NULL || !PyList_Check(keys)) {
5211 PyErr_Clear();
5212 return;
5213 }
5214
5215 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5216 detector, interned strings are not forcibly deallocated; rather, we
5217 give them their stolen references back, and then clear and DECREF
5218 the interned dict. */
5219
5220 n = PyList_GET_SIZE(keys);
5221 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5222 n);
5223 for (i = 0; i < n; i++) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005224 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
Christian Heimes44720832008-05-26 13:01:01 +00005225 switch (s->ob_sstate) {
5226 case SSTATE_NOT_INTERNED:
5227 /* XXX Shouldn't happen */
5228 break;
5229 case SSTATE_INTERNED_IMMORTAL:
5230 Py_REFCNT(s) += 1;
5231 immortal_size += Py_SIZE(s);
5232 break;
5233 case SSTATE_INTERNED_MORTAL:
5234 Py_REFCNT(s) += 2;
5235 mortal_size += Py_SIZE(s);
5236 break;
5237 default:
5238 Py_FatalError("Inconsistent interned string state.");
5239 }
5240 s->ob_sstate = SSTATE_NOT_INTERNED;
5241 }
5242 fprintf(stderr, "total size of all interned strings: "
5243 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5244 "mortal/immortal\n", mortal_size, immortal_size);
5245 Py_DECREF(keys);
5246 PyDict_Clear(interned);
5247 Py_DECREF(interned);
5248 interned = NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00005249}