blob: 347f3183381aa868d02088d958563c033b1214d2 [file] [log] [blame]
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001/* String (str/bytes) object implementation */
Christian Heimes1a6387e2008-03-26 12:49:49 +00002
3#define PY_SSIZE_T_CLEAN
Christian Heimes44720832008-05-26 13:01:01 +00004
Christian Heimes1a6387e2008-03-26 12:49:49 +00005#include "Python.h"
Christian Heimes44720832008-05-26 13:01:01 +00006#include <ctype.h>
Mark Dickinson826f3fe2008-12-05 21:55:28 +00007#include <stddef.h>
Christian Heimes44720832008-05-26 13:01:01 +00008
9#ifdef COUNT_ALLOCS
Martin v. Löwisb90304a2009-01-07 18:40:40 +000010Py_ssize_t null_strings, one_strings;
Christian Heimes44720832008-05-26 13:01:01 +000011#endif
12
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000013static PyStringObject *characters[UCHAR_MAX + 1];
14static PyStringObject *nullstring;
Christian Heimes44720832008-05-26 13:01:01 +000015
16/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Mark Dickinson826f3fe2008-12-05 21:55:28 +000026/* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyStringObject_SIZE + n bytes.
28
29 Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30 3 bytes per string allocation on a typical system.
31*/
32#define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
33
Christian Heimes44720832008-05-26 13:01:01 +000034/*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000035 For both PyString_FromString() and PyString_FromStringAndSize(), the
Christian Heimes44720832008-05-26 13:01:01 +000036 parameter `size' denotes number of characters to allocate, not counting any
37 null terminating character.
38
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000039 For PyString_FromString(), the parameter `str' points to a null-terminated
Christian Heimes44720832008-05-26 13:01:01 +000040 string containing exactly `size' bytes.
41
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000042 For PyString_FromStringAndSize(), the parameter the parameter `str' is
Christian Heimes44720832008-05-26 13:01:01 +000043 either NULL or else points to a string containing at least `size' bytes.
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000044 For PyString_FromStringAndSize(), the string in the `str' parameter does
Christian Heimes44720832008-05-26 13:01:01 +000045 not have to be null-terminated. (Therefore it is safe to construct a
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000046 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
47 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
Christian Heimes44720832008-05-26 13:01:01 +000048 bytes (setting the last byte to the null terminating character) and you can
49 fill in the data yourself. If `str' is non-NULL then the resulting
50 PyString object must be treated as immutable and you must not fill in nor
51 alter the data yourself, since the strings may be shared.
52
53 The PyObject member `op->ob_size', which denotes the number of "extra
54 items" in a variable-size object, will contain the number of bytes
55 allocated for string data, not counting the null terminating character. It
56 is therefore equal to the equal to the `size' parameter (for
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000057 PyString_FromStringAndSize()) or the length of the string in the `str'
58 parameter (for PyString_FromString()).
Christian Heimes44720832008-05-26 13:01:01 +000059*/
60PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000061PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Christian Heimes1a6387e2008-03-26 12:49:49 +000062{
Antoine Pitrouc83ea132010-05-09 14:46:46 +000063 register PyStringObject *op;
64 if (size < 0) {
65 PyErr_SetString(PyExc_SystemError,
66 "Negative size passed to PyString_FromStringAndSize");
67 return NULL;
68 }
69 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +000070#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +000071 null_strings++;
Christian Heimes44720832008-05-26 13:01:01 +000072#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +000073 Py_INCREF(op);
74 return (PyObject *)op;
75 }
76 if (size == 1 && str != NULL &&
77 (op = characters[*str & UCHAR_MAX]) != NULL)
78 {
Christian Heimes44720832008-05-26 13:01:01 +000079#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +000080 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +000081#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +000082 Py_INCREF(op);
83 return (PyObject *)op;
84 }
Christian Heimes44720832008-05-26 13:01:01 +000085
Antoine Pitrouc83ea132010-05-09 14:46:46 +000086 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
87 PyErr_SetString(PyExc_OverflowError, "string is too large");
88 return NULL;
89 }
Neal Norwitze7d8be82008-07-31 17:17:14 +000090
Antoine Pitrouc83ea132010-05-09 14:46:46 +000091 /* Inline PyObject_NewVar */
92 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
93 if (op == NULL)
94 return PyErr_NoMemory();
95 PyObject_INIT_VAR(op, &PyString_Type, size);
96 op->ob_shash = -1;
97 op->ob_sstate = SSTATE_NOT_INTERNED;
98 if (str != NULL)
99 Py_MEMCPY(op->ob_sval, str, size);
100 op->ob_sval[size] = '\0';
101 /* share short strings */
102 if (size == 0) {
103 PyObject *t = (PyObject *)op;
104 PyString_InternInPlace(&t);
105 op = (PyStringObject *)t;
106 nullstring = op;
107 Py_INCREF(op);
108 } else if (size == 1 && str != NULL) {
109 PyObject *t = (PyObject *)op;
110 PyString_InternInPlace(&t);
111 op = (PyStringObject *)t;
112 characters[*str & UCHAR_MAX] = op;
113 Py_INCREF(op);
114 }
115 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000116}
117
Christian Heimes44720832008-05-26 13:01:01 +0000118PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000119PyString_FromString(const char *str)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000120{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000121 register size_t size;
122 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +0000123
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000124 assert(str != NULL);
125 size = strlen(str);
126 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
127 PyErr_SetString(PyExc_OverflowError,
128 "string is too long for a Python string");
129 return NULL;
130 }
131 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +0000132#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000133 null_strings++;
Christian Heimes44720832008-05-26 13:01:01 +0000134#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000135 Py_INCREF(op);
136 return (PyObject *)op;
137 }
138 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +0000139#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000140 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +0000141#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000142 Py_INCREF(op);
143 return (PyObject *)op;
144 }
Christian Heimes44720832008-05-26 13:01:01 +0000145
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000146 /* Inline PyObject_NewVar */
147 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
148 if (op == NULL)
149 return PyErr_NoMemory();
150 PyObject_INIT_VAR(op, &PyString_Type, size);
151 op->ob_shash = -1;
152 op->ob_sstate = SSTATE_NOT_INTERNED;
153 Py_MEMCPY(op->ob_sval, str, size+1);
154 /* share short strings */
155 if (size == 0) {
156 PyObject *t = (PyObject *)op;
157 PyString_InternInPlace(&t);
158 op = (PyStringObject *)t;
159 nullstring = op;
160 Py_INCREF(op);
161 } else if (size == 1) {
162 PyObject *t = (PyObject *)op;
163 PyString_InternInPlace(&t);
164 op = (PyStringObject *)t;
165 characters[*str & UCHAR_MAX] = op;
166 Py_INCREF(op);
167 }
168 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000169}
170
Christian Heimes44720832008-05-26 13:01:01 +0000171PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000172PyString_FromFormatV(const char *format, va_list vargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000173{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000174 va_list count;
175 Py_ssize_t n = 0;
176 const char* f;
177 char *s;
178 PyObject* string;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000179
Christian Heimes44720832008-05-26 13:01:01 +0000180#ifdef VA_LIST_IS_ARRAY
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000181 Py_MEMCPY(count, vargs, sizeof(va_list));
Christian Heimes44720832008-05-26 13:01:01 +0000182#else
183#ifdef __va_copy
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000184 __va_copy(count, vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000185#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000186 count = vargs;
Christian Heimes44720832008-05-26 13:01:01 +0000187#endif
188#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000189 /* step 1: figure out how large a buffer we need */
190 for (f = format; *f; f++) {
191 if (*f == '%') {
Mark Dickinson82864d12009-11-15 16:18:58 +0000192#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000193 int longlongflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000194#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000195 const char* p = f;
196 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
197 ;
Christian Heimes44720832008-05-26 13:01:01 +0000198
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000199 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
200 * they don't affect the amount of space we reserve.
201 */
202 if (*f == 'l') {
203 if (f[1] == 'd' || f[1] == 'u') {
204 ++f;
205 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000206#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000207 else if (f[1] == 'l' &&
208 (f[2] == 'd' || f[2] == 'u')) {
209 longlongflag = 1;
210 f += 2;
211 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000212#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000213 }
214 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
215 ++f;
216 }
Christian Heimes44720832008-05-26 13:01:01 +0000217
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000218 switch (*f) {
219 case 'c':
220 (void)va_arg(count, int);
221 /* fall through... */
222 case '%':
223 n++;
224 break;
225 case 'd': case 'u': case 'i': case 'x':
226 (void) va_arg(count, int);
Mark Dickinson82864d12009-11-15 16:18:58 +0000227#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000228 /* Need at most
229 ceil(log10(256)*SIZEOF_LONG_LONG) digits,
230 plus 1 for the sign. 53/22 is an upper
231 bound for log10(256). */
232 if (longlongflag)
233 n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
234 else
Mark Dickinson82864d12009-11-15 16:18:58 +0000235#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000236 /* 20 bytes is enough to hold a 64-bit
237 integer. Decimal takes the most
238 space. This isn't enough for
239 octal. */
240 n += 20;
Mark Dickinson82864d12009-11-15 16:18:58 +0000241
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000242 break;
243 case 's':
244 s = va_arg(count, char*);
245 n += strlen(s);
246 break;
247 case 'p':
248 (void) va_arg(count, int);
249 /* maximum 64-bit pointer representation:
250 * 0xffffffffffffffff
251 * so 19 characters is enough.
252 * XXX I count 18 -- what's the extra for?
253 */
254 n += 19;
255 break;
256 default:
257 /* if we stumble upon an unknown
258 formatting code, copy the rest of
259 the format string to the output
260 string. (we cannot just skip the
261 code, since there's no way to know
262 what's in the argument list) */
263 n += strlen(p);
264 goto expand;
265 }
266 } else
267 n++;
268 }
Christian Heimes44720832008-05-26 13:01:01 +0000269 expand:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000270 /* step 2: fill the buffer */
271 /* Since we've analyzed how much space we need for the worst case,
272 use sprintf directly instead of the slower PyOS_snprintf. */
273 string = PyString_FromStringAndSize(NULL, n);
274 if (!string)
275 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000276
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000277 s = PyString_AsString(string);
Christian Heimes44720832008-05-26 13:01:01 +0000278
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000279 for (f = format; *f; f++) {
280 if (*f == '%') {
281 const char* p = f++;
282 Py_ssize_t i;
283 int longflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000284#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000285 int longlongflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000286#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000287 int size_tflag = 0;
288 /* parse the width.precision part (we're only
289 interested in the precision value, if any) */
290 n = 0;
291 while (isdigit(Py_CHARMASK(*f)))
292 n = (n*10) + *f++ - '0';
293 if (*f == '.') {
294 f++;
295 n = 0;
296 while (isdigit(Py_CHARMASK(*f)))
297 n = (n*10) + *f++ - '0';
298 }
299 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
300 f++;
301 /* Handle %ld, %lu, %lld and %llu. */
302 if (*f == 'l') {
303 if (f[1] == 'd' || f[1] == 'u') {
304 longflag = 1;
305 ++f;
306 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000307#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000308 else if (f[1] == 'l' &&
309 (f[2] == 'd' || f[2] == 'u')) {
310 longlongflag = 1;
311 f += 2;
312 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000313#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000314 }
315 /* handle the size_t flag. */
316 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
317 size_tflag = 1;
318 ++f;
319 }
Christian Heimes44720832008-05-26 13:01:01 +0000320
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000321 switch (*f) {
322 case 'c':
323 *s++ = va_arg(vargs, int);
324 break;
325 case 'd':
326 if (longflag)
327 sprintf(s, "%ld", va_arg(vargs, long));
Mark Dickinson82864d12009-11-15 16:18:58 +0000328#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000329 else if (longlongflag)
330 sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
331 va_arg(vargs, PY_LONG_LONG));
Mark Dickinson82864d12009-11-15 16:18:58 +0000332#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000333 else if (size_tflag)
334 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
335 va_arg(vargs, Py_ssize_t));
336 else
337 sprintf(s, "%d", va_arg(vargs, int));
338 s += strlen(s);
339 break;
340 case 'u':
341 if (longflag)
342 sprintf(s, "%lu",
343 va_arg(vargs, unsigned long));
Mark Dickinson82864d12009-11-15 16:18:58 +0000344#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000345 else if (longlongflag)
346 sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
347 va_arg(vargs, PY_LONG_LONG));
Mark Dickinson82864d12009-11-15 16:18:58 +0000348#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000349 else if (size_tflag)
350 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
351 va_arg(vargs, size_t));
352 else
353 sprintf(s, "%u",
354 va_arg(vargs, unsigned int));
355 s += strlen(s);
356 break;
357 case 'i':
358 sprintf(s, "%i", va_arg(vargs, int));
359 s += strlen(s);
360 break;
361 case 'x':
362 sprintf(s, "%x", va_arg(vargs, int));
363 s += strlen(s);
364 break;
365 case 's':
366 p = va_arg(vargs, char*);
367 i = strlen(p);
368 if (n > 0 && i > n)
369 i = n;
370 Py_MEMCPY(s, p, i);
371 s += i;
372 break;
373 case 'p':
374 sprintf(s, "%p", va_arg(vargs, void*));
375 /* %p is ill-defined: ensure leading 0x. */
376 if (s[1] == 'X')
377 s[1] = 'x';
378 else if (s[1] != 'x') {
379 memmove(s+2, s, strlen(s)+1);
380 s[0] = '0';
381 s[1] = 'x';
382 }
383 s += strlen(s);
384 break;
385 case '%':
386 *s++ = '%';
387 break;
388 default:
389 strcpy(s, p);
390 s += strlen(s);
391 goto end;
392 }
393 } else
394 *s++ = *f;
395 }
Christian Heimes44720832008-05-26 13:01:01 +0000396
397 end:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000398 if (_PyString_Resize(&string, s - PyString_AS_STRING(string)))
399 return NULL;
400 return string;
Christian Heimes44720832008-05-26 13:01:01 +0000401}
402
403PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000404PyString_FromFormat(const char *format, ...)
Christian Heimes44720832008-05-26 13:01:01 +0000405{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000406 PyObject* ret;
407 va_list vargs;
Christian Heimes44720832008-05-26 13:01:01 +0000408
409#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000410 va_start(vargs, format);
Christian Heimes44720832008-05-26 13:01:01 +0000411#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000412 va_start(vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000413#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000414 ret = PyString_FromFormatV(format, vargs);
415 va_end(vargs);
416 return ret;
Christian Heimes44720832008-05-26 13:01:01 +0000417}
418
419
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000420PyObject *PyString_Decode(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000421 Py_ssize_t size,
422 const char *encoding,
423 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000424{
425 PyObject *v, *str;
426
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000427 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000428 if (str == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000429 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000430 v = PyString_AsDecodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000431 Py_DECREF(str);
432 return v;
433}
434
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000435PyObject *PyString_AsDecodedObject(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000436 const char *encoding,
437 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000438{
439 PyObject *v;
440
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000441 if (!PyString_Check(str)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000442 PyErr_BadArgument();
443 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000444 }
445
Christian Heimes44720832008-05-26 13:01:01 +0000446 if (encoding == NULL) {
447#ifdef Py_USING_UNICODE
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000448 encoding = PyUnicode_GetDefaultEncoding();
Christian Heimes44720832008-05-26 13:01:01 +0000449#else
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000450 PyErr_SetString(PyExc_ValueError, "no encoding specified");
451 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000452#endif
Christian Heimes1a6387e2008-03-26 12:49:49 +0000453 }
Christian Heimes44720832008-05-26 13:01:01 +0000454
455 /* Decode via the codec registry */
456 v = PyCodec_Decode(str, encoding, errors);
457 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000458 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000459
460 return v;
461
462 onError:
463 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000464}
465
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000466PyObject *PyString_AsDecodedString(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000467 const char *encoding,
468 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000469{
Christian Heimes44720832008-05-26 13:01:01 +0000470 PyObject *v;
471
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000472 v = PyString_AsDecodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000473 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000474 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000475
476#ifdef Py_USING_UNICODE
477 /* Convert Unicode to a string using the default encoding */
478 if (PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000479 PyObject *temp = v;
480 v = PyUnicode_AsEncodedString(v, NULL, NULL);
481 Py_DECREF(temp);
482 if (v == NULL)
483 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000484 }
Christian Heimes44720832008-05-26 13:01:01 +0000485#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000486 if (!PyString_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000487 PyErr_Format(PyExc_TypeError,
488 "decoder did not return a string object (type=%.400s)",
489 Py_TYPE(v)->tp_name);
490 Py_DECREF(v);
491 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000492 }
Christian Heimes44720832008-05-26 13:01:01 +0000493
494 return v;
495
496 onError:
497 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000498}
499
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000500PyObject *PyString_Encode(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000501 Py_ssize_t size,
502 const char *encoding,
503 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000504{
Christian Heimes44720832008-05-26 13:01:01 +0000505 PyObject *v, *str;
506
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000507 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000508 if (str == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000509 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000510 v = PyString_AsEncodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000511 Py_DECREF(str);
512 return v;
513}
514
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000515PyObject *PyString_AsEncodedObject(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000516 const char *encoding,
517 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000518{
519 PyObject *v;
520
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000521 if (!PyString_Check(str)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000522 PyErr_BadArgument();
523 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000524 }
525
526 if (encoding == NULL) {
527#ifdef Py_USING_UNICODE
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000528 encoding = PyUnicode_GetDefaultEncoding();
Christian Heimes44720832008-05-26 13:01:01 +0000529#else
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000530 PyErr_SetString(PyExc_ValueError, "no encoding specified");
531 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000532#endif
533 }
534
535 /* Encode via the codec registry */
536 v = PyCodec_Encode(str, encoding, errors);
537 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000538 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000539
540 return v;
541
542 onError:
543 return NULL;
544}
545
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000546PyObject *PyString_AsEncodedString(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000547 const char *encoding,
548 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000549{
550 PyObject *v;
551
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000552 v = PyString_AsEncodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000553 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000554 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000555
556#ifdef Py_USING_UNICODE
557 /* Convert Unicode to a string using the default encoding */
558 if (PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000559 PyObject *temp = v;
560 v = PyUnicode_AsEncodedString(v, NULL, NULL);
561 Py_DECREF(temp);
562 if (v == NULL)
563 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000564 }
565#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000566 if (!PyString_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000567 PyErr_Format(PyExc_TypeError,
568 "encoder did not return a string object (type=%.400s)",
569 Py_TYPE(v)->tp_name);
570 Py_DECREF(v);
571 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000572 }
573
574 return v;
575
576 onError:
577 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000578}
579
580static void
Christian Heimes44720832008-05-26 13:01:01 +0000581string_dealloc(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000582{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000583 switch (PyString_CHECK_INTERNED(op)) {
584 case SSTATE_NOT_INTERNED:
585 break;
Christian Heimes44720832008-05-26 13:01:01 +0000586
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000587 case SSTATE_INTERNED_MORTAL:
588 /* revive dead object temporarily for DelItem */
589 Py_REFCNT(op) = 3;
590 if (PyDict_DelItem(interned, op) != 0)
591 Py_FatalError(
592 "deletion of interned string failed");
593 break;
Christian Heimes44720832008-05-26 13:01:01 +0000594
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000595 case SSTATE_INTERNED_IMMORTAL:
596 Py_FatalError("Immortal interned string died.");
Christian Heimes44720832008-05-26 13:01:01 +0000597
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000598 default:
599 Py_FatalError("Inconsistent interned string state.");
600 }
601 Py_TYPE(op)->tp_free(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000602}
603
Christian Heimes44720832008-05-26 13:01:01 +0000604/* Unescape a backslash-escaped string. If unicode is non-zero,
605 the string is a u-literal. If recode_encoding is non-zero,
606 the string is UTF-8 encoded and should be re-encoded in the
607 specified encoding. */
608
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000609PyObject *PyString_DecodeEscape(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000610 Py_ssize_t len,
611 const char *errors,
612 Py_ssize_t unicode,
613 const char *recode_encoding)
Christian Heimes44720832008-05-26 13:01:01 +0000614{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000615 int c;
616 char *p, *buf;
617 const char *end;
618 PyObject *v;
619 Py_ssize_t newlen = recode_encoding ? 4*len:len;
620 v = PyString_FromStringAndSize((char *)NULL, newlen);
621 if (v == NULL)
622 return NULL;
623 p = buf = PyString_AsString(v);
624 end = s + len;
625 while (s < end) {
626 if (*s != '\\') {
627 non_esc:
Christian Heimes44720832008-05-26 13:01:01 +0000628#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000629 if (recode_encoding && (*s & 0x80)) {
630 PyObject *u, *w;
631 char *r;
632 const char* t;
633 Py_ssize_t rn;
634 t = s;
635 /* Decode non-ASCII bytes as UTF-8. */
636 while (t < end && (*t & 0x80)) t++;
637 u = PyUnicode_DecodeUTF8(s, t - s, errors);
638 if(!u) goto failed;
Christian Heimes44720832008-05-26 13:01:01 +0000639
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000640 /* Recode them in target encoding. */
641 w = PyUnicode_AsEncodedString(
642 u, recode_encoding, errors);
643 Py_DECREF(u);
644 if (!w) goto failed;
Christian Heimes44720832008-05-26 13:01:01 +0000645
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000646 /* Append bytes to output buffer. */
647 assert(PyString_Check(w));
648 r = PyString_AS_STRING(w);
649 rn = PyString_GET_SIZE(w);
650 Py_MEMCPY(p, r, rn);
651 p += rn;
652 Py_DECREF(w);
653 s = t;
654 } else {
655 *p++ = *s++;
656 }
Christian Heimes44720832008-05-26 13:01:01 +0000657#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000658 *p++ = *s++;
Christian Heimes44720832008-05-26 13:01:01 +0000659#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000660 continue;
661 }
662 s++;
663 if (s==end) {
664 PyErr_SetString(PyExc_ValueError,
665 "Trailing \\ in string");
666 goto failed;
667 }
668 switch (*s++) {
669 /* XXX This assumes ASCII! */
670 case '\n': break;
671 case '\\': *p++ = '\\'; break;
672 case '\'': *p++ = '\''; break;
673 case '\"': *p++ = '\"'; break;
674 case 'b': *p++ = '\b'; break;
675 case 'f': *p++ = '\014'; break; /* FF */
676 case 't': *p++ = '\t'; break;
677 case 'n': *p++ = '\n'; break;
678 case 'r': *p++ = '\r'; break;
679 case 'v': *p++ = '\013'; break; /* VT */
680 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
681 case '0': case '1': case '2': case '3':
682 case '4': case '5': case '6': case '7':
683 c = s[-1] - '0';
684 if (s < end && '0' <= *s && *s <= '7') {
685 c = (c<<3) + *s++ - '0';
686 if (s < end && '0' <= *s && *s <= '7')
687 c = (c<<3) + *s++ - '0';
688 }
689 *p++ = c;
690 break;
691 case 'x':
692 if (s+1 < end &&
693 isxdigit(Py_CHARMASK(s[0])) &&
694 isxdigit(Py_CHARMASK(s[1])))
695 {
696 unsigned int x = 0;
697 c = Py_CHARMASK(*s);
698 s++;
699 if (isdigit(c))
700 x = c - '0';
701 else if (islower(c))
702 x = 10 + c - 'a';
703 else
704 x = 10 + c - 'A';
705 x = x << 4;
706 c = Py_CHARMASK(*s);
707 s++;
708 if (isdigit(c))
709 x += c - '0';
710 else if (islower(c))
711 x += 10 + c - 'a';
712 else
713 x += 10 + c - 'A';
714 *p++ = x;
715 break;
716 }
717 if (!errors || strcmp(errors, "strict") == 0) {
718 PyErr_SetString(PyExc_ValueError,
719 "invalid \\x escape");
720 goto failed;
721 }
722 if (strcmp(errors, "replace") == 0) {
723 *p++ = '?';
724 } else if (strcmp(errors, "ignore") == 0)
725 /* do nothing */;
726 else {
727 PyErr_Format(PyExc_ValueError,
728 "decoding error; "
729 "unknown error handling code: %.400s",
730 errors);
731 goto failed;
732 }
Christian Heimes44720832008-05-26 13:01:01 +0000733#ifndef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000734 case 'u':
735 case 'U':
736 case 'N':
737 if (unicode) {
738 PyErr_SetString(PyExc_ValueError,
739 "Unicode escapes not legal "
740 "when Unicode disabled");
741 goto failed;
742 }
Christian Heimes44720832008-05-26 13:01:01 +0000743#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000744 default:
745 *p++ = '\\';
746 s--;
747 goto non_esc; /* an arbitry number of unescaped
748 UTF-8 bytes may follow. */
749 }
750 }
751 if (p-buf < newlen && _PyString_Resize(&v, p - buf))
752 goto failed;
753 return v;
Christian Heimes44720832008-05-26 13:01:01 +0000754 failed:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000755 Py_DECREF(v);
756 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000757}
758
759/* -------------------------------------------------------------------- */
760/* object api */
761
Christian Heimes1a6387e2008-03-26 12:49:49 +0000762static Py_ssize_t
Christian Heimes44720832008-05-26 13:01:01 +0000763string_getsize(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000764{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000765 char *s;
766 Py_ssize_t len;
767 if (PyString_AsStringAndSize(op, &s, &len))
768 return -1;
769 return len;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000770}
771
Christian Heimes44720832008-05-26 13:01:01 +0000772static /*const*/ char *
773string_getbuffer(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000774{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000775 char *s;
776 Py_ssize_t len;
777 if (PyString_AsStringAndSize(op, &s, &len))
778 return NULL;
779 return s;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000780}
781
782Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000783PyString_Size(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000784{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000785 if (!PyString_Check(op))
786 return string_getsize(op);
787 return Py_SIZE(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000788}
789
Christian Heimes44720832008-05-26 13:01:01 +0000790/*const*/ char *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000791PyString_AsString(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000792{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000793 if (!PyString_Check(op))
794 return string_getbuffer(op);
795 return ((PyStringObject *)op) -> ob_sval;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000796}
797
798int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000799PyString_AsStringAndSize(register PyObject *obj,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000800 register char **s,
801 register Py_ssize_t *len)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000802{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000803 if (s == NULL) {
804 PyErr_BadInternalCall();
805 return -1;
806 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000807
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000808 if (!PyString_Check(obj)) {
Christian Heimes44720832008-05-26 13:01:01 +0000809#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000810 if (PyUnicode_Check(obj)) {
811 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
812 if (obj == NULL)
813 return -1;
814 }
815 else
Christian Heimes1a6387e2008-03-26 12:49:49 +0000816#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000817 {
818 PyErr_Format(PyExc_TypeError,
819 "expected string or Unicode object, "
820 "%.200s found", Py_TYPE(obj)->tp_name);
821 return -1;
822 }
823 }
Christian Heimes44720832008-05-26 13:01:01 +0000824
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000825 *s = PyString_AS_STRING(obj);
826 if (len != NULL)
827 *len = PyString_GET_SIZE(obj);
828 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
829 PyErr_SetString(PyExc_TypeError,
830 "expected string without null bytes");
831 return -1;
832 }
833 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000834}
835
Christian Heimes1a6387e2008-03-26 12:49:49 +0000836/* -------------------------------------------------------------------- */
837/* Methods */
838
Christian Heimes44720832008-05-26 13:01:01 +0000839#include "stringlib/stringdefs.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000840#include "stringlib/fastsearch.h"
Christian Heimes44720832008-05-26 13:01:01 +0000841
Christian Heimes1a6387e2008-03-26 12:49:49 +0000842#include "stringlib/count.h"
843#include "stringlib/find.h"
844#include "stringlib/partition.h"
Antoine Pitrou64672132010-01-13 07:55:48 +0000845#include "stringlib/split.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000846
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000847#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
Christian Heimes44720832008-05-26 13:01:01 +0000848#include "stringlib/localeutil.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000849
Christian Heimes1a6387e2008-03-26 12:49:49 +0000850
851
852static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000853string_print(PyStringObject *op, FILE *fp, int flags)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000854{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000855 Py_ssize_t i, str_len;
856 char c;
857 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000858
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000859 /* XXX Ought to check for interrupts when writing long strings */
860 if (! PyString_CheckExact(op)) {
861 int ret;
862 /* A str subclass may have its own __str__ method. */
863 op = (PyStringObject *) PyObject_Str((PyObject *)op);
864 if (op == NULL)
865 return -1;
866 ret = string_print(op, fp, flags);
867 Py_DECREF(op);
868 return ret;
869 }
870 if (flags & Py_PRINT_RAW) {
871 char *data = op->ob_sval;
872 Py_ssize_t size = Py_SIZE(op);
873 Py_BEGIN_ALLOW_THREADS
874 while (size > INT_MAX) {
875 /* Very long strings cannot be written atomically.
876 * But don't write exactly INT_MAX bytes at a time
877 * to avoid memory aligment issues.
878 */
879 const int chunk_size = INT_MAX & ~0x3FFF;
880 fwrite(data, 1, chunk_size, fp);
881 data += chunk_size;
882 size -= chunk_size;
883 }
Christian Heimes44720832008-05-26 13:01:01 +0000884#ifdef __VMS
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000885 if (size) fwrite(data, (int)size, 1, fp);
Christian Heimes44720832008-05-26 13:01:01 +0000886#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000887 fwrite(data, 1, (int)size, fp);
Christian Heimes44720832008-05-26 13:01:01 +0000888#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000889 Py_END_ALLOW_THREADS
890 return 0;
891 }
Christian Heimes44720832008-05-26 13:01:01 +0000892
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000893 /* figure out which quote to use; single is preferred */
894 quote = '\'';
895 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
896 !memchr(op->ob_sval, '"', Py_SIZE(op)))
897 quote = '"';
Christian Heimes44720832008-05-26 13:01:01 +0000898
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000899 str_len = Py_SIZE(op);
900 Py_BEGIN_ALLOW_THREADS
901 fputc(quote, fp);
902 for (i = 0; i < str_len; i++) {
903 /* Since strings are immutable and the caller should have a
904 reference, accessing the interal buffer should not be an issue
905 with the GIL released. */
906 c = op->ob_sval[i];
907 if (c == quote || c == '\\')
908 fprintf(fp, "\\%c", c);
909 else if (c == '\t')
910 fprintf(fp, "\\t");
911 else if (c == '\n')
912 fprintf(fp, "\\n");
913 else if (c == '\r')
914 fprintf(fp, "\\r");
915 else if (c < ' ' || c >= 0x7f)
916 fprintf(fp, "\\x%02x", c & 0xff);
917 else
918 fputc(c, fp);
919 }
920 fputc(quote, fp);
921 Py_END_ALLOW_THREADS
922 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000923}
924
Christian Heimes44720832008-05-26 13:01:01 +0000925PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000926PyString_Repr(PyObject *obj, int smartquotes)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000927{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000928 register PyStringObject* op = (PyStringObject*) obj;
929 size_t newsize = 2 + 4 * Py_SIZE(op);
930 PyObject *v;
931 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
932 PyErr_SetString(PyExc_OverflowError,
933 "string is too large to make repr");
934 return NULL;
935 }
936 v = PyString_FromStringAndSize((char *)NULL, newsize);
937 if (v == NULL) {
938 return NULL;
939 }
940 else {
941 register Py_ssize_t i;
942 register char c;
943 register char *p;
944 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000945
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000946 /* figure out which quote to use; single is preferred */
947 quote = '\'';
948 if (smartquotes &&
949 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
950 !memchr(op->ob_sval, '"', Py_SIZE(op)))
951 quote = '"';
Christian Heimes44720832008-05-26 13:01:01 +0000952
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000953 p = PyString_AS_STRING(v);
954 *p++ = quote;
955 for (i = 0; i < Py_SIZE(op); i++) {
956 /* There's at least enough room for a hex escape
957 and a closing quote. */
958 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
959 c = op->ob_sval[i];
960 if (c == quote || c == '\\')
961 *p++ = '\\', *p++ = c;
962 else if (c == '\t')
963 *p++ = '\\', *p++ = 't';
964 else if (c == '\n')
965 *p++ = '\\', *p++ = 'n';
966 else if (c == '\r')
967 *p++ = '\\', *p++ = 'r';
968 else if (c < ' ' || c >= 0x7f) {
969 /* For performance, we don't want to call
970 PyOS_snprintf here (extra layers of
971 function call). */
972 sprintf(p, "\\x%02x", c & 0xff);
973 p += 4;
974 }
975 else
976 *p++ = c;
977 }
978 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
979 *p++ = quote;
980 *p = '\0';
981 if (_PyString_Resize(&v, (p - PyString_AS_STRING(v))))
982 return NULL;
983 return v;
984 }
Christian Heimes44720832008-05-26 13:01:01 +0000985}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000986
987static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000988string_repr(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000989{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000990 return PyString_Repr(op, 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000991}
992
Christian Heimes1a6387e2008-03-26 12:49:49 +0000993static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000994string_str(PyObject *s)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000995{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000996 assert(PyString_Check(s));
997 if (PyString_CheckExact(s)) {
998 Py_INCREF(s);
999 return s;
1000 }
1001 else {
1002 /* Subtype -- return genuine string with the same value. */
1003 PyStringObject *t = (PyStringObject *) s;
1004 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
1005 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001006}
1007
Christian Heimes44720832008-05-26 13:01:01 +00001008static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001009string_length(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001010{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001011 return Py_SIZE(a);
Christian Heimes44720832008-05-26 13:01:01 +00001012}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001013
Christian Heimes44720832008-05-26 13:01:01 +00001014static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001015string_concat(register PyStringObject *a, register PyObject *bb)
Christian Heimes44720832008-05-26 13:01:01 +00001016{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001017 register Py_ssize_t size;
1018 register PyStringObject *op;
1019 if (!PyString_Check(bb)) {
Christian Heimes44720832008-05-26 13:01:01 +00001020#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001021 if (PyUnicode_Check(bb))
1022 return PyUnicode_Concat((PyObject *)a, bb);
Christian Heimes44720832008-05-26 13:01:01 +00001023#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001024 if (PyByteArray_Check(bb))
1025 return PyByteArray_Concat((PyObject *)a, bb);
1026 PyErr_Format(PyExc_TypeError,
1027 "cannot concatenate 'str' and '%.200s' objects",
1028 Py_TYPE(bb)->tp_name);
1029 return NULL;
1030 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001031#define b ((PyStringObject *)bb)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001032 /* Optimize cases with empty left or right operand */
1033 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
1034 PyString_CheckExact(a) && PyString_CheckExact(b)) {
1035 if (Py_SIZE(a) == 0) {
1036 Py_INCREF(bb);
1037 return bb;
1038 }
1039 Py_INCREF(a);
1040 return (PyObject *)a;
1041 }
1042 size = Py_SIZE(a) + Py_SIZE(b);
1043 /* Check that string sizes are not negative, to prevent an
1044 overflow in cases where we are passed incorrectly-created
1045 strings with negative lengths (due to a bug in other code).
1046 */
1047 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
1048 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
1049 PyErr_SetString(PyExc_OverflowError,
1050 "strings are too large to concat");
1051 return NULL;
1052 }
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001053
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001054 /* Inline PyObject_NewVar */
1055 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
1056 PyErr_SetString(PyExc_OverflowError,
1057 "strings are too large to concat");
1058 return NULL;
1059 }
1060 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
1061 if (op == NULL)
1062 return PyErr_NoMemory();
1063 PyObject_INIT_VAR(op, &PyString_Type, size);
1064 op->ob_shash = -1;
1065 op->ob_sstate = SSTATE_NOT_INTERNED;
1066 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1067 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1068 op->ob_sval[size] = '\0';
1069 return (PyObject *) op;
Christian Heimes44720832008-05-26 13:01:01 +00001070#undef b
1071}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001072
Christian Heimes44720832008-05-26 13:01:01 +00001073static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001074string_repeat(register PyStringObject *a, register Py_ssize_t n)
Christian Heimes44720832008-05-26 13:01:01 +00001075{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001076 register Py_ssize_t i;
1077 register Py_ssize_t j;
1078 register Py_ssize_t size;
1079 register PyStringObject *op;
1080 size_t nbytes;
1081 if (n < 0)
1082 n = 0;
1083 /* watch out for overflows: the size can overflow int,
1084 * and the # of bytes needed can overflow size_t
1085 */
1086 size = Py_SIZE(a) * n;
1087 if (n && size / n != Py_SIZE(a)) {
1088 PyErr_SetString(PyExc_OverflowError,
1089 "repeated string is too long");
1090 return NULL;
1091 }
1092 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1093 Py_INCREF(a);
1094 return (PyObject *)a;
1095 }
1096 nbytes = (size_t)size;
1097 if (nbytes + PyStringObject_SIZE <= nbytes) {
1098 PyErr_SetString(PyExc_OverflowError,
1099 "repeated string is too long");
1100 return NULL;
1101 }
1102 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
1103 if (op == NULL)
1104 return PyErr_NoMemory();
1105 PyObject_INIT_VAR(op, &PyString_Type, size);
1106 op->ob_shash = -1;
1107 op->ob_sstate = SSTATE_NOT_INTERNED;
1108 op->ob_sval[size] = '\0';
1109 if (Py_SIZE(a) == 1 && n > 0) {
1110 memset(op->ob_sval, a->ob_sval[0] , n);
1111 return (PyObject *) op;
1112 }
1113 i = 0;
1114 if (i < size) {
1115 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1116 i = Py_SIZE(a);
1117 }
1118 while (i < size) {
1119 j = (i <= size-i) ? i : size-i;
1120 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1121 i += j;
1122 }
1123 return (PyObject *) op;
Christian Heimes44720832008-05-26 13:01:01 +00001124}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001125
Christian Heimes44720832008-05-26 13:01:01 +00001126/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1127
1128static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001129string_slice(register PyStringObject *a, register Py_ssize_t i,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001130 register Py_ssize_t j)
Christian Heimes44720832008-05-26 13:01:01 +00001131 /* j -- may be negative! */
1132{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001133 if (i < 0)
1134 i = 0;
1135 if (j < 0)
1136 j = 0; /* Avoid signed/unsigned bug in next line */
1137 if (j > Py_SIZE(a))
1138 j = Py_SIZE(a);
1139 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1140 /* It's the same as a */
1141 Py_INCREF(a);
1142 return (PyObject *)a;
1143 }
1144 if (j < i)
1145 j = i;
1146 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001147}
1148
1149static int
1150string_contains(PyObject *str_obj, PyObject *sub_obj)
1151{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001152 if (!PyString_CheckExact(sub_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00001153#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001154 if (PyUnicode_Check(sub_obj))
1155 return PyUnicode_Contains(str_obj, sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001156#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001157 if (!PyString_Check(sub_obj)) {
1158 PyErr_Format(PyExc_TypeError,
1159 "'in <string>' requires string as left operand, "
1160 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1161 return -1;
1162 }
1163 }
Christian Heimes44720832008-05-26 13:01:01 +00001164
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001165 return stringlib_contains_obj(str_obj, sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001166}
1167
1168static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001169string_item(PyStringObject *a, register Py_ssize_t i)
Christian Heimes44720832008-05-26 13:01:01 +00001170{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001171 char pchar;
1172 PyObject *v;
1173 if (i < 0 || i >= Py_SIZE(a)) {
1174 PyErr_SetString(PyExc_IndexError, "string index out of range");
1175 return NULL;
1176 }
1177 pchar = a->ob_sval[i];
1178 v = (PyObject *)characters[pchar & UCHAR_MAX];
1179 if (v == NULL)
1180 v = PyString_FromStringAndSize(&pchar, 1);
1181 else {
Christian Heimes44720832008-05-26 13:01:01 +00001182#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001183 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +00001184#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001185 Py_INCREF(v);
1186 }
1187 return v;
Christian Heimes44720832008-05-26 13:01:01 +00001188}
1189
1190static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001191string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Christian Heimes44720832008-05-26 13:01:01 +00001192{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001193 int c;
1194 Py_ssize_t len_a, len_b;
1195 Py_ssize_t min_len;
1196 PyObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00001197
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001198 /* Make sure both arguments are strings. */
1199 if (!(PyString_Check(a) && PyString_Check(b))) {
1200 result = Py_NotImplemented;
1201 goto out;
1202 }
1203 if (a == b) {
1204 switch (op) {
1205 case Py_EQ:case Py_LE:case Py_GE:
1206 result = Py_True;
1207 goto out;
1208 case Py_NE:case Py_LT:case Py_GT:
1209 result = Py_False;
1210 goto out;
1211 }
1212 }
1213 if (op == Py_EQ) {
1214 /* Supporting Py_NE here as well does not save
1215 much time, since Py_NE is rarely used. */
1216 if (Py_SIZE(a) == Py_SIZE(b)
1217 && (a->ob_sval[0] == b->ob_sval[0]
1218 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1219 result = Py_True;
1220 } else {
1221 result = Py_False;
1222 }
1223 goto out;
1224 }
1225 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1226 min_len = (len_a < len_b) ? len_a : len_b;
1227 if (min_len > 0) {
1228 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1229 if (c==0)
1230 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1231 } else
1232 c = 0;
1233 if (c == 0)
1234 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1235 switch (op) {
1236 case Py_LT: c = c < 0; break;
1237 case Py_LE: c = c <= 0; break;
1238 case Py_EQ: assert(0); break; /* unreachable */
1239 case Py_NE: c = c != 0; break;
1240 case Py_GT: c = c > 0; break;
1241 case Py_GE: c = c >= 0; break;
1242 default:
1243 result = Py_NotImplemented;
1244 goto out;
1245 }
1246 result = c ? Py_True : Py_False;
Christian Heimes44720832008-05-26 13:01:01 +00001247 out:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001248 Py_INCREF(result);
1249 return result;
Christian Heimes44720832008-05-26 13:01:01 +00001250}
1251
1252int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001253_PyString_Eq(PyObject *o1, PyObject *o2)
Christian Heimes44720832008-05-26 13:01:01 +00001254{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001255 PyStringObject *a = (PyStringObject*) o1;
1256 PyStringObject *b = (PyStringObject*) o2;
1257 return Py_SIZE(a) == Py_SIZE(b)
1258 && *a->ob_sval == *b->ob_sval
1259 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
Christian Heimes44720832008-05-26 13:01:01 +00001260}
1261
1262static long
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001263string_hash(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001264{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001265 register Py_ssize_t len;
1266 register unsigned char *p;
1267 register long x;
Christian Heimes44720832008-05-26 13:01:01 +00001268
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001269 if (a->ob_shash != -1)
1270 return a->ob_shash;
1271 len = Py_SIZE(a);
1272 p = (unsigned char *) a->ob_sval;
1273 x = *p << 7;
1274 while (--len >= 0)
1275 x = (1000003*x) ^ *p++;
1276 x ^= Py_SIZE(a);
1277 if (x == -1)
1278 x = -2;
1279 a->ob_shash = x;
1280 return x;
Christian Heimes44720832008-05-26 13:01:01 +00001281}
1282
1283static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001284string_subscript(PyStringObject* self, PyObject* item)
Christian Heimes44720832008-05-26 13:01:01 +00001285{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001286 if (PyIndex_Check(item)) {
1287 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1288 if (i == -1 && PyErr_Occurred())
1289 return NULL;
1290 if (i < 0)
1291 i += PyString_GET_SIZE(self);
1292 return string_item(self, i);
1293 }
1294 else if (PySlice_Check(item)) {
1295 Py_ssize_t start, stop, step, slicelength, cur, i;
1296 char* source_buf;
1297 char* result_buf;
1298 PyObject* result;
Christian Heimes44720832008-05-26 13:01:01 +00001299
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001300 if (PySlice_GetIndicesEx((PySliceObject*)item,
1301 PyString_GET_SIZE(self),
1302 &start, &stop, &step, &slicelength) < 0) {
1303 return NULL;
1304 }
Christian Heimes44720832008-05-26 13:01:01 +00001305
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001306 if (slicelength <= 0) {
1307 return PyString_FromStringAndSize("", 0);
1308 }
1309 else if (start == 0 && step == 1 &&
1310 slicelength == PyString_GET_SIZE(self) &&
1311 PyString_CheckExact(self)) {
1312 Py_INCREF(self);
1313 return (PyObject *)self;
1314 }
1315 else if (step == 1) {
1316 return PyString_FromStringAndSize(
1317 PyString_AS_STRING(self) + start,
1318 slicelength);
1319 }
1320 else {
1321 source_buf = PyString_AsString((PyObject*)self);
1322 result_buf = (char *)PyMem_Malloc(slicelength);
1323 if (result_buf == NULL)
1324 return PyErr_NoMemory();
Christian Heimes44720832008-05-26 13:01:01 +00001325
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001326 for (cur = start, i = 0; i < slicelength;
1327 cur += step, i++) {
1328 result_buf[i] = source_buf[cur];
1329 }
Christian Heimes44720832008-05-26 13:01:01 +00001330
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001331 result = PyString_FromStringAndSize(result_buf,
1332 slicelength);
1333 PyMem_Free(result_buf);
1334 return result;
1335 }
1336 }
1337 else {
1338 PyErr_Format(PyExc_TypeError,
1339 "string indices must be integers, not %.200s",
1340 Py_TYPE(item)->tp_name);
1341 return NULL;
1342 }
Christian Heimes44720832008-05-26 13:01:01 +00001343}
1344
1345static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001346string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001347{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001348 if ( index != 0 ) {
1349 PyErr_SetString(PyExc_SystemError,
1350 "accessing non-existent string segment");
1351 return -1;
1352 }
1353 *ptr = (void *)self->ob_sval;
1354 return Py_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001355}
1356
1357static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001358string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001359{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001360 PyErr_SetString(PyExc_TypeError,
1361 "Cannot use string as modifiable buffer");
1362 return -1;
Christian Heimes44720832008-05-26 13:01:01 +00001363}
1364
1365static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001366string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Christian Heimes44720832008-05-26 13:01:01 +00001367{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001368 if ( lenp )
1369 *lenp = Py_SIZE(self);
1370 return 1;
Christian Heimes44720832008-05-26 13:01:01 +00001371}
1372
1373static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001374string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001375{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001376 if ( index != 0 ) {
1377 PyErr_SetString(PyExc_SystemError,
1378 "accessing non-existent string segment");
1379 return -1;
1380 }
1381 *ptr = self->ob_sval;
1382 return Py_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001383}
1384
1385static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001386string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Christian Heimes44720832008-05-26 13:01:01 +00001387{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001388 return PyBuffer_FillInfo(view, (PyObject*)self,
1389 (void *)self->ob_sval, Py_SIZE(self),
1390 1, flags);
Christian Heimes44720832008-05-26 13:01:01 +00001391}
1392
1393static PySequenceMethods string_as_sequence = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001394 (lenfunc)string_length, /*sq_length*/
1395 (binaryfunc)string_concat, /*sq_concat*/
1396 (ssizeargfunc)string_repeat, /*sq_repeat*/
1397 (ssizeargfunc)string_item, /*sq_item*/
1398 (ssizessizeargfunc)string_slice, /*sq_slice*/
1399 0, /*sq_ass_item*/
1400 0, /*sq_ass_slice*/
1401 (objobjproc)string_contains /*sq_contains*/
Christian Heimes44720832008-05-26 13:01:01 +00001402};
1403
1404static PyMappingMethods string_as_mapping = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001405 (lenfunc)string_length,
1406 (binaryfunc)string_subscript,
1407 0,
Christian Heimes44720832008-05-26 13:01:01 +00001408};
1409
1410static PyBufferProcs string_as_buffer = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001411 (readbufferproc)string_buffer_getreadbuf,
1412 (writebufferproc)string_buffer_getwritebuf,
1413 (segcountproc)string_buffer_getsegcount,
1414 (charbufferproc)string_buffer_getcharbuf,
1415 (getbufferproc)string_buffer_getbuffer,
1416 0, /* XXX */
Christian Heimes44720832008-05-26 13:01:01 +00001417};
1418
1419
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001420
Christian Heimes44720832008-05-26 13:01:01 +00001421#define LEFTSTRIP 0
1422#define RIGHTSTRIP 1
1423#define BOTHSTRIP 2
1424
1425/* Arrays indexed by above */
1426static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1427
1428#define STRIPNAME(i) (stripformat[i]+3)
1429
Christian Heimes1a6387e2008-03-26 12:49:49 +00001430PyDoc_STRVAR(split__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001431"S.split([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001432\n\
Christian Heimes44720832008-05-26 13:01:01 +00001433Return a list of the words in the string S, using sep as the\n\
1434delimiter string. If maxsplit is given, at most maxsplit\n\
1435splits are done. If sep is not specified or is None, any\n\
1436whitespace string is a separator and empty strings are removed\n\
1437from the result.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001438
1439static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001440string_split(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001441{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001442 Py_ssize_t len = PyString_GET_SIZE(self), n;
1443 Py_ssize_t maxsplit = -1;
1444 const char *s = PyString_AS_STRING(self), *sub;
1445 PyObject *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001446
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001447 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1448 return NULL;
1449 if (maxsplit < 0)
1450 maxsplit = PY_SSIZE_T_MAX;
1451 if (subobj == Py_None)
1452 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1453 if (PyString_Check(subobj)) {
1454 sub = PyString_AS_STRING(subobj);
1455 n = PyString_GET_SIZE(subobj);
1456 }
Christian Heimes44720832008-05-26 13:01:01 +00001457#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001458 else if (PyUnicode_Check(subobj))
1459 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001460#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001461 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1462 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001463
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001464 return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001465}
1466
1467PyDoc_STRVAR(partition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001468"S.partition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001469\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001470Search for the separator sep in S, and return the part before it,\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001471the separator itself, and the part after it. If the separator is not\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001472found, return S and two empty strings.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001473
1474static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001475string_partition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001476{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001477 const char *sep;
1478 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001479
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001480 if (PyString_Check(sep_obj)) {
1481 sep = PyString_AS_STRING(sep_obj);
1482 sep_len = PyString_GET_SIZE(sep_obj);
1483 }
Christian Heimes44720832008-05-26 13:01:01 +00001484#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001485 else if (PyUnicode_Check(sep_obj))
1486 return PyUnicode_Partition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001487#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001488 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1489 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001490
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001491 return stringlib_partition(
1492 (PyObject*) self,
1493 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1494 sep_obj, sep, sep_len
1495 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001496}
1497
1498PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti1fafaab2010-01-25 11:24:37 +00001499"S.rpartition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001500\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001501Search for the separator sep in S, starting at the end of S, and return\n\
Christian Heimes44720832008-05-26 13:01:01 +00001502the part before it, the separator itself, and the part after it. If the\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001503separator is not found, return two empty strings and S.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001504
1505static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001506string_rpartition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001507{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001508 const char *sep;
1509 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001510
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001511 if (PyString_Check(sep_obj)) {
1512 sep = PyString_AS_STRING(sep_obj);
1513 sep_len = PyString_GET_SIZE(sep_obj);
1514 }
Christian Heimes44720832008-05-26 13:01:01 +00001515#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001516 else if (PyUnicode_Check(sep_obj))
1517 return PyUnicode_RPartition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001518#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001519 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1520 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001521
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001522 return stringlib_rpartition(
1523 (PyObject*) self,
1524 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1525 sep_obj, sep, sep_len
1526 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001527}
1528
Christian Heimes1a6387e2008-03-26 12:49:49 +00001529PyDoc_STRVAR(rsplit__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001530"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001531\n\
Christian Heimes44720832008-05-26 13:01:01 +00001532Return a list of the words in the string S, using sep as the\n\
1533delimiter string, starting at the end of the string and working\n\
1534to the front. If maxsplit is given, at most maxsplit splits are\n\
1535done. If sep is not specified or is None, any whitespace string\n\
1536is a separator.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001537
1538static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001539string_rsplit(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001540{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001541 Py_ssize_t len = PyString_GET_SIZE(self), n;
1542 Py_ssize_t maxsplit = -1;
1543 const char *s = PyString_AS_STRING(self), *sub;
1544 PyObject *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001545
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001546 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1547 return NULL;
1548 if (maxsplit < 0)
1549 maxsplit = PY_SSIZE_T_MAX;
1550 if (subobj == Py_None)
1551 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1552 if (PyString_Check(subobj)) {
1553 sub = PyString_AS_STRING(subobj);
1554 n = PyString_GET_SIZE(subobj);
1555 }
Christian Heimes44720832008-05-26 13:01:01 +00001556#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001557 else if (PyUnicode_Check(subobj))
1558 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001559#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001560 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1561 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001562
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001563 return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001564}
1565
1566
1567PyDoc_STRVAR(join__doc__,
Georg Brandl9b4e5822009-10-14 18:48:32 +00001568"S.join(iterable) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00001569\n\
1570Return a string which is the concatenation of the strings in the\n\
Georg Brandl9b4e5822009-10-14 18:48:32 +00001571iterable. The separator between elements is S.");
Christian Heimes44720832008-05-26 13:01:01 +00001572
1573static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001574string_join(PyStringObject *self, PyObject *orig)
Christian Heimes44720832008-05-26 13:01:01 +00001575{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001576 char *sep = PyString_AS_STRING(self);
1577 const Py_ssize_t seplen = PyString_GET_SIZE(self);
1578 PyObject *res = NULL;
1579 char *p;
1580 Py_ssize_t seqlen = 0;
1581 size_t sz = 0;
1582 Py_ssize_t i;
1583 PyObject *seq, *item;
Christian Heimes44720832008-05-26 13:01:01 +00001584
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001585 seq = PySequence_Fast(orig, "");
1586 if (seq == NULL) {
1587 return NULL;
1588 }
Christian Heimes44720832008-05-26 13:01:01 +00001589
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001590 seqlen = PySequence_Size(seq);
1591 if (seqlen == 0) {
1592 Py_DECREF(seq);
1593 return PyString_FromString("");
1594 }
1595 if (seqlen == 1) {
1596 item = PySequence_Fast_GET_ITEM(seq, 0);
1597 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1598 Py_INCREF(item);
1599 Py_DECREF(seq);
1600 return item;
1601 }
1602 }
Christian Heimes44720832008-05-26 13:01:01 +00001603
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001604 /* There are at least two things to join, or else we have a subclass
1605 * of the builtin types in the sequence.
1606 * Do a pre-pass to figure out the total amount of space we'll
1607 * need (sz), see whether any argument is absurd, and defer to
1608 * the Unicode join if appropriate.
1609 */
1610 for (i = 0; i < seqlen; i++) {
1611 const size_t old_sz = sz;
1612 item = PySequence_Fast_GET_ITEM(seq, i);
1613 if (!PyString_Check(item)){
Christian Heimes44720832008-05-26 13:01:01 +00001614#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001615 if (PyUnicode_Check(item)) {
1616 /* Defer to Unicode join.
1617 * CAUTION: There's no gurantee that the
1618 * original sequence can be iterated over
1619 * again, so we must pass seq here.
1620 */
1621 PyObject *result;
1622 result = PyUnicode_Join((PyObject *)self, seq);
1623 Py_DECREF(seq);
1624 return result;
1625 }
Christian Heimes44720832008-05-26 13:01:01 +00001626#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001627 PyErr_Format(PyExc_TypeError,
1628 "sequence item %zd: expected string,"
1629 " %.80s found",
1630 i, Py_TYPE(item)->tp_name);
1631 Py_DECREF(seq);
1632 return NULL;
1633 }
1634 sz += PyString_GET_SIZE(item);
1635 if (i != 0)
1636 sz += seplen;
1637 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1638 PyErr_SetString(PyExc_OverflowError,
1639 "join() result is too long for a Python string");
1640 Py_DECREF(seq);
1641 return NULL;
1642 }
1643 }
Christian Heimes44720832008-05-26 13:01:01 +00001644
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001645 /* Allocate result space. */
1646 res = PyString_FromStringAndSize((char*)NULL, sz);
1647 if (res == NULL) {
1648 Py_DECREF(seq);
1649 return NULL;
1650 }
Christian Heimes44720832008-05-26 13:01:01 +00001651
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001652 /* Catenate everything. */
1653 p = PyString_AS_STRING(res);
1654 for (i = 0; i < seqlen; ++i) {
1655 size_t n;
1656 item = PySequence_Fast_GET_ITEM(seq, i);
1657 n = PyString_GET_SIZE(item);
1658 Py_MEMCPY(p, PyString_AS_STRING(item), n);
1659 p += n;
1660 if (i < seqlen - 1) {
1661 Py_MEMCPY(p, sep, seplen);
1662 p += seplen;
1663 }
1664 }
Christian Heimes44720832008-05-26 13:01:01 +00001665
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001666 Py_DECREF(seq);
1667 return res;
Christian Heimes44720832008-05-26 13:01:01 +00001668}
1669
1670PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001671_PyString_Join(PyObject *sep, PyObject *x)
Christian Heimes44720832008-05-26 13:01:01 +00001672{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001673 assert(sep != NULL && PyString_Check(sep));
1674 assert(x != NULL);
1675 return string_join((PyStringObject *)sep, x);
Christian Heimes44720832008-05-26 13:01:01 +00001676}
1677
Antoine Pitrou64672132010-01-13 07:55:48 +00001678/* helper macro to fixup start/end slice values */
1679#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001680 if (end > len) \
1681 end = len; \
1682 else if (end < 0) { \
1683 end += len; \
1684 if (end < 0) \
1685 end = 0; \
1686 } \
1687 if (start < 0) { \
1688 start += len; \
1689 if (start < 0) \
1690 start = 0; \
1691 }
Christian Heimes44720832008-05-26 13:01:01 +00001692
1693Py_LOCAL_INLINE(Py_ssize_t)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001694string_find_internal(PyStringObject *self, PyObject *args, int dir)
Christian Heimes44720832008-05-26 13:01:01 +00001695{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001696 PyObject *subobj;
1697 const char *sub;
1698 Py_ssize_t sub_len;
1699 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1700 PyObject *obj_start=Py_None, *obj_end=Py_None;
Christian Heimes44720832008-05-26 13:01:01 +00001701
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001702 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1703 &obj_start, &obj_end))
1704 return -2;
1705 /* To support None in "start" and "end" arguments, meaning
1706 the same as if they were not passed.
1707 */
1708 if (obj_start != Py_None)
1709 if (!_PyEval_SliceIndex(obj_start, &start))
1710 return -2;
1711 if (obj_end != Py_None)
1712 if (!_PyEval_SliceIndex(obj_end, &end))
1713 return -2;
Christian Heimes44720832008-05-26 13:01:01 +00001714
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001715 if (PyString_Check(subobj)) {
1716 sub = PyString_AS_STRING(subobj);
1717 sub_len = PyString_GET_SIZE(subobj);
1718 }
Christian Heimes44720832008-05-26 13:01:01 +00001719#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001720 else if (PyUnicode_Check(subobj))
1721 return PyUnicode_Find(
1722 (PyObject *)self, subobj, start, end, dir);
Christian Heimes44720832008-05-26 13:01:01 +00001723#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001724 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1725 /* XXX - the "expected a character buffer object" is pretty
1726 confusing for a non-expert. remap to something else ? */
1727 return -2;
Christian Heimes44720832008-05-26 13:01:01 +00001728
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001729 if (dir > 0)
1730 return stringlib_find_slice(
1731 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1732 sub, sub_len, start, end);
1733 else
1734 return stringlib_rfind_slice(
1735 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1736 sub, sub_len, start, end);
Christian Heimes44720832008-05-26 13:01:01 +00001737}
1738
1739
1740PyDoc_STRVAR(find__doc__,
1741"S.find(sub [,start [,end]]) -> int\n\
1742\n\
1743Return the lowest index in S where substring sub is found,\n\
1744such that sub is contained within s[start:end]. Optional\n\
1745arguments start and end are interpreted as in slice notation.\n\
1746\n\
1747Return -1 on failure.");
1748
1749static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001750string_find(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001751{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001752 Py_ssize_t result = string_find_internal(self, args, +1);
1753 if (result == -2)
1754 return NULL;
1755 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001756}
1757
1758
1759PyDoc_STRVAR(index__doc__,
1760"S.index(sub [,start [,end]]) -> int\n\
1761\n\
1762Like S.find() but raise ValueError when the substring is not found.");
1763
1764static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001765string_index(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001766{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001767 Py_ssize_t result = string_find_internal(self, args, +1);
1768 if (result == -2)
1769 return NULL;
1770 if (result == -1) {
1771 PyErr_SetString(PyExc_ValueError,
1772 "substring not found");
1773 return NULL;
1774 }
1775 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001776}
1777
1778
1779PyDoc_STRVAR(rfind__doc__,
1780"S.rfind(sub [,start [,end]]) -> int\n\
1781\n\
1782Return the highest index in S where substring sub is found,\n\
1783such that sub is contained within s[start:end]. Optional\n\
1784arguments start and end are interpreted as in slice notation.\n\
1785\n\
1786Return -1 on failure.");
1787
1788static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001789string_rfind(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001790{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001791 Py_ssize_t result = string_find_internal(self, args, -1);
1792 if (result == -2)
1793 return NULL;
1794 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001795}
1796
1797
1798PyDoc_STRVAR(rindex__doc__,
1799"S.rindex(sub [,start [,end]]) -> int\n\
1800\n\
1801Like S.rfind() but raise ValueError when the substring is not found.");
1802
1803static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001804string_rindex(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001805{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001806 Py_ssize_t result = string_find_internal(self, args, -1);
1807 if (result == -2)
1808 return NULL;
1809 if (result == -1) {
1810 PyErr_SetString(PyExc_ValueError,
1811 "substring not found");
1812 return NULL;
1813 }
1814 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001815}
1816
1817
1818Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001819do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
Christian Heimes44720832008-05-26 13:01:01 +00001820{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001821 char *s = PyString_AS_STRING(self);
1822 Py_ssize_t len = PyString_GET_SIZE(self);
1823 char *sep = PyString_AS_STRING(sepobj);
1824 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1825 Py_ssize_t i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001826
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001827 i = 0;
1828 if (striptype != RIGHTSTRIP) {
1829 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1830 i++;
1831 }
1832 }
Christian Heimes44720832008-05-26 13:01:01 +00001833
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001834 j = len;
1835 if (striptype != LEFTSTRIP) {
1836 do {
1837 j--;
1838 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1839 j++;
1840 }
Christian Heimes44720832008-05-26 13:01:01 +00001841
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001842 if (i == 0 && j == len && PyString_CheckExact(self)) {
1843 Py_INCREF(self);
1844 return (PyObject*)self;
1845 }
1846 else
1847 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001848}
1849
1850
1851Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001852do_strip(PyStringObject *self, int striptype)
Christian Heimes44720832008-05-26 13:01:01 +00001853{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001854 char *s = PyString_AS_STRING(self);
1855 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001856
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001857 i = 0;
1858 if (striptype != RIGHTSTRIP) {
1859 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1860 i++;
1861 }
1862 }
Christian Heimes44720832008-05-26 13:01:01 +00001863
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001864 j = len;
1865 if (striptype != LEFTSTRIP) {
1866 do {
1867 j--;
1868 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1869 j++;
1870 }
Christian Heimes44720832008-05-26 13:01:01 +00001871
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001872 if (i == 0 && j == len && PyString_CheckExact(self)) {
1873 Py_INCREF(self);
1874 return (PyObject*)self;
1875 }
1876 else
1877 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001878}
1879
1880
1881Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001882do_argstrip(PyStringObject *self, int striptype, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001883{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001884 PyObject *sep = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001885
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001886 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1887 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001888
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001889 if (sep != NULL && sep != Py_None) {
1890 if (PyString_Check(sep))
1891 return do_xstrip(self, striptype, sep);
Christian Heimes44720832008-05-26 13:01:01 +00001892#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001893 else if (PyUnicode_Check(sep)) {
1894 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1895 PyObject *res;
1896 if (uniself==NULL)
1897 return NULL;
1898 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1899 striptype, sep);
1900 Py_DECREF(uniself);
1901 return res;
1902 }
Christian Heimes44720832008-05-26 13:01:01 +00001903#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001904 PyErr_Format(PyExc_TypeError,
Christian Heimes44720832008-05-26 13:01:01 +00001905#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001906 "%s arg must be None, str or unicode",
Christian Heimes44720832008-05-26 13:01:01 +00001907#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001908 "%s arg must be None or str",
Christian Heimes44720832008-05-26 13:01:01 +00001909#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001910 STRIPNAME(striptype));
1911 return NULL;
1912 }
Christian Heimes44720832008-05-26 13:01:01 +00001913
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001914 return do_strip(self, striptype);
Christian Heimes44720832008-05-26 13:01:01 +00001915}
1916
1917
1918PyDoc_STRVAR(strip__doc__,
1919"S.strip([chars]) -> string or unicode\n\
1920\n\
1921Return a copy of the string S with leading and trailing\n\
1922whitespace removed.\n\
1923If chars is given and not None, remove characters in chars instead.\n\
1924If chars is unicode, S will be converted to unicode before stripping");
1925
1926static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001927string_strip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001928{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001929 if (PyTuple_GET_SIZE(args) == 0)
1930 return do_strip(self, BOTHSTRIP); /* Common case */
1931 else
1932 return do_argstrip(self, BOTHSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001933}
1934
1935
1936PyDoc_STRVAR(lstrip__doc__,
1937"S.lstrip([chars]) -> string or unicode\n\
1938\n\
1939Return a copy of the string S with leading whitespace removed.\n\
1940If chars is given and not None, remove characters in chars instead.\n\
1941If chars is unicode, S will be converted to unicode before stripping");
1942
1943static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001944string_lstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001945{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001946 if (PyTuple_GET_SIZE(args) == 0)
1947 return do_strip(self, LEFTSTRIP); /* Common case */
1948 else
1949 return do_argstrip(self, LEFTSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001950}
1951
1952
1953PyDoc_STRVAR(rstrip__doc__,
1954"S.rstrip([chars]) -> string or unicode\n\
1955\n\
1956Return a copy of the string S with trailing whitespace removed.\n\
1957If chars is given and not None, remove characters in chars instead.\n\
1958If chars is unicode, S will be converted to unicode before stripping");
1959
1960static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001961string_rstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001962{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001963 if (PyTuple_GET_SIZE(args) == 0)
1964 return do_strip(self, RIGHTSTRIP); /* Common case */
1965 else
1966 return do_argstrip(self, RIGHTSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001967}
1968
1969
1970PyDoc_STRVAR(lower__doc__,
1971"S.lower() -> string\n\
1972\n\
1973Return a copy of the string S converted to lowercase.");
1974
1975/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1976#ifndef _tolower
1977#define _tolower tolower
1978#endif
1979
1980static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001981string_lower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00001982{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001983 char *s;
1984 Py_ssize_t i, n = PyString_GET_SIZE(self);
1985 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00001986
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001987 newobj = PyString_FromStringAndSize(NULL, n);
1988 if (!newobj)
1989 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001990
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001991 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00001992
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001993 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00001994
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001995 for (i = 0; i < n; i++) {
1996 int c = Py_CHARMASK(s[i]);
1997 if (isupper(c))
1998 s[i] = _tolower(c);
1999 }
Christian Heimes44720832008-05-26 13:01:01 +00002000
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002001 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002002}
2003
2004PyDoc_STRVAR(upper__doc__,
2005"S.upper() -> string\n\
2006\n\
2007Return a copy of the string S converted to uppercase.");
2008
2009#ifndef _toupper
2010#define _toupper toupper
2011#endif
2012
2013static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002014string_upper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002015{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002016 char *s;
2017 Py_ssize_t i, n = PyString_GET_SIZE(self);
2018 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002019
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002020 newobj = PyString_FromStringAndSize(NULL, n);
2021 if (!newobj)
2022 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002023
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002024 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002025
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002026 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002027
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002028 for (i = 0; i < n; i++) {
2029 int c = Py_CHARMASK(s[i]);
2030 if (islower(c))
2031 s[i] = _toupper(c);
2032 }
Christian Heimes44720832008-05-26 13:01:01 +00002033
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002034 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002035}
2036
2037PyDoc_STRVAR(title__doc__,
2038"S.title() -> string\n\
2039\n\
2040Return a titlecased version of S, i.e. words start with uppercase\n\
2041characters, all remaining cased characters have lowercase.");
2042
2043static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002044string_title(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002045{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002046 char *s = PyString_AS_STRING(self), *s_new;
2047 Py_ssize_t i, n = PyString_GET_SIZE(self);
2048 int previous_is_cased = 0;
2049 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002050
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002051 newobj = PyString_FromStringAndSize(NULL, n);
2052 if (newobj == NULL)
2053 return NULL;
2054 s_new = PyString_AsString(newobj);
2055 for (i = 0; i < n; i++) {
2056 int c = Py_CHARMASK(*s++);
2057 if (islower(c)) {
2058 if (!previous_is_cased)
2059 c = toupper(c);
2060 previous_is_cased = 1;
2061 } else if (isupper(c)) {
2062 if (previous_is_cased)
2063 c = tolower(c);
2064 previous_is_cased = 1;
2065 } else
2066 previous_is_cased = 0;
2067 *s_new++ = c;
2068 }
2069 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002070}
2071
2072PyDoc_STRVAR(capitalize__doc__,
2073"S.capitalize() -> string\n\
2074\n\
2075Return a copy of the string S with only its first character\n\
2076capitalized.");
2077
2078static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002079string_capitalize(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002080{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002081 char *s = PyString_AS_STRING(self), *s_new;
2082 Py_ssize_t i, n = PyString_GET_SIZE(self);
2083 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002084
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002085 newobj = PyString_FromStringAndSize(NULL, n);
2086 if (newobj == NULL)
2087 return NULL;
2088 s_new = PyString_AsString(newobj);
2089 if (0 < n) {
2090 int c = Py_CHARMASK(*s++);
2091 if (islower(c))
2092 *s_new = toupper(c);
2093 else
2094 *s_new = c;
2095 s_new++;
2096 }
2097 for (i = 1; i < n; i++) {
2098 int c = Py_CHARMASK(*s++);
2099 if (isupper(c))
2100 *s_new = tolower(c);
2101 else
2102 *s_new = c;
2103 s_new++;
2104 }
2105 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002106}
2107
2108
2109PyDoc_STRVAR(count__doc__,
2110"S.count(sub[, start[, end]]) -> int\n\
2111\n\
2112Return the number of non-overlapping occurrences of substring sub in\n\
2113string S[start:end]. Optional arguments start and end are interpreted\n\
2114as in slice notation.");
2115
2116static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002117string_count(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002118{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002119 PyObject *sub_obj;
2120 const char *str = PyString_AS_STRING(self), *sub;
2121 Py_ssize_t sub_len;
2122 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes44720832008-05-26 13:01:01 +00002123
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002124 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2125 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2126 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002127
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002128 if (PyString_Check(sub_obj)) {
2129 sub = PyString_AS_STRING(sub_obj);
2130 sub_len = PyString_GET_SIZE(sub_obj);
2131 }
Christian Heimes44720832008-05-26 13:01:01 +00002132#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002133 else if (PyUnicode_Check(sub_obj)) {
2134 Py_ssize_t count;
2135 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2136 if (count == -1)
2137 return NULL;
2138 else
2139 return PyInt_FromSsize_t(count);
2140 }
Christian Heimes44720832008-05-26 13:01:01 +00002141#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002142 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2143 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002144
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002145 ADJUST_INDICES(start, end, PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002146
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002147 return PyInt_FromSsize_t(
2148 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2149 );
Christian Heimes44720832008-05-26 13:01:01 +00002150}
2151
2152PyDoc_STRVAR(swapcase__doc__,
2153"S.swapcase() -> string\n\
2154\n\
2155Return a copy of the string S with uppercase characters\n\
2156converted to lowercase and vice versa.");
2157
2158static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002159string_swapcase(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002160{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002161 char *s = PyString_AS_STRING(self), *s_new;
2162 Py_ssize_t i, n = PyString_GET_SIZE(self);
2163 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002164
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002165 newobj = PyString_FromStringAndSize(NULL, n);
2166 if (newobj == NULL)
2167 return NULL;
2168 s_new = PyString_AsString(newobj);
2169 for (i = 0; i < n; i++) {
2170 int c = Py_CHARMASK(*s++);
2171 if (islower(c)) {
2172 *s_new = toupper(c);
2173 }
2174 else if (isupper(c)) {
2175 *s_new = tolower(c);
2176 }
2177 else
2178 *s_new = c;
2179 s_new++;
2180 }
2181 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002182}
2183
2184
2185PyDoc_STRVAR(translate__doc__,
2186"S.translate(table [,deletechars]) -> string\n\
2187\n\
2188Return a copy of the string S, where all characters occurring\n\
2189in the optional argument deletechars are removed, and the\n\
2190remaining characters have been mapped through the given\n\
2191translation table, which must be a string of length 256.");
2192
2193static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002194string_translate(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002195{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002196 register char *input, *output;
2197 const char *table;
2198 register Py_ssize_t i, c, changed = 0;
2199 PyObject *input_obj = (PyObject*)self;
2200 const char *output_start, *del_table=NULL;
2201 Py_ssize_t inlen, tablen, dellen = 0;
2202 PyObject *result;
2203 int trans_table[256];
2204 PyObject *tableobj, *delobj = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002205
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002206 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2207 &tableobj, &delobj))
2208 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002209
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002210 if (PyString_Check(tableobj)) {
2211 table = PyString_AS_STRING(tableobj);
2212 tablen = PyString_GET_SIZE(tableobj);
2213 }
2214 else if (tableobj == Py_None) {
2215 table = NULL;
2216 tablen = 256;
2217 }
Christian Heimes44720832008-05-26 13:01:01 +00002218#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002219 else if (PyUnicode_Check(tableobj)) {
2220 /* Unicode .translate() does not support the deletechars
2221 parameter; instead a mapping to None will cause characters
2222 to be deleted. */
2223 if (delobj != NULL) {
2224 PyErr_SetString(PyExc_TypeError,
2225 "deletions are implemented differently for unicode");
2226 return NULL;
2227 }
2228 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2229 }
Christian Heimes44720832008-05-26 13:01:01 +00002230#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002231 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2232 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002233
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002234 if (tablen != 256) {
2235 PyErr_SetString(PyExc_ValueError,
2236 "translation table must be 256 characters long");
2237 return NULL;
2238 }
Christian Heimes44720832008-05-26 13:01:01 +00002239
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002240 if (delobj != NULL) {
2241 if (PyString_Check(delobj)) {
2242 del_table = PyString_AS_STRING(delobj);
2243 dellen = PyString_GET_SIZE(delobj);
2244 }
Christian Heimes44720832008-05-26 13:01:01 +00002245#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002246 else if (PyUnicode_Check(delobj)) {
2247 PyErr_SetString(PyExc_TypeError,
2248 "deletions are implemented differently for unicode");
2249 return NULL;
2250 }
Christian Heimes44720832008-05-26 13:01:01 +00002251#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002252 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2253 return NULL;
2254 }
2255 else {
2256 del_table = NULL;
2257 dellen = 0;
2258 }
Christian Heimes44720832008-05-26 13:01:01 +00002259
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002260 inlen = PyString_GET_SIZE(input_obj);
2261 result = PyString_FromStringAndSize((char *)NULL, inlen);
2262 if (result == NULL)
2263 return NULL;
2264 output_start = output = PyString_AsString(result);
2265 input = PyString_AS_STRING(input_obj);
Christian Heimes44720832008-05-26 13:01:01 +00002266
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002267 if (dellen == 0 && table != NULL) {
2268 /* If no deletions are required, use faster code */
2269 for (i = inlen; --i >= 0; ) {
2270 c = Py_CHARMASK(*input++);
2271 if (Py_CHARMASK((*output++ = table[c])) != c)
2272 changed = 1;
2273 }
2274 if (changed || !PyString_CheckExact(input_obj))
2275 return result;
2276 Py_DECREF(result);
2277 Py_INCREF(input_obj);
2278 return input_obj;
2279 }
Christian Heimes44720832008-05-26 13:01:01 +00002280
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002281 if (table == NULL) {
2282 for (i = 0; i < 256; i++)
2283 trans_table[i] = Py_CHARMASK(i);
2284 } else {
2285 for (i = 0; i < 256; i++)
2286 trans_table[i] = Py_CHARMASK(table[i]);
2287 }
Christian Heimes44720832008-05-26 13:01:01 +00002288
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002289 for (i = 0; i < dellen; i++)
2290 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes44720832008-05-26 13:01:01 +00002291
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002292 for (i = inlen; --i >= 0; ) {
2293 c = Py_CHARMASK(*input++);
2294 if (trans_table[c] != -1)
2295 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2296 continue;
2297 changed = 1;
2298 }
2299 if (!changed && PyString_CheckExact(input_obj)) {
2300 Py_DECREF(result);
2301 Py_INCREF(input_obj);
2302 return input_obj;
2303 }
2304 /* Fix the size of the resulting string */
2305 if (inlen > 0 && _PyString_Resize(&result, output - output_start))
2306 return NULL;
2307 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002308}
2309
2310
Christian Heimes44720832008-05-26 13:01:01 +00002311/* find and count characters and substrings */
2312
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002313#define findchar(target, target_len, c) \
Christian Heimes44720832008-05-26 13:01:01 +00002314 ((char *)memchr((const void *)(target), c, target_len))
2315
2316/* String ops must return a string. */
2317/* If the object is subclass of string, create a copy */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002318Py_LOCAL(PyStringObject *)
2319return_self(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002320{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002321 if (PyString_CheckExact(self)) {
2322 Py_INCREF(self);
2323 return self;
2324 }
2325 return (PyStringObject *)PyString_FromStringAndSize(
2326 PyString_AS_STRING(self),
2327 PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002328}
2329
2330Py_LOCAL_INLINE(Py_ssize_t)
2331countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2332{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002333 Py_ssize_t count=0;
2334 const char *start=target;
2335 const char *end=target+target_len;
Christian Heimes44720832008-05-26 13:01:01 +00002336
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002337 while ( (start=findchar(start, end-start, c)) != NULL ) {
2338 count++;
2339 if (count >= maxcount)
2340 break;
2341 start += 1;
2342 }
2343 return count;
Christian Heimes44720832008-05-26 13:01:01 +00002344}
2345
Christian Heimes44720832008-05-26 13:01:01 +00002346
2347/* Algorithms for different cases of string replacement */
2348
2349/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002350Py_LOCAL(PyStringObject *)
2351replace_interleave(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002352 const char *to_s, Py_ssize_t to_len,
2353 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002354{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002355 char *self_s, *result_s;
2356 Py_ssize_t self_len, result_len;
2357 Py_ssize_t count, i, product;
2358 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002359
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002360 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002361
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002362 /* 1 at the end plus 1 after every character */
2363 count = self_len+1;
2364 if (maxcount < count)
2365 count = maxcount;
Christian Heimes44720832008-05-26 13:01:01 +00002366
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002367 /* Check for overflow */
2368 /* result_len = count * to_len + self_len; */
2369 product = count * to_len;
2370 if (product / to_len != count) {
2371 PyErr_SetString(PyExc_OverflowError,
2372 "replace string is too long");
2373 return NULL;
2374 }
2375 result_len = product + self_len;
2376 if (result_len < 0) {
2377 PyErr_SetString(PyExc_OverflowError,
2378 "replace string is too long");
2379 return NULL;
2380 }
Christian Heimes44720832008-05-26 13:01:01 +00002381
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002382 if (! (result = (PyStringObject *)
2383 PyString_FromStringAndSize(NULL, result_len)) )
2384 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002385
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002386 self_s = PyString_AS_STRING(self);
2387 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002388
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002389 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes44720832008-05-26 13:01:01 +00002390
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002391 /* Lay the first one down (guaranteed this will occur) */
2392 Py_MEMCPY(result_s, to_s, to_len);
2393 result_s += to_len;
2394 count -= 1;
Christian Heimes44720832008-05-26 13:01:01 +00002395
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002396 for (i=0; i<count; i++) {
2397 *result_s++ = *self_s++;
2398 Py_MEMCPY(result_s, to_s, to_len);
2399 result_s += to_len;
2400 }
2401
2402 /* Copy the rest of the original string */
2403 Py_MEMCPY(result_s, self_s, self_len-i);
2404
2405 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002406}
2407
2408/* Special case for deleting a single character */
2409/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002410Py_LOCAL(PyStringObject *)
2411replace_delete_single_character(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002412 char from_c, Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002413{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002414 char *self_s, *result_s;
2415 char *start, *next, *end;
2416 Py_ssize_t self_len, result_len;
2417 Py_ssize_t count;
2418 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002419
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002420 self_len = PyString_GET_SIZE(self);
2421 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002422
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002423 count = countchar(self_s, self_len, from_c, maxcount);
2424 if (count == 0) {
2425 return return_self(self);
2426 }
Christian Heimes44720832008-05-26 13:01:01 +00002427
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002428 result_len = self_len - count; /* from_len == 1 */
2429 assert(result_len>=0);
Christian Heimes44720832008-05-26 13:01:01 +00002430
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002431 if ( (result = (PyStringObject *)
2432 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2433 return NULL;
2434 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002435
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002436 start = self_s;
2437 end = self_s + self_len;
2438 while (count-- > 0) {
2439 next = findchar(start, end-start, from_c);
2440 if (next == NULL)
2441 break;
2442 Py_MEMCPY(result_s, start, next-start);
2443 result_s += (next-start);
2444 start = next+1;
2445 }
2446 Py_MEMCPY(result_s, start, end-start);
2447
2448 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002449}
2450
2451/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2452
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002453Py_LOCAL(PyStringObject *)
2454replace_delete_substring(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002455 const char *from_s, Py_ssize_t from_len,
2456 Py_ssize_t maxcount) {
2457 char *self_s, *result_s;
2458 char *start, *next, *end;
2459 Py_ssize_t self_len, result_len;
2460 Py_ssize_t count, offset;
2461 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002462
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002463 self_len = PyString_GET_SIZE(self);
2464 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002465
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002466 count = stringlib_count(self_s, self_len,
2467 from_s, from_len,
2468 maxcount);
Christian Heimes44720832008-05-26 13:01:01 +00002469
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002470 if (count == 0) {
2471 /* no matches */
2472 return return_self(self);
2473 }
Christian Heimes44720832008-05-26 13:01:01 +00002474
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002475 result_len = self_len - (count * from_len);
2476 assert (result_len>=0);
Christian Heimes44720832008-05-26 13:01:01 +00002477
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002478 if ( (result = (PyStringObject *)
2479 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2480 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002481
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002482 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002483
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002484 start = self_s;
2485 end = self_s + self_len;
2486 while (count-- > 0) {
2487 offset = stringlib_find(start, end-start,
2488 from_s, from_len,
2489 0);
2490 if (offset == -1)
2491 break;
2492 next = start + offset;
Christian Heimes44720832008-05-26 13:01:01 +00002493
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002494 Py_MEMCPY(result_s, start, next-start);
Christian Heimes44720832008-05-26 13:01:01 +00002495
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002496 result_s += (next-start);
2497 start = next+from_len;
2498 }
2499 Py_MEMCPY(result_s, start, end-start);
2500 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002501}
2502
2503/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002504Py_LOCAL(PyStringObject *)
2505replace_single_character_in_place(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002506 char from_c, char to_c,
2507 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002508{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002509 char *self_s, *result_s, *start, *end, *next;
2510 Py_ssize_t self_len;
2511 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002512
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002513 /* The result string will be the same size */
2514 self_s = PyString_AS_STRING(self);
2515 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002516
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002517 next = findchar(self_s, self_len, from_c);
Christian Heimes44720832008-05-26 13:01:01 +00002518
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002519 if (next == NULL) {
2520 /* No matches; return the original string */
2521 return return_self(self);
2522 }
Christian Heimes44720832008-05-26 13:01:01 +00002523
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002524 /* Need to make a new string */
2525 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2526 if (result == NULL)
2527 return NULL;
2528 result_s = PyString_AS_STRING(result);
2529 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002530
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002531 /* change everything in-place, starting with this one */
2532 start = result_s + (next-self_s);
2533 *start = to_c;
2534 start++;
2535 end = result_s + self_len;
Christian Heimes44720832008-05-26 13:01:01 +00002536
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002537 while (--maxcount > 0) {
2538 next = findchar(start, end-start, from_c);
2539 if (next == NULL)
2540 break;
2541 *next = to_c;
2542 start = next+1;
2543 }
Christian Heimes44720832008-05-26 13:01:01 +00002544
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002545 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002546}
2547
2548/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002549Py_LOCAL(PyStringObject *)
2550replace_substring_in_place(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002551 const char *from_s, Py_ssize_t from_len,
2552 const char *to_s, Py_ssize_t to_len,
2553 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002554{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002555 char *result_s, *start, *end;
2556 char *self_s;
2557 Py_ssize_t self_len, offset;
2558 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002559
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002560 /* The result string will be the same size */
Christian Heimes44720832008-05-26 13:01:01 +00002561
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002562 self_s = PyString_AS_STRING(self);
2563 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002564
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002565 offset = stringlib_find(self_s, self_len,
2566 from_s, from_len,
2567 0);
2568 if (offset == -1) {
2569 /* No matches; return the original string */
2570 return return_self(self);
2571 }
Christian Heimes44720832008-05-26 13:01:01 +00002572
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002573 /* Need to make a new string */
2574 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2575 if (result == NULL)
2576 return NULL;
2577 result_s = PyString_AS_STRING(result);
2578 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002579
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002580 /* change everything in-place, starting with this one */
2581 start = result_s + offset;
2582 Py_MEMCPY(start, to_s, from_len);
2583 start += from_len;
2584 end = result_s + self_len;
Christian Heimes44720832008-05-26 13:01:01 +00002585
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002586 while ( --maxcount > 0) {
2587 offset = stringlib_find(start, end-start,
2588 from_s, from_len,
2589 0);
2590 if (offset==-1)
2591 break;
2592 Py_MEMCPY(start+offset, to_s, from_len);
2593 start += offset+from_len;
2594 }
Christian Heimes44720832008-05-26 13:01:01 +00002595
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002596 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002597}
2598
2599/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002600Py_LOCAL(PyStringObject *)
2601replace_single_character(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002602 char from_c,
2603 const char *to_s, Py_ssize_t to_len,
2604 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002605{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002606 char *self_s, *result_s;
2607 char *start, *next, *end;
2608 Py_ssize_t self_len, result_len;
2609 Py_ssize_t count, product;
2610 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002611
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002612 self_s = PyString_AS_STRING(self);
2613 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002614
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002615 count = countchar(self_s, self_len, from_c, maxcount);
2616 if (count == 0) {
2617 /* no matches, return unchanged */
2618 return return_self(self);
2619 }
Christian Heimes44720832008-05-26 13:01:01 +00002620
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002621 /* use the difference between current and new, hence the "-1" */
2622 /* result_len = self_len + count * (to_len-1) */
2623 product = count * (to_len-1);
2624 if (product / (to_len-1) != count) {
2625 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2626 return NULL;
2627 }
2628 result_len = self_len + product;
2629 if (result_len < 0) {
2630 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2631 return NULL;
2632 }
Christian Heimes44720832008-05-26 13:01:01 +00002633
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002634 if ( (result = (PyStringObject *)
2635 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2636 return NULL;
2637 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002638
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002639 start = self_s;
2640 end = self_s + self_len;
2641 while (count-- > 0) {
2642 next = findchar(start, end-start, from_c);
2643 if (next == NULL)
2644 break;
Christian Heimes44720832008-05-26 13:01:01 +00002645
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002646 if (next == start) {
2647 /* replace with the 'to' */
2648 Py_MEMCPY(result_s, to_s, to_len);
2649 result_s += to_len;
2650 start += 1;
2651 } else {
2652 /* copy the unchanged old then the 'to' */
2653 Py_MEMCPY(result_s, start, next-start);
2654 result_s += (next-start);
2655 Py_MEMCPY(result_s, to_s, to_len);
2656 result_s += to_len;
2657 start = next+1;
2658 }
2659 }
2660 /* Copy the remainder of the remaining string */
2661 Py_MEMCPY(result_s, start, end-start);
Christian Heimes44720832008-05-26 13:01:01 +00002662
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002663 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002664}
2665
2666/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002667Py_LOCAL(PyStringObject *)
2668replace_substring(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002669 const char *from_s, Py_ssize_t from_len,
2670 const char *to_s, Py_ssize_t to_len,
2671 Py_ssize_t maxcount) {
2672 char *self_s, *result_s;
2673 char *start, *next, *end;
2674 Py_ssize_t self_len, result_len;
2675 Py_ssize_t count, offset, product;
2676 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002677
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002678 self_s = PyString_AS_STRING(self);
2679 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002680
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002681 count = stringlib_count(self_s, self_len,
2682 from_s, from_len,
2683 maxcount);
Antoine Pitrou64672132010-01-13 07:55:48 +00002684
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002685 if (count == 0) {
2686 /* no matches, return unchanged */
2687 return return_self(self);
2688 }
Christian Heimes44720832008-05-26 13:01:01 +00002689
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002690 /* Check for overflow */
2691 /* result_len = self_len + count * (to_len-from_len) */
2692 product = count * (to_len-from_len);
2693 if (product / (to_len-from_len) != count) {
2694 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2695 return NULL;
2696 }
2697 result_len = self_len + product;
2698 if (result_len < 0) {
2699 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2700 return NULL;
2701 }
Christian Heimes44720832008-05-26 13:01:01 +00002702
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002703 if ( (result = (PyStringObject *)
2704 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2705 return NULL;
2706 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002707
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002708 start = self_s;
2709 end = self_s + self_len;
2710 while (count-- > 0) {
2711 offset = stringlib_find(start, end-start,
2712 from_s, from_len,
2713 0);
2714 if (offset == -1)
2715 break;
2716 next = start+offset;
2717 if (next == start) {
2718 /* replace with the 'to' */
2719 Py_MEMCPY(result_s, to_s, to_len);
2720 result_s += to_len;
2721 start += from_len;
2722 } else {
2723 /* copy the unchanged old then the 'to' */
2724 Py_MEMCPY(result_s, start, next-start);
2725 result_s += (next-start);
2726 Py_MEMCPY(result_s, to_s, to_len);
2727 result_s += to_len;
2728 start = next+from_len;
2729 }
2730 }
2731 /* Copy the remainder of the remaining string */
2732 Py_MEMCPY(result_s, start, end-start);
Christian Heimes44720832008-05-26 13:01:01 +00002733
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002734 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002735}
2736
2737
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002738Py_LOCAL(PyStringObject *)
2739replace(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002740 const char *from_s, Py_ssize_t from_len,
2741 const char *to_s, Py_ssize_t to_len,
2742 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002743{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002744 if (maxcount < 0) {
2745 maxcount = PY_SSIZE_T_MAX;
2746 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2747 /* nothing to do; return the original string */
2748 return return_self(self);
2749 }
Christian Heimes44720832008-05-26 13:01:01 +00002750
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002751 if (maxcount == 0 ||
2752 (from_len == 0 && to_len == 0)) {
2753 /* nothing to do; return the original string */
2754 return return_self(self);
2755 }
Christian Heimes44720832008-05-26 13:01:01 +00002756
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002757 /* Handle zero-length special cases */
Christian Heimes44720832008-05-26 13:01:01 +00002758
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002759 if (from_len == 0) {
2760 /* insert the 'to' string everywhere. */
2761 /* >>> "Python".replace("", ".") */
2762 /* '.P.y.t.h.o.n.' */
2763 return replace_interleave(self, to_s, to_len, maxcount);
2764 }
Christian Heimes44720832008-05-26 13:01:01 +00002765
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002766 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2767 /* point for an empty self string to generate a non-empty string */
2768 /* Special case so the remaining code always gets a non-empty string */
2769 if (PyString_GET_SIZE(self) == 0) {
2770 return return_self(self);
2771 }
Christian Heimes44720832008-05-26 13:01:01 +00002772
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002773 if (to_len == 0) {
2774 /* delete all occurances of 'from' string */
2775 if (from_len == 1) {
2776 return replace_delete_single_character(
2777 self, from_s[0], maxcount);
2778 } else {
2779 return replace_delete_substring(self, from_s, from_len, maxcount);
2780 }
2781 }
Christian Heimes44720832008-05-26 13:01:01 +00002782
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002783 /* Handle special case where both strings have the same length */
Christian Heimes44720832008-05-26 13:01:01 +00002784
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002785 if (from_len == to_len) {
2786 if (from_len == 1) {
2787 return replace_single_character_in_place(
2788 self,
2789 from_s[0],
2790 to_s[0],
2791 maxcount);
2792 } else {
2793 return replace_substring_in_place(
2794 self, from_s, from_len, to_s, to_len, maxcount);
2795 }
2796 }
Christian Heimes44720832008-05-26 13:01:01 +00002797
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002798 /* Otherwise use the more generic algorithms */
2799 if (from_len == 1) {
2800 return replace_single_character(self, from_s[0],
2801 to_s, to_len, maxcount);
2802 } else {
2803 /* len('from')>=2, len('to')>=1 */
2804 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2805 }
Christian Heimes44720832008-05-26 13:01:01 +00002806}
2807
2808PyDoc_STRVAR(replace__doc__,
Ezio Melotti2f06b782010-06-26 18:44:42 +00002809"S.replace(old, new[, count]) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00002810\n\
2811Return a copy of string S with all occurrences of substring\n\
2812old replaced by new. If the optional argument count is\n\
2813given, only the first count occurrences are replaced.");
2814
2815static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002816string_replace(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002817{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002818 Py_ssize_t count = -1;
2819 PyObject *from, *to;
2820 const char *from_s, *to_s;
2821 Py_ssize_t from_len, to_len;
Christian Heimes44720832008-05-26 13:01:01 +00002822
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002823 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2824 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002825
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002826 if (PyString_Check(from)) {
2827 from_s = PyString_AS_STRING(from);
2828 from_len = PyString_GET_SIZE(from);
2829 }
Christian Heimes44720832008-05-26 13:01:01 +00002830#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002831 if (PyUnicode_Check(from))
2832 return PyUnicode_Replace((PyObject *)self,
2833 from, to, count);
Christian Heimes44720832008-05-26 13:01:01 +00002834#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002835 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2836 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002837
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002838 if (PyString_Check(to)) {
2839 to_s = PyString_AS_STRING(to);
2840 to_len = PyString_GET_SIZE(to);
2841 }
Christian Heimes44720832008-05-26 13:01:01 +00002842#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002843 else if (PyUnicode_Check(to))
2844 return PyUnicode_Replace((PyObject *)self,
2845 from, to, count);
Christian Heimes44720832008-05-26 13:01:01 +00002846#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002847 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2848 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002849
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002850 return (PyObject *)replace((PyStringObject *) self,
2851 from_s, from_len,
2852 to_s, to_len, count);
Christian Heimes44720832008-05-26 13:01:01 +00002853}
2854
2855/** End DALKE **/
2856
2857/* Matches the end (direction >= 0) or start (direction < 0) of self
2858 * against substr, using the start and end arguments. Returns
2859 * -1 on error, 0 if not found and 1 if found.
2860 */
2861Py_LOCAL(int)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002862_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002863 Py_ssize_t end, int direction)
Christian Heimes44720832008-05-26 13:01:01 +00002864{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002865 Py_ssize_t len = PyString_GET_SIZE(self);
2866 Py_ssize_t slen;
2867 const char* sub;
2868 const char* str;
Christian Heimes44720832008-05-26 13:01:01 +00002869
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002870 if (PyString_Check(substr)) {
2871 sub = PyString_AS_STRING(substr);
2872 slen = PyString_GET_SIZE(substr);
2873 }
Christian Heimes44720832008-05-26 13:01:01 +00002874#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002875 else if (PyUnicode_Check(substr))
2876 return PyUnicode_Tailmatch((PyObject *)self,
2877 substr, start, end, direction);
Christian Heimes44720832008-05-26 13:01:01 +00002878#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002879 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2880 return -1;
2881 str = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002882
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002883 ADJUST_INDICES(start, end, len);
Christian Heimes44720832008-05-26 13:01:01 +00002884
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002885 if (direction < 0) {
2886 /* startswith */
2887 if (start+slen > len)
2888 return 0;
2889 } else {
2890 /* endswith */
2891 if (end-start < slen || start > len)
2892 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00002893
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002894 if (end-slen > start)
2895 start = end - slen;
2896 }
2897 if (end-start >= slen)
2898 return ! memcmp(str+start, sub, slen);
2899 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00002900}
2901
2902
2903PyDoc_STRVAR(startswith__doc__,
2904"S.startswith(prefix[, start[, end]]) -> bool\n\
2905\n\
2906Return True if S starts with the specified prefix, False otherwise.\n\
2907With optional start, test S beginning at that position.\n\
2908With optional end, stop comparing S at that position.\n\
2909prefix can also be a tuple of strings to try.");
2910
2911static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002912string_startswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002913{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002914 Py_ssize_t start = 0;
2915 Py_ssize_t end = PY_SSIZE_T_MAX;
2916 PyObject *subobj;
2917 int result;
Christian Heimes44720832008-05-26 13:01:01 +00002918
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002919 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2920 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2921 return NULL;
2922 if (PyTuple_Check(subobj)) {
2923 Py_ssize_t i;
2924 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2925 result = _string_tailmatch(self,
2926 PyTuple_GET_ITEM(subobj, i),
2927 start, end, -1);
2928 if (result == -1)
2929 return NULL;
2930 else if (result) {
2931 Py_RETURN_TRUE;
2932 }
2933 }
2934 Py_RETURN_FALSE;
2935 }
2936 result = _string_tailmatch(self, subobj, start, end, -1);
2937 if (result == -1)
2938 return NULL;
2939 else
2940 return PyBool_FromLong(result);
Christian Heimes44720832008-05-26 13:01:01 +00002941}
2942
2943
2944PyDoc_STRVAR(endswith__doc__,
2945"S.endswith(suffix[, start[, end]]) -> bool\n\
2946\n\
2947Return True if S ends with the specified suffix, False otherwise.\n\
2948With optional start, test S beginning at that position.\n\
2949With optional end, stop comparing S at that position.\n\
2950suffix can also be a tuple of strings to try.");
2951
2952static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002953string_endswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002954{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002955 Py_ssize_t start = 0;
2956 Py_ssize_t end = PY_SSIZE_T_MAX;
2957 PyObject *subobj;
2958 int result;
Christian Heimes44720832008-05-26 13:01:01 +00002959
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002960 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2961 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2962 return NULL;
2963 if (PyTuple_Check(subobj)) {
2964 Py_ssize_t i;
2965 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2966 result = _string_tailmatch(self,
2967 PyTuple_GET_ITEM(subobj, i),
2968 start, end, +1);
2969 if (result == -1)
2970 return NULL;
2971 else if (result) {
2972 Py_RETURN_TRUE;
2973 }
2974 }
2975 Py_RETURN_FALSE;
2976 }
2977 result = _string_tailmatch(self, subobj, start, end, +1);
2978 if (result == -1)
2979 return NULL;
2980 else
2981 return PyBool_FromLong(result);
Christian Heimes44720832008-05-26 13:01:01 +00002982}
2983
2984
2985PyDoc_STRVAR(encode__doc__,
2986"S.encode([encoding[,errors]]) -> object\n\
2987\n\
2988Encodes S using the codec registered for encoding. encoding defaults\n\
2989to the default encoding. errors may be given to set a different error\n\
2990handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2991a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2992'xmlcharrefreplace' as well as any other name registered with\n\
2993codecs.register_error that is able to handle UnicodeEncodeErrors.");
2994
2995static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00002996string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes44720832008-05-26 13:01:01 +00002997{
Benjamin Peterson332d7212009-09-18 21:14:55 +00002998 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00002999 char *encoding = NULL;
3000 char *errors = NULL;
3001 PyObject *v;
3002
Benjamin Peterson332d7212009-09-18 21:14:55 +00003003 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003004 kwlist, &encoding, &errors))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003005 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003006 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003007 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003008 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003009 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003010 PyErr_Format(PyExc_TypeError,
3011 "encoder did not return a string/unicode object "
3012 "(type=%.400s)",
3013 Py_TYPE(v)->tp_name);
3014 Py_DECREF(v);
3015 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003016 }
3017 return v;
3018
3019 onError:
Christian Heimes1a6387e2008-03-26 12:49:49 +00003020 return NULL;
3021}
3022
Christian Heimes44720832008-05-26 13:01:01 +00003023
3024PyDoc_STRVAR(decode__doc__,
3025"S.decode([encoding[,errors]]) -> object\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003026\n\
Christian Heimes44720832008-05-26 13:01:01 +00003027Decodes S using the codec registered for encoding. encoding defaults\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003028to the default encoding. errors may be given to set a different error\n\
Christian Heimes44720832008-05-26 13:01:01 +00003029handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3030a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003031as well as any other name registered with codecs.register_error that is\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003032able to handle UnicodeDecodeErrors.");
3033
3034static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00003035string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003036{
Benjamin Peterson332d7212009-09-18 21:14:55 +00003037 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003038 char *encoding = NULL;
3039 char *errors = NULL;
3040 PyObject *v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003041
Benjamin Peterson332d7212009-09-18 21:14:55 +00003042 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003043 kwlist, &encoding, &errors))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003044 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003045 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003046 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003047 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003048 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003049 PyErr_Format(PyExc_TypeError,
3050 "decoder did not return a string/unicode object "
3051 "(type=%.400s)",
3052 Py_TYPE(v)->tp_name);
3053 Py_DECREF(v);
3054 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003055 }
3056 return v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003057
Christian Heimes44720832008-05-26 13:01:01 +00003058 onError:
3059 return NULL;
3060}
3061
3062
3063PyDoc_STRVAR(expandtabs__doc__,
3064"S.expandtabs([tabsize]) -> string\n\
3065\n\
3066Return a copy of S where all tab characters are expanded using spaces.\n\
3067If tabsize is not given, a tab size of 8 characters is assumed.");
3068
3069static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003070string_expandtabs(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003071{
3072 const char *e, *p, *qe;
3073 char *q;
3074 Py_ssize_t i, j, incr;
3075 PyObject *u;
3076 int tabsize = 8;
3077
3078 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003079 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003080
3081 /* First pass: determine size of output string */
3082 i = 0; /* chars up to and including most recent \n or \r */
3083 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003084 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3085 for (p = PyString_AS_STRING(self); p < e; p++)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003086 if (*p == '\t') {
3087 if (tabsize > 0) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003088 incr = tabsize - (j % tabsize);
3089 if (j > PY_SSIZE_T_MAX - incr)
3090 goto overflow1;
3091 j += incr;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003092 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003093 }
3094 else {
3095 if (j > PY_SSIZE_T_MAX - 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003096 goto overflow1;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003097 j++;
3098 if (*p == '\n' || *p == '\r') {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003099 if (i > PY_SSIZE_T_MAX - j)
3100 goto overflow1;
3101 i += j;
3102 j = 0;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003103 }
3104 }
Christian Heimes44720832008-05-26 13:01:01 +00003105
3106 if (i > PY_SSIZE_T_MAX - j)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003107 goto overflow1;
Christian Heimes44720832008-05-26 13:01:01 +00003108
3109 /* Second pass: create output string and fill it */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003110 u = PyString_FromStringAndSize(NULL, i + j);
Christian Heimes44720832008-05-26 13:01:01 +00003111 if (!u)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003112 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003113
3114 j = 0; /* same as in first pass */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003115 q = PyString_AS_STRING(u); /* next output char */
3116 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
Christian Heimes44720832008-05-26 13:01:01 +00003117
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003118 for (p = PyString_AS_STRING(self); p < e; p++)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003119 if (*p == '\t') {
3120 if (tabsize > 0) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003121 i = tabsize - (j % tabsize);
3122 j += i;
3123 while (i--) {
3124 if (q >= qe)
3125 goto overflow2;
3126 *q++ = ' ';
3127 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003128 }
3129 }
3130 else {
3131 if (q >= qe)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003132 goto overflow2;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003133 *q++ = *p;
3134 j++;
3135 if (*p == '\n' || *p == '\r')
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003136 j = 0;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003137 }
Christian Heimes44720832008-05-26 13:01:01 +00003138
3139 return u;
3140
3141 overflow2:
3142 Py_DECREF(u);
3143 overflow1:
3144 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3145 return NULL;
3146}
3147
3148Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003149pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Christian Heimes44720832008-05-26 13:01:01 +00003150{
3151 PyObject *u;
3152
3153 if (left < 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003154 left = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003155 if (right < 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003156 right = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003157
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003158 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003159 Py_INCREF(self);
3160 return (PyObject *)self;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003161 }
3162
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003163 u = PyString_FromStringAndSize(NULL,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003164 left + PyString_GET_SIZE(self) + right);
Christian Heimes44720832008-05-26 13:01:01 +00003165 if (u) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003166 if (left)
3167 memset(PyString_AS_STRING(u), fill, left);
3168 Py_MEMCPY(PyString_AS_STRING(u) + left,
3169 PyString_AS_STRING(self),
3170 PyString_GET_SIZE(self));
3171 if (right)
3172 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3173 fill, right);
Christian Heimes44720832008-05-26 13:01:01 +00003174 }
3175
3176 return u;
3177}
3178
3179PyDoc_STRVAR(ljust__doc__,
3180"S.ljust(width[, fillchar]) -> string\n"
3181"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003182"Return S left-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003183"done using the specified fill character (default is a space).");
3184
3185static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003186string_ljust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003187{
3188 Py_ssize_t width;
3189 char fillchar = ' ';
3190
3191 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003192 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003193
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003194 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003195 Py_INCREF(self);
3196 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003197 }
3198
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003199 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003200}
3201
3202
3203PyDoc_STRVAR(rjust__doc__,
3204"S.rjust(width[, fillchar]) -> string\n"
3205"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003206"Return S right-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003207"done using the specified fill character (default is a space)");
3208
3209static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003210string_rjust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003211{
3212 Py_ssize_t width;
3213 char fillchar = ' ';
3214
3215 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003216 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003217
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003218 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003219 Py_INCREF(self);
3220 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003221 }
3222
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003223 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003224}
3225
3226
3227PyDoc_STRVAR(center__doc__,
3228"S.center(width[, fillchar]) -> string\n"
3229"\n"
3230"Return S centered in a string of length width. Padding is\n"
3231"done using the specified fill character (default is a space)");
3232
3233static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003234string_center(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003235{
3236 Py_ssize_t marg, left;
3237 Py_ssize_t width;
3238 char fillchar = ' ';
3239
3240 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003241 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003242
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003243 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003244 Py_INCREF(self);
3245 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003246 }
3247
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003248 marg = width - PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003249 left = marg / 2 + (marg & width & 1);
3250
3251 return pad(self, left, marg - left, fillchar);
3252}
3253
3254PyDoc_STRVAR(zfill__doc__,
3255"S.zfill(width) -> string\n"
3256"\n"
3257"Pad a numeric string S with zeros on the left, to fill a field\n"
3258"of the specified width. The string S is never truncated.");
3259
3260static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003261string_zfill(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003262{
3263 Py_ssize_t fill;
3264 PyObject *s;
3265 char *p;
3266 Py_ssize_t width;
3267
3268 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003269 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003270
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003271 if (PyString_GET_SIZE(self) >= width) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003272 if (PyString_CheckExact(self)) {
3273 Py_INCREF(self);
3274 return (PyObject*) self;
3275 }
3276 else
3277 return PyString_FromStringAndSize(
3278 PyString_AS_STRING(self),
3279 PyString_GET_SIZE(self)
3280 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003281 }
3282
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003283 fill = width - PyString_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003284
Christian Heimes44720832008-05-26 13:01:01 +00003285 s = pad(self, fill, 0, '0');
3286
3287 if (s == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003288 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003289
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003290 p = PyString_AS_STRING(s);
Christian Heimes44720832008-05-26 13:01:01 +00003291 if (p[fill] == '+' || p[fill] == '-') {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003292 /* move sign to beginning of string */
3293 p[0] = p[fill];
3294 p[fill] = '0';
Christian Heimes44720832008-05-26 13:01:01 +00003295 }
3296
3297 return (PyObject*) s;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003298}
3299
Christian Heimes44720832008-05-26 13:01:01 +00003300PyDoc_STRVAR(isspace__doc__,
3301"S.isspace() -> bool\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003302\n\
Christian Heimes44720832008-05-26 13:01:01 +00003303Return True if all characters in S are whitespace\n\
3304and there is at least one character in S, False otherwise.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003305
Christian Heimes44720832008-05-26 13:01:01 +00003306static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003307string_isspace(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003308{
Christian Heimes44720832008-05-26 13:01:01 +00003309 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003310 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003311 register const unsigned char *e;
3312
3313 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003314 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003315 isspace(*p))
3316 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003317
3318 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003319 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003320 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003321
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003322 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003323 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003324 if (!isspace(*p))
3325 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003326 }
Christian Heimes44720832008-05-26 13:01:01 +00003327 return PyBool_FromLong(1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003328}
3329
Christian Heimes44720832008-05-26 13:01:01 +00003330
3331PyDoc_STRVAR(isalpha__doc__,
3332"S.isalpha() -> bool\n\
3333\n\
3334Return True if all characters in S are alphabetic\n\
3335and there is at least one character in S, False otherwise.");
3336
3337static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003338string_isalpha(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003339{
Christian Heimes44720832008-05-26 13:01:01 +00003340 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003341 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003342 register const unsigned char *e;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003343
Christian Heimes44720832008-05-26 13:01:01 +00003344 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003345 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003346 isalpha(*p))
3347 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003348
3349 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003350 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003351 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003352
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003353 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003354 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003355 if (!isalpha(*p))
3356 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003357 }
Christian Heimes44720832008-05-26 13:01:01 +00003358 return PyBool_FromLong(1);
3359}
Christian Heimes1a6387e2008-03-26 12:49:49 +00003360
Christian Heimes44720832008-05-26 13:01:01 +00003361
3362PyDoc_STRVAR(isalnum__doc__,
3363"S.isalnum() -> bool\n\
3364\n\
3365Return True if all characters in S are alphanumeric\n\
3366and there is at least one character in S, False otherwise.");
3367
3368static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003369string_isalnum(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003370{
3371 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003372 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003373 register const unsigned char *e;
3374
3375 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003376 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003377 isalnum(*p))
3378 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003379
3380 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003381 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003382 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003383
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003384 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003385 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003386 if (!isalnum(*p))
3387 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003388 }
3389 return PyBool_FromLong(1);
3390}
3391
3392
3393PyDoc_STRVAR(isdigit__doc__,
3394"S.isdigit() -> bool\n\
3395\n\
3396Return True if all characters in S are digits\n\
3397and there is at least one character in S, False otherwise.");
3398
3399static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003400string_isdigit(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003401{
3402 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003403 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003404 register const unsigned char *e;
3405
3406 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003407 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003408 isdigit(*p))
3409 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003410
3411 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003412 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003413 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003414
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003415 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003416 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003417 if (!isdigit(*p))
3418 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003419 }
3420 return PyBool_FromLong(1);
3421}
3422
3423
3424PyDoc_STRVAR(islower__doc__,
3425"S.islower() -> bool\n\
3426\n\
3427Return True if all cased characters in S are lowercase and there is\n\
3428at least one cased character in S, False otherwise.");
3429
3430static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003431string_islower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003432{
3433 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003434 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003435 register const unsigned char *e;
3436 int cased;
3437
3438 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003439 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003440 return PyBool_FromLong(islower(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003441
3442 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003443 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003444 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003445
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003446 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003447 cased = 0;
3448 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003449 if (isupper(*p))
3450 return PyBool_FromLong(0);
3451 else if (!cased && islower(*p))
3452 cased = 1;
Christian Heimes44720832008-05-26 13:01:01 +00003453 }
3454 return PyBool_FromLong(cased);
3455}
3456
3457
3458PyDoc_STRVAR(isupper__doc__,
3459"S.isupper() -> bool\n\
3460\n\
3461Return True if all cased characters in S are uppercase and there is\n\
3462at least one cased character in S, False otherwise.");
3463
3464static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003465string_isupper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003466{
3467 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003468 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003469 register const unsigned char *e;
3470 int cased;
3471
3472 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003473 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003474 return PyBool_FromLong(isupper(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003475
3476 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003477 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003478 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003479
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003480 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003481 cased = 0;
3482 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003483 if (islower(*p))
3484 return PyBool_FromLong(0);
3485 else if (!cased && isupper(*p))
3486 cased = 1;
Christian Heimes44720832008-05-26 13:01:01 +00003487 }
3488 return PyBool_FromLong(cased);
3489}
3490
3491
3492PyDoc_STRVAR(istitle__doc__,
3493"S.istitle() -> bool\n\
3494\n\
3495Return True if S is a titlecased string and there is at least one\n\
3496character in S, i.e. uppercase characters may only follow uncased\n\
3497characters and lowercase characters only cased ones. Return False\n\
3498otherwise.");
3499
3500static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003501string_istitle(PyStringObject *self, PyObject *uncased)
Christian Heimes44720832008-05-26 13:01:01 +00003502{
3503 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003504 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003505 register const unsigned char *e;
3506 int cased, previous_is_cased;
3507
3508 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003509 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003510 return PyBool_FromLong(isupper(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003511
3512 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003513 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003514 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003515
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003516 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003517 cased = 0;
3518 previous_is_cased = 0;
3519 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003520 register const unsigned char ch = *p;
Christian Heimes44720832008-05-26 13:01:01 +00003521
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003522 if (isupper(ch)) {
3523 if (previous_is_cased)
3524 return PyBool_FromLong(0);
3525 previous_is_cased = 1;
3526 cased = 1;
3527 }
3528 else if (islower(ch)) {
3529 if (!previous_is_cased)
3530 return PyBool_FromLong(0);
3531 previous_is_cased = 1;
3532 cased = 1;
3533 }
3534 else
3535 previous_is_cased = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003536 }
3537 return PyBool_FromLong(cased);
3538}
3539
3540
3541PyDoc_STRVAR(splitlines__doc__,
3542"S.splitlines([keepends]) -> list of strings\n\
3543\n\
3544Return a list of the lines in S, breaking at line boundaries.\n\
3545Line breaks are not included in the resulting list unless keepends\n\
3546is given and true.");
3547
3548static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003549string_splitlines(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003550{
Christian Heimes44720832008-05-26 13:01:01 +00003551 int keepends = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003552
3553 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003554 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003555
Antoine Pitrou64672132010-01-13 07:55:48 +00003556 return stringlib_splitlines(
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003557 (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),
3558 keepends
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003559 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003560}
3561
Robert Schuppenies51df0642008-06-01 16:16:17 +00003562PyDoc_STRVAR(sizeof__doc__,
Georg Brandl7a6de8b2008-06-01 16:42:16 +00003563"S.__sizeof__() -> size of S in memory, in bytes");
Robert Schuppenies51df0642008-06-01 16:16:17 +00003564
3565static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003566string_sizeof(PyStringObject *v)
Robert Schuppenies51df0642008-06-01 16:16:17 +00003567{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003568 Py_ssize_t res;
3569 res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
3570 return PyInt_FromSsize_t(res);
Robert Schuppenies51df0642008-06-01 16:16:17 +00003571}
3572
Christian Heimes1a6387e2008-03-26 12:49:49 +00003573static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003574string_getnewargs(PyStringObject *v)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003575{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003576 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Christian Heimes1a6387e2008-03-26 12:49:49 +00003577}
3578
Christian Heimes1a6387e2008-03-26 12:49:49 +00003579
Christian Heimes44720832008-05-26 13:01:01 +00003580#include "stringlib/string_format.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +00003581
Christian Heimes44720832008-05-26 13:01:01 +00003582PyDoc_STRVAR(format__doc__,
Georg Brandl05f819b2010-07-31 19:07:37 +00003583"S.format(*args, **kwargs) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003584\n\
3585");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003586
Eric Smithdc13b792008-05-30 18:10:04 +00003587static PyObject *
3588string__format__(PyObject* self, PyObject* args)
3589{
3590 PyObject *format_spec;
3591 PyObject *result = NULL;
3592 PyObject *tmp = NULL;
3593
3594 /* If 2.x, convert format_spec to the same type as value */
3595 /* This is to allow things like u''.format('') */
3596 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003597 goto done;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003598 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003599 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3600 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3601 goto done;
Eric Smithdc13b792008-05-30 18:10:04 +00003602 }
3603 tmp = PyObject_Str(format_spec);
3604 if (tmp == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003605 goto done;
Eric Smithdc13b792008-05-30 18:10:04 +00003606 format_spec = tmp;
3607
3608 result = _PyBytes_FormatAdvanced(self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003609 PyString_AS_STRING(format_spec),
3610 PyString_GET_SIZE(format_spec));
Eric Smithdc13b792008-05-30 18:10:04 +00003611done:
3612 Py_XDECREF(tmp);
3613 return result;
3614}
3615
Christian Heimes44720832008-05-26 13:01:01 +00003616PyDoc_STRVAR(p_format__doc__,
Georg Brandl05f819b2010-07-31 19:07:37 +00003617"S.__format__(format_spec) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003618\n\
3619");
3620
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00003621
Christian Heimes1a6387e2008-03-26 12:49:49 +00003622static PyMethodDef
Christian Heimes44720832008-05-26 13:01:01 +00003623string_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003624 /* Counterparts of the obsolete stropmodule functions; except
3625 string.maketrans(). */
3626 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3627 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3628 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3629 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3630 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3631 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3632 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3633 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3634 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3635 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3636 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3637 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3638 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3639 capitalize__doc__},
3640 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3641 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3642 endswith__doc__},
3643 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3644 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3645 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3646 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3647 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3648 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3649 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3650 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3651 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3652 rpartition__doc__},
3653 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3654 startswith__doc__},
3655 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3656 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3657 swapcase__doc__},
3658 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3659 translate__doc__},
3660 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3661 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3662 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3663 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3664 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3665 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3666 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3667 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3668 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
3669 {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
3670 {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
3671 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3672 expandtabs__doc__},
3673 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3674 splitlines__doc__},
3675 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
3676 sizeof__doc__},
3677 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
3678 {NULL, NULL} /* sentinel */
Christian Heimes1a6387e2008-03-26 12:49:49 +00003679};
3680
3681static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00003682str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003683
Christian Heimes44720832008-05-26 13:01:01 +00003684static PyObject *
3685string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3686{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003687 PyObject *x = NULL;
3688 static char *kwlist[] = {"object", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003689
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003690 if (type != &PyString_Type)
3691 return str_subtype_new(type, args, kwds);
3692 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3693 return NULL;
3694 if (x == NULL)
3695 return PyString_FromString("");
3696 return PyObject_Str(x);
Christian Heimes44720832008-05-26 13:01:01 +00003697}
3698
3699static PyObject *
3700str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3701{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003702 PyObject *tmp, *pnew;
3703 Py_ssize_t n;
Christian Heimes44720832008-05-26 13:01:01 +00003704
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003705 assert(PyType_IsSubtype(type, &PyString_Type));
3706 tmp = string_new(&PyString_Type, args, kwds);
3707 if (tmp == NULL)
3708 return NULL;
3709 assert(PyString_CheckExact(tmp));
3710 n = PyString_GET_SIZE(tmp);
3711 pnew = type->tp_alloc(type, n);
3712 if (pnew != NULL) {
3713 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3714 ((PyStringObject *)pnew)->ob_shash =
3715 ((PyStringObject *)tmp)->ob_shash;
3716 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3717 }
3718 Py_DECREF(tmp);
3719 return pnew;
Christian Heimes44720832008-05-26 13:01:01 +00003720}
3721
3722static PyObject *
3723basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3724{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003725 PyErr_SetString(PyExc_TypeError,
3726 "The basestring type cannot be instantiated");
3727 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003728}
3729
3730static PyObject *
3731string_mod(PyObject *v, PyObject *w)
3732{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003733 if (!PyString_Check(v)) {
3734 Py_INCREF(Py_NotImplemented);
3735 return Py_NotImplemented;
3736 }
3737 return PyString_Format(v, w);
Christian Heimes44720832008-05-26 13:01:01 +00003738}
3739
3740PyDoc_STRVAR(basestring_doc,
3741"Type basestring cannot be instantiated; it is the base for str and unicode.");
3742
3743static PyNumberMethods string_as_number = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003744 0, /*nb_add*/
3745 0, /*nb_subtract*/
3746 0, /*nb_multiply*/
3747 0, /*nb_divide*/
3748 string_mod, /*nb_remainder*/
Christian Heimes44720832008-05-26 13:01:01 +00003749};
3750
3751
3752PyTypeObject PyBaseString_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003753 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3754 "basestring",
3755 0,
3756 0,
3757 0, /* tp_dealloc */
3758 0, /* tp_print */
3759 0, /* tp_getattr */
3760 0, /* tp_setattr */
3761 0, /* tp_compare */
3762 0, /* tp_repr */
3763 0, /* tp_as_number */
3764 0, /* tp_as_sequence */
3765 0, /* tp_as_mapping */
3766 0, /* tp_hash */
3767 0, /* tp_call */
3768 0, /* tp_str */
3769 0, /* tp_getattro */
3770 0, /* tp_setattro */
3771 0, /* tp_as_buffer */
3772 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3773 basestring_doc, /* tp_doc */
3774 0, /* tp_traverse */
3775 0, /* tp_clear */
3776 0, /* tp_richcompare */
3777 0, /* tp_weaklistoffset */
3778 0, /* tp_iter */
3779 0, /* tp_iternext */
3780 0, /* tp_methods */
3781 0, /* tp_members */
3782 0, /* tp_getset */
3783 &PyBaseObject_Type, /* tp_base */
3784 0, /* tp_dict */
3785 0, /* tp_descr_get */
3786 0, /* tp_descr_set */
3787 0, /* tp_dictoffset */
3788 0, /* tp_init */
3789 0, /* tp_alloc */
3790 basestring_new, /* tp_new */
3791 0, /* tp_free */
Christian Heimes44720832008-05-26 13:01:01 +00003792};
3793
3794PyDoc_STRVAR(string_doc,
3795"str(object) -> string\n\
3796\n\
3797Return a nice string representation of the object.\n\
3798If the argument is a string, the return value is the same object.");
3799
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003800PyTypeObject PyString_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003801 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3802 "str",
3803 PyStringObject_SIZE,
3804 sizeof(char),
3805 string_dealloc, /* tp_dealloc */
3806 (printfunc)string_print, /* tp_print */
3807 0, /* tp_getattr */
3808 0, /* tp_setattr */
3809 0, /* tp_compare */
3810 string_repr, /* tp_repr */
3811 &string_as_number, /* tp_as_number */
3812 &string_as_sequence, /* tp_as_sequence */
3813 &string_as_mapping, /* tp_as_mapping */
3814 (hashfunc)string_hash, /* tp_hash */
3815 0, /* tp_call */
3816 string_str, /* tp_str */
3817 PyObject_GenericGetAttr, /* tp_getattro */
3818 0, /* tp_setattro */
3819 &string_as_buffer, /* tp_as_buffer */
3820 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3821 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
3822 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3823 string_doc, /* tp_doc */
3824 0, /* tp_traverse */
3825 0, /* tp_clear */
3826 (richcmpfunc)string_richcompare, /* tp_richcompare */
3827 0, /* tp_weaklistoffset */
3828 0, /* tp_iter */
3829 0, /* tp_iternext */
3830 string_methods, /* tp_methods */
3831 0, /* tp_members */
3832 0, /* tp_getset */
3833 &PyBaseString_Type, /* tp_base */
3834 0, /* tp_dict */
3835 0, /* tp_descr_get */
3836 0, /* tp_descr_set */
3837 0, /* tp_dictoffset */
3838 0, /* tp_init */
3839 0, /* tp_alloc */
3840 string_new, /* tp_new */
3841 PyObject_Del, /* tp_free */
Christian Heimes44720832008-05-26 13:01:01 +00003842};
3843
3844void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003845PyString_Concat(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00003846{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003847 register PyObject *v;
3848 if (*pv == NULL)
3849 return;
3850 if (w == NULL || !PyString_Check(*pv)) {
3851 Py_DECREF(*pv);
3852 *pv = NULL;
3853 return;
3854 }
3855 v = string_concat((PyStringObject *) *pv, w);
3856 Py_DECREF(*pv);
3857 *pv = v;
Christian Heimes44720832008-05-26 13:01:01 +00003858}
3859
3860void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003861PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00003862{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003863 PyString_Concat(pv, w);
3864 Py_XDECREF(w);
Christian Heimes44720832008-05-26 13:01:01 +00003865}
3866
3867
3868/* The following function breaks the notion that strings are immutable:
3869 it changes the size of a string. We get away with this only if there
3870 is only one module referencing the object. You can also think of it
3871 as creating a new string object and destroying the old one, only
3872 more efficiently. In any case, don't use this if the string may
3873 already be known to some other part of the code...
3874 Note that if there's not enough memory to resize the string, the original
3875 string object at *pv is deallocated, *pv is set to NULL, an "out of
3876 memory" exception is set, and -1 is returned. Else (on success) 0 is
3877 returned, and the value in *pv may or may not be the same as on input.
3878 As always, an extra byte is allocated for a trailing \0 byte (newsize
3879 does *not* include that), and a trailing \0 byte is stored.
3880*/
3881
3882int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003883_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Christian Heimes44720832008-05-26 13:01:01 +00003884{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003885 register PyObject *v;
3886 register PyStringObject *sv;
3887 v = *pv;
3888 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
3889 PyString_CHECK_INTERNED(v)) {
3890 *pv = 0;
3891 Py_DECREF(v);
3892 PyErr_BadInternalCall();
3893 return -1;
3894 }
3895 /* XXX UNREF/NEWREF interface should be more symmetrical */
3896 _Py_DEC_REFTOTAL;
3897 _Py_ForgetReference(v);
3898 *pv = (PyObject *)
3899 PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
3900 if (*pv == NULL) {
3901 PyObject_Del(v);
3902 PyErr_NoMemory();
3903 return -1;
3904 }
3905 _Py_NewReference(*pv);
3906 sv = (PyStringObject *) *pv;
3907 Py_SIZE(sv) = newsize;
3908 sv->ob_sval[newsize] = '\0';
3909 sv->ob_shash = -1; /* invalidate cached hash value */
3910 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00003911}
3912
3913/* Helpers for formatstring */
3914
3915Py_LOCAL_INLINE(PyObject *)
3916getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
3917{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003918 Py_ssize_t argidx = *p_argidx;
3919 if (argidx < arglen) {
3920 (*p_argidx)++;
3921 if (arglen < 0)
3922 return args;
3923 else
3924 return PyTuple_GetItem(args, argidx);
3925 }
3926 PyErr_SetString(PyExc_TypeError,
3927 "not enough arguments for format string");
3928 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003929}
3930
3931/* Format codes
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003932 * F_LJUST '-'
3933 * F_SIGN '+'
3934 * F_BLANK ' '
3935 * F_ALT '#'
3936 * F_ZERO '0'
Christian Heimes44720832008-05-26 13:01:01 +00003937 */
3938#define F_LJUST (1<<0)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003939#define F_SIGN (1<<1)
Christian Heimes44720832008-05-26 13:01:01 +00003940#define F_BLANK (1<<2)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003941#define F_ALT (1<<3)
3942#define F_ZERO (1<<4)
Christian Heimes44720832008-05-26 13:01:01 +00003943
Mark Dickinson18cfada2009-11-23 18:46:41 +00003944/* Returns a new reference to a PyString object, or NULL on failure. */
3945
3946static PyObject *
3947formatfloat(PyObject *v, int flags, int prec, int type)
Christian Heimes44720832008-05-26 13:01:01 +00003948{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003949 char *p;
3950 PyObject *result;
3951 double x;
Eric Smithc1bdf892009-10-26 17:46:17 +00003952
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003953 x = PyFloat_AsDouble(v);
3954 if (x == -1.0 && PyErr_Occurred()) {
3955 PyErr_Format(PyExc_TypeError, "float argument required, "
3956 "not %.200s", Py_TYPE(v)->tp_name);
3957 return NULL;
3958 }
Mark Dickinson18cfada2009-11-23 18:46:41 +00003959
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003960 if (prec < 0)
3961 prec = 6;
Mark Dickinson174e9092009-03-29 16:17:16 +00003962
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003963 p = PyOS_double_to_string(x, type, prec,
3964 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
Christian Heimes44720832008-05-26 13:01:01 +00003965
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003966 if (p == NULL)
3967 return NULL;
3968 result = PyString_FromStringAndSize(p, strlen(p));
3969 PyMem_Free(p);
3970 return result;
Christian Heimes44720832008-05-26 13:01:01 +00003971}
3972
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003973/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
Christian Heimes44720832008-05-26 13:01:01 +00003974 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3975 * Python's regular ints.
3976 * Return value: a new PyString*, or NULL if error.
3977 * . *pbuf is set to point into it,
3978 * *plen set to the # of chars following that.
3979 * Caller must decref it when done using pbuf.
3980 * The string starting at *pbuf is of the form
3981 * "-"? ("0x" | "0X")? digit+
3982 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3983 * set in flags. The case of hex digits will be correct,
3984 * There will be at least prec digits, zero-filled on the left if
3985 * necessary to get that many.
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003986 * val object to be converted
3987 * flags bitmask of format flags; only F_ALT is looked at
3988 * prec minimum number of digits; 0-fill on left if needed
3989 * type a character in [duoxX]; u acts the same as d
Christian Heimes44720832008-05-26 13:01:01 +00003990 *
3991 * CAUTION: o, x and X conversions on regular ints can never
3992 * produce a '-' sign, but can for Python's unbounded ints.
3993 */
3994PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003995_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003996 char **pbuf, int *plen)
Christian Heimes44720832008-05-26 13:01:01 +00003997{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003998 PyObject *result = NULL;
3999 char *buf;
4000 Py_ssize_t i;
4001 int sign; /* 1 if '-', else 0 */
4002 int len; /* number of characters */
4003 Py_ssize_t llen;
4004 int numdigits; /* len == numnondigits + numdigits */
4005 int numnondigits = 0;
Christian Heimes44720832008-05-26 13:01:01 +00004006
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004007 switch (type) {
4008 case 'd':
4009 case 'u':
4010 result = Py_TYPE(val)->tp_str(val);
4011 break;
4012 case 'o':
4013 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4014 break;
4015 case 'x':
4016 case 'X':
4017 numnondigits = 2;
4018 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4019 break;
4020 default:
4021 assert(!"'type' not in [duoxX]");
4022 }
4023 if (!result)
4024 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004025
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004026 buf = PyString_AsString(result);
4027 if (!buf) {
4028 Py_DECREF(result);
4029 return NULL;
4030 }
Christian Heimes44720832008-05-26 13:01:01 +00004031
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004032 /* To modify the string in-place, there can only be one reference. */
4033 if (Py_REFCNT(result) != 1) {
4034 PyErr_BadInternalCall();
4035 return NULL;
4036 }
4037 llen = PyString_Size(result);
4038 if (llen > INT_MAX) {
4039 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4040 return NULL;
4041 }
4042 len = (int)llen;
4043 if (buf[len-1] == 'L') {
4044 --len;
4045 buf[len] = '\0';
4046 }
4047 sign = buf[0] == '-';
4048 numnondigits += sign;
4049 numdigits = len - numnondigits;
4050 assert(numdigits > 0);
Christian Heimes44720832008-05-26 13:01:01 +00004051
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004052 /* Get rid of base marker unless F_ALT */
4053 if ((flags & F_ALT) == 0) {
4054 /* Need to skip 0x, 0X or 0. */
4055 int skipped = 0;
4056 switch (type) {
4057 case 'o':
4058 assert(buf[sign] == '0');
4059 /* If 0 is only digit, leave it alone. */
4060 if (numdigits > 1) {
4061 skipped = 1;
4062 --numdigits;
4063 }
4064 break;
4065 case 'x':
4066 case 'X':
4067 assert(buf[sign] == '0');
4068 assert(buf[sign + 1] == 'x');
4069 skipped = 2;
4070 numnondigits -= 2;
4071 break;
4072 }
4073 if (skipped) {
4074 buf += skipped;
4075 len -= skipped;
4076 if (sign)
4077 buf[0] = '-';
4078 }
4079 assert(len == numnondigits + numdigits);
4080 assert(numdigits > 0);
4081 }
Christian Heimes44720832008-05-26 13:01:01 +00004082
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004083 /* Fill with leading zeroes to meet minimum width. */
4084 if (prec > numdigits) {
4085 PyObject *r1 = PyString_FromStringAndSize(NULL,
4086 numnondigits + prec);
4087 char *b1;
4088 if (!r1) {
4089 Py_DECREF(result);
4090 return NULL;
4091 }
4092 b1 = PyString_AS_STRING(r1);
4093 for (i = 0; i < numnondigits; ++i)
4094 *b1++ = *buf++;
4095 for (i = 0; i < prec - numdigits; i++)
4096 *b1++ = '0';
4097 for (i = 0; i < numdigits; i++)
4098 *b1++ = *buf++;
4099 *b1 = '\0';
4100 Py_DECREF(result);
4101 result = r1;
4102 buf = PyString_AS_STRING(result);
4103 len = numnondigits + prec;
4104 }
Christian Heimes44720832008-05-26 13:01:01 +00004105
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004106 /* Fix up case for hex conversions. */
4107 if (type == 'X') {
4108 /* Need to convert all lower case letters to upper case.
4109 and need to convert 0x to 0X (and -0x to -0X). */
4110 for (i = 0; i < len; i++)
4111 if (buf[i] >= 'a' && buf[i] <= 'x')
4112 buf[i] -= 'a'-'A';
4113 }
4114 *pbuf = buf;
4115 *plen = len;
4116 return result;
Christian Heimes44720832008-05-26 13:01:01 +00004117}
4118
4119Py_LOCAL_INLINE(int)
4120formatint(char *buf, size_t buflen, int flags,
4121 int prec, int type, PyObject *v)
4122{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004123 /* fmt = '%#.' + `prec` + 'l' + `type`
4124 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4125 + 1 + 1 = 24 */
4126 char fmt[64]; /* plenty big enough! */
4127 char *sign;
4128 long x;
Christian Heimes44720832008-05-26 13:01:01 +00004129
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004130 x = PyInt_AsLong(v);
4131 if (x == -1 && PyErr_Occurred()) {
4132 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4133 Py_TYPE(v)->tp_name);
4134 return -1;
4135 }
4136 if (x < 0 && type == 'u') {
4137 type = 'd';
4138 }
4139 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4140 sign = "-";
4141 else
4142 sign = "";
4143 if (prec < 0)
4144 prec = 1;
Christian Heimes44720832008-05-26 13:01:01 +00004145
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004146 if ((flags & F_ALT) &&
4147 (type == 'x' || type == 'X')) {
4148 /* When converting under %#x or %#X, there are a number
4149 * of issues that cause pain:
4150 * - when 0 is being converted, the C standard leaves off
4151 * the '0x' or '0X', which is inconsistent with other
4152 * %#x/%#X conversions and inconsistent with Python's
4153 * hex() function
4154 * - there are platforms that violate the standard and
4155 * convert 0 with the '0x' or '0X'
4156 * (Metrowerks, Compaq Tru64)
4157 * - there are platforms that give '0x' when converting
4158 * under %#X, but convert 0 in accordance with the
4159 * standard (OS/2 EMX)
4160 *
4161 * We can achieve the desired consistency by inserting our
4162 * own '0x' or '0X' prefix, and substituting %x/%X in place
4163 * of %#x/%#X.
4164 *
4165 * Note that this is the same approach as used in
4166 * formatint() in unicodeobject.c
4167 */
4168 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4169 sign, type, prec, type);
4170 }
4171 else {
4172 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4173 sign, (flags&F_ALT) ? "#" : "",
4174 prec, type);
4175 }
Christian Heimes44720832008-05-26 13:01:01 +00004176
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004177 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4178 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4179 */
4180 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4181 PyErr_SetString(PyExc_OverflowError,
4182 "formatted integer is too long (precision too large?)");
4183 return -1;
4184 }
4185 if (sign[0])
4186 PyOS_snprintf(buf, buflen, fmt, -x);
4187 else
4188 PyOS_snprintf(buf, buflen, fmt, x);
4189 return (int)strlen(buf);
Christian Heimes44720832008-05-26 13:01:01 +00004190}
4191
4192Py_LOCAL_INLINE(int)
4193formatchar(char *buf, size_t buflen, PyObject *v)
4194{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004195 /* presume that the buffer is at least 2 characters long */
4196 if (PyString_Check(v)) {
4197 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4198 return -1;
4199 }
4200 else {
4201 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4202 return -1;
4203 }
4204 buf[1] = '\0';
4205 return 1;
Christian Heimes44720832008-05-26 13:01:01 +00004206}
4207
4208/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4209
Mark Dickinson18cfada2009-11-23 18:46:41 +00004210 FORMATBUFLEN is the length of the buffer in which the ints &
Christian Heimes44720832008-05-26 13:01:01 +00004211 chars are formatted. XXX This is a magic number. Each formatting
4212 routine does bounds checking to ensure no overflow, but a better
4213 solution may be to malloc a buffer of appropriate size for each
4214 format. For now, the current solution is sufficient.
4215*/
4216#define FORMATBUFLEN (size_t)120
4217
4218PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004219PyString_Format(PyObject *format, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00004220{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004221 char *fmt, *res;
4222 Py_ssize_t arglen, argidx;
4223 Py_ssize_t reslen, rescnt, fmtcnt;
4224 int args_owned = 0;
4225 PyObject *result, *orig_args;
Christian Heimes44720832008-05-26 13:01:01 +00004226#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004227 PyObject *v, *w;
Christian Heimes44720832008-05-26 13:01:01 +00004228#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004229 PyObject *dict = NULL;
4230 if (format == NULL || !PyString_Check(format) || args == NULL) {
4231 PyErr_BadInternalCall();
4232 return NULL;
4233 }
4234 orig_args = args;
4235 fmt = PyString_AS_STRING(format);
4236 fmtcnt = PyString_GET_SIZE(format);
4237 reslen = rescnt = fmtcnt + 100;
4238 result = PyString_FromStringAndSize((char *)NULL, reslen);
4239 if (result == NULL)
4240 return NULL;
4241 res = PyString_AsString(result);
4242 if (PyTuple_Check(args)) {
4243 arglen = PyTuple_GET_SIZE(args);
4244 argidx = 0;
4245 }
4246 else {
4247 arglen = -1;
4248 argidx = -2;
4249 }
4250 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4251 !PyObject_TypeCheck(args, &PyBaseString_Type))
4252 dict = args;
4253 while (--fmtcnt >= 0) {
4254 if (*fmt != '%') {
4255 if (--rescnt < 0) {
4256 rescnt = fmtcnt + 100;
4257 reslen += rescnt;
4258 if (_PyString_Resize(&result, reslen))
4259 return NULL;
4260 res = PyString_AS_STRING(result)
4261 + reslen - rescnt;
4262 --rescnt;
4263 }
4264 *res++ = *fmt++;
4265 }
4266 else {
4267 /* Got a format specifier */
4268 int flags = 0;
4269 Py_ssize_t width = -1;
4270 int prec = -1;
4271 int c = '\0';
4272 int fill;
4273 int isnumok;
4274 PyObject *v = NULL;
4275 PyObject *temp = NULL;
4276 char *pbuf;
4277 int sign;
4278 Py_ssize_t len;
4279 char formatbuf[FORMATBUFLEN];
4280 /* For format{int,char}() */
Christian Heimes44720832008-05-26 13:01:01 +00004281#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004282 char *fmt_start = fmt;
4283 Py_ssize_t argidx_start = argidx;
Christian Heimes44720832008-05-26 13:01:01 +00004284#endif
4285
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004286 fmt++;
4287 if (*fmt == '(') {
4288 char *keystart;
4289 Py_ssize_t keylen;
4290 PyObject *key;
4291 int pcount = 1;
Christian Heimes44720832008-05-26 13:01:01 +00004292
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004293 if (dict == NULL) {
4294 PyErr_SetString(PyExc_TypeError,
4295 "format requires a mapping");
4296 goto error;
4297 }
4298 ++fmt;
4299 --fmtcnt;
4300 keystart = fmt;
4301 /* Skip over balanced parentheses */
4302 while (pcount > 0 && --fmtcnt >= 0) {
4303 if (*fmt == ')')
4304 --pcount;
4305 else if (*fmt == '(')
4306 ++pcount;
4307 fmt++;
4308 }
4309 keylen = fmt - keystart - 1;
4310 if (fmtcnt < 0 || pcount > 0) {
4311 PyErr_SetString(PyExc_ValueError,
4312 "incomplete format key");
4313 goto error;
4314 }
4315 key = PyString_FromStringAndSize(keystart,
4316 keylen);
4317 if (key == NULL)
4318 goto error;
4319 if (args_owned) {
4320 Py_DECREF(args);
4321 args_owned = 0;
4322 }
4323 args = PyObject_GetItem(dict, key);
4324 Py_DECREF(key);
4325 if (args == NULL) {
4326 goto error;
4327 }
4328 args_owned = 1;
4329 arglen = -1;
4330 argidx = -2;
4331 }
4332 while (--fmtcnt >= 0) {
4333 switch (c = *fmt++) {
4334 case '-': flags |= F_LJUST; continue;
4335 case '+': flags |= F_SIGN; continue;
4336 case ' ': flags |= F_BLANK; continue;
4337 case '#': flags |= F_ALT; continue;
4338 case '0': flags |= F_ZERO; continue;
4339 }
4340 break;
4341 }
4342 if (c == '*') {
4343 v = getnextarg(args, arglen, &argidx);
4344 if (v == NULL)
4345 goto error;
4346 if (!PyInt_Check(v)) {
4347 PyErr_SetString(PyExc_TypeError,
4348 "* wants int");
4349 goto error;
4350 }
4351 width = PyInt_AsLong(v);
4352 if (width < 0) {
4353 flags |= F_LJUST;
4354 width = -width;
4355 }
4356 if (--fmtcnt >= 0)
4357 c = *fmt++;
4358 }
4359 else if (c >= 0 && isdigit(c)) {
4360 width = c - '0';
4361 while (--fmtcnt >= 0) {
4362 c = Py_CHARMASK(*fmt++);
4363 if (!isdigit(c))
4364 break;
4365 if ((width*10) / 10 != width) {
4366 PyErr_SetString(
4367 PyExc_ValueError,
4368 "width too big");
4369 goto error;
4370 }
4371 width = width*10 + (c - '0');
4372 }
4373 }
4374 if (c == '.') {
4375 prec = 0;
4376 if (--fmtcnt >= 0)
4377 c = *fmt++;
4378 if (c == '*') {
4379 v = getnextarg(args, arglen, &argidx);
4380 if (v == NULL)
4381 goto error;
4382 if (!PyInt_Check(v)) {
4383 PyErr_SetString(
4384 PyExc_TypeError,
4385 "* wants int");
4386 goto error;
4387 }
4388 prec = PyInt_AsLong(v);
4389 if (prec < 0)
4390 prec = 0;
4391 if (--fmtcnt >= 0)
4392 c = *fmt++;
4393 }
4394 else if (c >= 0 && isdigit(c)) {
4395 prec = c - '0';
4396 while (--fmtcnt >= 0) {
4397 c = Py_CHARMASK(*fmt++);
4398 if (!isdigit(c))
4399 break;
4400 if ((prec*10) / 10 != prec) {
4401 PyErr_SetString(
4402 PyExc_ValueError,
4403 "prec too big");
4404 goto error;
Christian Heimes44720832008-05-26 13:01:01 +00004405 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004406 prec = prec*10 + (c - '0');
4407 }
4408 }
4409 } /* prec */
4410 if (fmtcnt >= 0) {
4411 if (c == 'h' || c == 'l' || c == 'L') {
4412 if (--fmtcnt >= 0)
4413 c = *fmt++;
4414 }
4415 }
4416 if (fmtcnt < 0) {
4417 PyErr_SetString(PyExc_ValueError,
4418 "incomplete format");
4419 goto error;
4420 }
4421 if (c != '%') {
4422 v = getnextarg(args, arglen, &argidx);
4423 if (v == NULL)
4424 goto error;
4425 }
4426 sign = 0;
4427 fill = ' ';
4428 switch (c) {
4429 case '%':
4430 pbuf = "%";
4431 len = 1;
4432 break;
4433 case 's':
4434#ifdef Py_USING_UNICODE
4435 if (PyUnicode_Check(v)) {
4436 fmt = fmt_start;
4437 argidx = argidx_start;
4438 goto unicode;
4439 }
4440#endif
4441 temp = _PyObject_Str(v);
4442#ifdef Py_USING_UNICODE
4443 if (temp != NULL && PyUnicode_Check(temp)) {
4444 Py_DECREF(temp);
4445 fmt = fmt_start;
4446 argidx = argidx_start;
4447 goto unicode;
4448 }
4449#endif
4450 /* Fall through */
4451 case 'r':
4452 if (c == 'r')
4453 temp = PyObject_Repr(v);
4454 if (temp == NULL)
4455 goto error;
4456 if (!PyString_Check(temp)) {
4457 PyErr_SetString(PyExc_TypeError,
4458 "%s argument has non-string str()");
4459 Py_DECREF(temp);
4460 goto error;
4461 }
4462 pbuf = PyString_AS_STRING(temp);
4463 len = PyString_GET_SIZE(temp);
4464 if (prec >= 0 && len > prec)
4465 len = prec;
4466 break;
4467 case 'i':
4468 case 'd':
4469 case 'u':
4470 case 'o':
4471 case 'x':
4472 case 'X':
4473 if (c == 'i')
4474 c = 'd';
4475 isnumok = 0;
4476 if (PyNumber_Check(v)) {
4477 PyObject *iobj=NULL;
4478
4479 if (PyInt_Check(v) || (PyLong_Check(v))) {
4480 iobj = v;
4481 Py_INCREF(iobj);
4482 }
4483 else {
4484 iobj = PyNumber_Int(v);
4485 if (iobj==NULL) iobj = PyNumber_Long(v);
4486 }
4487 if (iobj!=NULL) {
4488 if (PyInt_Check(iobj)) {
4489 isnumok = 1;
4490 pbuf = formatbuf;
4491 len = formatint(pbuf,
4492 sizeof(formatbuf),
4493 flags, prec, c, iobj);
4494 Py_DECREF(iobj);
4495 if (len < 0)
4496 goto error;
4497 sign = 1;
4498 }
4499 else if (PyLong_Check(iobj)) {
4500 int ilen;
4501
4502 isnumok = 1;
4503 temp = _PyString_FormatLong(iobj, flags,
4504 prec, c, &pbuf, &ilen);
4505 Py_DECREF(iobj);
4506 len = ilen;
4507 if (!temp)
4508 goto error;
4509 sign = 1;
4510 }
4511 else {
4512 Py_DECREF(iobj);
4513 }
4514 }
4515 }
4516 if (!isnumok) {
4517 PyErr_Format(PyExc_TypeError,
4518 "%%%c format: a number is required, "
4519 "not %.200s", c, Py_TYPE(v)->tp_name);
4520 goto error;
4521 }
4522 if (flags & F_ZERO)
4523 fill = '0';
4524 break;
4525 case 'e':
4526 case 'E':
4527 case 'f':
4528 case 'F':
4529 case 'g':
4530 case 'G':
4531 temp = formatfloat(v, flags, prec, c);
4532 if (temp == NULL)
4533 goto error;
4534 pbuf = PyString_AS_STRING(temp);
4535 len = PyString_GET_SIZE(temp);
4536 sign = 1;
4537 if (flags & F_ZERO)
4538 fill = '0';
4539 break;
4540 case 'c':
4541#ifdef Py_USING_UNICODE
4542 if (PyUnicode_Check(v)) {
4543 fmt = fmt_start;
4544 argidx = argidx_start;
4545 goto unicode;
4546 }
4547#endif
4548 pbuf = formatbuf;
4549 len = formatchar(pbuf, sizeof(formatbuf), v);
4550 if (len < 0)
4551 goto error;
4552 break;
4553 default:
4554 PyErr_Format(PyExc_ValueError,
4555 "unsupported format character '%c' (0x%x) "
4556 "at index %zd",
4557 c, c,
4558 (Py_ssize_t)(fmt - 1 -
4559 PyString_AsString(format)));
4560 goto error;
4561 }
4562 if (sign) {
4563 if (*pbuf == '-' || *pbuf == '+') {
4564 sign = *pbuf++;
4565 len--;
4566 }
4567 else if (flags & F_SIGN)
4568 sign = '+';
4569 else if (flags & F_BLANK)
4570 sign = ' ';
4571 else
4572 sign = 0;
4573 }
4574 if (width < len)
4575 width = len;
4576 if (rescnt - (sign != 0) < width) {
4577 reslen -= rescnt;
4578 rescnt = width + fmtcnt + 100;
4579 reslen += rescnt;
4580 if (reslen < 0) {
4581 Py_DECREF(result);
4582 Py_XDECREF(temp);
4583 return PyErr_NoMemory();
4584 }
4585 if (_PyString_Resize(&result, reslen)) {
4586 Py_XDECREF(temp);
4587 return NULL;
4588 }
4589 res = PyString_AS_STRING(result)
4590 + reslen - rescnt;
4591 }
4592 if (sign) {
4593 if (fill != ' ')
4594 *res++ = sign;
4595 rescnt--;
4596 if (width > len)
4597 width--;
4598 }
4599 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4600 assert(pbuf[0] == '0');
4601 assert(pbuf[1] == c);
4602 if (fill != ' ') {
4603 *res++ = *pbuf++;
4604 *res++ = *pbuf++;
4605 }
4606 rescnt -= 2;
4607 width -= 2;
4608 if (width < 0)
4609 width = 0;
4610 len -= 2;
4611 }
4612 if (width > len && !(flags & F_LJUST)) {
4613 do {
4614 --rescnt;
4615 *res++ = fill;
4616 } while (--width > len);
4617 }
4618 if (fill == ' ') {
4619 if (sign)
4620 *res++ = sign;
4621 if ((flags & F_ALT) &&
4622 (c == 'x' || c == 'X')) {
4623 assert(pbuf[0] == '0');
4624 assert(pbuf[1] == c);
4625 *res++ = *pbuf++;
4626 *res++ = *pbuf++;
4627 }
4628 }
4629 Py_MEMCPY(res, pbuf, len);
4630 res += len;
4631 rescnt -= len;
4632 while (--width >= len) {
4633 --rescnt;
4634 *res++ = ' ';
4635 }
4636 if (dict && (argidx < arglen) && c != '%') {
4637 PyErr_SetString(PyExc_TypeError,
4638 "not all arguments converted during string formatting");
4639 Py_XDECREF(temp);
4640 goto error;
4641 }
4642 Py_XDECREF(temp);
4643 } /* '%' */
4644 } /* until end */
4645 if (argidx < arglen && !dict) {
4646 PyErr_SetString(PyExc_TypeError,
4647 "not all arguments converted during string formatting");
4648 goto error;
4649 }
4650 if (args_owned) {
4651 Py_DECREF(args);
4652 }
4653 if (_PyString_Resize(&result, reslen - rescnt))
4654 return NULL;
4655 return result;
Christian Heimes44720832008-05-26 13:01:01 +00004656
4657#ifdef Py_USING_UNICODE
4658 unicode:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004659 if (args_owned) {
4660 Py_DECREF(args);
4661 args_owned = 0;
4662 }
4663 /* Fiddle args right (remove the first argidx arguments) */
4664 if (PyTuple_Check(orig_args) && argidx > 0) {
4665 PyObject *v;
4666 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4667 v = PyTuple_New(n);
4668 if (v == NULL)
4669 goto error;
4670 while (--n >= 0) {
4671 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4672 Py_INCREF(w);
4673 PyTuple_SET_ITEM(v, n, w);
4674 }
4675 args = v;
4676 } else {
4677 Py_INCREF(orig_args);
4678 args = orig_args;
4679 }
4680 args_owned = 1;
4681 /* Take what we have of the result and let the Unicode formatting
4682 function format the rest of the input. */
4683 rescnt = res - PyString_AS_STRING(result);
4684 if (_PyString_Resize(&result, rescnt))
4685 goto error;
4686 fmtcnt = PyString_GET_SIZE(format) - \
4687 (fmt - PyString_AS_STRING(format));
4688 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4689 if (format == NULL)
4690 goto error;
4691 v = PyUnicode_Format(format, args);
4692 Py_DECREF(format);
4693 if (v == NULL)
4694 goto error;
4695 /* Paste what we have (result) to what the Unicode formatting
4696 function returned (v) and return the result (or error) */
4697 w = PyUnicode_Concat(result, v);
4698 Py_DECREF(result);
4699 Py_DECREF(v);
4700 Py_DECREF(args);
4701 return w;
Christian Heimes44720832008-05-26 13:01:01 +00004702#endif /* Py_USING_UNICODE */
4703
4704 error:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004705 Py_DECREF(result);
4706 if (args_owned) {
4707 Py_DECREF(args);
4708 }
4709 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004710}
4711
4712void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004713PyString_InternInPlace(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00004714{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004715 register PyStringObject *s = (PyStringObject *)(*p);
4716 PyObject *t;
4717 if (s == NULL || !PyString_Check(s))
4718 Py_FatalError("PyString_InternInPlace: strings only please!");
4719 /* If it's a string subclass, we don't really know what putting
4720 it in the interned dict might do. */
4721 if (!PyString_CheckExact(s))
4722 return;
4723 if (PyString_CHECK_INTERNED(s))
4724 return;
4725 if (interned == NULL) {
4726 interned = PyDict_New();
4727 if (interned == NULL) {
4728 PyErr_Clear(); /* Don't leave an exception */
4729 return;
4730 }
4731 }
4732 t = PyDict_GetItem(interned, (PyObject *)s);
4733 if (t) {
4734 Py_INCREF(t);
4735 Py_DECREF(*p);
4736 *p = t;
4737 return;
4738 }
Christian Heimes44720832008-05-26 13:01:01 +00004739
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004740 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4741 PyErr_Clear();
4742 return;
4743 }
4744 /* The two references in interned are not counted by refcnt.
4745 The string deallocator will take care of this */
4746 Py_REFCNT(s) -= 2;
4747 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Christian Heimes44720832008-05-26 13:01:01 +00004748}
4749
4750void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004751PyString_InternImmortal(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00004752{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004753 PyString_InternInPlace(p);
4754 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4755 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4756 Py_INCREF(*p);
4757 }
Christian Heimes44720832008-05-26 13:01:01 +00004758}
4759
4760
4761PyObject *
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004762PyString_InternFromString(const char *cp)
Christian Heimes44720832008-05-26 13:01:01 +00004763{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004764 PyObject *s = PyString_FromString(cp);
4765 if (s == NULL)
4766 return NULL;
4767 PyString_InternInPlace(&s);
4768 return s;
Christian Heimes44720832008-05-26 13:01:01 +00004769}
4770
4771void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004772PyString_Fini(void)
Christian Heimes44720832008-05-26 13:01:01 +00004773{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004774 int i;
4775 for (i = 0; i < UCHAR_MAX + 1; i++) {
4776 Py_XDECREF(characters[i]);
4777 characters[i] = NULL;
4778 }
4779 Py_XDECREF(nullstring);
4780 nullstring = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004781}
4782
4783void _Py_ReleaseInternedStrings(void)
4784{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004785 PyObject *keys;
4786 PyStringObject *s;
4787 Py_ssize_t i, n;
4788 Py_ssize_t immortal_size = 0, mortal_size = 0;
Christian Heimes44720832008-05-26 13:01:01 +00004789
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004790 if (interned == NULL || !PyDict_Check(interned))
4791 return;
4792 keys = PyDict_Keys(interned);
4793 if (keys == NULL || !PyList_Check(keys)) {
4794 PyErr_Clear();
4795 return;
4796 }
Christian Heimes44720832008-05-26 13:01:01 +00004797
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004798 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4799 detector, interned strings are not forcibly deallocated; rather, we
4800 give them their stolen references back, and then clear and DECREF
4801 the interned dict. */
Christian Heimes44720832008-05-26 13:01:01 +00004802
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004803 n = PyList_GET_SIZE(keys);
4804 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4805 n);
4806 for (i = 0; i < n; i++) {
4807 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4808 switch (s->ob_sstate) {
4809 case SSTATE_NOT_INTERNED:
4810 /* XXX Shouldn't happen */
4811 break;
4812 case SSTATE_INTERNED_IMMORTAL:
4813 Py_REFCNT(s) += 1;
4814 immortal_size += Py_SIZE(s);
4815 break;
4816 case SSTATE_INTERNED_MORTAL:
4817 Py_REFCNT(s) += 2;
4818 mortal_size += Py_SIZE(s);
4819 break;
4820 default:
4821 Py_FatalError("Inconsistent interned string state.");
4822 }
4823 s->ob_sstate = SSTATE_NOT_INTERNED;
4824 }
4825 fprintf(stderr, "total size of all interned strings: "
4826 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4827 "mortal/immortal\n", mortal_size, immortal_size);
4828 Py_DECREF(keys);
4829 PyDict_Clear(interned);
4830 Py_DECREF(interned);
4831 interned = NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00004832}