blob: 6d3ca8eb82d782b0d0756be7f020419447f28dc0 [file] [log] [blame]
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001/* String (str/bytes) object implementation */
Christian Heimes1a6387e2008-03-26 12:49:49 +00002
3#define PY_SSIZE_T_CLEAN
Christian Heimes44720832008-05-26 13:01:01 +00004
Christian Heimes1a6387e2008-03-26 12:49:49 +00005#include "Python.h"
Christian Heimes44720832008-05-26 13:01:01 +00006#include <ctype.h>
Mark Dickinson826f3fe2008-12-05 21:55:28 +00007#include <stddef.h>
Christian Heimes44720832008-05-26 13:01:01 +00008
9#ifdef COUNT_ALLOCS
Martin v. Löwisb90304a2009-01-07 18:40:40 +000010Py_ssize_t null_strings, one_strings;
Christian Heimes44720832008-05-26 13:01:01 +000011#endif
12
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000013static PyStringObject *characters[UCHAR_MAX + 1];
14static PyStringObject *nullstring;
Christian Heimes44720832008-05-26 13:01:01 +000015
16/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Mark Dickinson826f3fe2008-12-05 21:55:28 +000026/* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyStringObject_SIZE + n bytes.
28
29 Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30 3 bytes per string allocation on a typical system.
31*/
32#define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
33
Christian Heimes44720832008-05-26 13:01:01 +000034/*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000035 For both PyString_FromString() and PyString_FromStringAndSize(), the
Christian Heimes44720832008-05-26 13:01:01 +000036 parameter `size' denotes number of characters to allocate, not counting any
37 null terminating character.
38
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000039 For PyString_FromString(), the parameter `str' points to a null-terminated
Christian Heimes44720832008-05-26 13:01:01 +000040 string containing exactly `size' bytes.
41
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000042 For PyString_FromStringAndSize(), the parameter the parameter `str' is
Christian Heimes44720832008-05-26 13:01:01 +000043 either NULL or else points to a string containing at least `size' bytes.
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000044 For PyString_FromStringAndSize(), the string in the `str' parameter does
Christian Heimes44720832008-05-26 13:01:01 +000045 not have to be null-terminated. (Therefore it is safe to construct a
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000046 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
47 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
Christian Heimes44720832008-05-26 13:01:01 +000048 bytes (setting the last byte to the null terminating character) and you can
49 fill in the data yourself. If `str' is non-NULL then the resulting
50 PyString object must be treated as immutable and you must not fill in nor
51 alter the data yourself, since the strings may be shared.
52
53 The PyObject member `op->ob_size', which denotes the number of "extra
54 items" in a variable-size object, will contain the number of bytes
55 allocated for string data, not counting the null terminating character. It
56 is therefore equal to the equal to the `size' parameter (for
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000057 PyString_FromStringAndSize()) or the length of the string in the `str'
58 parameter (for PyString_FromString()).
Christian Heimes44720832008-05-26 13:01:01 +000059*/
60PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000061PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Christian Heimes1a6387e2008-03-26 12:49:49 +000062{
Antoine Pitrouc83ea132010-05-09 14:46:46 +000063 register PyStringObject *op;
64 if (size < 0) {
65 PyErr_SetString(PyExc_SystemError,
66 "Negative size passed to PyString_FromStringAndSize");
67 return NULL;
68 }
69 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +000070#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +000071 null_strings++;
Christian Heimes44720832008-05-26 13:01:01 +000072#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +000073 Py_INCREF(op);
74 return (PyObject *)op;
75 }
76 if (size == 1 && str != NULL &&
77 (op = characters[*str & UCHAR_MAX]) != NULL)
78 {
Christian Heimes44720832008-05-26 13:01:01 +000079#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +000080 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +000081#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +000082 Py_INCREF(op);
83 return (PyObject *)op;
84 }
Christian Heimes44720832008-05-26 13:01:01 +000085
Antoine Pitrouc83ea132010-05-09 14:46:46 +000086 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
87 PyErr_SetString(PyExc_OverflowError, "string is too large");
88 return NULL;
89 }
Neal Norwitze7d8be82008-07-31 17:17:14 +000090
Antoine Pitrouc83ea132010-05-09 14:46:46 +000091 /* Inline PyObject_NewVar */
92 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
93 if (op == NULL)
94 return PyErr_NoMemory();
95 PyObject_INIT_VAR(op, &PyString_Type, size);
96 op->ob_shash = -1;
97 op->ob_sstate = SSTATE_NOT_INTERNED;
98 if (str != NULL)
99 Py_MEMCPY(op->ob_sval, str, size);
100 op->ob_sval[size] = '\0';
101 /* share short strings */
102 if (size == 0) {
103 PyObject *t = (PyObject *)op;
104 PyString_InternInPlace(&t);
105 op = (PyStringObject *)t;
106 nullstring = op;
107 Py_INCREF(op);
108 } else if (size == 1 && str != NULL) {
109 PyObject *t = (PyObject *)op;
110 PyString_InternInPlace(&t);
111 op = (PyStringObject *)t;
112 characters[*str & UCHAR_MAX] = op;
113 Py_INCREF(op);
114 }
115 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000116}
117
Christian Heimes44720832008-05-26 13:01:01 +0000118PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000119PyString_FromString(const char *str)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000120{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000121 register size_t size;
122 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +0000123
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000124 assert(str != NULL);
125 size = strlen(str);
126 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
127 PyErr_SetString(PyExc_OverflowError,
128 "string is too long for a Python string");
129 return NULL;
130 }
131 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +0000132#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000133 null_strings++;
Christian Heimes44720832008-05-26 13:01:01 +0000134#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000135 Py_INCREF(op);
136 return (PyObject *)op;
137 }
138 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +0000139#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000140 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +0000141#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000142 Py_INCREF(op);
143 return (PyObject *)op;
144 }
Christian Heimes44720832008-05-26 13:01:01 +0000145
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000146 /* Inline PyObject_NewVar */
147 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
148 if (op == NULL)
149 return PyErr_NoMemory();
150 PyObject_INIT_VAR(op, &PyString_Type, size);
151 op->ob_shash = -1;
152 op->ob_sstate = SSTATE_NOT_INTERNED;
153 Py_MEMCPY(op->ob_sval, str, size+1);
154 /* share short strings */
155 if (size == 0) {
156 PyObject *t = (PyObject *)op;
157 PyString_InternInPlace(&t);
158 op = (PyStringObject *)t;
159 nullstring = op;
160 Py_INCREF(op);
161 } else if (size == 1) {
162 PyObject *t = (PyObject *)op;
163 PyString_InternInPlace(&t);
164 op = (PyStringObject *)t;
165 characters[*str & UCHAR_MAX] = op;
166 Py_INCREF(op);
167 }
168 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000169}
170
Christian Heimes44720832008-05-26 13:01:01 +0000171PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000172PyString_FromFormatV(const char *format, va_list vargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000173{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000174 va_list count;
175 Py_ssize_t n = 0;
176 const char* f;
177 char *s;
178 PyObject* string;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000179
Christian Heimes44720832008-05-26 13:01:01 +0000180#ifdef VA_LIST_IS_ARRAY
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000181 Py_MEMCPY(count, vargs, sizeof(va_list));
Christian Heimes44720832008-05-26 13:01:01 +0000182#else
183#ifdef __va_copy
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000184 __va_copy(count, vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000185#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000186 count = vargs;
Christian Heimes44720832008-05-26 13:01:01 +0000187#endif
188#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000189 /* step 1: figure out how large a buffer we need */
190 for (f = format; *f; f++) {
191 if (*f == '%') {
Mark Dickinson82864d12009-11-15 16:18:58 +0000192#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000193 int longlongflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000194#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000195 const char* p = f;
196 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
197 ;
Christian Heimes44720832008-05-26 13:01:01 +0000198
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000199 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
200 * they don't affect the amount of space we reserve.
201 */
202 if (*f == 'l') {
203 if (f[1] == 'd' || f[1] == 'u') {
204 ++f;
205 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000206#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000207 else if (f[1] == 'l' &&
208 (f[2] == 'd' || f[2] == 'u')) {
209 longlongflag = 1;
210 f += 2;
211 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000212#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000213 }
214 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
215 ++f;
216 }
Christian Heimes44720832008-05-26 13:01:01 +0000217
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000218 switch (*f) {
219 case 'c':
220 (void)va_arg(count, int);
221 /* fall through... */
222 case '%':
223 n++;
224 break;
225 case 'd': case 'u': case 'i': case 'x':
226 (void) va_arg(count, int);
Mark Dickinson82864d12009-11-15 16:18:58 +0000227#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000228 /* Need at most
229 ceil(log10(256)*SIZEOF_LONG_LONG) digits,
230 plus 1 for the sign. 53/22 is an upper
231 bound for log10(256). */
232 if (longlongflag)
233 n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
234 else
Mark Dickinson82864d12009-11-15 16:18:58 +0000235#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000236 /* 20 bytes is enough to hold a 64-bit
237 integer. Decimal takes the most
238 space. This isn't enough for
239 octal. */
240 n += 20;
Mark Dickinson82864d12009-11-15 16:18:58 +0000241
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000242 break;
243 case 's':
244 s = va_arg(count, char*);
245 n += strlen(s);
246 break;
247 case 'p':
248 (void) va_arg(count, int);
249 /* maximum 64-bit pointer representation:
250 * 0xffffffffffffffff
251 * so 19 characters is enough.
252 * XXX I count 18 -- what's the extra for?
253 */
254 n += 19;
255 break;
256 default:
257 /* if we stumble upon an unknown
258 formatting code, copy the rest of
259 the format string to the output
260 string. (we cannot just skip the
261 code, since there's no way to know
262 what's in the argument list) */
263 n += strlen(p);
264 goto expand;
265 }
266 } else
267 n++;
268 }
Christian Heimes44720832008-05-26 13:01:01 +0000269 expand:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000270 /* step 2: fill the buffer */
271 /* Since we've analyzed how much space we need for the worst case,
272 use sprintf directly instead of the slower PyOS_snprintf. */
273 string = PyString_FromStringAndSize(NULL, n);
274 if (!string)
275 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000276
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000277 s = PyString_AsString(string);
Christian Heimes44720832008-05-26 13:01:01 +0000278
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000279 for (f = format; *f; f++) {
280 if (*f == '%') {
281 const char* p = f++;
282 Py_ssize_t i;
283 int longflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000284#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000285 int longlongflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000286#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000287 int size_tflag = 0;
288 /* parse the width.precision part (we're only
289 interested in the precision value, if any) */
290 n = 0;
291 while (isdigit(Py_CHARMASK(*f)))
292 n = (n*10) + *f++ - '0';
293 if (*f == '.') {
294 f++;
295 n = 0;
296 while (isdigit(Py_CHARMASK(*f)))
297 n = (n*10) + *f++ - '0';
298 }
299 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
300 f++;
301 /* Handle %ld, %lu, %lld and %llu. */
302 if (*f == 'l') {
303 if (f[1] == 'd' || f[1] == 'u') {
304 longflag = 1;
305 ++f;
306 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000307#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000308 else if (f[1] == 'l' &&
309 (f[2] == 'd' || f[2] == 'u')) {
310 longlongflag = 1;
311 f += 2;
312 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000313#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000314 }
315 /* handle the size_t flag. */
316 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
317 size_tflag = 1;
318 ++f;
319 }
Christian Heimes44720832008-05-26 13:01:01 +0000320
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000321 switch (*f) {
322 case 'c':
323 *s++ = va_arg(vargs, int);
324 break;
325 case 'd':
326 if (longflag)
327 sprintf(s, "%ld", va_arg(vargs, long));
Mark Dickinson82864d12009-11-15 16:18:58 +0000328#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000329 else if (longlongflag)
330 sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
331 va_arg(vargs, PY_LONG_LONG));
Mark Dickinson82864d12009-11-15 16:18:58 +0000332#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000333 else if (size_tflag)
334 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
335 va_arg(vargs, Py_ssize_t));
336 else
337 sprintf(s, "%d", va_arg(vargs, int));
338 s += strlen(s);
339 break;
340 case 'u':
341 if (longflag)
342 sprintf(s, "%lu",
343 va_arg(vargs, unsigned long));
Mark Dickinson82864d12009-11-15 16:18:58 +0000344#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000345 else if (longlongflag)
346 sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
347 va_arg(vargs, PY_LONG_LONG));
Mark Dickinson82864d12009-11-15 16:18:58 +0000348#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000349 else if (size_tflag)
350 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
351 va_arg(vargs, size_t));
352 else
353 sprintf(s, "%u",
354 va_arg(vargs, unsigned int));
355 s += strlen(s);
356 break;
357 case 'i':
358 sprintf(s, "%i", va_arg(vargs, int));
359 s += strlen(s);
360 break;
361 case 'x':
362 sprintf(s, "%x", va_arg(vargs, int));
363 s += strlen(s);
364 break;
365 case 's':
366 p = va_arg(vargs, char*);
367 i = strlen(p);
368 if (n > 0 && i > n)
369 i = n;
370 Py_MEMCPY(s, p, i);
371 s += i;
372 break;
373 case 'p':
374 sprintf(s, "%p", va_arg(vargs, void*));
375 /* %p is ill-defined: ensure leading 0x. */
376 if (s[1] == 'X')
377 s[1] = 'x';
378 else if (s[1] != 'x') {
379 memmove(s+2, s, strlen(s)+1);
380 s[0] = '0';
381 s[1] = 'x';
382 }
383 s += strlen(s);
384 break;
385 case '%':
386 *s++ = '%';
387 break;
388 default:
389 strcpy(s, p);
390 s += strlen(s);
391 goto end;
392 }
393 } else
394 *s++ = *f;
395 }
Christian Heimes44720832008-05-26 13:01:01 +0000396
397 end:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000398 if (_PyString_Resize(&string, s - PyString_AS_STRING(string)))
399 return NULL;
400 return string;
Christian Heimes44720832008-05-26 13:01:01 +0000401}
402
403PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000404PyString_FromFormat(const char *format, ...)
Christian Heimes44720832008-05-26 13:01:01 +0000405{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000406 PyObject* ret;
407 va_list vargs;
Christian Heimes44720832008-05-26 13:01:01 +0000408
409#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000410 va_start(vargs, format);
Christian Heimes44720832008-05-26 13:01:01 +0000411#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000412 va_start(vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000413#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000414 ret = PyString_FromFormatV(format, vargs);
415 va_end(vargs);
416 return ret;
Christian Heimes44720832008-05-26 13:01:01 +0000417}
418
419
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000420PyObject *PyString_Decode(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000421 Py_ssize_t size,
422 const char *encoding,
423 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000424{
425 PyObject *v, *str;
426
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000427 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000428 if (str == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000429 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000430 v = PyString_AsDecodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000431 Py_DECREF(str);
432 return v;
433}
434
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000435PyObject *PyString_AsDecodedObject(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000436 const char *encoding,
437 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000438{
439 PyObject *v;
440
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000441 if (!PyString_Check(str)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000442 PyErr_BadArgument();
443 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000444 }
445
Christian Heimes44720832008-05-26 13:01:01 +0000446 if (encoding == NULL) {
447#ifdef Py_USING_UNICODE
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000448 encoding = PyUnicode_GetDefaultEncoding();
Christian Heimes44720832008-05-26 13:01:01 +0000449#else
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000450 PyErr_SetString(PyExc_ValueError, "no encoding specified");
451 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000452#endif
Christian Heimes1a6387e2008-03-26 12:49:49 +0000453 }
Christian Heimes44720832008-05-26 13:01:01 +0000454
455 /* Decode via the codec registry */
456 v = PyCodec_Decode(str, encoding, errors);
457 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000458 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000459
460 return v;
461
462 onError:
463 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000464}
465
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000466PyObject *PyString_AsDecodedString(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000467 const char *encoding,
468 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000469{
Christian Heimes44720832008-05-26 13:01:01 +0000470 PyObject *v;
471
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000472 v = PyString_AsDecodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000473 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000474 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000475
476#ifdef Py_USING_UNICODE
477 /* Convert Unicode to a string using the default encoding */
478 if (PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000479 PyObject *temp = v;
480 v = PyUnicode_AsEncodedString(v, NULL, NULL);
481 Py_DECREF(temp);
482 if (v == NULL)
483 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000484 }
Christian Heimes44720832008-05-26 13:01:01 +0000485#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000486 if (!PyString_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000487 PyErr_Format(PyExc_TypeError,
488 "decoder did not return a string object (type=%.400s)",
489 Py_TYPE(v)->tp_name);
490 Py_DECREF(v);
491 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000492 }
Christian Heimes44720832008-05-26 13:01:01 +0000493
494 return v;
495
496 onError:
497 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000498}
499
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000500PyObject *PyString_Encode(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000501 Py_ssize_t size,
502 const char *encoding,
503 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000504{
Christian Heimes44720832008-05-26 13:01:01 +0000505 PyObject *v, *str;
506
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000507 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000508 if (str == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000509 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000510 v = PyString_AsEncodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000511 Py_DECREF(str);
512 return v;
513}
514
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000515PyObject *PyString_AsEncodedObject(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000516 const char *encoding,
517 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000518{
519 PyObject *v;
520
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000521 if (!PyString_Check(str)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000522 PyErr_BadArgument();
523 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000524 }
525
526 if (encoding == NULL) {
527#ifdef Py_USING_UNICODE
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000528 encoding = PyUnicode_GetDefaultEncoding();
Christian Heimes44720832008-05-26 13:01:01 +0000529#else
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000530 PyErr_SetString(PyExc_ValueError, "no encoding specified");
531 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000532#endif
533 }
534
535 /* Encode via the codec registry */
536 v = PyCodec_Encode(str, encoding, errors);
537 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000538 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000539
540 return v;
541
542 onError:
543 return NULL;
544}
545
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000546PyObject *PyString_AsEncodedString(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000547 const char *encoding,
548 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000549{
550 PyObject *v;
551
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000552 v = PyString_AsEncodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000553 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000554 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000555
556#ifdef Py_USING_UNICODE
557 /* Convert Unicode to a string using the default encoding */
558 if (PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000559 PyObject *temp = v;
560 v = PyUnicode_AsEncodedString(v, NULL, NULL);
561 Py_DECREF(temp);
562 if (v == NULL)
563 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000564 }
565#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000566 if (!PyString_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000567 PyErr_Format(PyExc_TypeError,
568 "encoder did not return a string object (type=%.400s)",
569 Py_TYPE(v)->tp_name);
570 Py_DECREF(v);
571 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000572 }
573
574 return v;
575
576 onError:
577 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000578}
579
580static void
Christian Heimes44720832008-05-26 13:01:01 +0000581string_dealloc(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000582{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000583 switch (PyString_CHECK_INTERNED(op)) {
584 case SSTATE_NOT_INTERNED:
585 break;
Christian Heimes44720832008-05-26 13:01:01 +0000586
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000587 case SSTATE_INTERNED_MORTAL:
588 /* revive dead object temporarily for DelItem */
589 Py_REFCNT(op) = 3;
590 if (PyDict_DelItem(interned, op) != 0)
591 Py_FatalError(
592 "deletion of interned string failed");
593 break;
Christian Heimes44720832008-05-26 13:01:01 +0000594
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000595 case SSTATE_INTERNED_IMMORTAL:
596 Py_FatalError("Immortal interned string died.");
Christian Heimes44720832008-05-26 13:01:01 +0000597
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000598 default:
599 Py_FatalError("Inconsistent interned string state.");
600 }
601 Py_TYPE(op)->tp_free(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000602}
603
Christian Heimes44720832008-05-26 13:01:01 +0000604/* Unescape a backslash-escaped string. If unicode is non-zero,
605 the string is a u-literal. If recode_encoding is non-zero,
606 the string is UTF-8 encoded and should be re-encoded in the
607 specified encoding. */
608
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000609PyObject *PyString_DecodeEscape(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000610 Py_ssize_t len,
611 const char *errors,
612 Py_ssize_t unicode,
613 const char *recode_encoding)
Christian Heimes44720832008-05-26 13:01:01 +0000614{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000615 int c;
616 char *p, *buf;
617 const char *end;
618 PyObject *v;
619 Py_ssize_t newlen = recode_encoding ? 4*len:len;
620 v = PyString_FromStringAndSize((char *)NULL, newlen);
621 if (v == NULL)
622 return NULL;
623 p = buf = PyString_AsString(v);
624 end = s + len;
625 while (s < end) {
626 if (*s != '\\') {
627 non_esc:
Christian Heimes44720832008-05-26 13:01:01 +0000628#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000629 if (recode_encoding && (*s & 0x80)) {
630 PyObject *u, *w;
631 char *r;
632 const char* t;
633 Py_ssize_t rn;
634 t = s;
635 /* Decode non-ASCII bytes as UTF-8. */
636 while (t < end && (*t & 0x80)) t++;
637 u = PyUnicode_DecodeUTF8(s, t - s, errors);
638 if(!u) goto failed;
Christian Heimes44720832008-05-26 13:01:01 +0000639
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000640 /* Recode them in target encoding. */
641 w = PyUnicode_AsEncodedString(
642 u, recode_encoding, errors);
643 Py_DECREF(u);
644 if (!w) goto failed;
Christian Heimes44720832008-05-26 13:01:01 +0000645
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000646 /* Append bytes to output buffer. */
647 assert(PyString_Check(w));
648 r = PyString_AS_STRING(w);
649 rn = PyString_GET_SIZE(w);
650 Py_MEMCPY(p, r, rn);
651 p += rn;
652 Py_DECREF(w);
653 s = t;
654 } else {
655 *p++ = *s++;
656 }
Christian Heimes44720832008-05-26 13:01:01 +0000657#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000658 *p++ = *s++;
Christian Heimes44720832008-05-26 13:01:01 +0000659#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000660 continue;
661 }
662 s++;
663 if (s==end) {
664 PyErr_SetString(PyExc_ValueError,
665 "Trailing \\ in string");
666 goto failed;
667 }
668 switch (*s++) {
669 /* XXX This assumes ASCII! */
670 case '\n': break;
671 case '\\': *p++ = '\\'; break;
672 case '\'': *p++ = '\''; break;
673 case '\"': *p++ = '\"'; break;
674 case 'b': *p++ = '\b'; break;
675 case 'f': *p++ = '\014'; break; /* FF */
676 case 't': *p++ = '\t'; break;
677 case 'n': *p++ = '\n'; break;
678 case 'r': *p++ = '\r'; break;
679 case 'v': *p++ = '\013'; break; /* VT */
680 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
681 case '0': case '1': case '2': case '3':
682 case '4': case '5': case '6': case '7':
683 c = s[-1] - '0';
684 if (s < end && '0' <= *s && *s <= '7') {
685 c = (c<<3) + *s++ - '0';
686 if (s < end && '0' <= *s && *s <= '7')
687 c = (c<<3) + *s++ - '0';
688 }
689 *p++ = c;
690 break;
691 case 'x':
692 if (s+1 < end &&
693 isxdigit(Py_CHARMASK(s[0])) &&
694 isxdigit(Py_CHARMASK(s[1])))
695 {
696 unsigned int x = 0;
697 c = Py_CHARMASK(*s);
698 s++;
699 if (isdigit(c))
700 x = c - '0';
701 else if (islower(c))
702 x = 10 + c - 'a';
703 else
704 x = 10 + c - 'A';
705 x = x << 4;
706 c = Py_CHARMASK(*s);
707 s++;
708 if (isdigit(c))
709 x += c - '0';
710 else if (islower(c))
711 x += 10 + c - 'a';
712 else
713 x += 10 + c - 'A';
714 *p++ = x;
715 break;
716 }
717 if (!errors || strcmp(errors, "strict") == 0) {
718 PyErr_SetString(PyExc_ValueError,
719 "invalid \\x escape");
720 goto failed;
721 }
722 if (strcmp(errors, "replace") == 0) {
723 *p++ = '?';
724 } else if (strcmp(errors, "ignore") == 0)
725 /* do nothing */;
726 else {
727 PyErr_Format(PyExc_ValueError,
728 "decoding error; "
729 "unknown error handling code: %.400s",
730 errors);
731 goto failed;
732 }
Christian Heimes44720832008-05-26 13:01:01 +0000733#ifndef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000734 case 'u':
735 case 'U':
736 case 'N':
737 if (unicode) {
738 PyErr_SetString(PyExc_ValueError,
739 "Unicode escapes not legal "
740 "when Unicode disabled");
741 goto failed;
742 }
Christian Heimes44720832008-05-26 13:01:01 +0000743#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000744 default:
745 *p++ = '\\';
746 s--;
747 goto non_esc; /* an arbitry number of unescaped
748 UTF-8 bytes may follow. */
749 }
750 }
751 if (p-buf < newlen && _PyString_Resize(&v, p - buf))
752 goto failed;
753 return v;
Christian Heimes44720832008-05-26 13:01:01 +0000754 failed:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000755 Py_DECREF(v);
756 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000757}
758
759/* -------------------------------------------------------------------- */
760/* object api */
761
Christian Heimes1a6387e2008-03-26 12:49:49 +0000762static Py_ssize_t
Christian Heimes44720832008-05-26 13:01:01 +0000763string_getsize(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000764{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000765 char *s;
766 Py_ssize_t len;
767 if (PyString_AsStringAndSize(op, &s, &len))
768 return -1;
769 return len;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000770}
771
Christian Heimes44720832008-05-26 13:01:01 +0000772static /*const*/ char *
773string_getbuffer(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000774{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000775 char *s;
776 Py_ssize_t len;
777 if (PyString_AsStringAndSize(op, &s, &len))
778 return NULL;
779 return s;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000780}
781
782Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000783PyString_Size(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000784{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000785 if (!PyString_Check(op))
786 return string_getsize(op);
787 return Py_SIZE(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000788}
789
Christian Heimes44720832008-05-26 13:01:01 +0000790/*const*/ char *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000791PyString_AsString(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000792{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000793 if (!PyString_Check(op))
794 return string_getbuffer(op);
795 return ((PyStringObject *)op) -> ob_sval;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000796}
797
798int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000799PyString_AsStringAndSize(register PyObject *obj,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000800 register char **s,
801 register Py_ssize_t *len)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000802{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000803 if (s == NULL) {
804 PyErr_BadInternalCall();
805 return -1;
806 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000807
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000808 if (!PyString_Check(obj)) {
Christian Heimes44720832008-05-26 13:01:01 +0000809#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000810 if (PyUnicode_Check(obj)) {
811 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
812 if (obj == NULL)
813 return -1;
814 }
815 else
Christian Heimes1a6387e2008-03-26 12:49:49 +0000816#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000817 {
818 PyErr_Format(PyExc_TypeError,
819 "expected string or Unicode object, "
820 "%.200s found", Py_TYPE(obj)->tp_name);
821 return -1;
822 }
823 }
Christian Heimes44720832008-05-26 13:01:01 +0000824
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000825 *s = PyString_AS_STRING(obj);
826 if (len != NULL)
827 *len = PyString_GET_SIZE(obj);
828 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
829 PyErr_SetString(PyExc_TypeError,
830 "expected string without null bytes");
831 return -1;
832 }
833 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000834}
835
Christian Heimes1a6387e2008-03-26 12:49:49 +0000836/* -------------------------------------------------------------------- */
837/* Methods */
838
Christian Heimes44720832008-05-26 13:01:01 +0000839#include "stringlib/stringdefs.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000840#include "stringlib/fastsearch.h"
Christian Heimes44720832008-05-26 13:01:01 +0000841
Christian Heimes1a6387e2008-03-26 12:49:49 +0000842#include "stringlib/count.h"
843#include "stringlib/find.h"
844#include "stringlib/partition.h"
Antoine Pitrou64672132010-01-13 07:55:48 +0000845#include "stringlib/split.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000846
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000847#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
Christian Heimes44720832008-05-26 13:01:01 +0000848#include "stringlib/localeutil.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000849
Christian Heimes1a6387e2008-03-26 12:49:49 +0000850
851
852static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000853string_print(PyStringObject *op, FILE *fp, int flags)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000854{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000855 Py_ssize_t i, str_len;
856 char c;
857 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000858
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000859 /* XXX Ought to check for interrupts when writing long strings */
860 if (! PyString_CheckExact(op)) {
861 int ret;
862 /* A str subclass may have its own __str__ method. */
863 op = (PyStringObject *) PyObject_Str((PyObject *)op);
864 if (op == NULL)
865 return -1;
866 ret = string_print(op, fp, flags);
867 Py_DECREF(op);
868 return ret;
869 }
870 if (flags & Py_PRINT_RAW) {
871 char *data = op->ob_sval;
872 Py_ssize_t size = Py_SIZE(op);
873 Py_BEGIN_ALLOW_THREADS
874 while (size > INT_MAX) {
875 /* Very long strings cannot be written atomically.
876 * But don't write exactly INT_MAX bytes at a time
877 * to avoid memory aligment issues.
878 */
879 const int chunk_size = INT_MAX & ~0x3FFF;
880 fwrite(data, 1, chunk_size, fp);
881 data += chunk_size;
882 size -= chunk_size;
883 }
Christian Heimes44720832008-05-26 13:01:01 +0000884#ifdef __VMS
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000885 if (size) fwrite(data, (int)size, 1, fp);
Christian Heimes44720832008-05-26 13:01:01 +0000886#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000887 fwrite(data, 1, (int)size, fp);
Christian Heimes44720832008-05-26 13:01:01 +0000888#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000889 Py_END_ALLOW_THREADS
890 return 0;
891 }
Christian Heimes44720832008-05-26 13:01:01 +0000892
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000893 /* figure out which quote to use; single is preferred */
894 quote = '\'';
895 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
896 !memchr(op->ob_sval, '"', Py_SIZE(op)))
897 quote = '"';
Christian Heimes44720832008-05-26 13:01:01 +0000898
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000899 str_len = Py_SIZE(op);
900 Py_BEGIN_ALLOW_THREADS
901 fputc(quote, fp);
902 for (i = 0; i < str_len; i++) {
903 /* Since strings are immutable and the caller should have a
904 reference, accessing the interal buffer should not be an issue
905 with the GIL released. */
906 c = op->ob_sval[i];
907 if (c == quote || c == '\\')
908 fprintf(fp, "\\%c", c);
909 else if (c == '\t')
910 fprintf(fp, "\\t");
911 else if (c == '\n')
912 fprintf(fp, "\\n");
913 else if (c == '\r')
914 fprintf(fp, "\\r");
915 else if (c < ' ' || c >= 0x7f)
916 fprintf(fp, "\\x%02x", c & 0xff);
917 else
918 fputc(c, fp);
919 }
920 fputc(quote, fp);
921 Py_END_ALLOW_THREADS
922 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000923}
924
Christian Heimes44720832008-05-26 13:01:01 +0000925PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000926PyString_Repr(PyObject *obj, int smartquotes)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000927{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000928 register PyStringObject* op = (PyStringObject*) obj;
929 size_t newsize = 2 + 4 * Py_SIZE(op);
930 PyObject *v;
931 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
932 PyErr_SetString(PyExc_OverflowError,
933 "string is too large to make repr");
934 return NULL;
935 }
936 v = PyString_FromStringAndSize((char *)NULL, newsize);
937 if (v == NULL) {
938 return NULL;
939 }
940 else {
941 register Py_ssize_t i;
942 register char c;
943 register char *p;
944 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000945
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000946 /* figure out which quote to use; single is preferred */
947 quote = '\'';
948 if (smartquotes &&
949 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
950 !memchr(op->ob_sval, '"', Py_SIZE(op)))
951 quote = '"';
Christian Heimes44720832008-05-26 13:01:01 +0000952
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000953 p = PyString_AS_STRING(v);
954 *p++ = quote;
955 for (i = 0; i < Py_SIZE(op); i++) {
956 /* There's at least enough room for a hex escape
957 and a closing quote. */
958 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
959 c = op->ob_sval[i];
960 if (c == quote || c == '\\')
961 *p++ = '\\', *p++ = c;
962 else if (c == '\t')
963 *p++ = '\\', *p++ = 't';
964 else if (c == '\n')
965 *p++ = '\\', *p++ = 'n';
966 else if (c == '\r')
967 *p++ = '\\', *p++ = 'r';
968 else if (c < ' ' || c >= 0x7f) {
969 /* For performance, we don't want to call
970 PyOS_snprintf here (extra layers of
971 function call). */
972 sprintf(p, "\\x%02x", c & 0xff);
973 p += 4;
974 }
975 else
976 *p++ = c;
977 }
978 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
979 *p++ = quote;
980 *p = '\0';
981 if (_PyString_Resize(&v, (p - PyString_AS_STRING(v))))
982 return NULL;
983 return v;
984 }
Christian Heimes44720832008-05-26 13:01:01 +0000985}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000986
987static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000988string_repr(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000989{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000990 return PyString_Repr(op, 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000991}
992
Christian Heimes1a6387e2008-03-26 12:49:49 +0000993static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000994string_str(PyObject *s)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000995{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000996 assert(PyString_Check(s));
997 if (PyString_CheckExact(s)) {
998 Py_INCREF(s);
999 return s;
1000 }
1001 else {
1002 /* Subtype -- return genuine string with the same value. */
1003 PyStringObject *t = (PyStringObject *) s;
1004 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
1005 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001006}
1007
Christian Heimes44720832008-05-26 13:01:01 +00001008static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001009string_length(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001010{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001011 return Py_SIZE(a);
Christian Heimes44720832008-05-26 13:01:01 +00001012}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001013
Christian Heimes44720832008-05-26 13:01:01 +00001014static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001015string_concat(register PyStringObject *a, register PyObject *bb)
Christian Heimes44720832008-05-26 13:01:01 +00001016{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001017 register Py_ssize_t size;
1018 register PyStringObject *op;
1019 if (!PyString_Check(bb)) {
Christian Heimes44720832008-05-26 13:01:01 +00001020#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001021 if (PyUnicode_Check(bb))
1022 return PyUnicode_Concat((PyObject *)a, bb);
Christian Heimes44720832008-05-26 13:01:01 +00001023#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001024 if (PyByteArray_Check(bb))
1025 return PyByteArray_Concat((PyObject *)a, bb);
1026 PyErr_Format(PyExc_TypeError,
1027 "cannot concatenate 'str' and '%.200s' objects",
1028 Py_TYPE(bb)->tp_name);
1029 return NULL;
1030 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001031#define b ((PyStringObject *)bb)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001032 /* Optimize cases with empty left or right operand */
1033 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
1034 PyString_CheckExact(a) && PyString_CheckExact(b)) {
1035 if (Py_SIZE(a) == 0) {
1036 Py_INCREF(bb);
1037 return bb;
1038 }
1039 Py_INCREF(a);
1040 return (PyObject *)a;
1041 }
1042 size = Py_SIZE(a) + Py_SIZE(b);
1043 /* Check that string sizes are not negative, to prevent an
1044 overflow in cases where we are passed incorrectly-created
1045 strings with negative lengths (due to a bug in other code).
1046 */
1047 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
1048 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
1049 PyErr_SetString(PyExc_OverflowError,
1050 "strings are too large to concat");
1051 return NULL;
1052 }
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001053
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001054 /* Inline PyObject_NewVar */
1055 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
1056 PyErr_SetString(PyExc_OverflowError,
1057 "strings are too large to concat");
1058 return NULL;
1059 }
1060 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
1061 if (op == NULL)
1062 return PyErr_NoMemory();
1063 PyObject_INIT_VAR(op, &PyString_Type, size);
1064 op->ob_shash = -1;
1065 op->ob_sstate = SSTATE_NOT_INTERNED;
1066 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1067 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1068 op->ob_sval[size] = '\0';
1069 return (PyObject *) op;
Christian Heimes44720832008-05-26 13:01:01 +00001070#undef b
1071}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001072
Christian Heimes44720832008-05-26 13:01:01 +00001073static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001074string_repeat(register PyStringObject *a, register Py_ssize_t n)
Christian Heimes44720832008-05-26 13:01:01 +00001075{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001076 register Py_ssize_t i;
1077 register Py_ssize_t j;
1078 register Py_ssize_t size;
1079 register PyStringObject *op;
1080 size_t nbytes;
1081 if (n < 0)
1082 n = 0;
1083 /* watch out for overflows: the size can overflow int,
1084 * and the # of bytes needed can overflow size_t
1085 */
1086 size = Py_SIZE(a) * n;
1087 if (n && size / n != Py_SIZE(a)) {
1088 PyErr_SetString(PyExc_OverflowError,
1089 "repeated string is too long");
1090 return NULL;
1091 }
1092 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1093 Py_INCREF(a);
1094 return (PyObject *)a;
1095 }
1096 nbytes = (size_t)size;
1097 if (nbytes + PyStringObject_SIZE <= nbytes) {
1098 PyErr_SetString(PyExc_OverflowError,
1099 "repeated string is too long");
1100 return NULL;
1101 }
1102 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
1103 if (op == NULL)
1104 return PyErr_NoMemory();
1105 PyObject_INIT_VAR(op, &PyString_Type, size);
1106 op->ob_shash = -1;
1107 op->ob_sstate = SSTATE_NOT_INTERNED;
1108 op->ob_sval[size] = '\0';
1109 if (Py_SIZE(a) == 1 && n > 0) {
1110 memset(op->ob_sval, a->ob_sval[0] , n);
1111 return (PyObject *) op;
1112 }
1113 i = 0;
1114 if (i < size) {
1115 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1116 i = Py_SIZE(a);
1117 }
1118 while (i < size) {
1119 j = (i <= size-i) ? i : size-i;
1120 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1121 i += j;
1122 }
1123 return (PyObject *) op;
Christian Heimes44720832008-05-26 13:01:01 +00001124}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001125
Christian Heimes44720832008-05-26 13:01:01 +00001126/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1127
1128static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001129string_slice(register PyStringObject *a, register Py_ssize_t i,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001130 register Py_ssize_t j)
Christian Heimes44720832008-05-26 13:01:01 +00001131 /* j -- may be negative! */
1132{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001133 if (i < 0)
1134 i = 0;
1135 if (j < 0)
1136 j = 0; /* Avoid signed/unsigned bug in next line */
1137 if (j > Py_SIZE(a))
1138 j = Py_SIZE(a);
1139 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1140 /* It's the same as a */
1141 Py_INCREF(a);
1142 return (PyObject *)a;
1143 }
1144 if (j < i)
1145 j = i;
1146 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001147}
1148
1149static int
1150string_contains(PyObject *str_obj, PyObject *sub_obj)
1151{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001152 if (!PyString_CheckExact(sub_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00001153#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001154 if (PyUnicode_Check(sub_obj))
1155 return PyUnicode_Contains(str_obj, sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001156#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001157 if (!PyString_Check(sub_obj)) {
1158 PyErr_Format(PyExc_TypeError,
1159 "'in <string>' requires string as left operand, "
1160 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1161 return -1;
1162 }
1163 }
Christian Heimes44720832008-05-26 13:01:01 +00001164
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001165 return stringlib_contains_obj(str_obj, sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001166}
1167
1168static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001169string_item(PyStringObject *a, register Py_ssize_t i)
Christian Heimes44720832008-05-26 13:01:01 +00001170{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001171 char pchar;
1172 PyObject *v;
1173 if (i < 0 || i >= Py_SIZE(a)) {
1174 PyErr_SetString(PyExc_IndexError, "string index out of range");
1175 return NULL;
1176 }
1177 pchar = a->ob_sval[i];
1178 v = (PyObject *)characters[pchar & UCHAR_MAX];
1179 if (v == NULL)
1180 v = PyString_FromStringAndSize(&pchar, 1);
1181 else {
Christian Heimes44720832008-05-26 13:01:01 +00001182#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001183 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +00001184#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001185 Py_INCREF(v);
1186 }
1187 return v;
Christian Heimes44720832008-05-26 13:01:01 +00001188}
1189
1190static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001191string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Christian Heimes44720832008-05-26 13:01:01 +00001192{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001193 int c;
1194 Py_ssize_t len_a, len_b;
1195 Py_ssize_t min_len;
1196 PyObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00001197
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001198 /* Make sure both arguments are strings. */
1199 if (!(PyString_Check(a) && PyString_Check(b))) {
1200 result = Py_NotImplemented;
1201 goto out;
1202 }
1203 if (a == b) {
1204 switch (op) {
1205 case Py_EQ:case Py_LE:case Py_GE:
1206 result = Py_True;
1207 goto out;
1208 case Py_NE:case Py_LT:case Py_GT:
1209 result = Py_False;
1210 goto out;
1211 }
1212 }
1213 if (op == Py_EQ) {
1214 /* Supporting Py_NE here as well does not save
1215 much time, since Py_NE is rarely used. */
1216 if (Py_SIZE(a) == Py_SIZE(b)
1217 && (a->ob_sval[0] == b->ob_sval[0]
1218 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1219 result = Py_True;
1220 } else {
1221 result = Py_False;
1222 }
1223 goto out;
1224 }
1225 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1226 min_len = (len_a < len_b) ? len_a : len_b;
1227 if (min_len > 0) {
1228 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1229 if (c==0)
1230 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1231 } else
1232 c = 0;
1233 if (c == 0)
1234 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1235 switch (op) {
1236 case Py_LT: c = c < 0; break;
1237 case Py_LE: c = c <= 0; break;
1238 case Py_EQ: assert(0); break; /* unreachable */
1239 case Py_NE: c = c != 0; break;
1240 case Py_GT: c = c > 0; break;
1241 case Py_GE: c = c >= 0; break;
1242 default:
1243 result = Py_NotImplemented;
1244 goto out;
1245 }
1246 result = c ? Py_True : Py_False;
Christian Heimes44720832008-05-26 13:01:01 +00001247 out:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001248 Py_INCREF(result);
1249 return result;
Christian Heimes44720832008-05-26 13:01:01 +00001250}
1251
1252int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001253_PyString_Eq(PyObject *o1, PyObject *o2)
Christian Heimes44720832008-05-26 13:01:01 +00001254{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001255 PyStringObject *a = (PyStringObject*) o1;
1256 PyStringObject *b = (PyStringObject*) o2;
1257 return Py_SIZE(a) == Py_SIZE(b)
1258 && *a->ob_sval == *b->ob_sval
1259 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
Christian Heimes44720832008-05-26 13:01:01 +00001260}
1261
1262static long
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001263string_hash(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001264{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001265 register Py_ssize_t len;
1266 register unsigned char *p;
1267 register long x;
Christian Heimes44720832008-05-26 13:01:01 +00001268
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001269 if (a->ob_shash != -1)
1270 return a->ob_shash;
1271 len = Py_SIZE(a);
1272 p = (unsigned char *) a->ob_sval;
1273 x = *p << 7;
1274 while (--len >= 0)
1275 x = (1000003*x) ^ *p++;
1276 x ^= Py_SIZE(a);
1277 if (x == -1)
1278 x = -2;
1279 a->ob_shash = x;
1280 return x;
Christian Heimes44720832008-05-26 13:01:01 +00001281}
1282
1283static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001284string_subscript(PyStringObject* self, PyObject* item)
Christian Heimes44720832008-05-26 13:01:01 +00001285{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001286 if (PyIndex_Check(item)) {
1287 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1288 if (i == -1 && PyErr_Occurred())
1289 return NULL;
1290 if (i < 0)
1291 i += PyString_GET_SIZE(self);
1292 return string_item(self, i);
1293 }
1294 else if (PySlice_Check(item)) {
1295 Py_ssize_t start, stop, step, slicelength, cur, i;
1296 char* source_buf;
1297 char* result_buf;
1298 PyObject* result;
Christian Heimes44720832008-05-26 13:01:01 +00001299
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001300 if (PySlice_GetIndicesEx((PySliceObject*)item,
1301 PyString_GET_SIZE(self),
1302 &start, &stop, &step, &slicelength) < 0) {
1303 return NULL;
1304 }
Christian Heimes44720832008-05-26 13:01:01 +00001305
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001306 if (slicelength <= 0) {
1307 return PyString_FromStringAndSize("", 0);
1308 }
1309 else if (start == 0 && step == 1 &&
1310 slicelength == PyString_GET_SIZE(self) &&
1311 PyString_CheckExact(self)) {
1312 Py_INCREF(self);
1313 return (PyObject *)self;
1314 }
1315 else if (step == 1) {
1316 return PyString_FromStringAndSize(
1317 PyString_AS_STRING(self) + start,
1318 slicelength);
1319 }
1320 else {
1321 source_buf = PyString_AsString((PyObject*)self);
1322 result_buf = (char *)PyMem_Malloc(slicelength);
1323 if (result_buf == NULL)
1324 return PyErr_NoMemory();
Christian Heimes44720832008-05-26 13:01:01 +00001325
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001326 for (cur = start, i = 0; i < slicelength;
1327 cur += step, i++) {
1328 result_buf[i] = source_buf[cur];
1329 }
Christian Heimes44720832008-05-26 13:01:01 +00001330
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001331 result = PyString_FromStringAndSize(result_buf,
1332 slicelength);
1333 PyMem_Free(result_buf);
1334 return result;
1335 }
1336 }
1337 else {
1338 PyErr_Format(PyExc_TypeError,
1339 "string indices must be integers, not %.200s",
1340 Py_TYPE(item)->tp_name);
1341 return NULL;
1342 }
Christian Heimes44720832008-05-26 13:01:01 +00001343}
1344
1345static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001346string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001347{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001348 if ( index != 0 ) {
1349 PyErr_SetString(PyExc_SystemError,
1350 "accessing non-existent string segment");
1351 return -1;
1352 }
1353 *ptr = (void *)self->ob_sval;
1354 return Py_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001355}
1356
1357static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001358string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001359{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001360 PyErr_SetString(PyExc_TypeError,
1361 "Cannot use string as modifiable buffer");
1362 return -1;
Christian Heimes44720832008-05-26 13:01:01 +00001363}
1364
1365static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001366string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Christian Heimes44720832008-05-26 13:01:01 +00001367{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001368 if ( lenp )
1369 *lenp = Py_SIZE(self);
1370 return 1;
Christian Heimes44720832008-05-26 13:01:01 +00001371}
1372
1373static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001374string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001375{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001376 if ( index != 0 ) {
1377 PyErr_SetString(PyExc_SystemError,
1378 "accessing non-existent string segment");
1379 return -1;
1380 }
1381 *ptr = self->ob_sval;
1382 return Py_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001383}
1384
1385static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001386string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Christian Heimes44720832008-05-26 13:01:01 +00001387{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001388 return PyBuffer_FillInfo(view, (PyObject*)self,
1389 (void *)self->ob_sval, Py_SIZE(self),
1390 1, flags);
Christian Heimes44720832008-05-26 13:01:01 +00001391}
1392
1393static PySequenceMethods string_as_sequence = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001394 (lenfunc)string_length, /*sq_length*/
1395 (binaryfunc)string_concat, /*sq_concat*/
1396 (ssizeargfunc)string_repeat, /*sq_repeat*/
1397 (ssizeargfunc)string_item, /*sq_item*/
1398 (ssizessizeargfunc)string_slice, /*sq_slice*/
1399 0, /*sq_ass_item*/
1400 0, /*sq_ass_slice*/
1401 (objobjproc)string_contains /*sq_contains*/
Christian Heimes44720832008-05-26 13:01:01 +00001402};
1403
1404static PyMappingMethods string_as_mapping = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001405 (lenfunc)string_length,
1406 (binaryfunc)string_subscript,
1407 0,
Christian Heimes44720832008-05-26 13:01:01 +00001408};
1409
1410static PyBufferProcs string_as_buffer = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001411 (readbufferproc)string_buffer_getreadbuf,
1412 (writebufferproc)string_buffer_getwritebuf,
1413 (segcountproc)string_buffer_getsegcount,
1414 (charbufferproc)string_buffer_getcharbuf,
1415 (getbufferproc)string_buffer_getbuffer,
1416 0, /* XXX */
Christian Heimes44720832008-05-26 13:01:01 +00001417};
1418
1419
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001420
Christian Heimes44720832008-05-26 13:01:01 +00001421#define LEFTSTRIP 0
1422#define RIGHTSTRIP 1
1423#define BOTHSTRIP 2
1424
1425/* Arrays indexed by above */
1426static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1427
1428#define STRIPNAME(i) (stripformat[i]+3)
1429
Christian Heimes1a6387e2008-03-26 12:49:49 +00001430PyDoc_STRVAR(split__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001431"S.split([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001432\n\
Christian Heimes44720832008-05-26 13:01:01 +00001433Return a list of the words in the string S, using sep as the\n\
1434delimiter string. If maxsplit is given, at most maxsplit\n\
1435splits are done. If sep is not specified or is None, any\n\
1436whitespace string is a separator and empty strings are removed\n\
1437from the result.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001438
1439static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001440string_split(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001441{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001442 Py_ssize_t len = PyString_GET_SIZE(self), n;
1443 Py_ssize_t maxsplit = -1;
1444 const char *s = PyString_AS_STRING(self), *sub;
1445 PyObject *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001446
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001447 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1448 return NULL;
1449 if (maxsplit < 0)
1450 maxsplit = PY_SSIZE_T_MAX;
1451 if (subobj == Py_None)
1452 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1453 if (PyString_Check(subobj)) {
1454 sub = PyString_AS_STRING(subobj);
1455 n = PyString_GET_SIZE(subobj);
1456 }
Christian Heimes44720832008-05-26 13:01:01 +00001457#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001458 else if (PyUnicode_Check(subobj))
1459 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001460#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001461 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1462 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001463
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001464 return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001465}
1466
1467PyDoc_STRVAR(partition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001468"S.partition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001469\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001470Search for the separator sep in S, and return the part before it,\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001471the separator itself, and the part after it. If the separator is not\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001472found, return S and two empty strings.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001473
1474static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001475string_partition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001476{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001477 const char *sep;
1478 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001479
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001480 if (PyString_Check(sep_obj)) {
1481 sep = PyString_AS_STRING(sep_obj);
1482 sep_len = PyString_GET_SIZE(sep_obj);
1483 }
Christian Heimes44720832008-05-26 13:01:01 +00001484#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001485 else if (PyUnicode_Check(sep_obj))
1486 return PyUnicode_Partition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001487#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001488 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1489 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001490
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001491 return stringlib_partition(
1492 (PyObject*) self,
1493 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1494 sep_obj, sep, sep_len
1495 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001496}
1497
1498PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti1fafaab2010-01-25 11:24:37 +00001499"S.rpartition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001500\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001501Search for the separator sep in S, starting at the end of S, and return\n\
Christian Heimes44720832008-05-26 13:01:01 +00001502the part before it, the separator itself, and the part after it. If the\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001503separator is not found, return two empty strings and S.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001504
1505static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001506string_rpartition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001507{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001508 const char *sep;
1509 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001510
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001511 if (PyString_Check(sep_obj)) {
1512 sep = PyString_AS_STRING(sep_obj);
1513 sep_len = PyString_GET_SIZE(sep_obj);
1514 }
Christian Heimes44720832008-05-26 13:01:01 +00001515#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001516 else if (PyUnicode_Check(sep_obj))
1517 return PyUnicode_RPartition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001518#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001519 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1520 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001521
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001522 return stringlib_rpartition(
1523 (PyObject*) self,
1524 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1525 sep_obj, sep, sep_len
1526 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001527}
1528
Christian Heimes1a6387e2008-03-26 12:49:49 +00001529PyDoc_STRVAR(rsplit__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001530"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001531\n\
Christian Heimes44720832008-05-26 13:01:01 +00001532Return a list of the words in the string S, using sep as the\n\
1533delimiter string, starting at the end of the string and working\n\
1534to the front. If maxsplit is given, at most maxsplit splits are\n\
1535done. If sep is not specified or is None, any whitespace string\n\
1536is a separator.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001537
1538static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001539string_rsplit(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001540{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001541 Py_ssize_t len = PyString_GET_SIZE(self), n;
1542 Py_ssize_t maxsplit = -1;
1543 const char *s = PyString_AS_STRING(self), *sub;
1544 PyObject *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001545
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001546 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1547 return NULL;
1548 if (maxsplit < 0)
1549 maxsplit = PY_SSIZE_T_MAX;
1550 if (subobj == Py_None)
1551 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1552 if (PyString_Check(subobj)) {
1553 sub = PyString_AS_STRING(subobj);
1554 n = PyString_GET_SIZE(subobj);
1555 }
Christian Heimes44720832008-05-26 13:01:01 +00001556#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001557 else if (PyUnicode_Check(subobj))
1558 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001559#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001560 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1561 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001562
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001563 return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001564}
1565
1566
1567PyDoc_STRVAR(join__doc__,
Georg Brandl9b4e5822009-10-14 18:48:32 +00001568"S.join(iterable) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00001569\n\
1570Return a string which is the concatenation of the strings in the\n\
Georg Brandl9b4e5822009-10-14 18:48:32 +00001571iterable. The separator between elements is S.");
Christian Heimes44720832008-05-26 13:01:01 +00001572
1573static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001574string_join(PyStringObject *self, PyObject *orig)
Christian Heimes44720832008-05-26 13:01:01 +00001575{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001576 char *sep = PyString_AS_STRING(self);
1577 const Py_ssize_t seplen = PyString_GET_SIZE(self);
1578 PyObject *res = NULL;
1579 char *p;
1580 Py_ssize_t seqlen = 0;
1581 size_t sz = 0;
1582 Py_ssize_t i;
1583 PyObject *seq, *item;
Christian Heimes44720832008-05-26 13:01:01 +00001584
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001585 seq = PySequence_Fast(orig, "");
1586 if (seq == NULL) {
1587 return NULL;
1588 }
Christian Heimes44720832008-05-26 13:01:01 +00001589
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001590 seqlen = PySequence_Size(seq);
1591 if (seqlen == 0) {
1592 Py_DECREF(seq);
1593 return PyString_FromString("");
1594 }
1595 if (seqlen == 1) {
1596 item = PySequence_Fast_GET_ITEM(seq, 0);
1597 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1598 Py_INCREF(item);
1599 Py_DECREF(seq);
1600 return item;
1601 }
1602 }
Christian Heimes44720832008-05-26 13:01:01 +00001603
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001604 /* There are at least two things to join, or else we have a subclass
1605 * of the builtin types in the sequence.
1606 * Do a pre-pass to figure out the total amount of space we'll
1607 * need (sz), see whether any argument is absurd, and defer to
1608 * the Unicode join if appropriate.
1609 */
1610 for (i = 0; i < seqlen; i++) {
1611 const size_t old_sz = sz;
1612 item = PySequence_Fast_GET_ITEM(seq, i);
1613 if (!PyString_Check(item)){
Christian Heimes44720832008-05-26 13:01:01 +00001614#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001615 if (PyUnicode_Check(item)) {
1616 /* Defer to Unicode join.
1617 * CAUTION: There's no gurantee that the
1618 * original sequence can be iterated over
1619 * again, so we must pass seq here.
1620 */
1621 PyObject *result;
1622 result = PyUnicode_Join((PyObject *)self, seq);
1623 Py_DECREF(seq);
1624 return result;
1625 }
Christian Heimes44720832008-05-26 13:01:01 +00001626#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001627 PyErr_Format(PyExc_TypeError,
1628 "sequence item %zd: expected string,"
1629 " %.80s found",
1630 i, Py_TYPE(item)->tp_name);
1631 Py_DECREF(seq);
1632 return NULL;
1633 }
1634 sz += PyString_GET_SIZE(item);
1635 if (i != 0)
1636 sz += seplen;
1637 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1638 PyErr_SetString(PyExc_OverflowError,
1639 "join() result is too long for a Python string");
1640 Py_DECREF(seq);
1641 return NULL;
1642 }
1643 }
Christian Heimes44720832008-05-26 13:01:01 +00001644
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001645 /* Allocate result space. */
1646 res = PyString_FromStringAndSize((char*)NULL, sz);
1647 if (res == NULL) {
1648 Py_DECREF(seq);
1649 return NULL;
1650 }
Christian Heimes44720832008-05-26 13:01:01 +00001651
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001652 /* Catenate everything. */
1653 p = PyString_AS_STRING(res);
1654 for (i = 0; i < seqlen; ++i) {
1655 size_t n;
1656 item = PySequence_Fast_GET_ITEM(seq, i);
1657 n = PyString_GET_SIZE(item);
1658 Py_MEMCPY(p, PyString_AS_STRING(item), n);
1659 p += n;
1660 if (i < seqlen - 1) {
1661 Py_MEMCPY(p, sep, seplen);
1662 p += seplen;
1663 }
1664 }
Christian Heimes44720832008-05-26 13:01:01 +00001665
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001666 Py_DECREF(seq);
1667 return res;
Christian Heimes44720832008-05-26 13:01:01 +00001668}
1669
1670PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001671_PyString_Join(PyObject *sep, PyObject *x)
Christian Heimes44720832008-05-26 13:01:01 +00001672{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001673 assert(sep != NULL && PyString_Check(sep));
1674 assert(x != NULL);
1675 return string_join((PyStringObject *)sep, x);
Christian Heimes44720832008-05-26 13:01:01 +00001676}
1677
Antoine Pitrou64672132010-01-13 07:55:48 +00001678/* helper macro to fixup start/end slice values */
1679#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001680 if (end > len) \
1681 end = len; \
1682 else if (end < 0) { \
1683 end += len; \
1684 if (end < 0) \
1685 end = 0; \
1686 } \
1687 if (start < 0) { \
1688 start += len; \
1689 if (start < 0) \
1690 start = 0; \
1691 }
Christian Heimes44720832008-05-26 13:01:01 +00001692
1693Py_LOCAL_INLINE(Py_ssize_t)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001694string_find_internal(PyStringObject *self, PyObject *args, int dir)
Christian Heimes44720832008-05-26 13:01:01 +00001695{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001696 PyObject *subobj;
1697 const char *sub;
1698 Py_ssize_t sub_len;
1699 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1700 PyObject *obj_start=Py_None, *obj_end=Py_None;
Christian Heimes44720832008-05-26 13:01:01 +00001701
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001702 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1703 &obj_start, &obj_end))
1704 return -2;
1705 /* To support None in "start" and "end" arguments, meaning
1706 the same as if they were not passed.
1707 */
1708 if (obj_start != Py_None)
1709 if (!_PyEval_SliceIndex(obj_start, &start))
1710 return -2;
1711 if (obj_end != Py_None)
1712 if (!_PyEval_SliceIndex(obj_end, &end))
1713 return -2;
Christian Heimes44720832008-05-26 13:01:01 +00001714
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001715 if (PyString_Check(subobj)) {
1716 sub = PyString_AS_STRING(subobj);
1717 sub_len = PyString_GET_SIZE(subobj);
1718 }
Christian Heimes44720832008-05-26 13:01:01 +00001719#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001720 else if (PyUnicode_Check(subobj))
1721 return PyUnicode_Find(
1722 (PyObject *)self, subobj, start, end, dir);
Christian Heimes44720832008-05-26 13:01:01 +00001723#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001724 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1725 /* XXX - the "expected a character buffer object" is pretty
1726 confusing for a non-expert. remap to something else ? */
1727 return -2;
Christian Heimes44720832008-05-26 13:01:01 +00001728
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001729 if (dir > 0)
1730 return stringlib_find_slice(
1731 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1732 sub, sub_len, start, end);
1733 else
1734 return stringlib_rfind_slice(
1735 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1736 sub, sub_len, start, end);
Christian Heimes44720832008-05-26 13:01:01 +00001737}
1738
1739
1740PyDoc_STRVAR(find__doc__,
1741"S.find(sub [,start [,end]]) -> int\n\
1742\n\
1743Return the lowest index in S where substring sub is found,\n\
1744such that sub is contained within s[start:end]. Optional\n\
1745arguments start and end are interpreted as in slice notation.\n\
1746\n\
1747Return -1 on failure.");
1748
1749static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001750string_find(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001751{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001752 Py_ssize_t result = string_find_internal(self, args, +1);
1753 if (result == -2)
1754 return NULL;
1755 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001756}
1757
1758
1759PyDoc_STRVAR(index__doc__,
1760"S.index(sub [,start [,end]]) -> int\n\
1761\n\
1762Like S.find() but raise ValueError when the substring is not found.");
1763
1764static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001765string_index(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001766{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001767 Py_ssize_t result = string_find_internal(self, args, +1);
1768 if (result == -2)
1769 return NULL;
1770 if (result == -1) {
1771 PyErr_SetString(PyExc_ValueError,
1772 "substring not found");
1773 return NULL;
1774 }
1775 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001776}
1777
1778
1779PyDoc_STRVAR(rfind__doc__,
1780"S.rfind(sub [,start [,end]]) -> int\n\
1781\n\
1782Return the highest index in S where substring sub is found,\n\
1783such that sub is contained within s[start:end]. Optional\n\
1784arguments start and end are interpreted as in slice notation.\n\
1785\n\
1786Return -1 on failure.");
1787
1788static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001789string_rfind(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001790{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001791 Py_ssize_t result = string_find_internal(self, args, -1);
1792 if (result == -2)
1793 return NULL;
1794 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001795}
1796
1797
1798PyDoc_STRVAR(rindex__doc__,
1799"S.rindex(sub [,start [,end]]) -> int\n\
1800\n\
1801Like S.rfind() but raise ValueError when the substring is not found.");
1802
1803static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001804string_rindex(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001805{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001806 Py_ssize_t result = string_find_internal(self, args, -1);
1807 if (result == -2)
1808 return NULL;
1809 if (result == -1) {
1810 PyErr_SetString(PyExc_ValueError,
1811 "substring not found");
1812 return NULL;
1813 }
1814 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001815}
1816
1817
1818Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001819do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
Christian Heimes44720832008-05-26 13:01:01 +00001820{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001821 char *s = PyString_AS_STRING(self);
1822 Py_ssize_t len = PyString_GET_SIZE(self);
1823 char *sep = PyString_AS_STRING(sepobj);
1824 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1825 Py_ssize_t i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001826
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001827 i = 0;
1828 if (striptype != RIGHTSTRIP) {
1829 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1830 i++;
1831 }
1832 }
Christian Heimes44720832008-05-26 13:01:01 +00001833
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001834 j = len;
1835 if (striptype != LEFTSTRIP) {
1836 do {
1837 j--;
1838 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1839 j++;
1840 }
Christian Heimes44720832008-05-26 13:01:01 +00001841
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001842 if (i == 0 && j == len && PyString_CheckExact(self)) {
1843 Py_INCREF(self);
1844 return (PyObject*)self;
1845 }
1846 else
1847 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001848}
1849
1850
1851Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001852do_strip(PyStringObject *self, int striptype)
Christian Heimes44720832008-05-26 13:01:01 +00001853{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001854 char *s = PyString_AS_STRING(self);
1855 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001856
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001857 i = 0;
1858 if (striptype != RIGHTSTRIP) {
1859 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1860 i++;
1861 }
1862 }
Christian Heimes44720832008-05-26 13:01:01 +00001863
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001864 j = len;
1865 if (striptype != LEFTSTRIP) {
1866 do {
1867 j--;
1868 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1869 j++;
1870 }
Christian Heimes44720832008-05-26 13:01:01 +00001871
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001872 if (i == 0 && j == len && PyString_CheckExact(self)) {
1873 Py_INCREF(self);
1874 return (PyObject*)self;
1875 }
1876 else
1877 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001878}
1879
1880
1881Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001882do_argstrip(PyStringObject *self, int striptype, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001883{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001884 PyObject *sep = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001885
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001886 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1887 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001888
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001889 if (sep != NULL && sep != Py_None) {
1890 if (PyString_Check(sep))
1891 return do_xstrip(self, striptype, sep);
Christian Heimes44720832008-05-26 13:01:01 +00001892#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001893 else if (PyUnicode_Check(sep)) {
1894 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1895 PyObject *res;
1896 if (uniself==NULL)
1897 return NULL;
1898 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1899 striptype, sep);
1900 Py_DECREF(uniself);
1901 return res;
1902 }
Christian Heimes44720832008-05-26 13:01:01 +00001903#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001904 PyErr_Format(PyExc_TypeError,
Christian Heimes44720832008-05-26 13:01:01 +00001905#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001906 "%s arg must be None, str or unicode",
Christian Heimes44720832008-05-26 13:01:01 +00001907#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001908 "%s arg must be None or str",
Christian Heimes44720832008-05-26 13:01:01 +00001909#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001910 STRIPNAME(striptype));
1911 return NULL;
1912 }
Christian Heimes44720832008-05-26 13:01:01 +00001913
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001914 return do_strip(self, striptype);
Christian Heimes44720832008-05-26 13:01:01 +00001915}
1916
1917
1918PyDoc_STRVAR(strip__doc__,
1919"S.strip([chars]) -> string or unicode\n\
1920\n\
1921Return a copy of the string S with leading and trailing\n\
1922whitespace removed.\n\
1923If chars is given and not None, remove characters in chars instead.\n\
1924If chars is unicode, S will be converted to unicode before stripping");
1925
1926static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001927string_strip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001928{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001929 if (PyTuple_GET_SIZE(args) == 0)
1930 return do_strip(self, BOTHSTRIP); /* Common case */
1931 else
1932 return do_argstrip(self, BOTHSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001933}
1934
1935
1936PyDoc_STRVAR(lstrip__doc__,
1937"S.lstrip([chars]) -> string or unicode\n\
1938\n\
1939Return a copy of the string S with leading whitespace removed.\n\
1940If chars is given and not None, remove characters in chars instead.\n\
1941If chars is unicode, S will be converted to unicode before stripping");
1942
1943static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001944string_lstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001945{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001946 if (PyTuple_GET_SIZE(args) == 0)
1947 return do_strip(self, LEFTSTRIP); /* Common case */
1948 else
1949 return do_argstrip(self, LEFTSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001950}
1951
1952
1953PyDoc_STRVAR(rstrip__doc__,
1954"S.rstrip([chars]) -> string or unicode\n\
1955\n\
1956Return a copy of the string S with trailing whitespace removed.\n\
1957If chars is given and not None, remove characters in chars instead.\n\
1958If chars is unicode, S will be converted to unicode before stripping");
1959
1960static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001961string_rstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001962{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001963 if (PyTuple_GET_SIZE(args) == 0)
1964 return do_strip(self, RIGHTSTRIP); /* Common case */
1965 else
1966 return do_argstrip(self, RIGHTSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001967}
1968
1969
1970PyDoc_STRVAR(lower__doc__,
1971"S.lower() -> string\n\
1972\n\
1973Return a copy of the string S converted to lowercase.");
1974
1975/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1976#ifndef _tolower
1977#define _tolower tolower
1978#endif
1979
1980static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001981string_lower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00001982{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001983 char *s;
1984 Py_ssize_t i, n = PyString_GET_SIZE(self);
1985 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00001986
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001987 newobj = PyString_FromStringAndSize(NULL, n);
1988 if (!newobj)
1989 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001990
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001991 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00001992
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001993 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00001994
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001995 for (i = 0; i < n; i++) {
1996 int c = Py_CHARMASK(s[i]);
1997 if (isupper(c))
1998 s[i] = _tolower(c);
1999 }
Christian Heimes44720832008-05-26 13:01:01 +00002000
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002001 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002002}
2003
2004PyDoc_STRVAR(upper__doc__,
2005"S.upper() -> string\n\
2006\n\
2007Return a copy of the string S converted to uppercase.");
2008
2009#ifndef _toupper
2010#define _toupper toupper
2011#endif
2012
2013static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002014string_upper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002015{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002016 char *s;
2017 Py_ssize_t i, n = PyString_GET_SIZE(self);
2018 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002019
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002020 newobj = PyString_FromStringAndSize(NULL, n);
2021 if (!newobj)
2022 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002023
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002024 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002025
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002026 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002027
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002028 for (i = 0; i < n; i++) {
2029 int c = Py_CHARMASK(s[i]);
2030 if (islower(c))
2031 s[i] = _toupper(c);
2032 }
Christian Heimes44720832008-05-26 13:01:01 +00002033
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002034 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002035}
2036
2037PyDoc_STRVAR(title__doc__,
2038"S.title() -> string\n\
2039\n\
2040Return a titlecased version of S, i.e. words start with uppercase\n\
2041characters, all remaining cased characters have lowercase.");
2042
2043static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002044string_title(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002045{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002046 char *s = PyString_AS_STRING(self), *s_new;
2047 Py_ssize_t i, n = PyString_GET_SIZE(self);
2048 int previous_is_cased = 0;
2049 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002050
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002051 newobj = PyString_FromStringAndSize(NULL, n);
2052 if (newobj == NULL)
2053 return NULL;
2054 s_new = PyString_AsString(newobj);
2055 for (i = 0; i < n; i++) {
2056 int c = Py_CHARMASK(*s++);
2057 if (islower(c)) {
2058 if (!previous_is_cased)
2059 c = toupper(c);
2060 previous_is_cased = 1;
2061 } else if (isupper(c)) {
2062 if (previous_is_cased)
2063 c = tolower(c);
2064 previous_is_cased = 1;
2065 } else
2066 previous_is_cased = 0;
2067 *s_new++ = c;
2068 }
2069 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002070}
2071
2072PyDoc_STRVAR(capitalize__doc__,
2073"S.capitalize() -> string\n\
2074\n\
2075Return a copy of the string S with only its first character\n\
2076capitalized.");
2077
2078static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002079string_capitalize(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002080{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002081 char *s = PyString_AS_STRING(self), *s_new;
2082 Py_ssize_t i, n = PyString_GET_SIZE(self);
2083 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002084
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002085 newobj = PyString_FromStringAndSize(NULL, n);
2086 if (newobj == NULL)
2087 return NULL;
2088 s_new = PyString_AsString(newobj);
2089 if (0 < n) {
2090 int c = Py_CHARMASK(*s++);
2091 if (islower(c))
2092 *s_new = toupper(c);
2093 else
2094 *s_new = c;
2095 s_new++;
2096 }
2097 for (i = 1; i < n; i++) {
2098 int c = Py_CHARMASK(*s++);
2099 if (isupper(c))
2100 *s_new = tolower(c);
2101 else
2102 *s_new = c;
2103 s_new++;
2104 }
2105 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002106}
2107
2108
2109PyDoc_STRVAR(count__doc__,
2110"S.count(sub[, start[, end]]) -> int\n\
2111\n\
2112Return the number of non-overlapping occurrences of substring sub in\n\
2113string S[start:end]. Optional arguments start and end are interpreted\n\
2114as in slice notation.");
2115
2116static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002117string_count(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002118{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002119 PyObject *sub_obj;
2120 const char *str = PyString_AS_STRING(self), *sub;
2121 Py_ssize_t sub_len;
2122 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes44720832008-05-26 13:01:01 +00002123
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002124 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2125 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2126 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002127
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002128 if (PyString_Check(sub_obj)) {
2129 sub = PyString_AS_STRING(sub_obj);
2130 sub_len = PyString_GET_SIZE(sub_obj);
2131 }
Christian Heimes44720832008-05-26 13:01:01 +00002132#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002133 else if (PyUnicode_Check(sub_obj)) {
2134 Py_ssize_t count;
2135 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2136 if (count == -1)
2137 return NULL;
2138 else
2139 return PyInt_FromSsize_t(count);
2140 }
Christian Heimes44720832008-05-26 13:01:01 +00002141#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002142 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2143 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002144
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002145 ADJUST_INDICES(start, end, PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002146
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002147 return PyInt_FromSsize_t(
2148 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2149 );
Christian Heimes44720832008-05-26 13:01:01 +00002150}
2151
2152PyDoc_STRVAR(swapcase__doc__,
2153"S.swapcase() -> string\n\
2154\n\
2155Return a copy of the string S with uppercase characters\n\
2156converted to lowercase and vice versa.");
2157
2158static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002159string_swapcase(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002160{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002161 char *s = PyString_AS_STRING(self), *s_new;
2162 Py_ssize_t i, n = PyString_GET_SIZE(self);
2163 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002164
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002165 newobj = PyString_FromStringAndSize(NULL, n);
2166 if (newobj == NULL)
2167 return NULL;
2168 s_new = PyString_AsString(newobj);
2169 for (i = 0; i < n; i++) {
2170 int c = Py_CHARMASK(*s++);
2171 if (islower(c)) {
2172 *s_new = toupper(c);
2173 }
2174 else if (isupper(c)) {
2175 *s_new = tolower(c);
2176 }
2177 else
2178 *s_new = c;
2179 s_new++;
2180 }
2181 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002182}
2183
2184
2185PyDoc_STRVAR(translate__doc__,
2186"S.translate(table [,deletechars]) -> string\n\
2187\n\
2188Return a copy of the string S, where all characters occurring\n\
2189in the optional argument deletechars are removed, and the\n\
2190remaining characters have been mapped through the given\n\
2191translation table, which must be a string of length 256.");
2192
2193static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002194string_translate(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002195{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002196 register char *input, *output;
2197 const char *table;
2198 register Py_ssize_t i, c, changed = 0;
2199 PyObject *input_obj = (PyObject*)self;
2200 const char *output_start, *del_table=NULL;
2201 Py_ssize_t inlen, tablen, dellen = 0;
2202 PyObject *result;
2203 int trans_table[256];
2204 PyObject *tableobj, *delobj = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002205
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002206 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2207 &tableobj, &delobj))
2208 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002209
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002210 if (PyString_Check(tableobj)) {
2211 table = PyString_AS_STRING(tableobj);
2212 tablen = PyString_GET_SIZE(tableobj);
2213 }
2214 else if (tableobj == Py_None) {
2215 table = NULL;
2216 tablen = 256;
2217 }
Christian Heimes44720832008-05-26 13:01:01 +00002218#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002219 else if (PyUnicode_Check(tableobj)) {
2220 /* Unicode .translate() does not support the deletechars
2221 parameter; instead a mapping to None will cause characters
2222 to be deleted. */
2223 if (delobj != NULL) {
2224 PyErr_SetString(PyExc_TypeError,
2225 "deletions are implemented differently for unicode");
2226 return NULL;
2227 }
2228 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2229 }
Christian Heimes44720832008-05-26 13:01:01 +00002230#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002231 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2232 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002233
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002234 if (tablen != 256) {
2235 PyErr_SetString(PyExc_ValueError,
2236 "translation table must be 256 characters long");
2237 return NULL;
2238 }
Christian Heimes44720832008-05-26 13:01:01 +00002239
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002240 if (delobj != NULL) {
2241 if (PyString_Check(delobj)) {
2242 del_table = PyString_AS_STRING(delobj);
2243 dellen = PyString_GET_SIZE(delobj);
2244 }
Christian Heimes44720832008-05-26 13:01:01 +00002245#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002246 else if (PyUnicode_Check(delobj)) {
2247 PyErr_SetString(PyExc_TypeError,
2248 "deletions are implemented differently for unicode");
2249 return NULL;
2250 }
Christian Heimes44720832008-05-26 13:01:01 +00002251#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002252 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2253 return NULL;
2254 }
2255 else {
2256 del_table = NULL;
2257 dellen = 0;
2258 }
Christian Heimes44720832008-05-26 13:01:01 +00002259
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002260 inlen = PyString_GET_SIZE(input_obj);
2261 result = PyString_FromStringAndSize((char *)NULL, inlen);
2262 if (result == NULL)
2263 return NULL;
2264 output_start = output = PyString_AsString(result);
2265 input = PyString_AS_STRING(input_obj);
Christian Heimes44720832008-05-26 13:01:01 +00002266
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002267 if (dellen == 0 && table != NULL) {
2268 /* If no deletions are required, use faster code */
2269 for (i = inlen; --i >= 0; ) {
2270 c = Py_CHARMASK(*input++);
2271 if (Py_CHARMASK((*output++ = table[c])) != c)
2272 changed = 1;
2273 }
2274 if (changed || !PyString_CheckExact(input_obj))
2275 return result;
2276 Py_DECREF(result);
2277 Py_INCREF(input_obj);
2278 return input_obj;
2279 }
Christian Heimes44720832008-05-26 13:01:01 +00002280
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002281 if (table == NULL) {
2282 for (i = 0; i < 256; i++)
2283 trans_table[i] = Py_CHARMASK(i);
2284 } else {
2285 for (i = 0; i < 256; i++)
2286 trans_table[i] = Py_CHARMASK(table[i]);
2287 }
Christian Heimes44720832008-05-26 13:01:01 +00002288
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002289 for (i = 0; i < dellen; i++)
2290 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes44720832008-05-26 13:01:01 +00002291
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002292 for (i = inlen; --i >= 0; ) {
2293 c = Py_CHARMASK(*input++);
2294 if (trans_table[c] != -1)
2295 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2296 continue;
2297 changed = 1;
2298 }
2299 if (!changed && PyString_CheckExact(input_obj)) {
2300 Py_DECREF(result);
2301 Py_INCREF(input_obj);
2302 return input_obj;
2303 }
2304 /* Fix the size of the resulting string */
2305 if (inlen > 0 && _PyString_Resize(&result, output - output_start))
2306 return NULL;
2307 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002308}
2309
2310
Christian Heimes44720832008-05-26 13:01:01 +00002311/* find and count characters and substrings */
2312
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002313#define findchar(target, target_len, c) \
Christian Heimes44720832008-05-26 13:01:01 +00002314 ((char *)memchr((const void *)(target), c, target_len))
2315
2316/* String ops must return a string. */
2317/* If the object is subclass of string, create a copy */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002318Py_LOCAL(PyStringObject *)
2319return_self(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002320{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002321 if (PyString_CheckExact(self)) {
2322 Py_INCREF(self);
2323 return self;
2324 }
2325 return (PyStringObject *)PyString_FromStringAndSize(
2326 PyString_AS_STRING(self),
2327 PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002328}
2329
2330Py_LOCAL_INLINE(Py_ssize_t)
2331countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2332{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002333 Py_ssize_t count=0;
2334 const char *start=target;
2335 const char *end=target+target_len;
Christian Heimes44720832008-05-26 13:01:01 +00002336
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002337 while ( (start=findchar(start, end-start, c)) != NULL ) {
2338 count++;
2339 if (count >= maxcount)
2340 break;
2341 start += 1;
2342 }
2343 return count;
Christian Heimes44720832008-05-26 13:01:01 +00002344}
2345
Christian Heimes44720832008-05-26 13:01:01 +00002346
2347/* Algorithms for different cases of string replacement */
2348
2349/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002350Py_LOCAL(PyStringObject *)
2351replace_interleave(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002352 const char *to_s, Py_ssize_t to_len,
2353 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002354{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002355 char *self_s, *result_s;
2356 Py_ssize_t self_len, result_len;
2357 Py_ssize_t count, i, product;
2358 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002359
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002360 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002361
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002362 /* 1 at the end plus 1 after every character */
2363 count = self_len+1;
2364 if (maxcount < count)
2365 count = maxcount;
Christian Heimes44720832008-05-26 13:01:01 +00002366
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002367 /* Check for overflow */
2368 /* result_len = count * to_len + self_len; */
2369 product = count * to_len;
2370 if (product / to_len != count) {
2371 PyErr_SetString(PyExc_OverflowError,
2372 "replace string is too long");
2373 return NULL;
2374 }
2375 result_len = product + self_len;
2376 if (result_len < 0) {
2377 PyErr_SetString(PyExc_OverflowError,
2378 "replace string is too long");
2379 return NULL;
2380 }
Christian Heimes44720832008-05-26 13:01:01 +00002381
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002382 if (! (result = (PyStringObject *)
2383 PyString_FromStringAndSize(NULL, result_len)) )
2384 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002385
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002386 self_s = PyString_AS_STRING(self);
2387 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002388
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002389 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes44720832008-05-26 13:01:01 +00002390
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002391 /* Lay the first one down (guaranteed this will occur) */
2392 Py_MEMCPY(result_s, to_s, to_len);
2393 result_s += to_len;
2394 count -= 1;
Christian Heimes44720832008-05-26 13:01:01 +00002395
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002396 for (i=0; i<count; i++) {
2397 *result_s++ = *self_s++;
2398 Py_MEMCPY(result_s, to_s, to_len);
2399 result_s += to_len;
2400 }
2401
2402 /* Copy the rest of the original string */
2403 Py_MEMCPY(result_s, self_s, self_len-i);
2404
2405 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002406}
2407
2408/* Special case for deleting a single character */
2409/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002410Py_LOCAL(PyStringObject *)
2411replace_delete_single_character(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002412 char from_c, Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002413{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002414 char *self_s, *result_s;
2415 char *start, *next, *end;
2416 Py_ssize_t self_len, result_len;
2417 Py_ssize_t count;
2418 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002419
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002420 self_len = PyString_GET_SIZE(self);
2421 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002422
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002423 count = countchar(self_s, self_len, from_c, maxcount);
2424 if (count == 0) {
2425 return return_self(self);
2426 }
Christian Heimes44720832008-05-26 13:01:01 +00002427
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002428 result_len = self_len - count; /* from_len == 1 */
2429 assert(result_len>=0);
Christian Heimes44720832008-05-26 13:01:01 +00002430
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002431 if ( (result = (PyStringObject *)
2432 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2433 return NULL;
2434 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002435
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002436 start = self_s;
2437 end = self_s + self_len;
2438 while (count-- > 0) {
2439 next = findchar(start, end-start, from_c);
2440 if (next == NULL)
2441 break;
2442 Py_MEMCPY(result_s, start, next-start);
2443 result_s += (next-start);
2444 start = next+1;
2445 }
2446 Py_MEMCPY(result_s, start, end-start);
2447
2448 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002449}
2450
2451/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2452
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002453Py_LOCAL(PyStringObject *)
2454replace_delete_substring(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002455 const char *from_s, Py_ssize_t from_len,
2456 Py_ssize_t maxcount) {
2457 char *self_s, *result_s;
2458 char *start, *next, *end;
2459 Py_ssize_t self_len, result_len;
2460 Py_ssize_t count, offset;
2461 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002462
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002463 self_len = PyString_GET_SIZE(self);
2464 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002465
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002466 count = stringlib_count(self_s, self_len,
2467 from_s, from_len,
2468 maxcount);
Christian Heimes44720832008-05-26 13:01:01 +00002469
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002470 if (count == 0) {
2471 /* no matches */
2472 return return_self(self);
2473 }
Christian Heimes44720832008-05-26 13:01:01 +00002474
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002475 result_len = self_len - (count * from_len);
2476 assert (result_len>=0);
Christian Heimes44720832008-05-26 13:01:01 +00002477
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002478 if ( (result = (PyStringObject *)
2479 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2480 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002481
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002482 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002483
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002484 start = self_s;
2485 end = self_s + self_len;
2486 while (count-- > 0) {
2487 offset = stringlib_find(start, end-start,
2488 from_s, from_len,
2489 0);
2490 if (offset == -1)
2491 break;
2492 next = start + offset;
Christian Heimes44720832008-05-26 13:01:01 +00002493
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002494 Py_MEMCPY(result_s, start, next-start);
Christian Heimes44720832008-05-26 13:01:01 +00002495
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002496 result_s += (next-start);
2497 start = next+from_len;
2498 }
2499 Py_MEMCPY(result_s, start, end-start);
2500 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002501}
2502
2503/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002504Py_LOCAL(PyStringObject *)
2505replace_single_character_in_place(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002506 char from_c, char to_c,
2507 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002508{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002509 char *self_s, *result_s, *start, *end, *next;
2510 Py_ssize_t self_len;
2511 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002512
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002513 /* The result string will be the same size */
2514 self_s = PyString_AS_STRING(self);
2515 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002516
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002517 next = findchar(self_s, self_len, from_c);
Christian Heimes44720832008-05-26 13:01:01 +00002518
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002519 if (next == NULL) {
2520 /* No matches; return the original string */
2521 return return_self(self);
2522 }
Christian Heimes44720832008-05-26 13:01:01 +00002523
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002524 /* Need to make a new string */
2525 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2526 if (result == NULL)
2527 return NULL;
2528 result_s = PyString_AS_STRING(result);
2529 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002530
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002531 /* change everything in-place, starting with this one */
2532 start = result_s + (next-self_s);
2533 *start = to_c;
2534 start++;
2535 end = result_s + self_len;
Christian Heimes44720832008-05-26 13:01:01 +00002536
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002537 while (--maxcount > 0) {
2538 next = findchar(start, end-start, from_c);
2539 if (next == NULL)
2540 break;
2541 *next = to_c;
2542 start = next+1;
2543 }
Christian Heimes44720832008-05-26 13:01:01 +00002544
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002545 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002546}
2547
2548/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002549Py_LOCAL(PyStringObject *)
2550replace_substring_in_place(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002551 const char *from_s, Py_ssize_t from_len,
2552 const char *to_s, Py_ssize_t to_len,
2553 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002554{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002555 char *result_s, *start, *end;
2556 char *self_s;
2557 Py_ssize_t self_len, offset;
2558 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002559
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002560 /* The result string will be the same size */
Christian Heimes44720832008-05-26 13:01:01 +00002561
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002562 self_s = PyString_AS_STRING(self);
2563 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002564
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002565 offset = stringlib_find(self_s, self_len,
2566 from_s, from_len,
2567 0);
2568 if (offset == -1) {
2569 /* No matches; return the original string */
2570 return return_self(self);
2571 }
Christian Heimes44720832008-05-26 13:01:01 +00002572
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002573 /* Need to make a new string */
2574 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2575 if (result == NULL)
2576 return NULL;
2577 result_s = PyString_AS_STRING(result);
2578 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002579
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002580 /* change everything in-place, starting with this one */
2581 start = result_s + offset;
2582 Py_MEMCPY(start, to_s, from_len);
2583 start += from_len;
2584 end = result_s + self_len;
Christian Heimes44720832008-05-26 13:01:01 +00002585
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002586 while ( --maxcount > 0) {
2587 offset = stringlib_find(start, end-start,
2588 from_s, from_len,
2589 0);
2590 if (offset==-1)
2591 break;
2592 Py_MEMCPY(start+offset, to_s, from_len);
2593 start += offset+from_len;
2594 }
Christian Heimes44720832008-05-26 13:01:01 +00002595
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002596 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002597}
2598
2599/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002600Py_LOCAL(PyStringObject *)
2601replace_single_character(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002602 char from_c,
2603 const char *to_s, Py_ssize_t to_len,
2604 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002605{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002606 char *self_s, *result_s;
2607 char *start, *next, *end;
2608 Py_ssize_t self_len, result_len;
2609 Py_ssize_t count, product;
2610 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002611
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002612 self_s = PyString_AS_STRING(self);
2613 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002614
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002615 count = countchar(self_s, self_len, from_c, maxcount);
2616 if (count == 0) {
2617 /* no matches, return unchanged */
2618 return return_self(self);
2619 }
Christian Heimes44720832008-05-26 13:01:01 +00002620
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002621 /* use the difference between current and new, hence the "-1" */
2622 /* result_len = self_len + count * (to_len-1) */
2623 product = count * (to_len-1);
2624 if (product / (to_len-1) != count) {
2625 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2626 return NULL;
2627 }
2628 result_len = self_len + product;
2629 if (result_len < 0) {
2630 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2631 return NULL;
2632 }
Christian Heimes44720832008-05-26 13:01:01 +00002633
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002634 if ( (result = (PyStringObject *)
2635 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2636 return NULL;
2637 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002638
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002639 start = self_s;
2640 end = self_s + self_len;
2641 while (count-- > 0) {
2642 next = findchar(start, end-start, from_c);
2643 if (next == NULL)
2644 break;
Christian Heimes44720832008-05-26 13:01:01 +00002645
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002646 if (next == start) {
2647 /* replace with the 'to' */
2648 Py_MEMCPY(result_s, to_s, to_len);
2649 result_s += to_len;
2650 start += 1;
2651 } else {
2652 /* copy the unchanged old then the 'to' */
2653 Py_MEMCPY(result_s, start, next-start);
2654 result_s += (next-start);
2655 Py_MEMCPY(result_s, to_s, to_len);
2656 result_s += to_len;
2657 start = next+1;
2658 }
2659 }
2660 /* Copy the remainder of the remaining string */
2661 Py_MEMCPY(result_s, start, end-start);
Christian Heimes44720832008-05-26 13:01:01 +00002662
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002663 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002664}
2665
2666/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002667Py_LOCAL(PyStringObject *)
2668replace_substring(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002669 const char *from_s, Py_ssize_t from_len,
2670 const char *to_s, Py_ssize_t to_len,
2671 Py_ssize_t maxcount) {
2672 char *self_s, *result_s;
2673 char *start, *next, *end;
2674 Py_ssize_t self_len, result_len;
2675 Py_ssize_t count, offset, product;
2676 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002677
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002678 self_s = PyString_AS_STRING(self);
2679 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002680
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002681 count = stringlib_count(self_s, self_len,
2682 from_s, from_len,
2683 maxcount);
Antoine Pitrou64672132010-01-13 07:55:48 +00002684
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002685 if (count == 0) {
2686 /* no matches, return unchanged */
2687 return return_self(self);
2688 }
Christian Heimes44720832008-05-26 13:01:01 +00002689
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002690 /* Check for overflow */
2691 /* result_len = self_len + count * (to_len-from_len) */
2692 product = count * (to_len-from_len);
2693 if (product / (to_len-from_len) != count) {
2694 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2695 return NULL;
2696 }
2697 result_len = self_len + product;
2698 if (result_len < 0) {
2699 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2700 return NULL;
2701 }
Christian Heimes44720832008-05-26 13:01:01 +00002702
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002703 if ( (result = (PyStringObject *)
2704 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2705 return NULL;
2706 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002707
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002708 start = self_s;
2709 end = self_s + self_len;
2710 while (count-- > 0) {
2711 offset = stringlib_find(start, end-start,
2712 from_s, from_len,
2713 0);
2714 if (offset == -1)
2715 break;
2716 next = start+offset;
2717 if (next == start) {
2718 /* replace with the 'to' */
2719 Py_MEMCPY(result_s, to_s, to_len);
2720 result_s += to_len;
2721 start += from_len;
2722 } else {
2723 /* copy the unchanged old then the 'to' */
2724 Py_MEMCPY(result_s, start, next-start);
2725 result_s += (next-start);
2726 Py_MEMCPY(result_s, to_s, to_len);
2727 result_s += to_len;
2728 start = next+from_len;
2729 }
2730 }
2731 /* Copy the remainder of the remaining string */
2732 Py_MEMCPY(result_s, start, end-start);
Christian Heimes44720832008-05-26 13:01:01 +00002733
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002734 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002735}
2736
2737
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002738Py_LOCAL(PyStringObject *)
2739replace(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002740 const char *from_s, Py_ssize_t from_len,
2741 const char *to_s, Py_ssize_t to_len,
2742 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002743{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002744 if (maxcount < 0) {
2745 maxcount = PY_SSIZE_T_MAX;
2746 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2747 /* nothing to do; return the original string */
2748 return return_self(self);
2749 }
Christian Heimes44720832008-05-26 13:01:01 +00002750
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002751 if (maxcount == 0 ||
2752 (from_len == 0 && to_len == 0)) {
2753 /* nothing to do; return the original string */
2754 return return_self(self);
2755 }
Christian Heimes44720832008-05-26 13:01:01 +00002756
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002757 /* Handle zero-length special cases */
Christian Heimes44720832008-05-26 13:01:01 +00002758
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002759 if (from_len == 0) {
2760 /* insert the 'to' string everywhere. */
2761 /* >>> "Python".replace("", ".") */
2762 /* '.P.y.t.h.o.n.' */
2763 return replace_interleave(self, to_s, to_len, maxcount);
2764 }
Christian Heimes44720832008-05-26 13:01:01 +00002765
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002766 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2767 /* point for an empty self string to generate a non-empty string */
2768 /* Special case so the remaining code always gets a non-empty string */
2769 if (PyString_GET_SIZE(self) == 0) {
2770 return return_self(self);
2771 }
Christian Heimes44720832008-05-26 13:01:01 +00002772
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002773 if (to_len == 0) {
2774 /* delete all occurances of 'from' string */
2775 if (from_len == 1) {
2776 return replace_delete_single_character(
2777 self, from_s[0], maxcount);
2778 } else {
2779 return replace_delete_substring(self, from_s, from_len, maxcount);
2780 }
2781 }
Christian Heimes44720832008-05-26 13:01:01 +00002782
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002783 /* Handle special case where both strings have the same length */
Christian Heimes44720832008-05-26 13:01:01 +00002784
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002785 if (from_len == to_len) {
2786 if (from_len == 1) {
2787 return replace_single_character_in_place(
2788 self,
2789 from_s[0],
2790 to_s[0],
2791 maxcount);
2792 } else {
2793 return replace_substring_in_place(
2794 self, from_s, from_len, to_s, to_len, maxcount);
2795 }
2796 }
Christian Heimes44720832008-05-26 13:01:01 +00002797
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002798 /* Otherwise use the more generic algorithms */
2799 if (from_len == 1) {
2800 return replace_single_character(self, from_s[0],
2801 to_s, to_len, maxcount);
2802 } else {
2803 /* len('from')>=2, len('to')>=1 */
2804 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2805 }
Christian Heimes44720832008-05-26 13:01:01 +00002806}
2807
2808PyDoc_STRVAR(replace__doc__,
Ezio Melotti2f06b782010-06-26 18:44:42 +00002809"S.replace(old, new[, count]) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00002810\n\
2811Return a copy of string S with all occurrences of substring\n\
2812old replaced by new. If the optional argument count is\n\
2813given, only the first count occurrences are replaced.");
2814
2815static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002816string_replace(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002817{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002818 Py_ssize_t count = -1;
2819 PyObject *from, *to;
2820 const char *from_s, *to_s;
2821 Py_ssize_t from_len, to_len;
Christian Heimes44720832008-05-26 13:01:01 +00002822
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002823 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2824 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002825
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002826 if (PyString_Check(from)) {
2827 from_s = PyString_AS_STRING(from);
2828 from_len = PyString_GET_SIZE(from);
2829 }
Christian Heimes44720832008-05-26 13:01:01 +00002830#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002831 if (PyUnicode_Check(from))
2832 return PyUnicode_Replace((PyObject *)self,
2833 from, to, count);
Christian Heimes44720832008-05-26 13:01:01 +00002834#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002835 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2836 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002837
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002838 if (PyString_Check(to)) {
2839 to_s = PyString_AS_STRING(to);
2840 to_len = PyString_GET_SIZE(to);
2841 }
Christian Heimes44720832008-05-26 13:01:01 +00002842#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002843 else if (PyUnicode_Check(to))
2844 return PyUnicode_Replace((PyObject *)self,
2845 from, to, count);
Christian Heimes44720832008-05-26 13:01:01 +00002846#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002847 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2848 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002849
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002850 return (PyObject *)replace((PyStringObject *) self,
2851 from_s, from_len,
2852 to_s, to_len, count);
Christian Heimes44720832008-05-26 13:01:01 +00002853}
2854
2855/** End DALKE **/
2856
2857/* Matches the end (direction >= 0) or start (direction < 0) of self
2858 * against substr, using the start and end arguments. Returns
2859 * -1 on error, 0 if not found and 1 if found.
2860 */
2861Py_LOCAL(int)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002862_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002863 Py_ssize_t end, int direction)
Christian Heimes44720832008-05-26 13:01:01 +00002864{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002865 Py_ssize_t len = PyString_GET_SIZE(self);
2866 Py_ssize_t slen;
2867 const char* sub;
2868 const char* str;
Christian Heimes44720832008-05-26 13:01:01 +00002869
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002870 if (PyString_Check(substr)) {
2871 sub = PyString_AS_STRING(substr);
2872 slen = PyString_GET_SIZE(substr);
2873 }
Christian Heimes44720832008-05-26 13:01:01 +00002874#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002875 else if (PyUnicode_Check(substr))
2876 return PyUnicode_Tailmatch((PyObject *)self,
2877 substr, start, end, direction);
Christian Heimes44720832008-05-26 13:01:01 +00002878#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002879 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2880 return -1;
2881 str = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002882
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002883 ADJUST_INDICES(start, end, len);
Christian Heimes44720832008-05-26 13:01:01 +00002884
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002885 if (direction < 0) {
2886 /* startswith */
2887 if (start+slen > len)
2888 return 0;
2889 } else {
2890 /* endswith */
2891 if (end-start < slen || start > len)
2892 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00002893
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002894 if (end-slen > start)
2895 start = end - slen;
2896 }
2897 if (end-start >= slen)
2898 return ! memcmp(str+start, sub, slen);
2899 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00002900}
2901
2902
2903PyDoc_STRVAR(startswith__doc__,
2904"S.startswith(prefix[, start[, end]]) -> bool\n\
2905\n\
2906Return True if S starts with the specified prefix, False otherwise.\n\
2907With optional start, test S beginning at that position.\n\
2908With optional end, stop comparing S at that position.\n\
2909prefix can also be a tuple of strings to try.");
2910
2911static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002912string_startswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002913{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002914 Py_ssize_t start = 0;
2915 Py_ssize_t end = PY_SSIZE_T_MAX;
2916 PyObject *subobj;
2917 int result;
Christian Heimes44720832008-05-26 13:01:01 +00002918
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002919 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2920 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2921 return NULL;
2922 if (PyTuple_Check(subobj)) {
2923 Py_ssize_t i;
2924 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2925 result = _string_tailmatch(self,
2926 PyTuple_GET_ITEM(subobj, i),
2927 start, end, -1);
2928 if (result == -1)
2929 return NULL;
2930 else if (result) {
2931 Py_RETURN_TRUE;
2932 }
2933 }
2934 Py_RETURN_FALSE;
2935 }
2936 result = _string_tailmatch(self, subobj, start, end, -1);
2937 if (result == -1)
2938 return NULL;
2939 else
2940 return PyBool_FromLong(result);
Christian Heimes44720832008-05-26 13:01:01 +00002941}
2942
2943
2944PyDoc_STRVAR(endswith__doc__,
2945"S.endswith(suffix[, start[, end]]) -> bool\n\
2946\n\
2947Return True if S ends with the specified suffix, False otherwise.\n\
2948With optional start, test S beginning at that position.\n\
2949With optional end, stop comparing S at that position.\n\
2950suffix can also be a tuple of strings to try.");
2951
2952static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002953string_endswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002954{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002955 Py_ssize_t start = 0;
2956 Py_ssize_t end = PY_SSIZE_T_MAX;
2957 PyObject *subobj;
2958 int result;
Christian Heimes44720832008-05-26 13:01:01 +00002959
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002960 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2961 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2962 return NULL;
2963 if (PyTuple_Check(subobj)) {
2964 Py_ssize_t i;
2965 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2966 result = _string_tailmatch(self,
2967 PyTuple_GET_ITEM(subobj, i),
2968 start, end, +1);
2969 if (result == -1)
2970 return NULL;
2971 else if (result) {
2972 Py_RETURN_TRUE;
2973 }
2974 }
2975 Py_RETURN_FALSE;
2976 }
2977 result = _string_tailmatch(self, subobj, start, end, +1);
2978 if (result == -1)
2979 return NULL;
2980 else
2981 return PyBool_FromLong(result);
Christian Heimes44720832008-05-26 13:01:01 +00002982}
2983
2984
2985PyDoc_STRVAR(encode__doc__,
2986"S.encode([encoding[,errors]]) -> object\n\
2987\n\
2988Encodes S using the codec registered for encoding. encoding defaults\n\
2989to the default encoding. errors may be given to set a different error\n\
2990handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2991a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2992'xmlcharrefreplace' as well as any other name registered with\n\
2993codecs.register_error that is able to handle UnicodeEncodeErrors.");
2994
2995static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00002996string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes44720832008-05-26 13:01:01 +00002997{
Benjamin Peterson332d7212009-09-18 21:14:55 +00002998 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00002999 char *encoding = NULL;
3000 char *errors = NULL;
3001 PyObject *v;
3002
Benjamin Peterson332d7212009-09-18 21:14:55 +00003003 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003004 kwlist, &encoding, &errors))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003005 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003006 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003007 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003008 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003009 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003010 PyErr_Format(PyExc_TypeError,
3011 "encoder did not return a string/unicode object "
3012 "(type=%.400s)",
3013 Py_TYPE(v)->tp_name);
3014 Py_DECREF(v);
3015 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003016 }
3017 return v;
3018
3019 onError:
Christian Heimes1a6387e2008-03-26 12:49:49 +00003020 return NULL;
3021}
3022
Christian Heimes44720832008-05-26 13:01:01 +00003023
3024PyDoc_STRVAR(decode__doc__,
3025"S.decode([encoding[,errors]]) -> object\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003026\n\
Christian Heimes44720832008-05-26 13:01:01 +00003027Decodes S using the codec registered for encoding. encoding defaults\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003028to the default encoding. errors may be given to set a different error\n\
Christian Heimes44720832008-05-26 13:01:01 +00003029handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3030a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003031as well as any other name registered with codecs.register_error that is\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003032able to handle UnicodeDecodeErrors.");
3033
3034static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00003035string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003036{
Benjamin Peterson332d7212009-09-18 21:14:55 +00003037 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003038 char *encoding = NULL;
3039 char *errors = NULL;
3040 PyObject *v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003041
Benjamin Peterson332d7212009-09-18 21:14:55 +00003042 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003043 kwlist, &encoding, &errors))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003044 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003045 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003046 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003047 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003048 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003049 PyErr_Format(PyExc_TypeError,
3050 "decoder did not return a string/unicode object "
3051 "(type=%.400s)",
3052 Py_TYPE(v)->tp_name);
3053 Py_DECREF(v);
3054 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003055 }
3056 return v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003057
Christian Heimes44720832008-05-26 13:01:01 +00003058 onError:
3059 return NULL;
3060}
3061
3062
3063PyDoc_STRVAR(expandtabs__doc__,
3064"S.expandtabs([tabsize]) -> string\n\
3065\n\
3066Return a copy of S where all tab characters are expanded using spaces.\n\
3067If tabsize is not given, a tab size of 8 characters is assumed.");
3068
3069static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003070string_expandtabs(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003071{
3072 const char *e, *p, *qe;
3073 char *q;
3074 Py_ssize_t i, j, incr;
3075 PyObject *u;
3076 int tabsize = 8;
3077
3078 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003079 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003080
3081 /* First pass: determine size of output string */
3082 i = 0; /* chars up to and including most recent \n or \r */
3083 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003084 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3085 for (p = PyString_AS_STRING(self); p < e; p++)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003086 if (*p == '\t') {
3087 if (tabsize > 0) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003088 incr = tabsize - (j % tabsize);
3089 if (j > PY_SSIZE_T_MAX - incr)
3090 goto overflow1;
3091 j += incr;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003092 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003093 }
3094 else {
3095 if (j > PY_SSIZE_T_MAX - 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003096 goto overflow1;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003097 j++;
3098 if (*p == '\n' || *p == '\r') {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003099 if (i > PY_SSIZE_T_MAX - j)
3100 goto overflow1;
3101 i += j;
3102 j = 0;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003103 }
3104 }
Christian Heimes44720832008-05-26 13:01:01 +00003105
3106 if (i > PY_SSIZE_T_MAX - j)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003107 goto overflow1;
Christian Heimes44720832008-05-26 13:01:01 +00003108
3109 /* Second pass: create output string and fill it */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003110 u = PyString_FromStringAndSize(NULL, i + j);
Christian Heimes44720832008-05-26 13:01:01 +00003111 if (!u)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003112 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003113
3114 j = 0; /* same as in first pass */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003115 q = PyString_AS_STRING(u); /* next output char */
3116 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
Christian Heimes44720832008-05-26 13:01:01 +00003117
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003118 for (p = PyString_AS_STRING(self); p < e; p++)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003119 if (*p == '\t') {
3120 if (tabsize > 0) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003121 i = tabsize - (j % tabsize);
3122 j += i;
3123 while (i--) {
3124 if (q >= qe)
3125 goto overflow2;
3126 *q++ = ' ';
3127 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003128 }
3129 }
3130 else {
3131 if (q >= qe)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003132 goto overflow2;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003133 *q++ = *p;
3134 j++;
3135 if (*p == '\n' || *p == '\r')
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003136 j = 0;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003137 }
Christian Heimes44720832008-05-26 13:01:01 +00003138
3139 return u;
3140
3141 overflow2:
3142 Py_DECREF(u);
3143 overflow1:
3144 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3145 return NULL;
3146}
3147
3148Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003149pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Christian Heimes44720832008-05-26 13:01:01 +00003150{
3151 PyObject *u;
3152
3153 if (left < 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003154 left = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003155 if (right < 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003156 right = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003157
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003158 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003159 Py_INCREF(self);
3160 return (PyObject *)self;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003161 }
3162
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003163 u = PyString_FromStringAndSize(NULL,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003164 left + PyString_GET_SIZE(self) + right);
Christian Heimes44720832008-05-26 13:01:01 +00003165 if (u) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003166 if (left)
3167 memset(PyString_AS_STRING(u), fill, left);
3168 Py_MEMCPY(PyString_AS_STRING(u) + left,
3169 PyString_AS_STRING(self),
3170 PyString_GET_SIZE(self));
3171 if (right)
3172 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3173 fill, right);
Christian Heimes44720832008-05-26 13:01:01 +00003174 }
3175
3176 return u;
3177}
3178
3179PyDoc_STRVAR(ljust__doc__,
3180"S.ljust(width[, fillchar]) -> string\n"
3181"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003182"Return S left-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003183"done using the specified fill character (default is a space).");
3184
3185static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003186string_ljust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003187{
3188 Py_ssize_t width;
3189 char fillchar = ' ';
3190
3191 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003192 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003193
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003194 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003195 Py_INCREF(self);
3196 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003197 }
3198
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003199 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003200}
3201
3202
3203PyDoc_STRVAR(rjust__doc__,
3204"S.rjust(width[, fillchar]) -> string\n"
3205"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003206"Return S right-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003207"done using the specified fill character (default is a space)");
3208
3209static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003210string_rjust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003211{
3212 Py_ssize_t width;
3213 char fillchar = ' ';
3214
3215 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003216 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003217
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003218 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003219 Py_INCREF(self);
3220 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003221 }
3222
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003223 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003224}
3225
3226
3227PyDoc_STRVAR(center__doc__,
3228"S.center(width[, fillchar]) -> string\n"
3229"\n"
3230"Return S centered in a string of length width. Padding is\n"
3231"done using the specified fill character (default is a space)");
3232
3233static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003234string_center(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003235{
3236 Py_ssize_t marg, left;
3237 Py_ssize_t width;
3238 char fillchar = ' ';
3239
3240 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003241 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003242
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003243 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003244 Py_INCREF(self);
3245 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003246 }
3247
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003248 marg = width - PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003249 left = marg / 2 + (marg & width & 1);
3250
3251 return pad(self, left, marg - left, fillchar);
3252}
3253
3254PyDoc_STRVAR(zfill__doc__,
3255"S.zfill(width) -> string\n"
3256"\n"
3257"Pad a numeric string S with zeros on the left, to fill a field\n"
3258"of the specified width. The string S is never truncated.");
3259
3260static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003261string_zfill(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003262{
3263 Py_ssize_t fill;
3264 PyObject *s;
3265 char *p;
3266 Py_ssize_t width;
3267
3268 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003269 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003270
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003271 if (PyString_GET_SIZE(self) >= width) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003272 if (PyString_CheckExact(self)) {
3273 Py_INCREF(self);
3274 return (PyObject*) self;
3275 }
3276 else
3277 return PyString_FromStringAndSize(
3278 PyString_AS_STRING(self),
3279 PyString_GET_SIZE(self)
3280 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003281 }
3282
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003283 fill = width - PyString_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003284
Christian Heimes44720832008-05-26 13:01:01 +00003285 s = pad(self, fill, 0, '0');
3286
3287 if (s == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003288 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003289
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003290 p = PyString_AS_STRING(s);
Christian Heimes44720832008-05-26 13:01:01 +00003291 if (p[fill] == '+' || p[fill] == '-') {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003292 /* move sign to beginning of string */
3293 p[0] = p[fill];
3294 p[fill] = '0';
Christian Heimes44720832008-05-26 13:01:01 +00003295 }
3296
3297 return (PyObject*) s;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003298}
3299
Christian Heimes44720832008-05-26 13:01:01 +00003300PyDoc_STRVAR(isspace__doc__,
3301"S.isspace() -> bool\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003302\n\
Christian Heimes44720832008-05-26 13:01:01 +00003303Return True if all characters in S are whitespace\n\
3304and there is at least one character in S, False otherwise.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003305
Christian Heimes44720832008-05-26 13:01:01 +00003306static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003307string_isspace(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003308{
Christian Heimes44720832008-05-26 13:01:01 +00003309 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003310 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003311 register const unsigned char *e;
3312
3313 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003314 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003315 isspace(*p))
3316 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003317
3318 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003319 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003320 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003321
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003322 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003323 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003324 if (!isspace(*p))
3325 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003326 }
Christian Heimes44720832008-05-26 13:01:01 +00003327 return PyBool_FromLong(1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003328}
3329
Christian Heimes44720832008-05-26 13:01:01 +00003330
3331PyDoc_STRVAR(isalpha__doc__,
3332"S.isalpha() -> bool\n\
3333\n\
3334Return True if all characters in S are alphabetic\n\
3335and there is at least one character in S, False otherwise.");
3336
3337static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003338string_isalpha(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003339{
Christian Heimes44720832008-05-26 13:01:01 +00003340 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003341 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003342 register const unsigned char *e;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003343
Christian Heimes44720832008-05-26 13:01:01 +00003344 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003345 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003346 isalpha(*p))
3347 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003348
3349 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003350 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003351 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003352
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003353 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003354 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003355 if (!isalpha(*p))
3356 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003357 }
Christian Heimes44720832008-05-26 13:01:01 +00003358 return PyBool_FromLong(1);
3359}
Christian Heimes1a6387e2008-03-26 12:49:49 +00003360
Christian Heimes44720832008-05-26 13:01:01 +00003361
3362PyDoc_STRVAR(isalnum__doc__,
3363"S.isalnum() -> bool\n\
3364\n\
3365Return True if all characters in S are alphanumeric\n\
3366and there is at least one character in S, False otherwise.");
3367
3368static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003369string_isalnum(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003370{
3371 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003372 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003373 register const unsigned char *e;
3374
3375 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003376 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003377 isalnum(*p))
3378 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003379
3380 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003381 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003382 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003383
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003384 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003385 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003386 if (!isalnum(*p))
3387 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003388 }
3389 return PyBool_FromLong(1);
3390}
3391
3392
3393PyDoc_STRVAR(isdigit__doc__,
3394"S.isdigit() -> bool\n\
3395\n\
3396Return True if all characters in S are digits\n\
3397and there is at least one character in S, False otherwise.");
3398
3399static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003400string_isdigit(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003401{
3402 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003403 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003404 register const unsigned char *e;
3405
3406 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003407 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003408 isdigit(*p))
3409 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003410
3411 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003412 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003413 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003414
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003415 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003416 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003417 if (!isdigit(*p))
3418 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003419 }
3420 return PyBool_FromLong(1);
3421}
3422
3423
3424PyDoc_STRVAR(islower__doc__,
3425"S.islower() -> bool\n\
3426\n\
3427Return True if all cased characters in S are lowercase and there is\n\
3428at least one cased character in S, False otherwise.");
3429
3430static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003431string_islower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003432{
3433 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003434 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003435 register const unsigned char *e;
3436 int cased;
3437
3438 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003439 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003440 return PyBool_FromLong(islower(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003441
3442 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003443 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003444 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003445
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003446 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003447 cased = 0;
3448 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003449 if (isupper(*p))
3450 return PyBool_FromLong(0);
3451 else if (!cased && islower(*p))
3452 cased = 1;
Christian Heimes44720832008-05-26 13:01:01 +00003453 }
3454 return PyBool_FromLong(cased);
3455}
3456
3457
3458PyDoc_STRVAR(isupper__doc__,
3459"S.isupper() -> bool\n\
3460\n\
3461Return True if all cased characters in S are uppercase and there is\n\
3462at least one cased character in S, False otherwise.");
3463
3464static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003465string_isupper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003466{
3467 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003468 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003469 register const unsigned char *e;
3470 int cased;
3471
3472 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003473 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003474 return PyBool_FromLong(isupper(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003475
3476 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003477 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003478 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003479
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003480 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003481 cased = 0;
3482 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003483 if (islower(*p))
3484 return PyBool_FromLong(0);
3485 else if (!cased && isupper(*p))
3486 cased = 1;
Christian Heimes44720832008-05-26 13:01:01 +00003487 }
3488 return PyBool_FromLong(cased);
3489}
3490
3491
3492PyDoc_STRVAR(istitle__doc__,
3493"S.istitle() -> bool\n\
3494\n\
3495Return True if S is a titlecased string and there is at least one\n\
3496character in S, i.e. uppercase characters may only follow uncased\n\
3497characters and lowercase characters only cased ones. Return False\n\
3498otherwise.");
3499
3500static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003501string_istitle(PyStringObject *self, PyObject *uncased)
Christian Heimes44720832008-05-26 13:01:01 +00003502{
3503 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003504 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003505 register const unsigned char *e;
3506 int cased, previous_is_cased;
3507
3508 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003509 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003510 return PyBool_FromLong(isupper(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003511
3512 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003513 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003514 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003515
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003516 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003517 cased = 0;
3518 previous_is_cased = 0;
3519 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003520 register const unsigned char ch = *p;
Christian Heimes44720832008-05-26 13:01:01 +00003521
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003522 if (isupper(ch)) {
3523 if (previous_is_cased)
3524 return PyBool_FromLong(0);
3525 previous_is_cased = 1;
3526 cased = 1;
3527 }
3528 else if (islower(ch)) {
3529 if (!previous_is_cased)
3530 return PyBool_FromLong(0);
3531 previous_is_cased = 1;
3532 cased = 1;
3533 }
3534 else
3535 previous_is_cased = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003536 }
3537 return PyBool_FromLong(cased);
3538}
3539
3540
3541PyDoc_STRVAR(splitlines__doc__,
3542"S.splitlines([keepends]) -> list of strings\n\
3543\n\
3544Return a list of the lines in S, breaking at line boundaries.\n\
3545Line breaks are not included in the resulting list unless keepends\n\
3546is given and true.");
3547
3548static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003549string_splitlines(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003550{
Christian Heimes44720832008-05-26 13:01:01 +00003551 int keepends = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003552
3553 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003554 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003555
Antoine Pitrou64672132010-01-13 07:55:48 +00003556 return stringlib_splitlines(
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003557 (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),
3558 keepends
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003559 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003560}
3561
Robert Schuppenies51df0642008-06-01 16:16:17 +00003562PyDoc_STRVAR(sizeof__doc__,
Georg Brandl7a6de8b2008-06-01 16:42:16 +00003563"S.__sizeof__() -> size of S in memory, in bytes");
Robert Schuppenies51df0642008-06-01 16:16:17 +00003564
3565static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003566string_sizeof(PyStringObject *v)
Robert Schuppenies51df0642008-06-01 16:16:17 +00003567{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003568 Py_ssize_t res;
3569 res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
3570 return PyInt_FromSsize_t(res);
Robert Schuppenies51df0642008-06-01 16:16:17 +00003571}
3572
Christian Heimes1a6387e2008-03-26 12:49:49 +00003573static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003574string_getnewargs(PyStringObject *v)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003575{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003576 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Christian Heimes1a6387e2008-03-26 12:49:49 +00003577}
3578
Christian Heimes1a6387e2008-03-26 12:49:49 +00003579
Christian Heimes44720832008-05-26 13:01:01 +00003580#include "stringlib/string_format.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +00003581
Christian Heimes44720832008-05-26 13:01:01 +00003582PyDoc_STRVAR(format__doc__,
Georg Brandl05f819b2010-07-31 19:07:37 +00003583"S.format(*args, **kwargs) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003584\n\
Eric Smith6c840852010-11-06 19:43:44 +00003585Return a formatted version of S, using substitutions from args and kwargs.\n\
3586The substitutions are identified by braces ('{' and '}').");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003587
Eric Smithdc13b792008-05-30 18:10:04 +00003588static PyObject *
3589string__format__(PyObject* self, PyObject* args)
3590{
3591 PyObject *format_spec;
3592 PyObject *result = NULL;
3593 PyObject *tmp = NULL;
3594
3595 /* If 2.x, convert format_spec to the same type as value */
3596 /* This is to allow things like u''.format('') */
3597 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003598 goto done;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003599 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003600 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3601 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3602 goto done;
Eric Smithdc13b792008-05-30 18:10:04 +00003603 }
3604 tmp = PyObject_Str(format_spec);
3605 if (tmp == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003606 goto done;
Eric Smithdc13b792008-05-30 18:10:04 +00003607 format_spec = tmp;
3608
3609 result = _PyBytes_FormatAdvanced(self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003610 PyString_AS_STRING(format_spec),
3611 PyString_GET_SIZE(format_spec));
Eric Smithdc13b792008-05-30 18:10:04 +00003612done:
3613 Py_XDECREF(tmp);
3614 return result;
3615}
3616
Christian Heimes44720832008-05-26 13:01:01 +00003617PyDoc_STRVAR(p_format__doc__,
Georg Brandl05f819b2010-07-31 19:07:37 +00003618"S.__format__(format_spec) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003619\n\
Eric Smith6c840852010-11-06 19:43:44 +00003620Return a formatted version of S as described by format_spec.");
Christian Heimes44720832008-05-26 13:01:01 +00003621
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00003622
Christian Heimes1a6387e2008-03-26 12:49:49 +00003623static PyMethodDef
Christian Heimes44720832008-05-26 13:01:01 +00003624string_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003625 /* Counterparts of the obsolete stropmodule functions; except
3626 string.maketrans(). */
3627 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3628 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3629 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3630 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3631 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3632 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3633 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3634 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3635 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3636 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3637 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3638 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3639 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3640 capitalize__doc__},
3641 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3642 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3643 endswith__doc__},
3644 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3645 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3646 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3647 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3648 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3649 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3650 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3651 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3652 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3653 rpartition__doc__},
3654 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3655 startswith__doc__},
3656 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3657 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3658 swapcase__doc__},
3659 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3660 translate__doc__},
3661 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3662 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3663 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3664 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3665 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3666 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3667 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3668 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3669 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
3670 {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
3671 {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
3672 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3673 expandtabs__doc__},
3674 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3675 splitlines__doc__},
3676 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
3677 sizeof__doc__},
3678 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
3679 {NULL, NULL} /* sentinel */
Christian Heimes1a6387e2008-03-26 12:49:49 +00003680};
3681
3682static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00003683str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003684
Christian Heimes44720832008-05-26 13:01:01 +00003685static PyObject *
3686string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3687{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003688 PyObject *x = NULL;
3689 static char *kwlist[] = {"object", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003690
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003691 if (type != &PyString_Type)
3692 return str_subtype_new(type, args, kwds);
3693 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3694 return NULL;
3695 if (x == NULL)
3696 return PyString_FromString("");
3697 return PyObject_Str(x);
Christian Heimes44720832008-05-26 13:01:01 +00003698}
3699
3700static PyObject *
3701str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3702{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003703 PyObject *tmp, *pnew;
3704 Py_ssize_t n;
Christian Heimes44720832008-05-26 13:01:01 +00003705
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003706 assert(PyType_IsSubtype(type, &PyString_Type));
3707 tmp = string_new(&PyString_Type, args, kwds);
3708 if (tmp == NULL)
3709 return NULL;
3710 assert(PyString_CheckExact(tmp));
3711 n = PyString_GET_SIZE(tmp);
3712 pnew = type->tp_alloc(type, n);
3713 if (pnew != NULL) {
3714 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3715 ((PyStringObject *)pnew)->ob_shash =
3716 ((PyStringObject *)tmp)->ob_shash;
3717 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3718 }
3719 Py_DECREF(tmp);
3720 return pnew;
Christian Heimes44720832008-05-26 13:01:01 +00003721}
3722
3723static PyObject *
3724basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3725{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003726 PyErr_SetString(PyExc_TypeError,
3727 "The basestring type cannot be instantiated");
3728 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003729}
3730
3731static PyObject *
3732string_mod(PyObject *v, PyObject *w)
3733{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003734 if (!PyString_Check(v)) {
3735 Py_INCREF(Py_NotImplemented);
3736 return Py_NotImplemented;
3737 }
3738 return PyString_Format(v, w);
Christian Heimes44720832008-05-26 13:01:01 +00003739}
3740
3741PyDoc_STRVAR(basestring_doc,
3742"Type basestring cannot be instantiated; it is the base for str and unicode.");
3743
3744static PyNumberMethods string_as_number = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003745 0, /*nb_add*/
3746 0, /*nb_subtract*/
3747 0, /*nb_multiply*/
3748 0, /*nb_divide*/
3749 string_mod, /*nb_remainder*/
Christian Heimes44720832008-05-26 13:01:01 +00003750};
3751
3752
3753PyTypeObject PyBaseString_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003754 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3755 "basestring",
3756 0,
3757 0,
3758 0, /* tp_dealloc */
3759 0, /* tp_print */
3760 0, /* tp_getattr */
3761 0, /* tp_setattr */
3762 0, /* tp_compare */
3763 0, /* tp_repr */
3764 0, /* tp_as_number */
3765 0, /* tp_as_sequence */
3766 0, /* tp_as_mapping */
3767 0, /* tp_hash */
3768 0, /* tp_call */
3769 0, /* tp_str */
3770 0, /* tp_getattro */
3771 0, /* tp_setattro */
3772 0, /* tp_as_buffer */
3773 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3774 basestring_doc, /* tp_doc */
3775 0, /* tp_traverse */
3776 0, /* tp_clear */
3777 0, /* tp_richcompare */
3778 0, /* tp_weaklistoffset */
3779 0, /* tp_iter */
3780 0, /* tp_iternext */
3781 0, /* tp_methods */
3782 0, /* tp_members */
3783 0, /* tp_getset */
3784 &PyBaseObject_Type, /* tp_base */
3785 0, /* tp_dict */
3786 0, /* tp_descr_get */
3787 0, /* tp_descr_set */
3788 0, /* tp_dictoffset */
3789 0, /* tp_init */
3790 0, /* tp_alloc */
3791 basestring_new, /* tp_new */
3792 0, /* tp_free */
Christian Heimes44720832008-05-26 13:01:01 +00003793};
3794
3795PyDoc_STRVAR(string_doc,
3796"str(object) -> string\n\
3797\n\
3798Return a nice string representation of the object.\n\
3799If the argument is a string, the return value is the same object.");
3800
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003801PyTypeObject PyString_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003802 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3803 "str",
3804 PyStringObject_SIZE,
3805 sizeof(char),
3806 string_dealloc, /* tp_dealloc */
3807 (printfunc)string_print, /* tp_print */
3808 0, /* tp_getattr */
3809 0, /* tp_setattr */
3810 0, /* tp_compare */
3811 string_repr, /* tp_repr */
3812 &string_as_number, /* tp_as_number */
3813 &string_as_sequence, /* tp_as_sequence */
3814 &string_as_mapping, /* tp_as_mapping */
3815 (hashfunc)string_hash, /* tp_hash */
3816 0, /* tp_call */
3817 string_str, /* tp_str */
3818 PyObject_GenericGetAttr, /* tp_getattro */
3819 0, /* tp_setattro */
3820 &string_as_buffer, /* tp_as_buffer */
3821 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3822 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
3823 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3824 string_doc, /* tp_doc */
3825 0, /* tp_traverse */
3826 0, /* tp_clear */
3827 (richcmpfunc)string_richcompare, /* tp_richcompare */
3828 0, /* tp_weaklistoffset */
3829 0, /* tp_iter */
3830 0, /* tp_iternext */
3831 string_methods, /* tp_methods */
3832 0, /* tp_members */
3833 0, /* tp_getset */
3834 &PyBaseString_Type, /* tp_base */
3835 0, /* tp_dict */
3836 0, /* tp_descr_get */
3837 0, /* tp_descr_set */
3838 0, /* tp_dictoffset */
3839 0, /* tp_init */
3840 0, /* tp_alloc */
3841 string_new, /* tp_new */
3842 PyObject_Del, /* tp_free */
Christian Heimes44720832008-05-26 13:01:01 +00003843};
3844
3845void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003846PyString_Concat(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00003847{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003848 register PyObject *v;
3849 if (*pv == NULL)
3850 return;
3851 if (w == NULL || !PyString_Check(*pv)) {
3852 Py_DECREF(*pv);
3853 *pv = NULL;
3854 return;
3855 }
3856 v = string_concat((PyStringObject *) *pv, w);
3857 Py_DECREF(*pv);
3858 *pv = v;
Christian Heimes44720832008-05-26 13:01:01 +00003859}
3860
3861void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003862PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00003863{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003864 PyString_Concat(pv, w);
3865 Py_XDECREF(w);
Christian Heimes44720832008-05-26 13:01:01 +00003866}
3867
3868
3869/* The following function breaks the notion that strings are immutable:
3870 it changes the size of a string. We get away with this only if there
3871 is only one module referencing the object. You can also think of it
3872 as creating a new string object and destroying the old one, only
3873 more efficiently. In any case, don't use this if the string may
3874 already be known to some other part of the code...
3875 Note that if there's not enough memory to resize the string, the original
3876 string object at *pv is deallocated, *pv is set to NULL, an "out of
3877 memory" exception is set, and -1 is returned. Else (on success) 0 is
3878 returned, and the value in *pv may or may not be the same as on input.
3879 As always, an extra byte is allocated for a trailing \0 byte (newsize
3880 does *not* include that), and a trailing \0 byte is stored.
3881*/
3882
3883int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003884_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Christian Heimes44720832008-05-26 13:01:01 +00003885{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003886 register PyObject *v;
3887 register PyStringObject *sv;
3888 v = *pv;
3889 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
3890 PyString_CHECK_INTERNED(v)) {
3891 *pv = 0;
3892 Py_DECREF(v);
3893 PyErr_BadInternalCall();
3894 return -1;
3895 }
3896 /* XXX UNREF/NEWREF interface should be more symmetrical */
3897 _Py_DEC_REFTOTAL;
3898 _Py_ForgetReference(v);
3899 *pv = (PyObject *)
3900 PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
3901 if (*pv == NULL) {
3902 PyObject_Del(v);
3903 PyErr_NoMemory();
3904 return -1;
3905 }
3906 _Py_NewReference(*pv);
3907 sv = (PyStringObject *) *pv;
3908 Py_SIZE(sv) = newsize;
3909 sv->ob_sval[newsize] = '\0';
3910 sv->ob_shash = -1; /* invalidate cached hash value */
3911 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00003912}
3913
3914/* Helpers for formatstring */
3915
3916Py_LOCAL_INLINE(PyObject *)
3917getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
3918{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003919 Py_ssize_t argidx = *p_argidx;
3920 if (argidx < arglen) {
3921 (*p_argidx)++;
3922 if (arglen < 0)
3923 return args;
3924 else
3925 return PyTuple_GetItem(args, argidx);
3926 }
3927 PyErr_SetString(PyExc_TypeError,
3928 "not enough arguments for format string");
3929 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003930}
3931
3932/* Format codes
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003933 * F_LJUST '-'
3934 * F_SIGN '+'
3935 * F_BLANK ' '
3936 * F_ALT '#'
3937 * F_ZERO '0'
Christian Heimes44720832008-05-26 13:01:01 +00003938 */
3939#define F_LJUST (1<<0)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003940#define F_SIGN (1<<1)
Christian Heimes44720832008-05-26 13:01:01 +00003941#define F_BLANK (1<<2)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003942#define F_ALT (1<<3)
3943#define F_ZERO (1<<4)
Christian Heimes44720832008-05-26 13:01:01 +00003944
Mark Dickinson18cfada2009-11-23 18:46:41 +00003945/* Returns a new reference to a PyString object, or NULL on failure. */
3946
3947static PyObject *
3948formatfloat(PyObject *v, int flags, int prec, int type)
Christian Heimes44720832008-05-26 13:01:01 +00003949{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003950 char *p;
3951 PyObject *result;
3952 double x;
Eric Smithc1bdf892009-10-26 17:46:17 +00003953
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003954 x = PyFloat_AsDouble(v);
3955 if (x == -1.0 && PyErr_Occurred()) {
3956 PyErr_Format(PyExc_TypeError, "float argument required, "
3957 "not %.200s", Py_TYPE(v)->tp_name);
3958 return NULL;
3959 }
Mark Dickinson18cfada2009-11-23 18:46:41 +00003960
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003961 if (prec < 0)
3962 prec = 6;
Mark Dickinson174e9092009-03-29 16:17:16 +00003963
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003964 p = PyOS_double_to_string(x, type, prec,
3965 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
Christian Heimes44720832008-05-26 13:01:01 +00003966
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003967 if (p == NULL)
3968 return NULL;
3969 result = PyString_FromStringAndSize(p, strlen(p));
3970 PyMem_Free(p);
3971 return result;
Christian Heimes44720832008-05-26 13:01:01 +00003972}
3973
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003974/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
Christian Heimes44720832008-05-26 13:01:01 +00003975 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3976 * Python's regular ints.
3977 * Return value: a new PyString*, or NULL if error.
3978 * . *pbuf is set to point into it,
3979 * *plen set to the # of chars following that.
3980 * Caller must decref it when done using pbuf.
3981 * The string starting at *pbuf is of the form
3982 * "-"? ("0x" | "0X")? digit+
3983 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3984 * set in flags. The case of hex digits will be correct,
3985 * There will be at least prec digits, zero-filled on the left if
3986 * necessary to get that many.
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003987 * val object to be converted
3988 * flags bitmask of format flags; only F_ALT is looked at
3989 * prec minimum number of digits; 0-fill on left if needed
3990 * type a character in [duoxX]; u acts the same as d
Christian Heimes44720832008-05-26 13:01:01 +00003991 *
3992 * CAUTION: o, x and X conversions on regular ints can never
3993 * produce a '-' sign, but can for Python's unbounded ints.
3994 */
3995PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003996_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003997 char **pbuf, int *plen)
Christian Heimes44720832008-05-26 13:01:01 +00003998{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003999 PyObject *result = NULL;
4000 char *buf;
4001 Py_ssize_t i;
4002 int sign; /* 1 if '-', else 0 */
4003 int len; /* number of characters */
4004 Py_ssize_t llen;
4005 int numdigits; /* len == numnondigits + numdigits */
4006 int numnondigits = 0;
Christian Heimes44720832008-05-26 13:01:01 +00004007
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004008 switch (type) {
4009 case 'd':
4010 case 'u':
4011 result = Py_TYPE(val)->tp_str(val);
4012 break;
4013 case 'o':
4014 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4015 break;
4016 case 'x':
4017 case 'X':
4018 numnondigits = 2;
4019 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4020 break;
4021 default:
4022 assert(!"'type' not in [duoxX]");
4023 }
4024 if (!result)
4025 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004026
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004027 buf = PyString_AsString(result);
4028 if (!buf) {
4029 Py_DECREF(result);
4030 return NULL;
4031 }
Christian Heimes44720832008-05-26 13:01:01 +00004032
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004033 /* To modify the string in-place, there can only be one reference. */
4034 if (Py_REFCNT(result) != 1) {
4035 PyErr_BadInternalCall();
4036 return NULL;
4037 }
4038 llen = PyString_Size(result);
4039 if (llen > INT_MAX) {
4040 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4041 return NULL;
4042 }
4043 len = (int)llen;
4044 if (buf[len-1] == 'L') {
4045 --len;
4046 buf[len] = '\0';
4047 }
4048 sign = buf[0] == '-';
4049 numnondigits += sign;
4050 numdigits = len - numnondigits;
4051 assert(numdigits > 0);
Christian Heimes44720832008-05-26 13:01:01 +00004052
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004053 /* Get rid of base marker unless F_ALT */
4054 if ((flags & F_ALT) == 0) {
4055 /* Need to skip 0x, 0X or 0. */
4056 int skipped = 0;
4057 switch (type) {
4058 case 'o':
4059 assert(buf[sign] == '0');
4060 /* If 0 is only digit, leave it alone. */
4061 if (numdigits > 1) {
4062 skipped = 1;
4063 --numdigits;
4064 }
4065 break;
4066 case 'x':
4067 case 'X':
4068 assert(buf[sign] == '0');
4069 assert(buf[sign + 1] == 'x');
4070 skipped = 2;
4071 numnondigits -= 2;
4072 break;
4073 }
4074 if (skipped) {
4075 buf += skipped;
4076 len -= skipped;
4077 if (sign)
4078 buf[0] = '-';
4079 }
4080 assert(len == numnondigits + numdigits);
4081 assert(numdigits > 0);
4082 }
Christian Heimes44720832008-05-26 13:01:01 +00004083
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004084 /* Fill with leading zeroes to meet minimum width. */
4085 if (prec > numdigits) {
4086 PyObject *r1 = PyString_FromStringAndSize(NULL,
4087 numnondigits + prec);
4088 char *b1;
4089 if (!r1) {
4090 Py_DECREF(result);
4091 return NULL;
4092 }
4093 b1 = PyString_AS_STRING(r1);
4094 for (i = 0; i < numnondigits; ++i)
4095 *b1++ = *buf++;
4096 for (i = 0; i < prec - numdigits; i++)
4097 *b1++ = '0';
4098 for (i = 0; i < numdigits; i++)
4099 *b1++ = *buf++;
4100 *b1 = '\0';
4101 Py_DECREF(result);
4102 result = r1;
4103 buf = PyString_AS_STRING(result);
4104 len = numnondigits + prec;
4105 }
Christian Heimes44720832008-05-26 13:01:01 +00004106
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004107 /* Fix up case for hex conversions. */
4108 if (type == 'X') {
4109 /* Need to convert all lower case letters to upper case.
4110 and need to convert 0x to 0X (and -0x to -0X). */
4111 for (i = 0; i < len; i++)
4112 if (buf[i] >= 'a' && buf[i] <= 'x')
4113 buf[i] -= 'a'-'A';
4114 }
4115 *pbuf = buf;
4116 *plen = len;
4117 return result;
Christian Heimes44720832008-05-26 13:01:01 +00004118}
4119
4120Py_LOCAL_INLINE(int)
4121formatint(char *buf, size_t buflen, int flags,
4122 int prec, int type, PyObject *v)
4123{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004124 /* fmt = '%#.' + `prec` + 'l' + `type`
4125 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4126 + 1 + 1 = 24 */
4127 char fmt[64]; /* plenty big enough! */
4128 char *sign;
4129 long x;
Christian Heimes44720832008-05-26 13:01:01 +00004130
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004131 x = PyInt_AsLong(v);
4132 if (x == -1 && PyErr_Occurred()) {
4133 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4134 Py_TYPE(v)->tp_name);
4135 return -1;
4136 }
4137 if (x < 0 && type == 'u') {
4138 type = 'd';
4139 }
4140 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4141 sign = "-";
4142 else
4143 sign = "";
4144 if (prec < 0)
4145 prec = 1;
Christian Heimes44720832008-05-26 13:01:01 +00004146
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004147 if ((flags & F_ALT) &&
4148 (type == 'x' || type == 'X')) {
4149 /* When converting under %#x or %#X, there are a number
4150 * of issues that cause pain:
4151 * - when 0 is being converted, the C standard leaves off
4152 * the '0x' or '0X', which is inconsistent with other
4153 * %#x/%#X conversions and inconsistent with Python's
4154 * hex() function
4155 * - there are platforms that violate the standard and
4156 * convert 0 with the '0x' or '0X'
4157 * (Metrowerks, Compaq Tru64)
4158 * - there are platforms that give '0x' when converting
4159 * under %#X, but convert 0 in accordance with the
4160 * standard (OS/2 EMX)
4161 *
4162 * We can achieve the desired consistency by inserting our
4163 * own '0x' or '0X' prefix, and substituting %x/%X in place
4164 * of %#x/%#X.
4165 *
4166 * Note that this is the same approach as used in
4167 * formatint() in unicodeobject.c
4168 */
4169 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4170 sign, type, prec, type);
4171 }
4172 else {
4173 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4174 sign, (flags&F_ALT) ? "#" : "",
4175 prec, type);
4176 }
Christian Heimes44720832008-05-26 13:01:01 +00004177
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004178 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4179 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4180 */
4181 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4182 PyErr_SetString(PyExc_OverflowError,
4183 "formatted integer is too long (precision too large?)");
4184 return -1;
4185 }
4186 if (sign[0])
4187 PyOS_snprintf(buf, buflen, fmt, -x);
4188 else
4189 PyOS_snprintf(buf, buflen, fmt, x);
4190 return (int)strlen(buf);
Christian Heimes44720832008-05-26 13:01:01 +00004191}
4192
4193Py_LOCAL_INLINE(int)
4194formatchar(char *buf, size_t buflen, PyObject *v)
4195{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004196 /* presume that the buffer is at least 2 characters long */
4197 if (PyString_Check(v)) {
4198 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4199 return -1;
4200 }
4201 else {
4202 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4203 return -1;
4204 }
4205 buf[1] = '\0';
4206 return 1;
Christian Heimes44720832008-05-26 13:01:01 +00004207}
4208
4209/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4210
Mark Dickinson18cfada2009-11-23 18:46:41 +00004211 FORMATBUFLEN is the length of the buffer in which the ints &
Christian Heimes44720832008-05-26 13:01:01 +00004212 chars are formatted. XXX This is a magic number. Each formatting
4213 routine does bounds checking to ensure no overflow, but a better
4214 solution may be to malloc a buffer of appropriate size for each
4215 format. For now, the current solution is sufficient.
4216*/
4217#define FORMATBUFLEN (size_t)120
4218
4219PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004220PyString_Format(PyObject *format, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00004221{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004222 char *fmt, *res;
4223 Py_ssize_t arglen, argidx;
4224 Py_ssize_t reslen, rescnt, fmtcnt;
4225 int args_owned = 0;
4226 PyObject *result, *orig_args;
Christian Heimes44720832008-05-26 13:01:01 +00004227#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004228 PyObject *v, *w;
Christian Heimes44720832008-05-26 13:01:01 +00004229#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004230 PyObject *dict = NULL;
4231 if (format == NULL || !PyString_Check(format) || args == NULL) {
4232 PyErr_BadInternalCall();
4233 return NULL;
4234 }
4235 orig_args = args;
4236 fmt = PyString_AS_STRING(format);
4237 fmtcnt = PyString_GET_SIZE(format);
4238 reslen = rescnt = fmtcnt + 100;
4239 result = PyString_FromStringAndSize((char *)NULL, reslen);
4240 if (result == NULL)
4241 return NULL;
4242 res = PyString_AsString(result);
4243 if (PyTuple_Check(args)) {
4244 arglen = PyTuple_GET_SIZE(args);
4245 argidx = 0;
4246 }
4247 else {
4248 arglen = -1;
4249 argidx = -2;
4250 }
4251 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4252 !PyObject_TypeCheck(args, &PyBaseString_Type))
4253 dict = args;
4254 while (--fmtcnt >= 0) {
4255 if (*fmt != '%') {
4256 if (--rescnt < 0) {
4257 rescnt = fmtcnt + 100;
4258 reslen += rescnt;
4259 if (_PyString_Resize(&result, reslen))
4260 return NULL;
4261 res = PyString_AS_STRING(result)
4262 + reslen - rescnt;
4263 --rescnt;
4264 }
4265 *res++ = *fmt++;
4266 }
4267 else {
4268 /* Got a format specifier */
4269 int flags = 0;
4270 Py_ssize_t width = -1;
4271 int prec = -1;
4272 int c = '\0';
4273 int fill;
4274 int isnumok;
4275 PyObject *v = NULL;
4276 PyObject *temp = NULL;
4277 char *pbuf;
4278 int sign;
4279 Py_ssize_t len;
4280 char formatbuf[FORMATBUFLEN];
4281 /* For format{int,char}() */
Christian Heimes44720832008-05-26 13:01:01 +00004282#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004283 char *fmt_start = fmt;
4284 Py_ssize_t argidx_start = argidx;
Christian Heimes44720832008-05-26 13:01:01 +00004285#endif
4286
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004287 fmt++;
4288 if (*fmt == '(') {
4289 char *keystart;
4290 Py_ssize_t keylen;
4291 PyObject *key;
4292 int pcount = 1;
Christian Heimes44720832008-05-26 13:01:01 +00004293
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004294 if (dict == NULL) {
4295 PyErr_SetString(PyExc_TypeError,
4296 "format requires a mapping");
4297 goto error;
4298 }
4299 ++fmt;
4300 --fmtcnt;
4301 keystart = fmt;
4302 /* Skip over balanced parentheses */
4303 while (pcount > 0 && --fmtcnt >= 0) {
4304 if (*fmt == ')')
4305 --pcount;
4306 else if (*fmt == '(')
4307 ++pcount;
4308 fmt++;
4309 }
4310 keylen = fmt - keystart - 1;
4311 if (fmtcnt < 0 || pcount > 0) {
4312 PyErr_SetString(PyExc_ValueError,
4313 "incomplete format key");
4314 goto error;
4315 }
4316 key = PyString_FromStringAndSize(keystart,
4317 keylen);
4318 if (key == NULL)
4319 goto error;
4320 if (args_owned) {
4321 Py_DECREF(args);
4322 args_owned = 0;
4323 }
4324 args = PyObject_GetItem(dict, key);
4325 Py_DECREF(key);
4326 if (args == NULL) {
4327 goto error;
4328 }
4329 args_owned = 1;
4330 arglen = -1;
4331 argidx = -2;
4332 }
4333 while (--fmtcnt >= 0) {
4334 switch (c = *fmt++) {
4335 case '-': flags |= F_LJUST; continue;
4336 case '+': flags |= F_SIGN; continue;
4337 case ' ': flags |= F_BLANK; continue;
4338 case '#': flags |= F_ALT; continue;
4339 case '0': flags |= F_ZERO; continue;
4340 }
4341 break;
4342 }
4343 if (c == '*') {
4344 v = getnextarg(args, arglen, &argidx);
4345 if (v == NULL)
4346 goto error;
4347 if (!PyInt_Check(v)) {
4348 PyErr_SetString(PyExc_TypeError,
4349 "* wants int");
4350 goto error;
4351 }
4352 width = PyInt_AsLong(v);
4353 if (width < 0) {
4354 flags |= F_LJUST;
4355 width = -width;
4356 }
4357 if (--fmtcnt >= 0)
4358 c = *fmt++;
4359 }
4360 else if (c >= 0 && isdigit(c)) {
4361 width = c - '0';
4362 while (--fmtcnt >= 0) {
4363 c = Py_CHARMASK(*fmt++);
4364 if (!isdigit(c))
4365 break;
4366 if ((width*10) / 10 != width) {
4367 PyErr_SetString(
4368 PyExc_ValueError,
4369 "width too big");
4370 goto error;
4371 }
4372 width = width*10 + (c - '0');
4373 }
4374 }
4375 if (c == '.') {
4376 prec = 0;
4377 if (--fmtcnt >= 0)
4378 c = *fmt++;
4379 if (c == '*') {
4380 v = getnextarg(args, arglen, &argidx);
4381 if (v == NULL)
4382 goto error;
4383 if (!PyInt_Check(v)) {
4384 PyErr_SetString(
4385 PyExc_TypeError,
4386 "* wants int");
4387 goto error;
4388 }
4389 prec = PyInt_AsLong(v);
4390 if (prec < 0)
4391 prec = 0;
4392 if (--fmtcnt >= 0)
4393 c = *fmt++;
4394 }
4395 else if (c >= 0 && isdigit(c)) {
4396 prec = c - '0';
4397 while (--fmtcnt >= 0) {
4398 c = Py_CHARMASK(*fmt++);
4399 if (!isdigit(c))
4400 break;
4401 if ((prec*10) / 10 != prec) {
4402 PyErr_SetString(
4403 PyExc_ValueError,
4404 "prec too big");
4405 goto error;
Christian Heimes44720832008-05-26 13:01:01 +00004406 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004407 prec = prec*10 + (c - '0');
4408 }
4409 }
4410 } /* prec */
4411 if (fmtcnt >= 0) {
4412 if (c == 'h' || c == 'l' || c == 'L') {
4413 if (--fmtcnt >= 0)
4414 c = *fmt++;
4415 }
4416 }
4417 if (fmtcnt < 0) {
4418 PyErr_SetString(PyExc_ValueError,
4419 "incomplete format");
4420 goto error;
4421 }
4422 if (c != '%') {
4423 v = getnextarg(args, arglen, &argidx);
4424 if (v == NULL)
4425 goto error;
4426 }
4427 sign = 0;
4428 fill = ' ';
4429 switch (c) {
4430 case '%':
4431 pbuf = "%";
4432 len = 1;
4433 break;
4434 case 's':
4435#ifdef Py_USING_UNICODE
4436 if (PyUnicode_Check(v)) {
4437 fmt = fmt_start;
4438 argidx = argidx_start;
4439 goto unicode;
4440 }
4441#endif
4442 temp = _PyObject_Str(v);
4443#ifdef Py_USING_UNICODE
4444 if (temp != NULL && PyUnicode_Check(temp)) {
4445 Py_DECREF(temp);
4446 fmt = fmt_start;
4447 argidx = argidx_start;
4448 goto unicode;
4449 }
4450#endif
4451 /* Fall through */
4452 case 'r':
4453 if (c == 'r')
4454 temp = PyObject_Repr(v);
4455 if (temp == NULL)
4456 goto error;
4457 if (!PyString_Check(temp)) {
4458 PyErr_SetString(PyExc_TypeError,
4459 "%s argument has non-string str()");
4460 Py_DECREF(temp);
4461 goto error;
4462 }
4463 pbuf = PyString_AS_STRING(temp);
4464 len = PyString_GET_SIZE(temp);
4465 if (prec >= 0 && len > prec)
4466 len = prec;
4467 break;
4468 case 'i':
4469 case 'd':
4470 case 'u':
4471 case 'o':
4472 case 'x':
4473 case 'X':
4474 if (c == 'i')
4475 c = 'd';
4476 isnumok = 0;
4477 if (PyNumber_Check(v)) {
4478 PyObject *iobj=NULL;
4479
4480 if (PyInt_Check(v) || (PyLong_Check(v))) {
4481 iobj = v;
4482 Py_INCREF(iobj);
4483 }
4484 else {
4485 iobj = PyNumber_Int(v);
4486 if (iobj==NULL) iobj = PyNumber_Long(v);
4487 }
4488 if (iobj!=NULL) {
4489 if (PyInt_Check(iobj)) {
4490 isnumok = 1;
4491 pbuf = formatbuf;
4492 len = formatint(pbuf,
4493 sizeof(formatbuf),
4494 flags, prec, c, iobj);
4495 Py_DECREF(iobj);
4496 if (len < 0)
4497 goto error;
4498 sign = 1;
4499 }
4500 else if (PyLong_Check(iobj)) {
4501 int ilen;
4502
4503 isnumok = 1;
4504 temp = _PyString_FormatLong(iobj, flags,
4505 prec, c, &pbuf, &ilen);
4506 Py_DECREF(iobj);
4507 len = ilen;
4508 if (!temp)
4509 goto error;
4510 sign = 1;
4511 }
4512 else {
4513 Py_DECREF(iobj);
4514 }
4515 }
4516 }
4517 if (!isnumok) {
4518 PyErr_Format(PyExc_TypeError,
4519 "%%%c format: a number is required, "
4520 "not %.200s", c, Py_TYPE(v)->tp_name);
4521 goto error;
4522 }
4523 if (flags & F_ZERO)
4524 fill = '0';
4525 break;
4526 case 'e':
4527 case 'E':
4528 case 'f':
4529 case 'F':
4530 case 'g':
4531 case 'G':
4532 temp = formatfloat(v, flags, prec, c);
4533 if (temp == NULL)
4534 goto error;
4535 pbuf = PyString_AS_STRING(temp);
4536 len = PyString_GET_SIZE(temp);
4537 sign = 1;
4538 if (flags & F_ZERO)
4539 fill = '0';
4540 break;
4541 case 'c':
4542#ifdef Py_USING_UNICODE
4543 if (PyUnicode_Check(v)) {
4544 fmt = fmt_start;
4545 argidx = argidx_start;
4546 goto unicode;
4547 }
4548#endif
4549 pbuf = formatbuf;
4550 len = formatchar(pbuf, sizeof(formatbuf), v);
4551 if (len < 0)
4552 goto error;
4553 break;
4554 default:
4555 PyErr_Format(PyExc_ValueError,
4556 "unsupported format character '%c' (0x%x) "
4557 "at index %zd",
4558 c, c,
4559 (Py_ssize_t)(fmt - 1 -
4560 PyString_AsString(format)));
4561 goto error;
4562 }
4563 if (sign) {
4564 if (*pbuf == '-' || *pbuf == '+') {
4565 sign = *pbuf++;
4566 len--;
4567 }
4568 else if (flags & F_SIGN)
4569 sign = '+';
4570 else if (flags & F_BLANK)
4571 sign = ' ';
4572 else
4573 sign = 0;
4574 }
4575 if (width < len)
4576 width = len;
4577 if (rescnt - (sign != 0) < width) {
4578 reslen -= rescnt;
4579 rescnt = width + fmtcnt + 100;
4580 reslen += rescnt;
4581 if (reslen < 0) {
4582 Py_DECREF(result);
4583 Py_XDECREF(temp);
4584 return PyErr_NoMemory();
4585 }
4586 if (_PyString_Resize(&result, reslen)) {
4587 Py_XDECREF(temp);
4588 return NULL;
4589 }
4590 res = PyString_AS_STRING(result)
4591 + reslen - rescnt;
4592 }
4593 if (sign) {
4594 if (fill != ' ')
4595 *res++ = sign;
4596 rescnt--;
4597 if (width > len)
4598 width--;
4599 }
4600 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4601 assert(pbuf[0] == '0');
4602 assert(pbuf[1] == c);
4603 if (fill != ' ') {
4604 *res++ = *pbuf++;
4605 *res++ = *pbuf++;
4606 }
4607 rescnt -= 2;
4608 width -= 2;
4609 if (width < 0)
4610 width = 0;
4611 len -= 2;
4612 }
4613 if (width > len && !(flags & F_LJUST)) {
4614 do {
4615 --rescnt;
4616 *res++ = fill;
4617 } while (--width > len);
4618 }
4619 if (fill == ' ') {
4620 if (sign)
4621 *res++ = sign;
4622 if ((flags & F_ALT) &&
4623 (c == 'x' || c == 'X')) {
4624 assert(pbuf[0] == '0');
4625 assert(pbuf[1] == c);
4626 *res++ = *pbuf++;
4627 *res++ = *pbuf++;
4628 }
4629 }
4630 Py_MEMCPY(res, pbuf, len);
4631 res += len;
4632 rescnt -= len;
4633 while (--width >= len) {
4634 --rescnt;
4635 *res++ = ' ';
4636 }
4637 if (dict && (argidx < arglen) && c != '%') {
4638 PyErr_SetString(PyExc_TypeError,
4639 "not all arguments converted during string formatting");
4640 Py_XDECREF(temp);
4641 goto error;
4642 }
4643 Py_XDECREF(temp);
4644 } /* '%' */
4645 } /* until end */
4646 if (argidx < arglen && !dict) {
4647 PyErr_SetString(PyExc_TypeError,
4648 "not all arguments converted during string formatting");
4649 goto error;
4650 }
4651 if (args_owned) {
4652 Py_DECREF(args);
4653 }
4654 if (_PyString_Resize(&result, reslen - rescnt))
4655 return NULL;
4656 return result;
Christian Heimes44720832008-05-26 13:01:01 +00004657
4658#ifdef Py_USING_UNICODE
4659 unicode:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004660 if (args_owned) {
4661 Py_DECREF(args);
4662 args_owned = 0;
4663 }
4664 /* Fiddle args right (remove the first argidx arguments) */
4665 if (PyTuple_Check(orig_args) && argidx > 0) {
4666 PyObject *v;
4667 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4668 v = PyTuple_New(n);
4669 if (v == NULL)
4670 goto error;
4671 while (--n >= 0) {
4672 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4673 Py_INCREF(w);
4674 PyTuple_SET_ITEM(v, n, w);
4675 }
4676 args = v;
4677 } else {
4678 Py_INCREF(orig_args);
4679 args = orig_args;
4680 }
4681 args_owned = 1;
4682 /* Take what we have of the result and let the Unicode formatting
4683 function format the rest of the input. */
4684 rescnt = res - PyString_AS_STRING(result);
4685 if (_PyString_Resize(&result, rescnt))
4686 goto error;
4687 fmtcnt = PyString_GET_SIZE(format) - \
4688 (fmt - PyString_AS_STRING(format));
4689 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4690 if (format == NULL)
4691 goto error;
4692 v = PyUnicode_Format(format, args);
4693 Py_DECREF(format);
4694 if (v == NULL)
4695 goto error;
4696 /* Paste what we have (result) to what the Unicode formatting
4697 function returned (v) and return the result (or error) */
4698 w = PyUnicode_Concat(result, v);
4699 Py_DECREF(result);
4700 Py_DECREF(v);
4701 Py_DECREF(args);
4702 return w;
Christian Heimes44720832008-05-26 13:01:01 +00004703#endif /* Py_USING_UNICODE */
4704
4705 error:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004706 Py_DECREF(result);
4707 if (args_owned) {
4708 Py_DECREF(args);
4709 }
4710 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004711}
4712
4713void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004714PyString_InternInPlace(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00004715{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004716 register PyStringObject *s = (PyStringObject *)(*p);
4717 PyObject *t;
4718 if (s == NULL || !PyString_Check(s))
4719 Py_FatalError("PyString_InternInPlace: strings only please!");
4720 /* If it's a string subclass, we don't really know what putting
4721 it in the interned dict might do. */
4722 if (!PyString_CheckExact(s))
4723 return;
4724 if (PyString_CHECK_INTERNED(s))
4725 return;
4726 if (interned == NULL) {
4727 interned = PyDict_New();
4728 if (interned == NULL) {
4729 PyErr_Clear(); /* Don't leave an exception */
4730 return;
4731 }
4732 }
4733 t = PyDict_GetItem(interned, (PyObject *)s);
4734 if (t) {
4735 Py_INCREF(t);
4736 Py_DECREF(*p);
4737 *p = t;
4738 return;
4739 }
Christian Heimes44720832008-05-26 13:01:01 +00004740
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004741 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4742 PyErr_Clear();
4743 return;
4744 }
4745 /* The two references in interned are not counted by refcnt.
4746 The string deallocator will take care of this */
4747 Py_REFCNT(s) -= 2;
4748 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Christian Heimes44720832008-05-26 13:01:01 +00004749}
4750
4751void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004752PyString_InternImmortal(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00004753{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004754 PyString_InternInPlace(p);
4755 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4756 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4757 Py_INCREF(*p);
4758 }
Christian Heimes44720832008-05-26 13:01:01 +00004759}
4760
4761
4762PyObject *
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004763PyString_InternFromString(const char *cp)
Christian Heimes44720832008-05-26 13:01:01 +00004764{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004765 PyObject *s = PyString_FromString(cp);
4766 if (s == NULL)
4767 return NULL;
4768 PyString_InternInPlace(&s);
4769 return s;
Christian Heimes44720832008-05-26 13:01:01 +00004770}
4771
4772void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004773PyString_Fini(void)
Christian Heimes44720832008-05-26 13:01:01 +00004774{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004775 int i;
4776 for (i = 0; i < UCHAR_MAX + 1; i++) {
4777 Py_XDECREF(characters[i]);
4778 characters[i] = NULL;
4779 }
4780 Py_XDECREF(nullstring);
4781 nullstring = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004782}
4783
4784void _Py_ReleaseInternedStrings(void)
4785{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004786 PyObject *keys;
4787 PyStringObject *s;
4788 Py_ssize_t i, n;
4789 Py_ssize_t immortal_size = 0, mortal_size = 0;
Christian Heimes44720832008-05-26 13:01:01 +00004790
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004791 if (interned == NULL || !PyDict_Check(interned))
4792 return;
4793 keys = PyDict_Keys(interned);
4794 if (keys == NULL || !PyList_Check(keys)) {
4795 PyErr_Clear();
4796 return;
4797 }
Christian Heimes44720832008-05-26 13:01:01 +00004798
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004799 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4800 detector, interned strings are not forcibly deallocated; rather, we
4801 give them their stolen references back, and then clear and DECREF
4802 the interned dict. */
Christian Heimes44720832008-05-26 13:01:01 +00004803
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004804 n = PyList_GET_SIZE(keys);
4805 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4806 n);
4807 for (i = 0; i < n; i++) {
4808 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4809 switch (s->ob_sstate) {
4810 case SSTATE_NOT_INTERNED:
4811 /* XXX Shouldn't happen */
4812 break;
4813 case SSTATE_INTERNED_IMMORTAL:
4814 Py_REFCNT(s) += 1;
4815 immortal_size += Py_SIZE(s);
4816 break;
4817 case SSTATE_INTERNED_MORTAL:
4818 Py_REFCNT(s) += 2;
4819 mortal_size += Py_SIZE(s);
4820 break;
4821 default:
4822 Py_FatalError("Inconsistent interned string state.");
4823 }
4824 s->ob_sstate = SSTATE_NOT_INTERNED;
4825 }
4826 fprintf(stderr, "total size of all interned strings: "
4827 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4828 "mortal/immortal\n", mortal_size, immortal_size);
4829 Py_DECREF(keys);
4830 PyDict_Clear(interned);
4831 Py_DECREF(interned);
4832 interned = NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00004833}