blob: 373439bca15f656136469dd16f6ae96ed9312f5e [file] [log] [blame]
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001/* String (str/bytes) object implementation */
Christian Heimes1a6387e2008-03-26 12:49:49 +00002
3#define PY_SSIZE_T_CLEAN
Christian Heimes44720832008-05-26 13:01:01 +00004
Christian Heimes1a6387e2008-03-26 12:49:49 +00005#include "Python.h"
Christian Heimes44720832008-05-26 13:01:01 +00006#include <ctype.h>
Mark Dickinson826f3fe2008-12-05 21:55:28 +00007#include <stddef.h>
Christian Heimes44720832008-05-26 13:01:01 +00008
9#ifdef COUNT_ALLOCS
Martin v. Löwisb90304a2009-01-07 18:40:40 +000010Py_ssize_t null_strings, one_strings;
Christian Heimes44720832008-05-26 13:01:01 +000011#endif
12
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000013static PyStringObject *characters[UCHAR_MAX + 1];
14static PyStringObject *nullstring;
Christian Heimes44720832008-05-26 13:01:01 +000015
16/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Mark Dickinson826f3fe2008-12-05 21:55:28 +000026/* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyStringObject_SIZE + n bytes.
28
29 Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30 3 bytes per string allocation on a typical system.
31*/
32#define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
33
Christian Heimes44720832008-05-26 13:01:01 +000034/*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000035 For PyString_FromString(), the parameter `str' points to a null-terminated
Christian Heimes44720832008-05-26 13:01:01 +000036 string containing exactly `size' bytes.
37
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000038 For PyString_FromStringAndSize(), the parameter the parameter `str' is
Christian Heimes44720832008-05-26 13:01:01 +000039 either NULL or else points to a string containing at least `size' bytes.
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000040 For PyString_FromStringAndSize(), the string in the `str' parameter does
Christian Heimes44720832008-05-26 13:01:01 +000041 not have to be null-terminated. (Therefore it is safe to construct a
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000042 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
43 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
Christian Heimes44720832008-05-26 13:01:01 +000044 bytes (setting the last byte to the null terminating character) and you can
45 fill in the data yourself. If `str' is non-NULL then the resulting
46 PyString object must be treated as immutable and you must not fill in nor
47 alter the data yourself, since the strings may be shared.
48
49 The PyObject member `op->ob_size', which denotes the number of "extra
50 items" in a variable-size object, will contain the number of bytes
Eli Bendersky72de2052011-03-24 22:38:25 +020051 allocated for string data, not counting the null terminating character.
52 It is therefore equal to the `size' parameter (for
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000053 PyString_FromStringAndSize()) or the length of the string in the `str'
54 parameter (for PyString_FromString()).
Christian Heimes44720832008-05-26 13:01:01 +000055*/
56PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000057PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Christian Heimes1a6387e2008-03-26 12:49:49 +000058{
Antoine Pitrouc83ea132010-05-09 14:46:46 +000059 register PyStringObject *op;
60 if (size < 0) {
61 PyErr_SetString(PyExc_SystemError,
62 "Negative size passed to PyString_FromStringAndSize");
63 return NULL;
64 }
65 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +000066#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +000067 null_strings++;
Christian Heimes44720832008-05-26 13:01:01 +000068#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +000069 Py_INCREF(op);
70 return (PyObject *)op;
71 }
72 if (size == 1 && str != NULL &&
73 (op = characters[*str & UCHAR_MAX]) != NULL)
74 {
Christian Heimes44720832008-05-26 13:01:01 +000075#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +000076 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +000077#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +000078 Py_INCREF(op);
79 return (PyObject *)op;
80 }
Christian Heimes44720832008-05-26 13:01:01 +000081
Antoine Pitrouc83ea132010-05-09 14:46:46 +000082 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
83 PyErr_SetString(PyExc_OverflowError, "string is too large");
84 return NULL;
85 }
Neal Norwitze7d8be82008-07-31 17:17:14 +000086
Antoine Pitrouc83ea132010-05-09 14:46:46 +000087 /* Inline PyObject_NewVar */
88 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
89 if (op == NULL)
90 return PyErr_NoMemory();
91 PyObject_INIT_VAR(op, &PyString_Type, size);
92 op->ob_shash = -1;
93 op->ob_sstate = SSTATE_NOT_INTERNED;
94 if (str != NULL)
95 Py_MEMCPY(op->ob_sval, str, size);
96 op->ob_sval[size] = '\0';
97 /* share short strings */
98 if (size == 0) {
99 PyObject *t = (PyObject *)op;
100 PyString_InternInPlace(&t);
101 op = (PyStringObject *)t;
102 nullstring = op;
103 Py_INCREF(op);
104 } else if (size == 1 && str != NULL) {
105 PyObject *t = (PyObject *)op;
106 PyString_InternInPlace(&t);
107 op = (PyStringObject *)t;
108 characters[*str & UCHAR_MAX] = op;
109 Py_INCREF(op);
110 }
111 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000112}
113
Christian Heimes44720832008-05-26 13:01:01 +0000114PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000115PyString_FromString(const char *str)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000116{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000117 register size_t size;
118 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +0000119
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000120 assert(str != NULL);
121 size = strlen(str);
122 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
123 PyErr_SetString(PyExc_OverflowError,
124 "string is too long for a Python string");
125 return NULL;
126 }
127 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +0000128#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000129 null_strings++;
Christian Heimes44720832008-05-26 13:01:01 +0000130#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000131 Py_INCREF(op);
132 return (PyObject *)op;
133 }
134 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +0000135#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000136 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +0000137#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000138 Py_INCREF(op);
139 return (PyObject *)op;
140 }
Christian Heimes44720832008-05-26 13:01:01 +0000141
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000142 /* Inline PyObject_NewVar */
143 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
144 if (op == NULL)
145 return PyErr_NoMemory();
146 PyObject_INIT_VAR(op, &PyString_Type, size);
147 op->ob_shash = -1;
148 op->ob_sstate = SSTATE_NOT_INTERNED;
149 Py_MEMCPY(op->ob_sval, str, size+1);
150 /* share short strings */
151 if (size == 0) {
152 PyObject *t = (PyObject *)op;
153 PyString_InternInPlace(&t);
154 op = (PyStringObject *)t;
155 nullstring = op;
156 Py_INCREF(op);
157 } else if (size == 1) {
158 PyObject *t = (PyObject *)op;
159 PyString_InternInPlace(&t);
160 op = (PyStringObject *)t;
161 characters[*str & UCHAR_MAX] = op;
162 Py_INCREF(op);
163 }
164 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000165}
166
Christian Heimes44720832008-05-26 13:01:01 +0000167PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000168PyString_FromFormatV(const char *format, va_list vargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000169{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000170 va_list count;
171 Py_ssize_t n = 0;
172 const char* f;
173 char *s;
174 PyObject* string;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000175
Christian Heimes44720832008-05-26 13:01:01 +0000176#ifdef VA_LIST_IS_ARRAY
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000177 Py_MEMCPY(count, vargs, sizeof(va_list));
Christian Heimes44720832008-05-26 13:01:01 +0000178#else
179#ifdef __va_copy
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000180 __va_copy(count, vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000181#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000182 count = vargs;
Christian Heimes44720832008-05-26 13:01:01 +0000183#endif
184#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000185 /* step 1: figure out how large a buffer we need */
186 for (f = format; *f; f++) {
187 if (*f == '%') {
Mark Dickinson82864d12009-11-15 16:18:58 +0000188#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000189 int longlongflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000190#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000191 const char* p = f;
192 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
193 ;
Christian Heimes44720832008-05-26 13:01:01 +0000194
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000195 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
196 * they don't affect the amount of space we reserve.
197 */
198 if (*f == 'l') {
199 if (f[1] == 'd' || f[1] == 'u') {
200 ++f;
201 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000202#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000203 else if (f[1] == 'l' &&
204 (f[2] == 'd' || f[2] == 'u')) {
205 longlongflag = 1;
206 f += 2;
207 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000208#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000209 }
210 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
211 ++f;
212 }
Christian Heimes44720832008-05-26 13:01:01 +0000213
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000214 switch (*f) {
215 case 'c':
216 (void)va_arg(count, int);
217 /* fall through... */
218 case '%':
219 n++;
220 break;
221 case 'd': case 'u': case 'i': case 'x':
222 (void) va_arg(count, int);
Mark Dickinson82864d12009-11-15 16:18:58 +0000223#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000224 /* Need at most
225 ceil(log10(256)*SIZEOF_LONG_LONG) digits,
226 plus 1 for the sign. 53/22 is an upper
227 bound for log10(256). */
228 if (longlongflag)
229 n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
230 else
Mark Dickinson82864d12009-11-15 16:18:58 +0000231#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000232 /* 20 bytes is enough to hold a 64-bit
233 integer. Decimal takes the most
234 space. This isn't enough for
235 octal. */
236 n += 20;
Mark Dickinson82864d12009-11-15 16:18:58 +0000237
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000238 break;
239 case 's':
240 s = va_arg(count, char*);
241 n += strlen(s);
242 break;
243 case 'p':
244 (void) va_arg(count, int);
245 /* maximum 64-bit pointer representation:
246 * 0xffffffffffffffff
247 * so 19 characters is enough.
248 * XXX I count 18 -- what's the extra for?
249 */
250 n += 19;
251 break;
252 default:
253 /* if we stumble upon an unknown
254 formatting code, copy the rest of
255 the format string to the output
256 string. (we cannot just skip the
257 code, since there's no way to know
258 what's in the argument list) */
259 n += strlen(p);
260 goto expand;
261 }
262 } else
263 n++;
264 }
Christian Heimes44720832008-05-26 13:01:01 +0000265 expand:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000266 /* step 2: fill the buffer */
267 /* Since we've analyzed how much space we need for the worst case,
268 use sprintf directly instead of the slower PyOS_snprintf. */
269 string = PyString_FromStringAndSize(NULL, n);
270 if (!string)
271 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000272
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000273 s = PyString_AsString(string);
Christian Heimes44720832008-05-26 13:01:01 +0000274
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000275 for (f = format; *f; f++) {
276 if (*f == '%') {
277 const char* p = f++;
278 Py_ssize_t i;
279 int longflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000280#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000281 int longlongflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000282#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000283 int size_tflag = 0;
284 /* parse the width.precision part (we're only
285 interested in the precision value, if any) */
286 n = 0;
287 while (isdigit(Py_CHARMASK(*f)))
288 n = (n*10) + *f++ - '0';
289 if (*f == '.') {
290 f++;
291 n = 0;
292 while (isdigit(Py_CHARMASK(*f)))
293 n = (n*10) + *f++ - '0';
294 }
295 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
296 f++;
297 /* Handle %ld, %lu, %lld and %llu. */
298 if (*f == 'l') {
299 if (f[1] == 'd' || f[1] == 'u') {
300 longflag = 1;
301 ++f;
302 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000303#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000304 else if (f[1] == 'l' &&
305 (f[2] == 'd' || f[2] == 'u')) {
306 longlongflag = 1;
307 f += 2;
308 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000309#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000310 }
311 /* handle the size_t flag. */
312 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
313 size_tflag = 1;
314 ++f;
315 }
Christian Heimes44720832008-05-26 13:01:01 +0000316
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000317 switch (*f) {
318 case 'c':
319 *s++ = va_arg(vargs, int);
320 break;
321 case 'd':
322 if (longflag)
323 sprintf(s, "%ld", va_arg(vargs, long));
Mark Dickinson82864d12009-11-15 16:18:58 +0000324#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000325 else if (longlongflag)
326 sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
327 va_arg(vargs, PY_LONG_LONG));
Mark Dickinson82864d12009-11-15 16:18:58 +0000328#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000329 else if (size_tflag)
330 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
331 va_arg(vargs, Py_ssize_t));
332 else
333 sprintf(s, "%d", va_arg(vargs, int));
334 s += strlen(s);
335 break;
336 case 'u':
337 if (longflag)
338 sprintf(s, "%lu",
339 va_arg(vargs, unsigned long));
Mark Dickinson82864d12009-11-15 16:18:58 +0000340#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000341 else if (longlongflag)
342 sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
343 va_arg(vargs, PY_LONG_LONG));
Mark Dickinson82864d12009-11-15 16:18:58 +0000344#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000345 else if (size_tflag)
346 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
347 va_arg(vargs, size_t));
348 else
349 sprintf(s, "%u",
350 va_arg(vargs, unsigned int));
351 s += strlen(s);
352 break;
353 case 'i':
354 sprintf(s, "%i", va_arg(vargs, int));
355 s += strlen(s);
356 break;
357 case 'x':
358 sprintf(s, "%x", va_arg(vargs, int));
359 s += strlen(s);
360 break;
361 case 's':
362 p = va_arg(vargs, char*);
363 i = strlen(p);
364 if (n > 0 && i > n)
365 i = n;
366 Py_MEMCPY(s, p, i);
367 s += i;
368 break;
369 case 'p':
370 sprintf(s, "%p", va_arg(vargs, void*));
371 /* %p is ill-defined: ensure leading 0x. */
372 if (s[1] == 'X')
373 s[1] = 'x';
374 else if (s[1] != 'x') {
375 memmove(s+2, s, strlen(s)+1);
376 s[0] = '0';
377 s[1] = 'x';
378 }
379 s += strlen(s);
380 break;
381 case '%':
382 *s++ = '%';
383 break;
384 default:
385 strcpy(s, p);
386 s += strlen(s);
387 goto end;
388 }
389 } else
390 *s++ = *f;
391 }
Christian Heimes44720832008-05-26 13:01:01 +0000392
393 end:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000394 if (_PyString_Resize(&string, s - PyString_AS_STRING(string)))
395 return NULL;
396 return string;
Christian Heimes44720832008-05-26 13:01:01 +0000397}
398
399PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000400PyString_FromFormat(const char *format, ...)
Christian Heimes44720832008-05-26 13:01:01 +0000401{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000402 PyObject* ret;
403 va_list vargs;
Christian Heimes44720832008-05-26 13:01:01 +0000404
405#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000406 va_start(vargs, format);
Christian Heimes44720832008-05-26 13:01:01 +0000407#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000408 va_start(vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000409#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000410 ret = PyString_FromFormatV(format, vargs);
411 va_end(vargs);
412 return ret;
Christian Heimes44720832008-05-26 13:01:01 +0000413}
414
415
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000416PyObject *PyString_Decode(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000417 Py_ssize_t size,
418 const char *encoding,
419 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000420{
421 PyObject *v, *str;
422
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000423 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000424 if (str == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000425 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000426 v = PyString_AsDecodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000427 Py_DECREF(str);
428 return v;
429}
430
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000431PyObject *PyString_AsDecodedObject(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000432 const char *encoding,
433 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000434{
435 PyObject *v;
436
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000437 if (!PyString_Check(str)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000438 PyErr_BadArgument();
439 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000440 }
441
Christian Heimes44720832008-05-26 13:01:01 +0000442 if (encoding == NULL) {
443#ifdef Py_USING_UNICODE
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000444 encoding = PyUnicode_GetDefaultEncoding();
Christian Heimes44720832008-05-26 13:01:01 +0000445#else
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000446 PyErr_SetString(PyExc_ValueError, "no encoding specified");
447 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000448#endif
Christian Heimes1a6387e2008-03-26 12:49:49 +0000449 }
Christian Heimes44720832008-05-26 13:01:01 +0000450
451 /* Decode via the codec registry */
452 v = PyCodec_Decode(str, encoding, errors);
453 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000454 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000455
456 return v;
457
458 onError:
459 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000460}
461
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000462PyObject *PyString_AsDecodedString(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000463 const char *encoding,
464 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000465{
Christian Heimes44720832008-05-26 13:01:01 +0000466 PyObject *v;
467
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000468 v = PyString_AsDecodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000469 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000470 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000471
472#ifdef Py_USING_UNICODE
473 /* Convert Unicode to a string using the default encoding */
474 if (PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000475 PyObject *temp = v;
476 v = PyUnicode_AsEncodedString(v, NULL, NULL);
477 Py_DECREF(temp);
478 if (v == NULL)
479 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000480 }
Christian Heimes44720832008-05-26 13:01:01 +0000481#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000482 if (!PyString_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000483 PyErr_Format(PyExc_TypeError,
484 "decoder did not return a string object (type=%.400s)",
485 Py_TYPE(v)->tp_name);
486 Py_DECREF(v);
487 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000488 }
Christian Heimes44720832008-05-26 13:01:01 +0000489
490 return v;
491
492 onError:
493 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000494}
495
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000496PyObject *PyString_Encode(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000497 Py_ssize_t size,
498 const char *encoding,
499 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000500{
Christian Heimes44720832008-05-26 13:01:01 +0000501 PyObject *v, *str;
502
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000503 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000504 if (str == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000505 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000506 v = PyString_AsEncodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000507 Py_DECREF(str);
508 return v;
509}
510
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000511PyObject *PyString_AsEncodedObject(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000512 const char *encoding,
513 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000514{
515 PyObject *v;
516
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000517 if (!PyString_Check(str)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000518 PyErr_BadArgument();
519 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000520 }
521
522 if (encoding == NULL) {
523#ifdef Py_USING_UNICODE
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000524 encoding = PyUnicode_GetDefaultEncoding();
Christian Heimes44720832008-05-26 13:01:01 +0000525#else
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000526 PyErr_SetString(PyExc_ValueError, "no encoding specified");
527 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000528#endif
529 }
530
531 /* Encode via the codec registry */
532 v = PyCodec_Encode(str, encoding, errors);
533 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000534 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000535
536 return v;
537
538 onError:
539 return NULL;
540}
541
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000542PyObject *PyString_AsEncodedString(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000543 const char *encoding,
544 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000545{
546 PyObject *v;
547
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000548 v = PyString_AsEncodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000549 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000550 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000551
552#ifdef Py_USING_UNICODE
553 /* Convert Unicode to a string using the default encoding */
554 if (PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000555 PyObject *temp = v;
556 v = PyUnicode_AsEncodedString(v, NULL, NULL);
557 Py_DECREF(temp);
558 if (v == NULL)
559 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000560 }
561#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000562 if (!PyString_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000563 PyErr_Format(PyExc_TypeError,
564 "encoder did not return a string object (type=%.400s)",
565 Py_TYPE(v)->tp_name);
566 Py_DECREF(v);
567 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000568 }
569
570 return v;
571
572 onError:
573 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000574}
575
576static void
Christian Heimes44720832008-05-26 13:01:01 +0000577string_dealloc(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000578{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000579 switch (PyString_CHECK_INTERNED(op)) {
580 case SSTATE_NOT_INTERNED:
581 break;
Christian Heimes44720832008-05-26 13:01:01 +0000582
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000583 case SSTATE_INTERNED_MORTAL:
584 /* revive dead object temporarily for DelItem */
585 Py_REFCNT(op) = 3;
586 if (PyDict_DelItem(interned, op) != 0)
587 Py_FatalError(
588 "deletion of interned string failed");
589 break;
Christian Heimes44720832008-05-26 13:01:01 +0000590
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000591 case SSTATE_INTERNED_IMMORTAL:
592 Py_FatalError("Immortal interned string died.");
Christian Heimes44720832008-05-26 13:01:01 +0000593
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000594 default:
595 Py_FatalError("Inconsistent interned string state.");
596 }
597 Py_TYPE(op)->tp_free(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000598}
599
Christian Heimes44720832008-05-26 13:01:01 +0000600/* Unescape a backslash-escaped string. If unicode is non-zero,
601 the string is a u-literal. If recode_encoding is non-zero,
602 the string is UTF-8 encoded and should be re-encoded in the
603 specified encoding. */
604
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000605PyObject *PyString_DecodeEscape(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000606 Py_ssize_t len,
607 const char *errors,
608 Py_ssize_t unicode,
609 const char *recode_encoding)
Christian Heimes44720832008-05-26 13:01:01 +0000610{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000611 int c;
612 char *p, *buf;
613 const char *end;
614 PyObject *v;
615 Py_ssize_t newlen = recode_encoding ? 4*len:len;
616 v = PyString_FromStringAndSize((char *)NULL, newlen);
617 if (v == NULL)
618 return NULL;
619 p = buf = PyString_AsString(v);
620 end = s + len;
621 while (s < end) {
622 if (*s != '\\') {
623 non_esc:
Christian Heimes44720832008-05-26 13:01:01 +0000624#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000625 if (recode_encoding && (*s & 0x80)) {
626 PyObject *u, *w;
627 char *r;
628 const char* t;
629 Py_ssize_t rn;
630 t = s;
631 /* Decode non-ASCII bytes as UTF-8. */
632 while (t < end && (*t & 0x80)) t++;
633 u = PyUnicode_DecodeUTF8(s, t - s, errors);
634 if(!u) goto failed;
Christian Heimes44720832008-05-26 13:01:01 +0000635
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000636 /* Recode them in target encoding. */
637 w = PyUnicode_AsEncodedString(
638 u, recode_encoding, errors);
639 Py_DECREF(u);
640 if (!w) goto failed;
Christian Heimes44720832008-05-26 13:01:01 +0000641
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000642 /* Append bytes to output buffer. */
643 assert(PyString_Check(w));
644 r = PyString_AS_STRING(w);
645 rn = PyString_GET_SIZE(w);
646 Py_MEMCPY(p, r, rn);
647 p += rn;
648 Py_DECREF(w);
649 s = t;
650 } else {
651 *p++ = *s++;
652 }
Christian Heimes44720832008-05-26 13:01:01 +0000653#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000654 *p++ = *s++;
Christian Heimes44720832008-05-26 13:01:01 +0000655#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000656 continue;
657 }
658 s++;
659 if (s==end) {
660 PyErr_SetString(PyExc_ValueError,
661 "Trailing \\ in string");
662 goto failed;
663 }
664 switch (*s++) {
665 /* XXX This assumes ASCII! */
666 case '\n': break;
667 case '\\': *p++ = '\\'; break;
668 case '\'': *p++ = '\''; break;
669 case '\"': *p++ = '\"'; break;
670 case 'b': *p++ = '\b'; break;
671 case 'f': *p++ = '\014'; break; /* FF */
672 case 't': *p++ = '\t'; break;
673 case 'n': *p++ = '\n'; break;
674 case 'r': *p++ = '\r'; break;
675 case 'v': *p++ = '\013'; break; /* VT */
676 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
677 case '0': case '1': case '2': case '3':
678 case '4': case '5': case '6': case '7':
679 c = s[-1] - '0';
680 if (s < end && '0' <= *s && *s <= '7') {
681 c = (c<<3) + *s++ - '0';
682 if (s < end && '0' <= *s && *s <= '7')
683 c = (c<<3) + *s++ - '0';
684 }
685 *p++ = c;
686 break;
687 case 'x':
688 if (s+1 < end &&
689 isxdigit(Py_CHARMASK(s[0])) &&
690 isxdigit(Py_CHARMASK(s[1])))
691 {
692 unsigned int x = 0;
693 c = Py_CHARMASK(*s);
694 s++;
695 if (isdigit(c))
696 x = c - '0';
697 else if (islower(c))
698 x = 10 + c - 'a';
699 else
700 x = 10 + c - 'A';
701 x = x << 4;
702 c = Py_CHARMASK(*s);
703 s++;
704 if (isdigit(c))
705 x += c - '0';
706 else if (islower(c))
707 x += 10 + c - 'a';
708 else
709 x += 10 + c - 'A';
710 *p++ = x;
711 break;
712 }
713 if (!errors || strcmp(errors, "strict") == 0) {
714 PyErr_SetString(PyExc_ValueError,
715 "invalid \\x escape");
716 goto failed;
717 }
718 if (strcmp(errors, "replace") == 0) {
719 *p++ = '?';
720 } else if (strcmp(errors, "ignore") == 0)
721 /* do nothing */;
722 else {
723 PyErr_Format(PyExc_ValueError,
724 "decoding error; "
725 "unknown error handling code: %.400s",
726 errors);
727 goto failed;
728 }
Christian Heimes44720832008-05-26 13:01:01 +0000729#ifndef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000730 case 'u':
731 case 'U':
732 case 'N':
733 if (unicode) {
734 PyErr_SetString(PyExc_ValueError,
735 "Unicode escapes not legal "
736 "when Unicode disabled");
737 goto failed;
738 }
Christian Heimes44720832008-05-26 13:01:01 +0000739#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000740 default:
741 *p++ = '\\';
742 s--;
Ezio Melotti24b07bc2011-03-15 18:55:01 +0200743 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000744 UTF-8 bytes may follow. */
745 }
746 }
747 if (p-buf < newlen && _PyString_Resize(&v, p - buf))
748 goto failed;
749 return v;
Christian Heimes44720832008-05-26 13:01:01 +0000750 failed:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000751 Py_DECREF(v);
752 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000753}
754
755/* -------------------------------------------------------------------- */
756/* object api */
757
Christian Heimes1a6387e2008-03-26 12:49:49 +0000758static Py_ssize_t
Christian Heimes44720832008-05-26 13:01:01 +0000759string_getsize(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000760{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000761 char *s;
762 Py_ssize_t len;
763 if (PyString_AsStringAndSize(op, &s, &len))
764 return -1;
765 return len;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000766}
767
Christian Heimes44720832008-05-26 13:01:01 +0000768static /*const*/ char *
769string_getbuffer(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000770{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000771 char *s;
772 Py_ssize_t len;
773 if (PyString_AsStringAndSize(op, &s, &len))
774 return NULL;
775 return s;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000776}
777
778Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000779PyString_Size(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000780{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000781 if (!PyString_Check(op))
782 return string_getsize(op);
783 return Py_SIZE(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000784}
785
Christian Heimes44720832008-05-26 13:01:01 +0000786/*const*/ char *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000787PyString_AsString(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000788{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000789 if (!PyString_Check(op))
790 return string_getbuffer(op);
791 return ((PyStringObject *)op) -> ob_sval;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000792}
793
794int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000795PyString_AsStringAndSize(register PyObject *obj,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000796 register char **s,
797 register Py_ssize_t *len)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000798{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000799 if (s == NULL) {
800 PyErr_BadInternalCall();
801 return -1;
802 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000803
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000804 if (!PyString_Check(obj)) {
Christian Heimes44720832008-05-26 13:01:01 +0000805#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000806 if (PyUnicode_Check(obj)) {
807 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
808 if (obj == NULL)
809 return -1;
810 }
811 else
Christian Heimes1a6387e2008-03-26 12:49:49 +0000812#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000813 {
814 PyErr_Format(PyExc_TypeError,
815 "expected string or Unicode object, "
816 "%.200s found", Py_TYPE(obj)->tp_name);
817 return -1;
818 }
819 }
Christian Heimes44720832008-05-26 13:01:01 +0000820
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000821 *s = PyString_AS_STRING(obj);
822 if (len != NULL)
823 *len = PyString_GET_SIZE(obj);
824 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
825 PyErr_SetString(PyExc_TypeError,
826 "expected string without null bytes");
827 return -1;
828 }
829 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000830}
831
Christian Heimes1a6387e2008-03-26 12:49:49 +0000832/* -------------------------------------------------------------------- */
833/* Methods */
834
Christian Heimes44720832008-05-26 13:01:01 +0000835#include "stringlib/stringdefs.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000836#include "stringlib/fastsearch.h"
Christian Heimes44720832008-05-26 13:01:01 +0000837
Christian Heimes1a6387e2008-03-26 12:49:49 +0000838#include "stringlib/count.h"
839#include "stringlib/find.h"
840#include "stringlib/partition.h"
Antoine Pitrou64672132010-01-13 07:55:48 +0000841#include "stringlib/split.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000842
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000843#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
Christian Heimes44720832008-05-26 13:01:01 +0000844#include "stringlib/localeutil.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000845
Christian Heimes1a6387e2008-03-26 12:49:49 +0000846
847
848static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000849string_print(PyStringObject *op, FILE *fp, int flags)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000850{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000851 Py_ssize_t i, str_len;
852 char c;
853 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000854
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000855 /* XXX Ought to check for interrupts when writing long strings */
856 if (! PyString_CheckExact(op)) {
857 int ret;
858 /* A str subclass may have its own __str__ method. */
859 op = (PyStringObject *) PyObject_Str((PyObject *)op);
860 if (op == NULL)
861 return -1;
862 ret = string_print(op, fp, flags);
863 Py_DECREF(op);
864 return ret;
865 }
866 if (flags & Py_PRINT_RAW) {
867 char *data = op->ob_sval;
868 Py_ssize_t size = Py_SIZE(op);
869 Py_BEGIN_ALLOW_THREADS
870 while (size > INT_MAX) {
871 /* Very long strings cannot be written atomically.
872 * But don't write exactly INT_MAX bytes at a time
873 * to avoid memory aligment issues.
874 */
875 const int chunk_size = INT_MAX & ~0x3FFF;
876 fwrite(data, 1, chunk_size, fp);
877 data += chunk_size;
878 size -= chunk_size;
879 }
Christian Heimes44720832008-05-26 13:01:01 +0000880#ifdef __VMS
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000881 if (size) fwrite(data, (int)size, 1, fp);
Christian Heimes44720832008-05-26 13:01:01 +0000882#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000883 fwrite(data, 1, (int)size, fp);
Christian Heimes44720832008-05-26 13:01:01 +0000884#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000885 Py_END_ALLOW_THREADS
886 return 0;
887 }
Christian Heimes44720832008-05-26 13:01:01 +0000888
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000889 /* figure out which quote to use; single is preferred */
890 quote = '\'';
891 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
892 !memchr(op->ob_sval, '"', Py_SIZE(op)))
893 quote = '"';
Christian Heimes44720832008-05-26 13:01:01 +0000894
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000895 str_len = Py_SIZE(op);
896 Py_BEGIN_ALLOW_THREADS
897 fputc(quote, fp);
898 for (i = 0; i < str_len; i++) {
899 /* Since strings are immutable and the caller should have a
900 reference, accessing the interal buffer should not be an issue
901 with the GIL released. */
902 c = op->ob_sval[i];
903 if (c == quote || c == '\\')
904 fprintf(fp, "\\%c", c);
905 else if (c == '\t')
906 fprintf(fp, "\\t");
907 else if (c == '\n')
908 fprintf(fp, "\\n");
909 else if (c == '\r')
910 fprintf(fp, "\\r");
911 else if (c < ' ' || c >= 0x7f)
912 fprintf(fp, "\\x%02x", c & 0xff);
913 else
914 fputc(c, fp);
915 }
916 fputc(quote, fp);
917 Py_END_ALLOW_THREADS
918 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000919}
920
Christian Heimes44720832008-05-26 13:01:01 +0000921PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000922PyString_Repr(PyObject *obj, int smartquotes)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000923{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000924 register PyStringObject* op = (PyStringObject*) obj;
925 size_t newsize = 2 + 4 * Py_SIZE(op);
926 PyObject *v;
927 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
928 PyErr_SetString(PyExc_OverflowError,
929 "string is too large to make repr");
930 return NULL;
931 }
932 v = PyString_FromStringAndSize((char *)NULL, newsize);
933 if (v == NULL) {
934 return NULL;
935 }
936 else {
937 register Py_ssize_t i;
938 register char c;
939 register char *p;
940 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000941
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000942 /* figure out which quote to use; single is preferred */
943 quote = '\'';
944 if (smartquotes &&
945 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
946 !memchr(op->ob_sval, '"', Py_SIZE(op)))
947 quote = '"';
Christian Heimes44720832008-05-26 13:01:01 +0000948
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000949 p = PyString_AS_STRING(v);
950 *p++ = quote;
951 for (i = 0; i < Py_SIZE(op); i++) {
952 /* There's at least enough room for a hex escape
953 and a closing quote. */
954 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
955 c = op->ob_sval[i];
956 if (c == quote || c == '\\')
957 *p++ = '\\', *p++ = c;
958 else if (c == '\t')
959 *p++ = '\\', *p++ = 't';
960 else if (c == '\n')
961 *p++ = '\\', *p++ = 'n';
962 else if (c == '\r')
963 *p++ = '\\', *p++ = 'r';
964 else if (c < ' ' || c >= 0x7f) {
965 /* For performance, we don't want to call
966 PyOS_snprintf here (extra layers of
967 function call). */
968 sprintf(p, "\\x%02x", c & 0xff);
969 p += 4;
970 }
971 else
972 *p++ = c;
973 }
974 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
975 *p++ = quote;
976 *p = '\0';
977 if (_PyString_Resize(&v, (p - PyString_AS_STRING(v))))
978 return NULL;
979 return v;
980 }
Christian Heimes44720832008-05-26 13:01:01 +0000981}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000982
983static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000984string_repr(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000985{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000986 return PyString_Repr(op, 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000987}
988
Christian Heimes1a6387e2008-03-26 12:49:49 +0000989static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000990string_str(PyObject *s)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000991{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000992 assert(PyString_Check(s));
993 if (PyString_CheckExact(s)) {
994 Py_INCREF(s);
995 return s;
996 }
997 else {
998 /* Subtype -- return genuine string with the same value. */
999 PyStringObject *t = (PyStringObject *) s;
1000 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
1001 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001002}
1003
Christian Heimes44720832008-05-26 13:01:01 +00001004static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001005string_length(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001006{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001007 return Py_SIZE(a);
Christian Heimes44720832008-05-26 13:01:01 +00001008}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001009
Christian Heimes44720832008-05-26 13:01:01 +00001010static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001011string_concat(register PyStringObject *a, register PyObject *bb)
Christian Heimes44720832008-05-26 13:01:01 +00001012{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001013 register Py_ssize_t size;
1014 register PyStringObject *op;
1015 if (!PyString_Check(bb)) {
Christian Heimes44720832008-05-26 13:01:01 +00001016#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001017 if (PyUnicode_Check(bb))
1018 return PyUnicode_Concat((PyObject *)a, bb);
Christian Heimes44720832008-05-26 13:01:01 +00001019#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001020 if (PyByteArray_Check(bb))
1021 return PyByteArray_Concat((PyObject *)a, bb);
1022 PyErr_Format(PyExc_TypeError,
1023 "cannot concatenate 'str' and '%.200s' objects",
1024 Py_TYPE(bb)->tp_name);
1025 return NULL;
1026 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001027#define b ((PyStringObject *)bb)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001028 /* Optimize cases with empty left or right operand */
1029 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
1030 PyString_CheckExact(a) && PyString_CheckExact(b)) {
1031 if (Py_SIZE(a) == 0) {
1032 Py_INCREF(bb);
1033 return bb;
1034 }
1035 Py_INCREF(a);
1036 return (PyObject *)a;
1037 }
1038 size = Py_SIZE(a) + Py_SIZE(b);
1039 /* Check that string sizes are not negative, to prevent an
1040 overflow in cases where we are passed incorrectly-created
1041 strings with negative lengths (due to a bug in other code).
1042 */
1043 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
1044 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
1045 PyErr_SetString(PyExc_OverflowError,
1046 "strings are too large to concat");
1047 return NULL;
1048 }
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001049
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001050 /* Inline PyObject_NewVar */
1051 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
1052 PyErr_SetString(PyExc_OverflowError,
1053 "strings are too large to concat");
1054 return NULL;
1055 }
1056 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
1057 if (op == NULL)
1058 return PyErr_NoMemory();
1059 PyObject_INIT_VAR(op, &PyString_Type, size);
1060 op->ob_shash = -1;
1061 op->ob_sstate = SSTATE_NOT_INTERNED;
1062 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1063 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1064 op->ob_sval[size] = '\0';
1065 return (PyObject *) op;
Christian Heimes44720832008-05-26 13:01:01 +00001066#undef b
1067}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001068
Christian Heimes44720832008-05-26 13:01:01 +00001069static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001070string_repeat(register PyStringObject *a, register Py_ssize_t n)
Christian Heimes44720832008-05-26 13:01:01 +00001071{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001072 register Py_ssize_t i;
1073 register Py_ssize_t j;
1074 register Py_ssize_t size;
1075 register PyStringObject *op;
1076 size_t nbytes;
1077 if (n < 0)
1078 n = 0;
1079 /* watch out for overflows: the size can overflow int,
1080 * and the # of bytes needed can overflow size_t
1081 */
1082 size = Py_SIZE(a) * n;
1083 if (n && size / n != Py_SIZE(a)) {
1084 PyErr_SetString(PyExc_OverflowError,
1085 "repeated string is too long");
1086 return NULL;
1087 }
1088 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1089 Py_INCREF(a);
1090 return (PyObject *)a;
1091 }
1092 nbytes = (size_t)size;
1093 if (nbytes + PyStringObject_SIZE <= nbytes) {
1094 PyErr_SetString(PyExc_OverflowError,
1095 "repeated string is too long");
1096 return NULL;
1097 }
1098 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
1099 if (op == NULL)
1100 return PyErr_NoMemory();
1101 PyObject_INIT_VAR(op, &PyString_Type, size);
1102 op->ob_shash = -1;
1103 op->ob_sstate = SSTATE_NOT_INTERNED;
1104 op->ob_sval[size] = '\0';
1105 if (Py_SIZE(a) == 1 && n > 0) {
1106 memset(op->ob_sval, a->ob_sval[0] , n);
1107 return (PyObject *) op;
1108 }
1109 i = 0;
1110 if (i < size) {
1111 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1112 i = Py_SIZE(a);
1113 }
1114 while (i < size) {
1115 j = (i <= size-i) ? i : size-i;
1116 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1117 i += j;
1118 }
1119 return (PyObject *) op;
Christian Heimes44720832008-05-26 13:01:01 +00001120}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001121
Christian Heimes44720832008-05-26 13:01:01 +00001122/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1123
1124static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001125string_slice(register PyStringObject *a, register Py_ssize_t i,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001126 register Py_ssize_t j)
Christian Heimes44720832008-05-26 13:01:01 +00001127 /* j -- may be negative! */
1128{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001129 if (i < 0)
1130 i = 0;
1131 if (j < 0)
1132 j = 0; /* Avoid signed/unsigned bug in next line */
1133 if (j > Py_SIZE(a))
1134 j = Py_SIZE(a);
1135 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1136 /* It's the same as a */
1137 Py_INCREF(a);
1138 return (PyObject *)a;
1139 }
1140 if (j < i)
1141 j = i;
1142 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001143}
1144
1145static int
1146string_contains(PyObject *str_obj, PyObject *sub_obj)
1147{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001148 if (!PyString_CheckExact(sub_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00001149#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001150 if (PyUnicode_Check(sub_obj))
1151 return PyUnicode_Contains(str_obj, sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001152#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001153 if (!PyString_Check(sub_obj)) {
1154 PyErr_Format(PyExc_TypeError,
1155 "'in <string>' requires string as left operand, "
1156 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1157 return -1;
1158 }
1159 }
Christian Heimes44720832008-05-26 13:01:01 +00001160
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001161 return stringlib_contains_obj(str_obj, sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001162}
1163
1164static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001165string_item(PyStringObject *a, register Py_ssize_t i)
Christian Heimes44720832008-05-26 13:01:01 +00001166{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001167 char pchar;
1168 PyObject *v;
1169 if (i < 0 || i >= Py_SIZE(a)) {
1170 PyErr_SetString(PyExc_IndexError, "string index out of range");
1171 return NULL;
1172 }
1173 pchar = a->ob_sval[i];
1174 v = (PyObject *)characters[pchar & UCHAR_MAX];
1175 if (v == NULL)
1176 v = PyString_FromStringAndSize(&pchar, 1);
1177 else {
Christian Heimes44720832008-05-26 13:01:01 +00001178#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001179 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +00001180#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001181 Py_INCREF(v);
1182 }
1183 return v;
Christian Heimes44720832008-05-26 13:01:01 +00001184}
1185
1186static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001187string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Christian Heimes44720832008-05-26 13:01:01 +00001188{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001189 int c;
1190 Py_ssize_t len_a, len_b;
1191 Py_ssize_t min_len;
1192 PyObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00001193
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001194 /* Make sure both arguments are strings. */
1195 if (!(PyString_Check(a) && PyString_Check(b))) {
1196 result = Py_NotImplemented;
1197 goto out;
1198 }
1199 if (a == b) {
1200 switch (op) {
1201 case Py_EQ:case Py_LE:case Py_GE:
1202 result = Py_True;
1203 goto out;
1204 case Py_NE:case Py_LT:case Py_GT:
1205 result = Py_False;
1206 goto out;
1207 }
1208 }
1209 if (op == Py_EQ) {
1210 /* Supporting Py_NE here as well does not save
1211 much time, since Py_NE is rarely used. */
1212 if (Py_SIZE(a) == Py_SIZE(b)
1213 && (a->ob_sval[0] == b->ob_sval[0]
1214 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1215 result = Py_True;
1216 } else {
1217 result = Py_False;
1218 }
1219 goto out;
1220 }
1221 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1222 min_len = (len_a < len_b) ? len_a : len_b;
1223 if (min_len > 0) {
1224 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1225 if (c==0)
1226 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1227 } else
1228 c = 0;
1229 if (c == 0)
1230 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1231 switch (op) {
1232 case Py_LT: c = c < 0; break;
1233 case Py_LE: c = c <= 0; break;
1234 case Py_EQ: assert(0); break; /* unreachable */
1235 case Py_NE: c = c != 0; break;
1236 case Py_GT: c = c > 0; break;
1237 case Py_GE: c = c >= 0; break;
1238 default:
1239 result = Py_NotImplemented;
1240 goto out;
1241 }
1242 result = c ? Py_True : Py_False;
Christian Heimes44720832008-05-26 13:01:01 +00001243 out:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001244 Py_INCREF(result);
1245 return result;
Christian Heimes44720832008-05-26 13:01:01 +00001246}
1247
1248int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001249_PyString_Eq(PyObject *o1, PyObject *o2)
Christian Heimes44720832008-05-26 13:01:01 +00001250{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001251 PyStringObject *a = (PyStringObject*) o1;
1252 PyStringObject *b = (PyStringObject*) o2;
1253 return Py_SIZE(a) == Py_SIZE(b)
1254 && *a->ob_sval == *b->ob_sval
1255 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
Christian Heimes44720832008-05-26 13:01:01 +00001256}
1257
1258static long
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001259string_hash(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001260{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001261 register Py_ssize_t len;
1262 register unsigned char *p;
1263 register long x;
Christian Heimes44720832008-05-26 13:01:01 +00001264
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001265 if (a->ob_shash != -1)
1266 return a->ob_shash;
1267 len = Py_SIZE(a);
1268 p = (unsigned char *) a->ob_sval;
1269 x = *p << 7;
1270 while (--len >= 0)
1271 x = (1000003*x) ^ *p++;
1272 x ^= Py_SIZE(a);
1273 if (x == -1)
1274 x = -2;
1275 a->ob_shash = x;
1276 return x;
Christian Heimes44720832008-05-26 13:01:01 +00001277}
1278
1279static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001280string_subscript(PyStringObject* self, PyObject* item)
Christian Heimes44720832008-05-26 13:01:01 +00001281{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001282 if (PyIndex_Check(item)) {
1283 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1284 if (i == -1 && PyErr_Occurred())
1285 return NULL;
1286 if (i < 0)
1287 i += PyString_GET_SIZE(self);
1288 return string_item(self, i);
1289 }
1290 else if (PySlice_Check(item)) {
1291 Py_ssize_t start, stop, step, slicelength, cur, i;
1292 char* source_buf;
1293 char* result_buf;
1294 PyObject* result;
Christian Heimes44720832008-05-26 13:01:01 +00001295
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001296 if (PySlice_GetIndicesEx((PySliceObject*)item,
1297 PyString_GET_SIZE(self),
1298 &start, &stop, &step, &slicelength) < 0) {
1299 return NULL;
1300 }
Christian Heimes44720832008-05-26 13:01:01 +00001301
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001302 if (slicelength <= 0) {
1303 return PyString_FromStringAndSize("", 0);
1304 }
1305 else if (start == 0 && step == 1 &&
1306 slicelength == PyString_GET_SIZE(self) &&
1307 PyString_CheckExact(self)) {
1308 Py_INCREF(self);
1309 return (PyObject *)self;
1310 }
1311 else if (step == 1) {
1312 return PyString_FromStringAndSize(
1313 PyString_AS_STRING(self) + start,
1314 slicelength);
1315 }
1316 else {
1317 source_buf = PyString_AsString((PyObject*)self);
1318 result_buf = (char *)PyMem_Malloc(slicelength);
1319 if (result_buf == NULL)
1320 return PyErr_NoMemory();
Christian Heimes44720832008-05-26 13:01:01 +00001321
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001322 for (cur = start, i = 0; i < slicelength;
1323 cur += step, i++) {
1324 result_buf[i] = source_buf[cur];
1325 }
Christian Heimes44720832008-05-26 13:01:01 +00001326
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001327 result = PyString_FromStringAndSize(result_buf,
1328 slicelength);
1329 PyMem_Free(result_buf);
1330 return result;
1331 }
1332 }
1333 else {
1334 PyErr_Format(PyExc_TypeError,
1335 "string indices must be integers, not %.200s",
1336 Py_TYPE(item)->tp_name);
1337 return NULL;
1338 }
Christian Heimes44720832008-05-26 13:01:01 +00001339}
1340
1341static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001342string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001343{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001344 if ( index != 0 ) {
1345 PyErr_SetString(PyExc_SystemError,
1346 "accessing non-existent string segment");
1347 return -1;
1348 }
1349 *ptr = (void *)self->ob_sval;
1350 return Py_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001351}
1352
1353static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001354string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001355{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001356 PyErr_SetString(PyExc_TypeError,
1357 "Cannot use string as modifiable buffer");
1358 return -1;
Christian Heimes44720832008-05-26 13:01:01 +00001359}
1360
1361static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001362string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Christian Heimes44720832008-05-26 13:01:01 +00001363{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001364 if ( lenp )
1365 *lenp = Py_SIZE(self);
1366 return 1;
Christian Heimes44720832008-05-26 13:01:01 +00001367}
1368
1369static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001370string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001371{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001372 if ( index != 0 ) {
1373 PyErr_SetString(PyExc_SystemError,
1374 "accessing non-existent string segment");
1375 return -1;
1376 }
1377 *ptr = self->ob_sval;
1378 return Py_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001379}
1380
1381static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001382string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Christian Heimes44720832008-05-26 13:01:01 +00001383{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001384 return PyBuffer_FillInfo(view, (PyObject*)self,
1385 (void *)self->ob_sval, Py_SIZE(self),
1386 1, flags);
Christian Heimes44720832008-05-26 13:01:01 +00001387}
1388
1389static PySequenceMethods string_as_sequence = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001390 (lenfunc)string_length, /*sq_length*/
1391 (binaryfunc)string_concat, /*sq_concat*/
1392 (ssizeargfunc)string_repeat, /*sq_repeat*/
1393 (ssizeargfunc)string_item, /*sq_item*/
1394 (ssizessizeargfunc)string_slice, /*sq_slice*/
1395 0, /*sq_ass_item*/
1396 0, /*sq_ass_slice*/
1397 (objobjproc)string_contains /*sq_contains*/
Christian Heimes44720832008-05-26 13:01:01 +00001398};
1399
1400static PyMappingMethods string_as_mapping = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001401 (lenfunc)string_length,
1402 (binaryfunc)string_subscript,
1403 0,
Christian Heimes44720832008-05-26 13:01:01 +00001404};
1405
1406static PyBufferProcs string_as_buffer = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001407 (readbufferproc)string_buffer_getreadbuf,
1408 (writebufferproc)string_buffer_getwritebuf,
1409 (segcountproc)string_buffer_getsegcount,
1410 (charbufferproc)string_buffer_getcharbuf,
1411 (getbufferproc)string_buffer_getbuffer,
1412 0, /* XXX */
Christian Heimes44720832008-05-26 13:01:01 +00001413};
1414
1415
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001416
Christian Heimes44720832008-05-26 13:01:01 +00001417#define LEFTSTRIP 0
1418#define RIGHTSTRIP 1
1419#define BOTHSTRIP 2
1420
1421/* Arrays indexed by above */
1422static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1423
1424#define STRIPNAME(i) (stripformat[i]+3)
1425
Christian Heimes1a6387e2008-03-26 12:49:49 +00001426PyDoc_STRVAR(split__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001427"S.split([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001428\n\
Christian Heimes44720832008-05-26 13:01:01 +00001429Return a list of the words in the string S, using sep as the\n\
1430delimiter string. If maxsplit is given, at most maxsplit\n\
1431splits are done. If sep is not specified or is None, any\n\
1432whitespace string is a separator and empty strings are removed\n\
1433from the result.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001434
1435static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001436string_split(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001437{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001438 Py_ssize_t len = PyString_GET_SIZE(self), n;
1439 Py_ssize_t maxsplit = -1;
1440 const char *s = PyString_AS_STRING(self), *sub;
1441 PyObject *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001442
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001443 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1444 return NULL;
1445 if (maxsplit < 0)
1446 maxsplit = PY_SSIZE_T_MAX;
1447 if (subobj == Py_None)
1448 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1449 if (PyString_Check(subobj)) {
1450 sub = PyString_AS_STRING(subobj);
1451 n = PyString_GET_SIZE(subobj);
1452 }
Christian Heimes44720832008-05-26 13:01:01 +00001453#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001454 else if (PyUnicode_Check(subobj))
1455 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001456#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001457 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1458 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001459
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001460 return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001461}
1462
1463PyDoc_STRVAR(partition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001464"S.partition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001465\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001466Search for the separator sep in S, and return the part before it,\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001467the separator itself, and the part after it. If the separator is not\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001468found, return S and two empty strings.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001469
1470static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001471string_partition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001472{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001473 const char *sep;
1474 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001475
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001476 if (PyString_Check(sep_obj)) {
1477 sep = PyString_AS_STRING(sep_obj);
1478 sep_len = PyString_GET_SIZE(sep_obj);
1479 }
Christian Heimes44720832008-05-26 13:01:01 +00001480#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001481 else if (PyUnicode_Check(sep_obj))
1482 return PyUnicode_Partition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001483#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001484 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1485 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001486
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001487 return stringlib_partition(
1488 (PyObject*) self,
1489 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1490 sep_obj, sep, sep_len
1491 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001492}
1493
1494PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti1fafaab2010-01-25 11:24:37 +00001495"S.rpartition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001496\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001497Search for the separator sep in S, starting at the end of S, and return\n\
Christian Heimes44720832008-05-26 13:01:01 +00001498the part before it, the separator itself, and the part after it. If the\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001499separator is not found, return two empty strings and S.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001500
1501static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001502string_rpartition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001503{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001504 const char *sep;
1505 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001506
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001507 if (PyString_Check(sep_obj)) {
1508 sep = PyString_AS_STRING(sep_obj);
1509 sep_len = PyString_GET_SIZE(sep_obj);
1510 }
Christian Heimes44720832008-05-26 13:01:01 +00001511#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001512 else if (PyUnicode_Check(sep_obj))
1513 return PyUnicode_RPartition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001514#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001515 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1516 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001517
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001518 return stringlib_rpartition(
1519 (PyObject*) self,
1520 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1521 sep_obj, sep, sep_len
1522 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001523}
1524
Christian Heimes1a6387e2008-03-26 12:49:49 +00001525PyDoc_STRVAR(rsplit__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001526"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001527\n\
Christian Heimes44720832008-05-26 13:01:01 +00001528Return a list of the words in the string S, using sep as the\n\
1529delimiter string, starting at the end of the string and working\n\
1530to the front. If maxsplit is given, at most maxsplit splits are\n\
1531done. If sep is not specified or is None, any whitespace string\n\
1532is a separator.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001533
1534static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001535string_rsplit(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001536{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001537 Py_ssize_t len = PyString_GET_SIZE(self), n;
1538 Py_ssize_t maxsplit = -1;
1539 const char *s = PyString_AS_STRING(self), *sub;
1540 PyObject *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001541
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001542 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1543 return NULL;
1544 if (maxsplit < 0)
1545 maxsplit = PY_SSIZE_T_MAX;
1546 if (subobj == Py_None)
1547 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1548 if (PyString_Check(subobj)) {
1549 sub = PyString_AS_STRING(subobj);
1550 n = PyString_GET_SIZE(subobj);
1551 }
Christian Heimes44720832008-05-26 13:01:01 +00001552#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001553 else if (PyUnicode_Check(subobj))
1554 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001555#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001556 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1557 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001558
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001559 return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001560}
1561
1562
1563PyDoc_STRVAR(join__doc__,
Georg Brandl9b4e5822009-10-14 18:48:32 +00001564"S.join(iterable) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00001565\n\
1566Return a string which is the concatenation of the strings in the\n\
Georg Brandl9b4e5822009-10-14 18:48:32 +00001567iterable. The separator between elements is S.");
Christian Heimes44720832008-05-26 13:01:01 +00001568
1569static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001570string_join(PyStringObject *self, PyObject *orig)
Christian Heimes44720832008-05-26 13:01:01 +00001571{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001572 char *sep = PyString_AS_STRING(self);
1573 const Py_ssize_t seplen = PyString_GET_SIZE(self);
1574 PyObject *res = NULL;
1575 char *p;
1576 Py_ssize_t seqlen = 0;
1577 size_t sz = 0;
1578 Py_ssize_t i;
1579 PyObject *seq, *item;
Christian Heimes44720832008-05-26 13:01:01 +00001580
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001581 seq = PySequence_Fast(orig, "");
1582 if (seq == NULL) {
1583 return NULL;
1584 }
Christian Heimes44720832008-05-26 13:01:01 +00001585
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001586 seqlen = PySequence_Size(seq);
1587 if (seqlen == 0) {
1588 Py_DECREF(seq);
1589 return PyString_FromString("");
1590 }
1591 if (seqlen == 1) {
1592 item = PySequence_Fast_GET_ITEM(seq, 0);
1593 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1594 Py_INCREF(item);
1595 Py_DECREF(seq);
1596 return item;
1597 }
1598 }
Christian Heimes44720832008-05-26 13:01:01 +00001599
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001600 /* There are at least two things to join, or else we have a subclass
1601 * of the builtin types in the sequence.
1602 * Do a pre-pass to figure out the total amount of space we'll
1603 * need (sz), see whether any argument is absurd, and defer to
1604 * the Unicode join if appropriate.
1605 */
1606 for (i = 0; i < seqlen; i++) {
1607 const size_t old_sz = sz;
1608 item = PySequence_Fast_GET_ITEM(seq, i);
1609 if (!PyString_Check(item)){
Christian Heimes44720832008-05-26 13:01:01 +00001610#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001611 if (PyUnicode_Check(item)) {
1612 /* Defer to Unicode join.
1613 * CAUTION: There's no gurantee that the
1614 * original sequence can be iterated over
1615 * again, so we must pass seq here.
1616 */
1617 PyObject *result;
1618 result = PyUnicode_Join((PyObject *)self, seq);
1619 Py_DECREF(seq);
1620 return result;
1621 }
Christian Heimes44720832008-05-26 13:01:01 +00001622#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001623 PyErr_Format(PyExc_TypeError,
1624 "sequence item %zd: expected string,"
1625 " %.80s found",
1626 i, Py_TYPE(item)->tp_name);
1627 Py_DECREF(seq);
1628 return NULL;
1629 }
1630 sz += PyString_GET_SIZE(item);
1631 if (i != 0)
1632 sz += seplen;
1633 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1634 PyErr_SetString(PyExc_OverflowError,
1635 "join() result is too long for a Python string");
1636 Py_DECREF(seq);
1637 return NULL;
1638 }
1639 }
Christian Heimes44720832008-05-26 13:01:01 +00001640
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001641 /* Allocate result space. */
1642 res = PyString_FromStringAndSize((char*)NULL, sz);
1643 if (res == NULL) {
1644 Py_DECREF(seq);
1645 return NULL;
1646 }
Christian Heimes44720832008-05-26 13:01:01 +00001647
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001648 /* Catenate everything. */
1649 p = PyString_AS_STRING(res);
1650 for (i = 0; i < seqlen; ++i) {
1651 size_t n;
1652 item = PySequence_Fast_GET_ITEM(seq, i);
1653 n = PyString_GET_SIZE(item);
1654 Py_MEMCPY(p, PyString_AS_STRING(item), n);
1655 p += n;
1656 if (i < seqlen - 1) {
1657 Py_MEMCPY(p, sep, seplen);
1658 p += seplen;
1659 }
1660 }
Christian Heimes44720832008-05-26 13:01:01 +00001661
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001662 Py_DECREF(seq);
1663 return res;
Christian Heimes44720832008-05-26 13:01:01 +00001664}
1665
1666PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001667_PyString_Join(PyObject *sep, PyObject *x)
Christian Heimes44720832008-05-26 13:01:01 +00001668{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001669 assert(sep != NULL && PyString_Check(sep));
1670 assert(x != NULL);
1671 return string_join((PyStringObject *)sep, x);
Christian Heimes44720832008-05-26 13:01:01 +00001672}
1673
Antoine Pitrou64672132010-01-13 07:55:48 +00001674/* helper macro to fixup start/end slice values */
1675#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001676 if (end > len) \
1677 end = len; \
1678 else if (end < 0) { \
1679 end += len; \
1680 if (end < 0) \
1681 end = 0; \
1682 } \
1683 if (start < 0) { \
1684 start += len; \
1685 if (start < 0) \
1686 start = 0; \
1687 }
Christian Heimes44720832008-05-26 13:01:01 +00001688
1689Py_LOCAL_INLINE(Py_ssize_t)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001690string_find_internal(PyStringObject *self, PyObject *args, int dir)
Christian Heimes44720832008-05-26 13:01:01 +00001691{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001692 PyObject *subobj;
1693 const char *sub;
1694 Py_ssize_t sub_len;
1695 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Christian Heimes44720832008-05-26 13:01:01 +00001696
Jesus Cea44e81682011-04-20 16:39:15 +02001697 if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1698 args, &subobj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001699 return -2;
Christian Heimes44720832008-05-26 13:01:01 +00001700
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001701 if (PyString_Check(subobj)) {
1702 sub = PyString_AS_STRING(subobj);
1703 sub_len = PyString_GET_SIZE(subobj);
1704 }
Christian Heimes44720832008-05-26 13:01:01 +00001705#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001706 else if (PyUnicode_Check(subobj))
1707 return PyUnicode_Find(
1708 (PyObject *)self, subobj, start, end, dir);
Christian Heimes44720832008-05-26 13:01:01 +00001709#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001710 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1711 /* XXX - the "expected a character buffer object" is pretty
1712 confusing for a non-expert. remap to something else ? */
1713 return -2;
Christian Heimes44720832008-05-26 13:01:01 +00001714
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001715 if (dir > 0)
1716 return stringlib_find_slice(
1717 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1718 sub, sub_len, start, end);
1719 else
1720 return stringlib_rfind_slice(
1721 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1722 sub, sub_len, start, end);
Christian Heimes44720832008-05-26 13:01:01 +00001723}
1724
1725
1726PyDoc_STRVAR(find__doc__,
1727"S.find(sub [,start [,end]]) -> int\n\
1728\n\
1729Return the lowest index in S where substring sub is found,\n\
1730such that sub is contained within s[start:end]. Optional\n\
1731arguments start and end are interpreted as in slice notation.\n\
1732\n\
1733Return -1 on failure.");
1734
1735static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001736string_find(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001737{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001738 Py_ssize_t result = string_find_internal(self, args, +1);
1739 if (result == -2)
1740 return NULL;
1741 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001742}
1743
1744
1745PyDoc_STRVAR(index__doc__,
1746"S.index(sub [,start [,end]]) -> int\n\
1747\n\
1748Like S.find() but raise ValueError when the substring is not found.");
1749
1750static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001751string_index(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001752{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001753 Py_ssize_t result = string_find_internal(self, args, +1);
1754 if (result == -2)
1755 return NULL;
1756 if (result == -1) {
1757 PyErr_SetString(PyExc_ValueError,
1758 "substring not found");
1759 return NULL;
1760 }
1761 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001762}
1763
1764
1765PyDoc_STRVAR(rfind__doc__,
1766"S.rfind(sub [,start [,end]]) -> int\n\
1767\n\
1768Return the highest index in S where substring sub is found,\n\
1769such that sub is contained within s[start:end]. Optional\n\
1770arguments start and end are interpreted as in slice notation.\n\
1771\n\
1772Return -1 on failure.");
1773
1774static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001775string_rfind(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001776{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001777 Py_ssize_t result = string_find_internal(self, args, -1);
1778 if (result == -2)
1779 return NULL;
1780 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001781}
1782
1783
1784PyDoc_STRVAR(rindex__doc__,
1785"S.rindex(sub [,start [,end]]) -> int\n\
1786\n\
1787Like S.rfind() but raise ValueError when the substring is not found.");
1788
1789static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001790string_rindex(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001791{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001792 Py_ssize_t result = string_find_internal(self, args, -1);
1793 if (result == -2)
1794 return NULL;
1795 if (result == -1) {
1796 PyErr_SetString(PyExc_ValueError,
1797 "substring not found");
1798 return NULL;
1799 }
1800 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001801}
1802
1803
1804Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001805do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
Christian Heimes44720832008-05-26 13:01:01 +00001806{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001807 char *s = PyString_AS_STRING(self);
1808 Py_ssize_t len = PyString_GET_SIZE(self);
1809 char *sep = PyString_AS_STRING(sepobj);
1810 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1811 Py_ssize_t i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001812
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001813 i = 0;
1814 if (striptype != RIGHTSTRIP) {
1815 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1816 i++;
1817 }
1818 }
Christian Heimes44720832008-05-26 13:01:01 +00001819
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001820 j = len;
1821 if (striptype != LEFTSTRIP) {
1822 do {
1823 j--;
1824 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1825 j++;
1826 }
Christian Heimes44720832008-05-26 13:01:01 +00001827
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001828 if (i == 0 && j == len && PyString_CheckExact(self)) {
1829 Py_INCREF(self);
1830 return (PyObject*)self;
1831 }
1832 else
1833 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001834}
1835
1836
1837Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001838do_strip(PyStringObject *self, int striptype)
Christian Heimes44720832008-05-26 13:01:01 +00001839{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001840 char *s = PyString_AS_STRING(self);
1841 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001842
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001843 i = 0;
1844 if (striptype != RIGHTSTRIP) {
1845 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1846 i++;
1847 }
1848 }
Christian Heimes44720832008-05-26 13:01:01 +00001849
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001850 j = len;
1851 if (striptype != LEFTSTRIP) {
1852 do {
1853 j--;
1854 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1855 j++;
1856 }
Christian Heimes44720832008-05-26 13:01:01 +00001857
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001858 if (i == 0 && j == len && PyString_CheckExact(self)) {
1859 Py_INCREF(self);
1860 return (PyObject*)self;
1861 }
1862 else
1863 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001864}
1865
1866
1867Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001868do_argstrip(PyStringObject *self, int striptype, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001869{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001870 PyObject *sep = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001871
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001872 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1873 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001874
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001875 if (sep != NULL && sep != Py_None) {
1876 if (PyString_Check(sep))
1877 return do_xstrip(self, striptype, sep);
Christian Heimes44720832008-05-26 13:01:01 +00001878#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001879 else if (PyUnicode_Check(sep)) {
1880 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1881 PyObject *res;
1882 if (uniself==NULL)
1883 return NULL;
1884 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1885 striptype, sep);
1886 Py_DECREF(uniself);
1887 return res;
1888 }
Christian Heimes44720832008-05-26 13:01:01 +00001889#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001890 PyErr_Format(PyExc_TypeError,
Christian Heimes44720832008-05-26 13:01:01 +00001891#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001892 "%s arg must be None, str or unicode",
Christian Heimes44720832008-05-26 13:01:01 +00001893#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001894 "%s arg must be None or str",
Christian Heimes44720832008-05-26 13:01:01 +00001895#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001896 STRIPNAME(striptype));
1897 return NULL;
1898 }
Christian Heimes44720832008-05-26 13:01:01 +00001899
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001900 return do_strip(self, striptype);
Christian Heimes44720832008-05-26 13:01:01 +00001901}
1902
1903
1904PyDoc_STRVAR(strip__doc__,
1905"S.strip([chars]) -> string or unicode\n\
1906\n\
1907Return a copy of the string S with leading and trailing\n\
1908whitespace removed.\n\
1909If chars is given and not None, remove characters in chars instead.\n\
1910If chars is unicode, S will be converted to unicode before stripping");
1911
1912static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001913string_strip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001914{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001915 if (PyTuple_GET_SIZE(args) == 0)
1916 return do_strip(self, BOTHSTRIP); /* Common case */
1917 else
1918 return do_argstrip(self, BOTHSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001919}
1920
1921
1922PyDoc_STRVAR(lstrip__doc__,
1923"S.lstrip([chars]) -> string or unicode\n\
1924\n\
1925Return a copy of the string S with leading whitespace removed.\n\
1926If chars is given and not None, remove characters in chars instead.\n\
1927If chars is unicode, S will be converted to unicode before stripping");
1928
1929static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001930string_lstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001931{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001932 if (PyTuple_GET_SIZE(args) == 0)
1933 return do_strip(self, LEFTSTRIP); /* Common case */
1934 else
1935 return do_argstrip(self, LEFTSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001936}
1937
1938
1939PyDoc_STRVAR(rstrip__doc__,
1940"S.rstrip([chars]) -> string or unicode\n\
1941\n\
1942Return a copy of the string S with trailing whitespace removed.\n\
1943If chars is given and not None, remove characters in chars instead.\n\
1944If chars is unicode, S will be converted to unicode before stripping");
1945
1946static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001947string_rstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001948{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001949 if (PyTuple_GET_SIZE(args) == 0)
1950 return do_strip(self, RIGHTSTRIP); /* Common case */
1951 else
1952 return do_argstrip(self, RIGHTSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001953}
1954
1955
1956PyDoc_STRVAR(lower__doc__,
1957"S.lower() -> string\n\
1958\n\
1959Return a copy of the string S converted to lowercase.");
1960
1961/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1962#ifndef _tolower
1963#define _tolower tolower
1964#endif
1965
1966static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001967string_lower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00001968{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001969 char *s;
1970 Py_ssize_t i, n = PyString_GET_SIZE(self);
1971 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00001972
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001973 newobj = PyString_FromStringAndSize(NULL, n);
1974 if (!newobj)
1975 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001976
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001977 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00001978
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001979 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00001980
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001981 for (i = 0; i < n; i++) {
1982 int c = Py_CHARMASK(s[i]);
1983 if (isupper(c))
1984 s[i] = _tolower(c);
1985 }
Christian Heimes44720832008-05-26 13:01:01 +00001986
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001987 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00001988}
1989
1990PyDoc_STRVAR(upper__doc__,
1991"S.upper() -> string\n\
1992\n\
1993Return a copy of the string S converted to uppercase.");
1994
1995#ifndef _toupper
1996#define _toupper toupper
1997#endif
1998
1999static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002000string_upper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002001{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002002 char *s;
2003 Py_ssize_t i, n = PyString_GET_SIZE(self);
2004 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002005
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002006 newobj = PyString_FromStringAndSize(NULL, n);
2007 if (!newobj)
2008 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002009
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002010 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002011
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002012 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002013
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002014 for (i = 0; i < n; i++) {
2015 int c = Py_CHARMASK(s[i]);
2016 if (islower(c))
2017 s[i] = _toupper(c);
2018 }
Christian Heimes44720832008-05-26 13:01:01 +00002019
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002020 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002021}
2022
2023PyDoc_STRVAR(title__doc__,
2024"S.title() -> string\n\
2025\n\
2026Return a titlecased version of S, i.e. words start with uppercase\n\
2027characters, all remaining cased characters have lowercase.");
2028
2029static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002030string_title(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002031{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002032 char *s = PyString_AS_STRING(self), *s_new;
2033 Py_ssize_t i, n = PyString_GET_SIZE(self);
2034 int previous_is_cased = 0;
2035 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002036
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002037 newobj = PyString_FromStringAndSize(NULL, n);
2038 if (newobj == NULL)
2039 return NULL;
2040 s_new = PyString_AsString(newobj);
2041 for (i = 0; i < n; i++) {
2042 int c = Py_CHARMASK(*s++);
2043 if (islower(c)) {
2044 if (!previous_is_cased)
2045 c = toupper(c);
2046 previous_is_cased = 1;
2047 } else if (isupper(c)) {
2048 if (previous_is_cased)
2049 c = tolower(c);
2050 previous_is_cased = 1;
2051 } else
2052 previous_is_cased = 0;
2053 *s_new++ = c;
2054 }
2055 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002056}
2057
2058PyDoc_STRVAR(capitalize__doc__,
2059"S.capitalize() -> string\n\
2060\n\
2061Return a copy of the string S with only its first character\n\
2062capitalized.");
2063
2064static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002065string_capitalize(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002066{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002067 char *s = PyString_AS_STRING(self), *s_new;
2068 Py_ssize_t i, n = PyString_GET_SIZE(self);
2069 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002070
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002071 newobj = PyString_FromStringAndSize(NULL, n);
2072 if (newobj == NULL)
2073 return NULL;
2074 s_new = PyString_AsString(newobj);
2075 if (0 < n) {
2076 int c = Py_CHARMASK(*s++);
2077 if (islower(c))
2078 *s_new = toupper(c);
2079 else
2080 *s_new = c;
2081 s_new++;
2082 }
2083 for (i = 1; i < n; i++) {
2084 int c = Py_CHARMASK(*s++);
2085 if (isupper(c))
2086 *s_new = tolower(c);
2087 else
2088 *s_new = c;
2089 s_new++;
2090 }
2091 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002092}
2093
2094
2095PyDoc_STRVAR(count__doc__,
2096"S.count(sub[, start[, end]]) -> int\n\
2097\n\
2098Return the number of non-overlapping occurrences of substring sub in\n\
2099string S[start:end]. Optional arguments start and end are interpreted\n\
2100as in slice notation.");
2101
2102static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002103string_count(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002104{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002105 PyObject *sub_obj;
2106 const char *str = PyString_AS_STRING(self), *sub;
2107 Py_ssize_t sub_len;
2108 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes44720832008-05-26 13:01:01 +00002109
Jesus Cea44e81682011-04-20 16:39:15 +02002110 if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002111 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002112
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002113 if (PyString_Check(sub_obj)) {
2114 sub = PyString_AS_STRING(sub_obj);
2115 sub_len = PyString_GET_SIZE(sub_obj);
2116 }
Christian Heimes44720832008-05-26 13:01:01 +00002117#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002118 else if (PyUnicode_Check(sub_obj)) {
2119 Py_ssize_t count;
2120 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2121 if (count == -1)
2122 return NULL;
2123 else
2124 return PyInt_FromSsize_t(count);
2125 }
Christian Heimes44720832008-05-26 13:01:01 +00002126#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002127 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2128 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002129
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002130 ADJUST_INDICES(start, end, PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002131
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002132 return PyInt_FromSsize_t(
2133 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2134 );
Christian Heimes44720832008-05-26 13:01:01 +00002135}
2136
2137PyDoc_STRVAR(swapcase__doc__,
2138"S.swapcase() -> string\n\
2139\n\
2140Return a copy of the string S with uppercase characters\n\
2141converted to lowercase and vice versa.");
2142
2143static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002144string_swapcase(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002145{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002146 char *s = PyString_AS_STRING(self), *s_new;
2147 Py_ssize_t i, n = PyString_GET_SIZE(self);
2148 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002149
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002150 newobj = PyString_FromStringAndSize(NULL, n);
2151 if (newobj == NULL)
2152 return NULL;
2153 s_new = PyString_AsString(newobj);
2154 for (i = 0; i < n; i++) {
2155 int c = Py_CHARMASK(*s++);
2156 if (islower(c)) {
2157 *s_new = toupper(c);
2158 }
2159 else if (isupper(c)) {
2160 *s_new = tolower(c);
2161 }
2162 else
2163 *s_new = c;
2164 s_new++;
2165 }
2166 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002167}
2168
2169
2170PyDoc_STRVAR(translate__doc__,
2171"S.translate(table [,deletechars]) -> string\n\
2172\n\
2173Return a copy of the string S, where all characters occurring\n\
2174in the optional argument deletechars are removed, and the\n\
2175remaining characters have been mapped through the given\n\
2176translation table, which must be a string of length 256.");
2177
2178static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002179string_translate(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002180{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002181 register char *input, *output;
2182 const char *table;
2183 register Py_ssize_t i, c, changed = 0;
2184 PyObject *input_obj = (PyObject*)self;
2185 const char *output_start, *del_table=NULL;
2186 Py_ssize_t inlen, tablen, dellen = 0;
2187 PyObject *result;
2188 int trans_table[256];
2189 PyObject *tableobj, *delobj = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002190
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002191 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2192 &tableobj, &delobj))
2193 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002194
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002195 if (PyString_Check(tableobj)) {
2196 table = PyString_AS_STRING(tableobj);
2197 tablen = PyString_GET_SIZE(tableobj);
2198 }
2199 else if (tableobj == Py_None) {
2200 table = NULL;
2201 tablen = 256;
2202 }
Christian Heimes44720832008-05-26 13:01:01 +00002203#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002204 else if (PyUnicode_Check(tableobj)) {
2205 /* Unicode .translate() does not support the deletechars
2206 parameter; instead a mapping to None will cause characters
2207 to be deleted. */
2208 if (delobj != NULL) {
2209 PyErr_SetString(PyExc_TypeError,
2210 "deletions are implemented differently for unicode");
2211 return NULL;
2212 }
2213 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2214 }
Christian Heimes44720832008-05-26 13:01:01 +00002215#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002216 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2217 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002218
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002219 if (tablen != 256) {
2220 PyErr_SetString(PyExc_ValueError,
2221 "translation table must be 256 characters long");
2222 return NULL;
2223 }
Christian Heimes44720832008-05-26 13:01:01 +00002224
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002225 if (delobj != NULL) {
2226 if (PyString_Check(delobj)) {
2227 del_table = PyString_AS_STRING(delobj);
2228 dellen = PyString_GET_SIZE(delobj);
2229 }
Christian Heimes44720832008-05-26 13:01:01 +00002230#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002231 else if (PyUnicode_Check(delobj)) {
2232 PyErr_SetString(PyExc_TypeError,
2233 "deletions are implemented differently for unicode");
2234 return NULL;
2235 }
Christian Heimes44720832008-05-26 13:01:01 +00002236#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002237 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2238 return NULL;
2239 }
2240 else {
2241 del_table = NULL;
2242 dellen = 0;
2243 }
Christian Heimes44720832008-05-26 13:01:01 +00002244
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002245 inlen = PyString_GET_SIZE(input_obj);
2246 result = PyString_FromStringAndSize((char *)NULL, inlen);
2247 if (result == NULL)
2248 return NULL;
2249 output_start = output = PyString_AsString(result);
2250 input = PyString_AS_STRING(input_obj);
Christian Heimes44720832008-05-26 13:01:01 +00002251
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002252 if (dellen == 0 && table != NULL) {
2253 /* If no deletions are required, use faster code */
2254 for (i = inlen; --i >= 0; ) {
2255 c = Py_CHARMASK(*input++);
2256 if (Py_CHARMASK((*output++ = table[c])) != c)
2257 changed = 1;
2258 }
2259 if (changed || !PyString_CheckExact(input_obj))
2260 return result;
2261 Py_DECREF(result);
2262 Py_INCREF(input_obj);
2263 return input_obj;
2264 }
Christian Heimes44720832008-05-26 13:01:01 +00002265
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002266 if (table == NULL) {
2267 for (i = 0; i < 256; i++)
2268 trans_table[i] = Py_CHARMASK(i);
2269 } else {
2270 for (i = 0; i < 256; i++)
2271 trans_table[i] = Py_CHARMASK(table[i]);
2272 }
Christian Heimes44720832008-05-26 13:01:01 +00002273
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002274 for (i = 0; i < dellen; i++)
2275 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes44720832008-05-26 13:01:01 +00002276
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002277 for (i = inlen; --i >= 0; ) {
2278 c = Py_CHARMASK(*input++);
2279 if (trans_table[c] != -1)
2280 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2281 continue;
2282 changed = 1;
2283 }
2284 if (!changed && PyString_CheckExact(input_obj)) {
2285 Py_DECREF(result);
2286 Py_INCREF(input_obj);
2287 return input_obj;
2288 }
2289 /* Fix the size of the resulting string */
2290 if (inlen > 0 && _PyString_Resize(&result, output - output_start))
2291 return NULL;
2292 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002293}
2294
2295
Christian Heimes44720832008-05-26 13:01:01 +00002296/* find and count characters and substrings */
2297
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002298#define findchar(target, target_len, c) \
Christian Heimes44720832008-05-26 13:01:01 +00002299 ((char *)memchr((const void *)(target), c, target_len))
2300
2301/* String ops must return a string. */
2302/* If the object is subclass of string, create a copy */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002303Py_LOCAL(PyStringObject *)
2304return_self(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002305{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002306 if (PyString_CheckExact(self)) {
2307 Py_INCREF(self);
2308 return self;
2309 }
2310 return (PyStringObject *)PyString_FromStringAndSize(
2311 PyString_AS_STRING(self),
2312 PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002313}
2314
2315Py_LOCAL_INLINE(Py_ssize_t)
2316countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2317{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002318 Py_ssize_t count=0;
2319 const char *start=target;
2320 const char *end=target+target_len;
Christian Heimes44720832008-05-26 13:01:01 +00002321
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002322 while ( (start=findchar(start, end-start, c)) != NULL ) {
2323 count++;
2324 if (count >= maxcount)
2325 break;
2326 start += 1;
2327 }
2328 return count;
Christian Heimes44720832008-05-26 13:01:01 +00002329}
2330
Christian Heimes44720832008-05-26 13:01:01 +00002331
2332/* Algorithms for different cases of string replacement */
2333
2334/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002335Py_LOCAL(PyStringObject *)
2336replace_interleave(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002337 const char *to_s, Py_ssize_t to_len,
2338 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002339{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002340 char *self_s, *result_s;
2341 Py_ssize_t self_len, result_len;
2342 Py_ssize_t count, i, product;
2343 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002344
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002345 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002346
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002347 /* 1 at the end plus 1 after every character */
2348 count = self_len+1;
2349 if (maxcount < count)
2350 count = maxcount;
Christian Heimes44720832008-05-26 13:01:01 +00002351
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002352 /* Check for overflow */
2353 /* result_len = count * to_len + self_len; */
2354 product = count * to_len;
2355 if (product / to_len != count) {
2356 PyErr_SetString(PyExc_OverflowError,
2357 "replace string is too long");
2358 return NULL;
2359 }
2360 result_len = product + self_len;
2361 if (result_len < 0) {
2362 PyErr_SetString(PyExc_OverflowError,
2363 "replace string is too long");
2364 return NULL;
2365 }
Christian Heimes44720832008-05-26 13:01:01 +00002366
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002367 if (! (result = (PyStringObject *)
2368 PyString_FromStringAndSize(NULL, result_len)) )
2369 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002370
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002371 self_s = PyString_AS_STRING(self);
2372 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002373
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002374 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes44720832008-05-26 13:01:01 +00002375
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002376 /* Lay the first one down (guaranteed this will occur) */
2377 Py_MEMCPY(result_s, to_s, to_len);
2378 result_s += to_len;
2379 count -= 1;
Christian Heimes44720832008-05-26 13:01:01 +00002380
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002381 for (i=0; i<count; i++) {
2382 *result_s++ = *self_s++;
2383 Py_MEMCPY(result_s, to_s, to_len);
2384 result_s += to_len;
2385 }
2386
2387 /* Copy the rest of the original string */
2388 Py_MEMCPY(result_s, self_s, self_len-i);
2389
2390 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002391}
2392
2393/* Special case for deleting a single character */
2394/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002395Py_LOCAL(PyStringObject *)
2396replace_delete_single_character(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002397 char from_c, Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002398{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002399 char *self_s, *result_s;
2400 char *start, *next, *end;
2401 Py_ssize_t self_len, result_len;
2402 Py_ssize_t count;
2403 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002404
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002405 self_len = PyString_GET_SIZE(self);
2406 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002407
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002408 count = countchar(self_s, self_len, from_c, maxcount);
2409 if (count == 0) {
2410 return return_self(self);
2411 }
Christian Heimes44720832008-05-26 13:01:01 +00002412
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002413 result_len = self_len - count; /* from_len == 1 */
2414 assert(result_len>=0);
Christian Heimes44720832008-05-26 13:01:01 +00002415
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002416 if ( (result = (PyStringObject *)
2417 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2418 return NULL;
2419 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002420
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002421 start = self_s;
2422 end = self_s + self_len;
2423 while (count-- > 0) {
2424 next = findchar(start, end-start, from_c);
2425 if (next == NULL)
2426 break;
2427 Py_MEMCPY(result_s, start, next-start);
2428 result_s += (next-start);
2429 start = next+1;
2430 }
2431 Py_MEMCPY(result_s, start, end-start);
2432
2433 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002434}
2435
2436/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2437
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002438Py_LOCAL(PyStringObject *)
2439replace_delete_substring(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002440 const char *from_s, Py_ssize_t from_len,
2441 Py_ssize_t maxcount) {
2442 char *self_s, *result_s;
2443 char *start, *next, *end;
2444 Py_ssize_t self_len, result_len;
2445 Py_ssize_t count, offset;
2446 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002447
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002448 self_len = PyString_GET_SIZE(self);
2449 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002450
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002451 count = stringlib_count(self_s, self_len,
2452 from_s, from_len,
2453 maxcount);
Christian Heimes44720832008-05-26 13:01:01 +00002454
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002455 if (count == 0) {
2456 /* no matches */
2457 return return_self(self);
2458 }
Christian Heimes44720832008-05-26 13:01:01 +00002459
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002460 result_len = self_len - (count * from_len);
2461 assert (result_len>=0);
Christian Heimes44720832008-05-26 13:01:01 +00002462
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002463 if ( (result = (PyStringObject *)
2464 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2465 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002466
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002467 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002468
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002469 start = self_s;
2470 end = self_s + self_len;
2471 while (count-- > 0) {
2472 offset = stringlib_find(start, end-start,
2473 from_s, from_len,
2474 0);
2475 if (offset == -1)
2476 break;
2477 next = start + offset;
Christian Heimes44720832008-05-26 13:01:01 +00002478
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002479 Py_MEMCPY(result_s, start, next-start);
Christian Heimes44720832008-05-26 13:01:01 +00002480
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002481 result_s += (next-start);
2482 start = next+from_len;
2483 }
2484 Py_MEMCPY(result_s, start, end-start);
2485 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002486}
2487
2488/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002489Py_LOCAL(PyStringObject *)
2490replace_single_character_in_place(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002491 char from_c, char to_c,
2492 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002493{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002494 char *self_s, *result_s, *start, *end, *next;
2495 Py_ssize_t self_len;
2496 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002497
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002498 /* The result string will be the same size */
2499 self_s = PyString_AS_STRING(self);
2500 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002501
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002502 next = findchar(self_s, self_len, from_c);
Christian Heimes44720832008-05-26 13:01:01 +00002503
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002504 if (next == NULL) {
2505 /* No matches; return the original string */
2506 return return_self(self);
2507 }
Christian Heimes44720832008-05-26 13:01:01 +00002508
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002509 /* Need to make a new string */
2510 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2511 if (result == NULL)
2512 return NULL;
2513 result_s = PyString_AS_STRING(result);
2514 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002515
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002516 /* change everything in-place, starting with this one */
2517 start = result_s + (next-self_s);
2518 *start = to_c;
2519 start++;
2520 end = result_s + self_len;
Christian Heimes44720832008-05-26 13:01:01 +00002521
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002522 while (--maxcount > 0) {
2523 next = findchar(start, end-start, from_c);
2524 if (next == NULL)
2525 break;
2526 *next = to_c;
2527 start = next+1;
2528 }
Christian Heimes44720832008-05-26 13:01:01 +00002529
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002530 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002531}
2532
2533/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002534Py_LOCAL(PyStringObject *)
2535replace_substring_in_place(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002536 const char *from_s, Py_ssize_t from_len,
2537 const char *to_s, Py_ssize_t to_len,
2538 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002539{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002540 char *result_s, *start, *end;
2541 char *self_s;
2542 Py_ssize_t self_len, offset;
2543 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002544
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002545 /* The result string will be the same size */
Christian Heimes44720832008-05-26 13:01:01 +00002546
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002547 self_s = PyString_AS_STRING(self);
2548 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002549
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002550 offset = stringlib_find(self_s, self_len,
2551 from_s, from_len,
2552 0);
2553 if (offset == -1) {
2554 /* No matches; return the original string */
2555 return return_self(self);
2556 }
Christian Heimes44720832008-05-26 13:01:01 +00002557
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002558 /* Need to make a new string */
2559 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2560 if (result == NULL)
2561 return NULL;
2562 result_s = PyString_AS_STRING(result);
2563 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002564
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002565 /* change everything in-place, starting with this one */
2566 start = result_s + offset;
2567 Py_MEMCPY(start, to_s, from_len);
2568 start += from_len;
2569 end = result_s + self_len;
Christian Heimes44720832008-05-26 13:01:01 +00002570
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002571 while ( --maxcount > 0) {
2572 offset = stringlib_find(start, end-start,
2573 from_s, from_len,
2574 0);
2575 if (offset==-1)
2576 break;
2577 Py_MEMCPY(start+offset, to_s, from_len);
2578 start += offset+from_len;
2579 }
Christian Heimes44720832008-05-26 13:01:01 +00002580
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002581 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002582}
2583
2584/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002585Py_LOCAL(PyStringObject *)
2586replace_single_character(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002587 char from_c,
2588 const char *to_s, Py_ssize_t to_len,
2589 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002590{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002591 char *self_s, *result_s;
2592 char *start, *next, *end;
2593 Py_ssize_t self_len, result_len;
2594 Py_ssize_t count, product;
2595 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002596
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002597 self_s = PyString_AS_STRING(self);
2598 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002599
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002600 count = countchar(self_s, self_len, from_c, maxcount);
2601 if (count == 0) {
2602 /* no matches, return unchanged */
2603 return return_self(self);
2604 }
Christian Heimes44720832008-05-26 13:01:01 +00002605
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002606 /* use the difference between current and new, hence the "-1" */
2607 /* result_len = self_len + count * (to_len-1) */
2608 product = count * (to_len-1);
2609 if (product / (to_len-1) != count) {
2610 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2611 return NULL;
2612 }
2613 result_len = self_len + product;
2614 if (result_len < 0) {
2615 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2616 return NULL;
2617 }
Christian Heimes44720832008-05-26 13:01:01 +00002618
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002619 if ( (result = (PyStringObject *)
2620 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2621 return NULL;
2622 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002623
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002624 start = self_s;
2625 end = self_s + self_len;
2626 while (count-- > 0) {
2627 next = findchar(start, end-start, from_c);
2628 if (next == NULL)
2629 break;
Christian Heimes44720832008-05-26 13:01:01 +00002630
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002631 if (next == start) {
2632 /* replace with the 'to' */
2633 Py_MEMCPY(result_s, to_s, to_len);
2634 result_s += to_len;
2635 start += 1;
2636 } else {
2637 /* copy the unchanged old then the 'to' */
2638 Py_MEMCPY(result_s, start, next-start);
2639 result_s += (next-start);
2640 Py_MEMCPY(result_s, to_s, to_len);
2641 result_s += to_len;
2642 start = next+1;
2643 }
2644 }
2645 /* Copy the remainder of the remaining string */
2646 Py_MEMCPY(result_s, start, end-start);
Christian Heimes44720832008-05-26 13:01:01 +00002647
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002648 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002649}
2650
2651/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002652Py_LOCAL(PyStringObject *)
2653replace_substring(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002654 const char *from_s, Py_ssize_t from_len,
2655 const char *to_s, Py_ssize_t to_len,
2656 Py_ssize_t maxcount) {
2657 char *self_s, *result_s;
2658 char *start, *next, *end;
2659 Py_ssize_t self_len, result_len;
2660 Py_ssize_t count, offset, product;
2661 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002662
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002663 self_s = PyString_AS_STRING(self);
2664 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002665
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002666 count = stringlib_count(self_s, self_len,
2667 from_s, from_len,
2668 maxcount);
Antoine Pitrou64672132010-01-13 07:55:48 +00002669
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002670 if (count == 0) {
2671 /* no matches, return unchanged */
2672 return return_self(self);
2673 }
Christian Heimes44720832008-05-26 13:01:01 +00002674
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002675 /* Check for overflow */
2676 /* result_len = self_len + count * (to_len-from_len) */
2677 product = count * (to_len-from_len);
2678 if (product / (to_len-from_len) != count) {
2679 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2680 return NULL;
2681 }
2682 result_len = self_len + product;
2683 if (result_len < 0) {
2684 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2685 return NULL;
2686 }
Christian Heimes44720832008-05-26 13:01:01 +00002687
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002688 if ( (result = (PyStringObject *)
2689 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2690 return NULL;
2691 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002692
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002693 start = self_s;
2694 end = self_s + self_len;
2695 while (count-- > 0) {
2696 offset = stringlib_find(start, end-start,
2697 from_s, from_len,
2698 0);
2699 if (offset == -1)
2700 break;
2701 next = start+offset;
2702 if (next == start) {
2703 /* replace with the 'to' */
2704 Py_MEMCPY(result_s, to_s, to_len);
2705 result_s += to_len;
2706 start += from_len;
2707 } else {
2708 /* copy the unchanged old then the 'to' */
2709 Py_MEMCPY(result_s, start, next-start);
2710 result_s += (next-start);
2711 Py_MEMCPY(result_s, to_s, to_len);
2712 result_s += to_len;
2713 start = next+from_len;
2714 }
2715 }
2716 /* Copy the remainder of the remaining string */
2717 Py_MEMCPY(result_s, start, end-start);
Christian Heimes44720832008-05-26 13:01:01 +00002718
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002719 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002720}
2721
2722
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002723Py_LOCAL(PyStringObject *)
2724replace(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002725 const char *from_s, Py_ssize_t from_len,
2726 const char *to_s, Py_ssize_t to_len,
2727 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002728{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002729 if (maxcount < 0) {
2730 maxcount = PY_SSIZE_T_MAX;
2731 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2732 /* nothing to do; return the original string */
2733 return return_self(self);
2734 }
Christian Heimes44720832008-05-26 13:01:01 +00002735
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002736 if (maxcount == 0 ||
2737 (from_len == 0 && to_len == 0)) {
2738 /* nothing to do; return the original string */
2739 return return_self(self);
2740 }
Christian Heimes44720832008-05-26 13:01:01 +00002741
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002742 /* Handle zero-length special cases */
Christian Heimes44720832008-05-26 13:01:01 +00002743
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002744 if (from_len == 0) {
2745 /* insert the 'to' string everywhere. */
2746 /* >>> "Python".replace("", ".") */
2747 /* '.P.y.t.h.o.n.' */
2748 return replace_interleave(self, to_s, to_len, maxcount);
2749 }
Christian Heimes44720832008-05-26 13:01:01 +00002750
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002751 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2752 /* point for an empty self string to generate a non-empty string */
2753 /* Special case so the remaining code always gets a non-empty string */
2754 if (PyString_GET_SIZE(self) == 0) {
2755 return return_self(self);
2756 }
Christian Heimes44720832008-05-26 13:01:01 +00002757
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002758 if (to_len == 0) {
2759 /* delete all occurances of 'from' string */
2760 if (from_len == 1) {
2761 return replace_delete_single_character(
2762 self, from_s[0], maxcount);
2763 } else {
2764 return replace_delete_substring(self, from_s, from_len, maxcount);
2765 }
2766 }
Christian Heimes44720832008-05-26 13:01:01 +00002767
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002768 /* Handle special case where both strings have the same length */
Christian Heimes44720832008-05-26 13:01:01 +00002769
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002770 if (from_len == to_len) {
2771 if (from_len == 1) {
2772 return replace_single_character_in_place(
2773 self,
2774 from_s[0],
2775 to_s[0],
2776 maxcount);
2777 } else {
2778 return replace_substring_in_place(
2779 self, from_s, from_len, to_s, to_len, maxcount);
2780 }
2781 }
Christian Heimes44720832008-05-26 13:01:01 +00002782
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002783 /* Otherwise use the more generic algorithms */
2784 if (from_len == 1) {
2785 return replace_single_character(self, from_s[0],
2786 to_s, to_len, maxcount);
2787 } else {
2788 /* len('from')>=2, len('to')>=1 */
2789 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2790 }
Christian Heimes44720832008-05-26 13:01:01 +00002791}
2792
2793PyDoc_STRVAR(replace__doc__,
Ezio Melotti2f06b782010-06-26 18:44:42 +00002794"S.replace(old, new[, count]) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00002795\n\
2796Return a copy of string S with all occurrences of substring\n\
2797old replaced by new. If the optional argument count is\n\
2798given, only the first count occurrences are replaced.");
2799
2800static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002801string_replace(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002802{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002803 Py_ssize_t count = -1;
2804 PyObject *from, *to;
2805 const char *from_s, *to_s;
2806 Py_ssize_t from_len, to_len;
Christian Heimes44720832008-05-26 13:01:01 +00002807
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002808 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2809 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002810
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002811 if (PyString_Check(from)) {
2812 from_s = PyString_AS_STRING(from);
2813 from_len = PyString_GET_SIZE(from);
2814 }
Christian Heimes44720832008-05-26 13:01:01 +00002815#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002816 if (PyUnicode_Check(from))
2817 return PyUnicode_Replace((PyObject *)self,
2818 from, to, count);
Christian Heimes44720832008-05-26 13:01:01 +00002819#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002820 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2821 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002822
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002823 if (PyString_Check(to)) {
2824 to_s = PyString_AS_STRING(to);
2825 to_len = PyString_GET_SIZE(to);
2826 }
Christian Heimes44720832008-05-26 13:01:01 +00002827#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002828 else if (PyUnicode_Check(to))
2829 return PyUnicode_Replace((PyObject *)self,
2830 from, to, count);
Christian Heimes44720832008-05-26 13:01:01 +00002831#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002832 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2833 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002834
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002835 return (PyObject *)replace((PyStringObject *) self,
2836 from_s, from_len,
2837 to_s, to_len, count);
Christian Heimes44720832008-05-26 13:01:01 +00002838}
2839
2840/** End DALKE **/
2841
2842/* Matches the end (direction >= 0) or start (direction < 0) of self
2843 * against substr, using the start and end arguments. Returns
2844 * -1 on error, 0 if not found and 1 if found.
2845 */
2846Py_LOCAL(int)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002847_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002848 Py_ssize_t end, int direction)
Christian Heimes44720832008-05-26 13:01:01 +00002849{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002850 Py_ssize_t len = PyString_GET_SIZE(self);
2851 Py_ssize_t slen;
2852 const char* sub;
2853 const char* str;
Christian Heimes44720832008-05-26 13:01:01 +00002854
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002855 if (PyString_Check(substr)) {
2856 sub = PyString_AS_STRING(substr);
2857 slen = PyString_GET_SIZE(substr);
2858 }
Christian Heimes44720832008-05-26 13:01:01 +00002859#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002860 else if (PyUnicode_Check(substr))
2861 return PyUnicode_Tailmatch((PyObject *)self,
2862 substr, start, end, direction);
Christian Heimes44720832008-05-26 13:01:01 +00002863#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002864 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2865 return -1;
2866 str = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002867
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002868 ADJUST_INDICES(start, end, len);
Christian Heimes44720832008-05-26 13:01:01 +00002869
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002870 if (direction < 0) {
2871 /* startswith */
2872 if (start+slen > len)
2873 return 0;
2874 } else {
2875 /* endswith */
2876 if (end-start < slen || start > len)
2877 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00002878
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002879 if (end-slen > start)
2880 start = end - slen;
2881 }
2882 if (end-start >= slen)
2883 return ! memcmp(str+start, sub, slen);
2884 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00002885}
2886
2887
2888PyDoc_STRVAR(startswith__doc__,
2889"S.startswith(prefix[, start[, end]]) -> bool\n\
2890\n\
2891Return True if S starts with the specified prefix, False otherwise.\n\
2892With optional start, test S beginning at that position.\n\
2893With optional end, stop comparing S at that position.\n\
2894prefix can also be a tuple of strings to try.");
2895
2896static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002897string_startswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002898{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002899 Py_ssize_t start = 0;
2900 Py_ssize_t end = PY_SSIZE_T_MAX;
2901 PyObject *subobj;
2902 int result;
Christian Heimes44720832008-05-26 13:01:01 +00002903
Jesus Cea44e81682011-04-20 16:39:15 +02002904 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002905 return NULL;
2906 if (PyTuple_Check(subobj)) {
2907 Py_ssize_t i;
2908 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2909 result = _string_tailmatch(self,
2910 PyTuple_GET_ITEM(subobj, i),
2911 start, end, -1);
2912 if (result == -1)
2913 return NULL;
2914 else if (result) {
2915 Py_RETURN_TRUE;
2916 }
2917 }
2918 Py_RETURN_FALSE;
2919 }
2920 result = _string_tailmatch(self, subobj, start, end, -1);
2921 if (result == -1)
2922 return NULL;
2923 else
2924 return PyBool_FromLong(result);
Christian Heimes44720832008-05-26 13:01:01 +00002925}
2926
2927
2928PyDoc_STRVAR(endswith__doc__,
2929"S.endswith(suffix[, start[, end]]) -> bool\n\
2930\n\
2931Return True if S ends with the specified suffix, False otherwise.\n\
2932With optional start, test S beginning at that position.\n\
2933With optional end, stop comparing S at that position.\n\
2934suffix can also be a tuple of strings to try.");
2935
2936static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002937string_endswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002938{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002939 Py_ssize_t start = 0;
2940 Py_ssize_t end = PY_SSIZE_T_MAX;
2941 PyObject *subobj;
2942 int result;
Christian Heimes44720832008-05-26 13:01:01 +00002943
Jesus Cea44e81682011-04-20 16:39:15 +02002944 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002945 return NULL;
2946 if (PyTuple_Check(subobj)) {
2947 Py_ssize_t i;
2948 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2949 result = _string_tailmatch(self,
2950 PyTuple_GET_ITEM(subobj, i),
2951 start, end, +1);
2952 if (result == -1)
2953 return NULL;
2954 else if (result) {
2955 Py_RETURN_TRUE;
2956 }
2957 }
2958 Py_RETURN_FALSE;
2959 }
2960 result = _string_tailmatch(self, subobj, start, end, +1);
2961 if (result == -1)
2962 return NULL;
2963 else
2964 return PyBool_FromLong(result);
Christian Heimes44720832008-05-26 13:01:01 +00002965}
2966
2967
2968PyDoc_STRVAR(encode__doc__,
2969"S.encode([encoding[,errors]]) -> object\n\
2970\n\
2971Encodes S using the codec registered for encoding. encoding defaults\n\
2972to the default encoding. errors may be given to set a different error\n\
2973handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2974a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2975'xmlcharrefreplace' as well as any other name registered with\n\
2976codecs.register_error that is able to handle UnicodeEncodeErrors.");
2977
2978static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00002979string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes44720832008-05-26 13:01:01 +00002980{
Benjamin Peterson332d7212009-09-18 21:14:55 +00002981 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00002982 char *encoding = NULL;
2983 char *errors = NULL;
2984 PyObject *v;
2985
Benjamin Peterson332d7212009-09-18 21:14:55 +00002986 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002987 kwlist, &encoding, &errors))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00002988 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002989 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00002990 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00002991 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002992 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00002993 PyErr_Format(PyExc_TypeError,
2994 "encoder did not return a string/unicode object "
2995 "(type=%.400s)",
2996 Py_TYPE(v)->tp_name);
2997 Py_DECREF(v);
2998 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002999 }
3000 return v;
3001
3002 onError:
Christian Heimes1a6387e2008-03-26 12:49:49 +00003003 return NULL;
3004}
3005
Christian Heimes44720832008-05-26 13:01:01 +00003006
3007PyDoc_STRVAR(decode__doc__,
3008"S.decode([encoding[,errors]]) -> object\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003009\n\
Christian Heimes44720832008-05-26 13:01:01 +00003010Decodes S using the codec registered for encoding. encoding defaults\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003011to the default encoding. errors may be given to set a different error\n\
Christian Heimes44720832008-05-26 13:01:01 +00003012handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3013a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003014as well as any other name registered with codecs.register_error that is\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003015able to handle UnicodeDecodeErrors.");
3016
3017static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00003018string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003019{
Benjamin Peterson332d7212009-09-18 21:14:55 +00003020 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003021 char *encoding = NULL;
3022 char *errors = NULL;
3023 PyObject *v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003024
Benjamin Peterson332d7212009-09-18 21:14:55 +00003025 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003026 kwlist, &encoding, &errors))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003027 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003028 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003029 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003030 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003031 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003032 PyErr_Format(PyExc_TypeError,
3033 "decoder did not return a string/unicode object "
3034 "(type=%.400s)",
3035 Py_TYPE(v)->tp_name);
3036 Py_DECREF(v);
3037 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003038 }
3039 return v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003040
Christian Heimes44720832008-05-26 13:01:01 +00003041 onError:
3042 return NULL;
3043}
3044
3045
3046PyDoc_STRVAR(expandtabs__doc__,
3047"S.expandtabs([tabsize]) -> string\n\
3048\n\
3049Return a copy of S where all tab characters are expanded using spaces.\n\
3050If tabsize is not given, a tab size of 8 characters is assumed.");
3051
3052static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003053string_expandtabs(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003054{
3055 const char *e, *p, *qe;
3056 char *q;
3057 Py_ssize_t i, j, incr;
3058 PyObject *u;
3059 int tabsize = 8;
3060
3061 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003062 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003063
3064 /* First pass: determine size of output string */
3065 i = 0; /* chars up to and including most recent \n or \r */
3066 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003067 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3068 for (p = PyString_AS_STRING(self); p < e; p++)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003069 if (*p == '\t') {
3070 if (tabsize > 0) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003071 incr = tabsize - (j % tabsize);
3072 if (j > PY_SSIZE_T_MAX - incr)
3073 goto overflow1;
3074 j += incr;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003075 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003076 }
3077 else {
3078 if (j > PY_SSIZE_T_MAX - 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003079 goto overflow1;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003080 j++;
3081 if (*p == '\n' || *p == '\r') {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003082 if (i > PY_SSIZE_T_MAX - j)
3083 goto overflow1;
3084 i += j;
3085 j = 0;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003086 }
3087 }
Christian Heimes44720832008-05-26 13:01:01 +00003088
3089 if (i > PY_SSIZE_T_MAX - j)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003090 goto overflow1;
Christian Heimes44720832008-05-26 13:01:01 +00003091
3092 /* Second pass: create output string and fill it */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003093 u = PyString_FromStringAndSize(NULL, i + j);
Christian Heimes44720832008-05-26 13:01:01 +00003094 if (!u)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003095 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003096
3097 j = 0; /* same as in first pass */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003098 q = PyString_AS_STRING(u); /* next output char */
3099 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
Christian Heimes44720832008-05-26 13:01:01 +00003100
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003101 for (p = PyString_AS_STRING(self); p < e; p++)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003102 if (*p == '\t') {
3103 if (tabsize > 0) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003104 i = tabsize - (j % tabsize);
3105 j += i;
3106 while (i--) {
3107 if (q >= qe)
3108 goto overflow2;
3109 *q++ = ' ';
3110 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003111 }
3112 }
3113 else {
3114 if (q >= qe)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003115 goto overflow2;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003116 *q++ = *p;
3117 j++;
3118 if (*p == '\n' || *p == '\r')
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003119 j = 0;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003120 }
Christian Heimes44720832008-05-26 13:01:01 +00003121
3122 return u;
3123
3124 overflow2:
3125 Py_DECREF(u);
3126 overflow1:
3127 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3128 return NULL;
3129}
3130
3131Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003132pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Christian Heimes44720832008-05-26 13:01:01 +00003133{
3134 PyObject *u;
3135
3136 if (left < 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003137 left = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003138 if (right < 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003139 right = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003140
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003141 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003142 Py_INCREF(self);
3143 return (PyObject *)self;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003144 }
3145
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003146 u = PyString_FromStringAndSize(NULL,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003147 left + PyString_GET_SIZE(self) + right);
Christian Heimes44720832008-05-26 13:01:01 +00003148 if (u) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003149 if (left)
3150 memset(PyString_AS_STRING(u), fill, left);
3151 Py_MEMCPY(PyString_AS_STRING(u) + left,
3152 PyString_AS_STRING(self),
3153 PyString_GET_SIZE(self));
3154 if (right)
3155 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3156 fill, right);
Christian Heimes44720832008-05-26 13:01:01 +00003157 }
3158
3159 return u;
3160}
3161
3162PyDoc_STRVAR(ljust__doc__,
3163"S.ljust(width[, fillchar]) -> string\n"
3164"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003165"Return S left-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003166"done using the specified fill character (default is a space).");
3167
3168static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003169string_ljust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003170{
3171 Py_ssize_t width;
3172 char fillchar = ' ';
3173
3174 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003175 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003176
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003177 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003178 Py_INCREF(self);
3179 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003180 }
3181
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003182 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003183}
3184
3185
3186PyDoc_STRVAR(rjust__doc__,
3187"S.rjust(width[, fillchar]) -> string\n"
3188"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003189"Return S right-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003190"done using the specified fill character (default is a space)");
3191
3192static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003193string_rjust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003194{
3195 Py_ssize_t width;
3196 char fillchar = ' ';
3197
3198 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003199 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003200
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003201 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003202 Py_INCREF(self);
3203 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003204 }
3205
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003206 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003207}
3208
3209
3210PyDoc_STRVAR(center__doc__,
3211"S.center(width[, fillchar]) -> string\n"
3212"\n"
3213"Return S centered in a string of length width. Padding is\n"
3214"done using the specified fill character (default is a space)");
3215
3216static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003217string_center(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003218{
3219 Py_ssize_t marg, left;
3220 Py_ssize_t width;
3221 char fillchar = ' ';
3222
3223 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003224 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003225
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003226 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003227 Py_INCREF(self);
3228 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003229 }
3230
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003231 marg = width - PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003232 left = marg / 2 + (marg & width & 1);
3233
3234 return pad(self, left, marg - left, fillchar);
3235}
3236
3237PyDoc_STRVAR(zfill__doc__,
3238"S.zfill(width) -> string\n"
3239"\n"
3240"Pad a numeric string S with zeros on the left, to fill a field\n"
3241"of the specified width. The string S is never truncated.");
3242
3243static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003244string_zfill(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003245{
3246 Py_ssize_t fill;
3247 PyObject *s;
3248 char *p;
3249 Py_ssize_t width;
3250
3251 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003252 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003253
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003254 if (PyString_GET_SIZE(self) >= width) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003255 if (PyString_CheckExact(self)) {
3256 Py_INCREF(self);
3257 return (PyObject*) self;
3258 }
3259 else
3260 return PyString_FromStringAndSize(
3261 PyString_AS_STRING(self),
3262 PyString_GET_SIZE(self)
3263 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003264 }
3265
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003266 fill = width - PyString_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003267
Christian Heimes44720832008-05-26 13:01:01 +00003268 s = pad(self, fill, 0, '0');
3269
3270 if (s == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003271 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003272
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003273 p = PyString_AS_STRING(s);
Christian Heimes44720832008-05-26 13:01:01 +00003274 if (p[fill] == '+' || p[fill] == '-') {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003275 /* move sign to beginning of string */
3276 p[0] = p[fill];
3277 p[fill] = '0';
Christian Heimes44720832008-05-26 13:01:01 +00003278 }
3279
3280 return (PyObject*) s;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003281}
3282
Christian Heimes44720832008-05-26 13:01:01 +00003283PyDoc_STRVAR(isspace__doc__,
3284"S.isspace() -> bool\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003285\n\
Christian Heimes44720832008-05-26 13:01:01 +00003286Return True if all characters in S are whitespace\n\
3287and there is at least one character in S, False otherwise.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003288
Christian Heimes44720832008-05-26 13:01:01 +00003289static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003290string_isspace(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003291{
Christian Heimes44720832008-05-26 13:01:01 +00003292 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003293 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003294 register const unsigned char *e;
3295
3296 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003297 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003298 isspace(*p))
3299 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003300
3301 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003302 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003303 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003304
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003305 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003306 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003307 if (!isspace(*p))
3308 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003309 }
Christian Heimes44720832008-05-26 13:01:01 +00003310 return PyBool_FromLong(1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003311}
3312
Christian Heimes44720832008-05-26 13:01:01 +00003313
3314PyDoc_STRVAR(isalpha__doc__,
3315"S.isalpha() -> bool\n\
3316\n\
3317Return True if all characters in S are alphabetic\n\
3318and there is at least one character in S, False otherwise.");
3319
3320static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003321string_isalpha(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003322{
Christian Heimes44720832008-05-26 13:01:01 +00003323 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003324 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003325 register const unsigned char *e;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003326
Christian Heimes44720832008-05-26 13:01:01 +00003327 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003328 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003329 isalpha(*p))
3330 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003331
3332 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003333 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003334 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003335
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003336 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003337 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003338 if (!isalpha(*p))
3339 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003340 }
Christian Heimes44720832008-05-26 13:01:01 +00003341 return PyBool_FromLong(1);
3342}
Christian Heimes1a6387e2008-03-26 12:49:49 +00003343
Christian Heimes44720832008-05-26 13:01:01 +00003344
3345PyDoc_STRVAR(isalnum__doc__,
3346"S.isalnum() -> bool\n\
3347\n\
3348Return True if all characters in S are alphanumeric\n\
3349and there is at least one character in S, False otherwise.");
3350
3351static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003352string_isalnum(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003353{
3354 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003355 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003356 register const unsigned char *e;
3357
3358 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003359 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003360 isalnum(*p))
3361 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003362
3363 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003364 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003365 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003366
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003367 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003368 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003369 if (!isalnum(*p))
3370 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003371 }
3372 return PyBool_FromLong(1);
3373}
3374
3375
3376PyDoc_STRVAR(isdigit__doc__,
3377"S.isdigit() -> bool\n\
3378\n\
3379Return True if all characters in S are digits\n\
3380and there is at least one character in S, False otherwise.");
3381
3382static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003383string_isdigit(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003384{
3385 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003386 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003387 register const unsigned char *e;
3388
3389 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003390 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003391 isdigit(*p))
3392 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003393
3394 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003395 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003396 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003397
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003398 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003399 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003400 if (!isdigit(*p))
3401 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003402 }
3403 return PyBool_FromLong(1);
3404}
3405
3406
3407PyDoc_STRVAR(islower__doc__,
3408"S.islower() -> bool\n\
3409\n\
3410Return True if all cased characters in S are lowercase and there is\n\
3411at least one cased character in S, False otherwise.");
3412
3413static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003414string_islower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003415{
3416 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003417 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003418 register const unsigned char *e;
3419 int cased;
3420
3421 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003422 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003423 return PyBool_FromLong(islower(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003424
3425 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003426 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003427 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003428
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003429 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003430 cased = 0;
3431 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003432 if (isupper(*p))
3433 return PyBool_FromLong(0);
3434 else if (!cased && islower(*p))
3435 cased = 1;
Christian Heimes44720832008-05-26 13:01:01 +00003436 }
3437 return PyBool_FromLong(cased);
3438}
3439
3440
3441PyDoc_STRVAR(isupper__doc__,
3442"S.isupper() -> bool\n\
3443\n\
3444Return True if all cased characters in S are uppercase and there is\n\
3445at least one cased character in S, False otherwise.");
3446
3447static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003448string_isupper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003449{
3450 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003451 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003452 register const unsigned char *e;
3453 int cased;
3454
3455 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003456 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003457 return PyBool_FromLong(isupper(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003458
3459 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003460 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003461 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003462
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003463 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003464 cased = 0;
3465 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003466 if (islower(*p))
3467 return PyBool_FromLong(0);
3468 else if (!cased && isupper(*p))
3469 cased = 1;
Christian Heimes44720832008-05-26 13:01:01 +00003470 }
3471 return PyBool_FromLong(cased);
3472}
3473
3474
3475PyDoc_STRVAR(istitle__doc__,
3476"S.istitle() -> bool\n\
3477\n\
3478Return True if S is a titlecased string and there is at least one\n\
3479character in S, i.e. uppercase characters may only follow uncased\n\
3480characters and lowercase characters only cased ones. Return False\n\
3481otherwise.");
3482
3483static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003484string_istitle(PyStringObject *self, PyObject *uncased)
Christian Heimes44720832008-05-26 13:01:01 +00003485{
3486 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003487 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003488 register const unsigned char *e;
3489 int cased, previous_is_cased;
3490
3491 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003492 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003493 return PyBool_FromLong(isupper(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003494
3495 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003496 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003497 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003498
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003499 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003500 cased = 0;
3501 previous_is_cased = 0;
3502 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003503 register const unsigned char ch = *p;
Christian Heimes44720832008-05-26 13:01:01 +00003504
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003505 if (isupper(ch)) {
3506 if (previous_is_cased)
3507 return PyBool_FromLong(0);
3508 previous_is_cased = 1;
3509 cased = 1;
3510 }
3511 else if (islower(ch)) {
3512 if (!previous_is_cased)
3513 return PyBool_FromLong(0);
3514 previous_is_cased = 1;
3515 cased = 1;
3516 }
3517 else
3518 previous_is_cased = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003519 }
3520 return PyBool_FromLong(cased);
3521}
3522
3523
3524PyDoc_STRVAR(splitlines__doc__,
3525"S.splitlines([keepends]) -> list of strings\n\
3526\n\
3527Return a list of the lines in S, breaking at line boundaries.\n\
3528Line breaks are not included in the resulting list unless keepends\n\
3529is given and true.");
3530
3531static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003532string_splitlines(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003533{
Christian Heimes44720832008-05-26 13:01:01 +00003534 int keepends = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003535
3536 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003537 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003538
Antoine Pitrou64672132010-01-13 07:55:48 +00003539 return stringlib_splitlines(
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003540 (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),
3541 keepends
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003542 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003543}
3544
Robert Schuppenies51df0642008-06-01 16:16:17 +00003545PyDoc_STRVAR(sizeof__doc__,
Georg Brandl7a6de8b2008-06-01 16:42:16 +00003546"S.__sizeof__() -> size of S in memory, in bytes");
Robert Schuppenies51df0642008-06-01 16:16:17 +00003547
3548static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003549string_sizeof(PyStringObject *v)
Robert Schuppenies51df0642008-06-01 16:16:17 +00003550{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003551 Py_ssize_t res;
3552 res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
3553 return PyInt_FromSsize_t(res);
Robert Schuppenies51df0642008-06-01 16:16:17 +00003554}
3555
Christian Heimes1a6387e2008-03-26 12:49:49 +00003556static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003557string_getnewargs(PyStringObject *v)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003558{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003559 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Christian Heimes1a6387e2008-03-26 12:49:49 +00003560}
3561
Christian Heimes1a6387e2008-03-26 12:49:49 +00003562
Christian Heimes44720832008-05-26 13:01:01 +00003563#include "stringlib/string_format.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +00003564
Christian Heimes44720832008-05-26 13:01:01 +00003565PyDoc_STRVAR(format__doc__,
Georg Brandl05f819b2010-07-31 19:07:37 +00003566"S.format(*args, **kwargs) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003567\n\
Eric Smith6c840852010-11-06 19:43:44 +00003568Return a formatted version of S, using substitutions from args and kwargs.\n\
3569The substitutions are identified by braces ('{' and '}').");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003570
Eric Smithdc13b792008-05-30 18:10:04 +00003571static PyObject *
3572string__format__(PyObject* self, PyObject* args)
3573{
3574 PyObject *format_spec;
3575 PyObject *result = NULL;
3576 PyObject *tmp = NULL;
3577
3578 /* If 2.x, convert format_spec to the same type as value */
3579 /* This is to allow things like u''.format('') */
3580 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003581 goto done;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003582 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003583 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3584 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3585 goto done;
Eric Smithdc13b792008-05-30 18:10:04 +00003586 }
3587 tmp = PyObject_Str(format_spec);
3588 if (tmp == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003589 goto done;
Eric Smithdc13b792008-05-30 18:10:04 +00003590 format_spec = tmp;
3591
3592 result = _PyBytes_FormatAdvanced(self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003593 PyString_AS_STRING(format_spec),
3594 PyString_GET_SIZE(format_spec));
Eric Smithdc13b792008-05-30 18:10:04 +00003595done:
3596 Py_XDECREF(tmp);
3597 return result;
3598}
3599
Christian Heimes44720832008-05-26 13:01:01 +00003600PyDoc_STRVAR(p_format__doc__,
Georg Brandl05f819b2010-07-31 19:07:37 +00003601"S.__format__(format_spec) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003602\n\
Eric Smith6c840852010-11-06 19:43:44 +00003603Return a formatted version of S as described by format_spec.");
Christian Heimes44720832008-05-26 13:01:01 +00003604
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00003605
Christian Heimes1a6387e2008-03-26 12:49:49 +00003606static PyMethodDef
Christian Heimes44720832008-05-26 13:01:01 +00003607string_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003608 /* Counterparts of the obsolete stropmodule functions; except
3609 string.maketrans(). */
3610 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3611 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3612 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3613 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3614 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3615 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3616 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3617 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3618 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3619 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3620 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3621 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3622 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3623 capitalize__doc__},
3624 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3625 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3626 endswith__doc__},
3627 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3628 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3629 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3630 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3631 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3632 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3633 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3634 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3635 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3636 rpartition__doc__},
3637 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3638 startswith__doc__},
3639 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3640 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3641 swapcase__doc__},
3642 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3643 translate__doc__},
3644 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3645 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3646 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3647 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3648 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3649 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3650 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3651 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3652 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
3653 {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
3654 {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
3655 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3656 expandtabs__doc__},
3657 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3658 splitlines__doc__},
3659 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
3660 sizeof__doc__},
3661 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
3662 {NULL, NULL} /* sentinel */
Christian Heimes1a6387e2008-03-26 12:49:49 +00003663};
3664
3665static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00003666str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003667
Christian Heimes44720832008-05-26 13:01:01 +00003668static PyObject *
3669string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3670{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003671 PyObject *x = NULL;
3672 static char *kwlist[] = {"object", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003673
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003674 if (type != &PyString_Type)
3675 return str_subtype_new(type, args, kwds);
3676 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3677 return NULL;
3678 if (x == NULL)
3679 return PyString_FromString("");
3680 return PyObject_Str(x);
Christian Heimes44720832008-05-26 13:01:01 +00003681}
3682
3683static PyObject *
3684str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3685{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003686 PyObject *tmp, *pnew;
3687 Py_ssize_t n;
Christian Heimes44720832008-05-26 13:01:01 +00003688
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003689 assert(PyType_IsSubtype(type, &PyString_Type));
3690 tmp = string_new(&PyString_Type, args, kwds);
3691 if (tmp == NULL)
3692 return NULL;
3693 assert(PyString_CheckExact(tmp));
3694 n = PyString_GET_SIZE(tmp);
3695 pnew = type->tp_alloc(type, n);
3696 if (pnew != NULL) {
3697 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3698 ((PyStringObject *)pnew)->ob_shash =
3699 ((PyStringObject *)tmp)->ob_shash;
3700 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3701 }
3702 Py_DECREF(tmp);
3703 return pnew;
Christian Heimes44720832008-05-26 13:01:01 +00003704}
3705
3706static PyObject *
3707basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3708{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003709 PyErr_SetString(PyExc_TypeError,
3710 "The basestring type cannot be instantiated");
3711 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003712}
3713
3714static PyObject *
3715string_mod(PyObject *v, PyObject *w)
3716{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003717 if (!PyString_Check(v)) {
3718 Py_INCREF(Py_NotImplemented);
3719 return Py_NotImplemented;
3720 }
3721 return PyString_Format(v, w);
Christian Heimes44720832008-05-26 13:01:01 +00003722}
3723
3724PyDoc_STRVAR(basestring_doc,
3725"Type basestring cannot be instantiated; it is the base for str and unicode.");
3726
3727static PyNumberMethods string_as_number = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003728 0, /*nb_add*/
3729 0, /*nb_subtract*/
3730 0, /*nb_multiply*/
3731 0, /*nb_divide*/
3732 string_mod, /*nb_remainder*/
Christian Heimes44720832008-05-26 13:01:01 +00003733};
3734
3735
3736PyTypeObject PyBaseString_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003737 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3738 "basestring",
3739 0,
3740 0,
3741 0, /* tp_dealloc */
3742 0, /* tp_print */
3743 0, /* tp_getattr */
3744 0, /* tp_setattr */
3745 0, /* tp_compare */
3746 0, /* tp_repr */
3747 0, /* tp_as_number */
3748 0, /* tp_as_sequence */
3749 0, /* tp_as_mapping */
3750 0, /* tp_hash */
3751 0, /* tp_call */
3752 0, /* tp_str */
3753 0, /* tp_getattro */
3754 0, /* tp_setattro */
3755 0, /* tp_as_buffer */
3756 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3757 basestring_doc, /* tp_doc */
3758 0, /* tp_traverse */
3759 0, /* tp_clear */
3760 0, /* tp_richcompare */
3761 0, /* tp_weaklistoffset */
3762 0, /* tp_iter */
3763 0, /* tp_iternext */
3764 0, /* tp_methods */
3765 0, /* tp_members */
3766 0, /* tp_getset */
3767 &PyBaseObject_Type, /* tp_base */
3768 0, /* tp_dict */
3769 0, /* tp_descr_get */
3770 0, /* tp_descr_set */
3771 0, /* tp_dictoffset */
3772 0, /* tp_init */
3773 0, /* tp_alloc */
3774 basestring_new, /* tp_new */
3775 0, /* tp_free */
Christian Heimes44720832008-05-26 13:01:01 +00003776};
3777
3778PyDoc_STRVAR(string_doc,
3779"str(object) -> string\n\
3780\n\
3781Return a nice string representation of the object.\n\
3782If the argument is a string, the return value is the same object.");
3783
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003784PyTypeObject PyString_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003785 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3786 "str",
3787 PyStringObject_SIZE,
3788 sizeof(char),
3789 string_dealloc, /* tp_dealloc */
3790 (printfunc)string_print, /* tp_print */
3791 0, /* tp_getattr */
3792 0, /* tp_setattr */
3793 0, /* tp_compare */
3794 string_repr, /* tp_repr */
3795 &string_as_number, /* tp_as_number */
3796 &string_as_sequence, /* tp_as_sequence */
3797 &string_as_mapping, /* tp_as_mapping */
3798 (hashfunc)string_hash, /* tp_hash */
3799 0, /* tp_call */
3800 string_str, /* tp_str */
3801 PyObject_GenericGetAttr, /* tp_getattro */
3802 0, /* tp_setattro */
3803 &string_as_buffer, /* tp_as_buffer */
3804 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3805 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
3806 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3807 string_doc, /* tp_doc */
3808 0, /* tp_traverse */
3809 0, /* tp_clear */
3810 (richcmpfunc)string_richcompare, /* tp_richcompare */
3811 0, /* tp_weaklistoffset */
3812 0, /* tp_iter */
3813 0, /* tp_iternext */
3814 string_methods, /* tp_methods */
3815 0, /* tp_members */
3816 0, /* tp_getset */
3817 &PyBaseString_Type, /* tp_base */
3818 0, /* tp_dict */
3819 0, /* tp_descr_get */
3820 0, /* tp_descr_set */
3821 0, /* tp_dictoffset */
3822 0, /* tp_init */
3823 0, /* tp_alloc */
3824 string_new, /* tp_new */
3825 PyObject_Del, /* tp_free */
Christian Heimes44720832008-05-26 13:01:01 +00003826};
3827
3828void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003829PyString_Concat(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00003830{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003831 register PyObject *v;
3832 if (*pv == NULL)
3833 return;
3834 if (w == NULL || !PyString_Check(*pv)) {
3835 Py_DECREF(*pv);
3836 *pv = NULL;
3837 return;
3838 }
3839 v = string_concat((PyStringObject *) *pv, w);
3840 Py_DECREF(*pv);
3841 *pv = v;
Christian Heimes44720832008-05-26 13:01:01 +00003842}
3843
3844void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003845PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00003846{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003847 PyString_Concat(pv, w);
3848 Py_XDECREF(w);
Christian Heimes44720832008-05-26 13:01:01 +00003849}
3850
3851
3852/* The following function breaks the notion that strings are immutable:
3853 it changes the size of a string. We get away with this only if there
3854 is only one module referencing the object. You can also think of it
3855 as creating a new string object and destroying the old one, only
3856 more efficiently. In any case, don't use this if the string may
3857 already be known to some other part of the code...
3858 Note that if there's not enough memory to resize the string, the original
3859 string object at *pv is deallocated, *pv is set to NULL, an "out of
3860 memory" exception is set, and -1 is returned. Else (on success) 0 is
3861 returned, and the value in *pv may or may not be the same as on input.
3862 As always, an extra byte is allocated for a trailing \0 byte (newsize
3863 does *not* include that), and a trailing \0 byte is stored.
3864*/
3865
3866int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003867_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Christian Heimes44720832008-05-26 13:01:01 +00003868{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003869 register PyObject *v;
3870 register PyStringObject *sv;
3871 v = *pv;
3872 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
3873 PyString_CHECK_INTERNED(v)) {
3874 *pv = 0;
3875 Py_DECREF(v);
3876 PyErr_BadInternalCall();
3877 return -1;
3878 }
3879 /* XXX UNREF/NEWREF interface should be more symmetrical */
3880 _Py_DEC_REFTOTAL;
3881 _Py_ForgetReference(v);
3882 *pv = (PyObject *)
3883 PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
3884 if (*pv == NULL) {
3885 PyObject_Del(v);
3886 PyErr_NoMemory();
3887 return -1;
3888 }
3889 _Py_NewReference(*pv);
3890 sv = (PyStringObject *) *pv;
3891 Py_SIZE(sv) = newsize;
3892 sv->ob_sval[newsize] = '\0';
3893 sv->ob_shash = -1; /* invalidate cached hash value */
3894 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00003895}
3896
3897/* Helpers for formatstring */
3898
3899Py_LOCAL_INLINE(PyObject *)
3900getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
3901{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003902 Py_ssize_t argidx = *p_argidx;
3903 if (argidx < arglen) {
3904 (*p_argidx)++;
3905 if (arglen < 0)
3906 return args;
3907 else
3908 return PyTuple_GetItem(args, argidx);
3909 }
3910 PyErr_SetString(PyExc_TypeError,
3911 "not enough arguments for format string");
3912 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003913}
3914
3915/* Format codes
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003916 * F_LJUST '-'
3917 * F_SIGN '+'
3918 * F_BLANK ' '
3919 * F_ALT '#'
3920 * F_ZERO '0'
Christian Heimes44720832008-05-26 13:01:01 +00003921 */
3922#define F_LJUST (1<<0)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003923#define F_SIGN (1<<1)
Christian Heimes44720832008-05-26 13:01:01 +00003924#define F_BLANK (1<<2)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003925#define F_ALT (1<<3)
3926#define F_ZERO (1<<4)
Christian Heimes44720832008-05-26 13:01:01 +00003927
Mark Dickinson18cfada2009-11-23 18:46:41 +00003928/* Returns a new reference to a PyString object, or NULL on failure. */
3929
3930static PyObject *
3931formatfloat(PyObject *v, int flags, int prec, int type)
Christian Heimes44720832008-05-26 13:01:01 +00003932{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003933 char *p;
3934 PyObject *result;
3935 double x;
Eric Smithc1bdf892009-10-26 17:46:17 +00003936
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003937 x = PyFloat_AsDouble(v);
3938 if (x == -1.0 && PyErr_Occurred()) {
3939 PyErr_Format(PyExc_TypeError, "float argument required, "
3940 "not %.200s", Py_TYPE(v)->tp_name);
3941 return NULL;
3942 }
Mark Dickinson18cfada2009-11-23 18:46:41 +00003943
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003944 if (prec < 0)
3945 prec = 6;
Mark Dickinson174e9092009-03-29 16:17:16 +00003946
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003947 p = PyOS_double_to_string(x, type, prec,
3948 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
Christian Heimes44720832008-05-26 13:01:01 +00003949
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003950 if (p == NULL)
3951 return NULL;
3952 result = PyString_FromStringAndSize(p, strlen(p));
3953 PyMem_Free(p);
3954 return result;
Christian Heimes44720832008-05-26 13:01:01 +00003955}
3956
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003957/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
Christian Heimes44720832008-05-26 13:01:01 +00003958 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3959 * Python's regular ints.
3960 * Return value: a new PyString*, or NULL if error.
3961 * . *pbuf is set to point into it,
3962 * *plen set to the # of chars following that.
3963 * Caller must decref it when done using pbuf.
3964 * The string starting at *pbuf is of the form
3965 * "-"? ("0x" | "0X")? digit+
3966 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3967 * set in flags. The case of hex digits will be correct,
3968 * There will be at least prec digits, zero-filled on the left if
3969 * necessary to get that many.
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003970 * val object to be converted
3971 * flags bitmask of format flags; only F_ALT is looked at
3972 * prec minimum number of digits; 0-fill on left if needed
3973 * type a character in [duoxX]; u acts the same as d
Christian Heimes44720832008-05-26 13:01:01 +00003974 *
3975 * CAUTION: o, x and X conversions on regular ints can never
3976 * produce a '-' sign, but can for Python's unbounded ints.
3977 */
3978PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003979_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003980 char **pbuf, int *plen)
Christian Heimes44720832008-05-26 13:01:01 +00003981{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003982 PyObject *result = NULL;
3983 char *buf;
3984 Py_ssize_t i;
3985 int sign; /* 1 if '-', else 0 */
3986 int len; /* number of characters */
3987 Py_ssize_t llen;
3988 int numdigits; /* len == numnondigits + numdigits */
3989 int numnondigits = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003990
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003991 switch (type) {
3992 case 'd':
3993 case 'u':
3994 result = Py_TYPE(val)->tp_str(val);
3995 break;
3996 case 'o':
3997 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
3998 break;
3999 case 'x':
4000 case 'X':
4001 numnondigits = 2;
4002 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4003 break;
4004 default:
4005 assert(!"'type' not in [duoxX]");
4006 }
4007 if (!result)
4008 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004009
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004010 buf = PyString_AsString(result);
4011 if (!buf) {
4012 Py_DECREF(result);
4013 return NULL;
4014 }
Christian Heimes44720832008-05-26 13:01:01 +00004015
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004016 /* To modify the string in-place, there can only be one reference. */
4017 if (Py_REFCNT(result) != 1) {
4018 PyErr_BadInternalCall();
4019 return NULL;
4020 }
4021 llen = PyString_Size(result);
4022 if (llen > INT_MAX) {
4023 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4024 return NULL;
4025 }
4026 len = (int)llen;
4027 if (buf[len-1] == 'L') {
4028 --len;
4029 buf[len] = '\0';
4030 }
4031 sign = buf[0] == '-';
4032 numnondigits += sign;
4033 numdigits = len - numnondigits;
4034 assert(numdigits > 0);
Christian Heimes44720832008-05-26 13:01:01 +00004035
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004036 /* Get rid of base marker unless F_ALT */
4037 if ((flags & F_ALT) == 0) {
4038 /* Need to skip 0x, 0X or 0. */
4039 int skipped = 0;
4040 switch (type) {
4041 case 'o':
4042 assert(buf[sign] == '0');
4043 /* If 0 is only digit, leave it alone. */
4044 if (numdigits > 1) {
4045 skipped = 1;
4046 --numdigits;
4047 }
4048 break;
4049 case 'x':
4050 case 'X':
4051 assert(buf[sign] == '0');
4052 assert(buf[sign + 1] == 'x');
4053 skipped = 2;
4054 numnondigits -= 2;
4055 break;
4056 }
4057 if (skipped) {
4058 buf += skipped;
4059 len -= skipped;
4060 if (sign)
4061 buf[0] = '-';
4062 }
4063 assert(len == numnondigits + numdigits);
4064 assert(numdigits > 0);
4065 }
Christian Heimes44720832008-05-26 13:01:01 +00004066
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004067 /* Fill with leading zeroes to meet minimum width. */
4068 if (prec > numdigits) {
4069 PyObject *r1 = PyString_FromStringAndSize(NULL,
4070 numnondigits + prec);
4071 char *b1;
4072 if (!r1) {
4073 Py_DECREF(result);
4074 return NULL;
4075 }
4076 b1 = PyString_AS_STRING(r1);
4077 for (i = 0; i < numnondigits; ++i)
4078 *b1++ = *buf++;
4079 for (i = 0; i < prec - numdigits; i++)
4080 *b1++ = '0';
4081 for (i = 0; i < numdigits; i++)
4082 *b1++ = *buf++;
4083 *b1 = '\0';
4084 Py_DECREF(result);
4085 result = r1;
4086 buf = PyString_AS_STRING(result);
4087 len = numnondigits + prec;
4088 }
Christian Heimes44720832008-05-26 13:01:01 +00004089
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004090 /* Fix up case for hex conversions. */
4091 if (type == 'X') {
4092 /* Need to convert all lower case letters to upper case.
4093 and need to convert 0x to 0X (and -0x to -0X). */
4094 for (i = 0; i < len; i++)
4095 if (buf[i] >= 'a' && buf[i] <= 'x')
4096 buf[i] -= 'a'-'A';
4097 }
4098 *pbuf = buf;
4099 *plen = len;
4100 return result;
Christian Heimes44720832008-05-26 13:01:01 +00004101}
4102
4103Py_LOCAL_INLINE(int)
4104formatint(char *buf, size_t buflen, int flags,
4105 int prec, int type, PyObject *v)
4106{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004107 /* fmt = '%#.' + `prec` + 'l' + `type`
4108 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4109 + 1 + 1 = 24 */
4110 char fmt[64]; /* plenty big enough! */
4111 char *sign;
4112 long x;
Christian Heimes44720832008-05-26 13:01:01 +00004113
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004114 x = PyInt_AsLong(v);
4115 if (x == -1 && PyErr_Occurred()) {
4116 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4117 Py_TYPE(v)->tp_name);
4118 return -1;
4119 }
4120 if (x < 0 && type == 'u') {
4121 type = 'd';
4122 }
4123 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4124 sign = "-";
4125 else
4126 sign = "";
4127 if (prec < 0)
4128 prec = 1;
Christian Heimes44720832008-05-26 13:01:01 +00004129
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004130 if ((flags & F_ALT) &&
4131 (type == 'x' || type == 'X')) {
4132 /* When converting under %#x or %#X, there are a number
4133 * of issues that cause pain:
4134 * - when 0 is being converted, the C standard leaves off
4135 * the '0x' or '0X', which is inconsistent with other
4136 * %#x/%#X conversions and inconsistent with Python's
4137 * hex() function
4138 * - there are platforms that violate the standard and
4139 * convert 0 with the '0x' or '0X'
4140 * (Metrowerks, Compaq Tru64)
4141 * - there are platforms that give '0x' when converting
4142 * under %#X, but convert 0 in accordance with the
4143 * standard (OS/2 EMX)
4144 *
4145 * We can achieve the desired consistency by inserting our
4146 * own '0x' or '0X' prefix, and substituting %x/%X in place
4147 * of %#x/%#X.
4148 *
4149 * Note that this is the same approach as used in
4150 * formatint() in unicodeobject.c
4151 */
4152 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4153 sign, type, prec, type);
4154 }
4155 else {
4156 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4157 sign, (flags&F_ALT) ? "#" : "",
4158 prec, type);
4159 }
Christian Heimes44720832008-05-26 13:01:01 +00004160
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004161 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4162 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4163 */
4164 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4165 PyErr_SetString(PyExc_OverflowError,
4166 "formatted integer is too long (precision too large?)");
4167 return -1;
4168 }
4169 if (sign[0])
4170 PyOS_snprintf(buf, buflen, fmt, -x);
4171 else
4172 PyOS_snprintf(buf, buflen, fmt, x);
4173 return (int)strlen(buf);
Christian Heimes44720832008-05-26 13:01:01 +00004174}
4175
4176Py_LOCAL_INLINE(int)
4177formatchar(char *buf, size_t buflen, PyObject *v)
4178{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004179 /* presume that the buffer is at least 2 characters long */
4180 if (PyString_Check(v)) {
4181 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4182 return -1;
4183 }
4184 else {
4185 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4186 return -1;
4187 }
4188 buf[1] = '\0';
4189 return 1;
Christian Heimes44720832008-05-26 13:01:01 +00004190}
4191
4192/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4193
Mark Dickinson18cfada2009-11-23 18:46:41 +00004194 FORMATBUFLEN is the length of the buffer in which the ints &
Christian Heimes44720832008-05-26 13:01:01 +00004195 chars are formatted. XXX This is a magic number. Each formatting
4196 routine does bounds checking to ensure no overflow, but a better
4197 solution may be to malloc a buffer of appropriate size for each
4198 format. For now, the current solution is sufficient.
4199*/
4200#define FORMATBUFLEN (size_t)120
4201
4202PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004203PyString_Format(PyObject *format, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00004204{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004205 char *fmt, *res;
4206 Py_ssize_t arglen, argidx;
4207 Py_ssize_t reslen, rescnt, fmtcnt;
4208 int args_owned = 0;
4209 PyObject *result, *orig_args;
Christian Heimes44720832008-05-26 13:01:01 +00004210#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004211 PyObject *v, *w;
Christian Heimes44720832008-05-26 13:01:01 +00004212#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004213 PyObject *dict = NULL;
4214 if (format == NULL || !PyString_Check(format) || args == NULL) {
4215 PyErr_BadInternalCall();
4216 return NULL;
4217 }
4218 orig_args = args;
4219 fmt = PyString_AS_STRING(format);
4220 fmtcnt = PyString_GET_SIZE(format);
4221 reslen = rescnt = fmtcnt + 100;
4222 result = PyString_FromStringAndSize((char *)NULL, reslen);
4223 if (result == NULL)
4224 return NULL;
4225 res = PyString_AsString(result);
4226 if (PyTuple_Check(args)) {
4227 arglen = PyTuple_GET_SIZE(args);
4228 argidx = 0;
4229 }
4230 else {
4231 arglen = -1;
4232 argidx = -2;
4233 }
4234 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4235 !PyObject_TypeCheck(args, &PyBaseString_Type))
4236 dict = args;
4237 while (--fmtcnt >= 0) {
4238 if (*fmt != '%') {
4239 if (--rescnt < 0) {
4240 rescnt = fmtcnt + 100;
4241 reslen += rescnt;
4242 if (_PyString_Resize(&result, reslen))
4243 return NULL;
4244 res = PyString_AS_STRING(result)
4245 + reslen - rescnt;
4246 --rescnt;
4247 }
4248 *res++ = *fmt++;
4249 }
4250 else {
4251 /* Got a format specifier */
4252 int flags = 0;
4253 Py_ssize_t width = -1;
4254 int prec = -1;
4255 int c = '\0';
4256 int fill;
4257 int isnumok;
4258 PyObject *v = NULL;
4259 PyObject *temp = NULL;
4260 char *pbuf;
4261 int sign;
4262 Py_ssize_t len;
4263 char formatbuf[FORMATBUFLEN];
4264 /* For format{int,char}() */
Christian Heimes44720832008-05-26 13:01:01 +00004265#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004266 char *fmt_start = fmt;
4267 Py_ssize_t argidx_start = argidx;
Christian Heimes44720832008-05-26 13:01:01 +00004268#endif
4269
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004270 fmt++;
4271 if (*fmt == '(') {
4272 char *keystart;
4273 Py_ssize_t keylen;
4274 PyObject *key;
4275 int pcount = 1;
Christian Heimes44720832008-05-26 13:01:01 +00004276
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004277 if (dict == NULL) {
4278 PyErr_SetString(PyExc_TypeError,
4279 "format requires a mapping");
4280 goto error;
4281 }
4282 ++fmt;
4283 --fmtcnt;
4284 keystart = fmt;
4285 /* Skip over balanced parentheses */
4286 while (pcount > 0 && --fmtcnt >= 0) {
4287 if (*fmt == ')')
4288 --pcount;
4289 else if (*fmt == '(')
4290 ++pcount;
4291 fmt++;
4292 }
4293 keylen = fmt - keystart - 1;
4294 if (fmtcnt < 0 || pcount > 0) {
4295 PyErr_SetString(PyExc_ValueError,
4296 "incomplete format key");
4297 goto error;
4298 }
4299 key = PyString_FromStringAndSize(keystart,
4300 keylen);
4301 if (key == NULL)
4302 goto error;
4303 if (args_owned) {
4304 Py_DECREF(args);
4305 args_owned = 0;
4306 }
4307 args = PyObject_GetItem(dict, key);
4308 Py_DECREF(key);
4309 if (args == NULL) {
4310 goto error;
4311 }
4312 args_owned = 1;
4313 arglen = -1;
4314 argidx = -2;
4315 }
4316 while (--fmtcnt >= 0) {
4317 switch (c = *fmt++) {
4318 case '-': flags |= F_LJUST; continue;
4319 case '+': flags |= F_SIGN; continue;
4320 case ' ': flags |= F_BLANK; continue;
4321 case '#': flags |= F_ALT; continue;
4322 case '0': flags |= F_ZERO; continue;
4323 }
4324 break;
4325 }
4326 if (c == '*') {
4327 v = getnextarg(args, arglen, &argidx);
4328 if (v == NULL)
4329 goto error;
4330 if (!PyInt_Check(v)) {
4331 PyErr_SetString(PyExc_TypeError,
4332 "* wants int");
4333 goto error;
4334 }
4335 width = PyInt_AsLong(v);
4336 if (width < 0) {
4337 flags |= F_LJUST;
4338 width = -width;
4339 }
4340 if (--fmtcnt >= 0)
4341 c = *fmt++;
4342 }
4343 else if (c >= 0 && isdigit(c)) {
4344 width = c - '0';
4345 while (--fmtcnt >= 0) {
4346 c = Py_CHARMASK(*fmt++);
4347 if (!isdigit(c))
4348 break;
4349 if ((width*10) / 10 != width) {
4350 PyErr_SetString(
4351 PyExc_ValueError,
4352 "width too big");
4353 goto error;
4354 }
4355 width = width*10 + (c - '0');
4356 }
4357 }
4358 if (c == '.') {
4359 prec = 0;
4360 if (--fmtcnt >= 0)
4361 c = *fmt++;
4362 if (c == '*') {
4363 v = getnextarg(args, arglen, &argidx);
4364 if (v == NULL)
4365 goto error;
4366 if (!PyInt_Check(v)) {
4367 PyErr_SetString(
4368 PyExc_TypeError,
4369 "* wants int");
4370 goto error;
4371 }
4372 prec = PyInt_AsLong(v);
4373 if (prec < 0)
4374 prec = 0;
4375 if (--fmtcnt >= 0)
4376 c = *fmt++;
4377 }
4378 else if (c >= 0 && isdigit(c)) {
4379 prec = c - '0';
4380 while (--fmtcnt >= 0) {
4381 c = Py_CHARMASK(*fmt++);
4382 if (!isdigit(c))
4383 break;
4384 if ((prec*10) / 10 != prec) {
4385 PyErr_SetString(
4386 PyExc_ValueError,
4387 "prec too big");
4388 goto error;
Christian Heimes44720832008-05-26 13:01:01 +00004389 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004390 prec = prec*10 + (c - '0');
4391 }
4392 }
4393 } /* prec */
4394 if (fmtcnt >= 0) {
4395 if (c == 'h' || c == 'l' || c == 'L') {
4396 if (--fmtcnt >= 0)
4397 c = *fmt++;
4398 }
4399 }
4400 if (fmtcnt < 0) {
4401 PyErr_SetString(PyExc_ValueError,
4402 "incomplete format");
4403 goto error;
4404 }
4405 if (c != '%') {
4406 v = getnextarg(args, arglen, &argidx);
4407 if (v == NULL)
4408 goto error;
4409 }
4410 sign = 0;
4411 fill = ' ';
4412 switch (c) {
4413 case '%':
4414 pbuf = "%";
4415 len = 1;
4416 break;
4417 case 's':
4418#ifdef Py_USING_UNICODE
4419 if (PyUnicode_Check(v)) {
4420 fmt = fmt_start;
4421 argidx = argidx_start;
4422 goto unicode;
4423 }
4424#endif
4425 temp = _PyObject_Str(v);
4426#ifdef Py_USING_UNICODE
4427 if (temp != NULL && PyUnicode_Check(temp)) {
4428 Py_DECREF(temp);
4429 fmt = fmt_start;
4430 argidx = argidx_start;
4431 goto unicode;
4432 }
4433#endif
4434 /* Fall through */
4435 case 'r':
4436 if (c == 'r')
4437 temp = PyObject_Repr(v);
4438 if (temp == NULL)
4439 goto error;
4440 if (!PyString_Check(temp)) {
4441 PyErr_SetString(PyExc_TypeError,
4442 "%s argument has non-string str()");
4443 Py_DECREF(temp);
4444 goto error;
4445 }
4446 pbuf = PyString_AS_STRING(temp);
4447 len = PyString_GET_SIZE(temp);
4448 if (prec >= 0 && len > prec)
4449 len = prec;
4450 break;
4451 case 'i':
4452 case 'd':
4453 case 'u':
4454 case 'o':
4455 case 'x':
4456 case 'X':
4457 if (c == 'i')
4458 c = 'd';
4459 isnumok = 0;
4460 if (PyNumber_Check(v)) {
4461 PyObject *iobj=NULL;
4462
4463 if (PyInt_Check(v) || (PyLong_Check(v))) {
4464 iobj = v;
4465 Py_INCREF(iobj);
4466 }
4467 else {
4468 iobj = PyNumber_Int(v);
4469 if (iobj==NULL) iobj = PyNumber_Long(v);
4470 }
4471 if (iobj!=NULL) {
4472 if (PyInt_Check(iobj)) {
4473 isnumok = 1;
4474 pbuf = formatbuf;
4475 len = formatint(pbuf,
4476 sizeof(formatbuf),
4477 flags, prec, c, iobj);
4478 Py_DECREF(iobj);
4479 if (len < 0)
4480 goto error;
4481 sign = 1;
4482 }
4483 else if (PyLong_Check(iobj)) {
4484 int ilen;
4485
4486 isnumok = 1;
4487 temp = _PyString_FormatLong(iobj, flags,
4488 prec, c, &pbuf, &ilen);
4489 Py_DECREF(iobj);
4490 len = ilen;
4491 if (!temp)
4492 goto error;
4493 sign = 1;
4494 }
4495 else {
4496 Py_DECREF(iobj);
4497 }
4498 }
4499 }
4500 if (!isnumok) {
4501 PyErr_Format(PyExc_TypeError,
4502 "%%%c format: a number is required, "
4503 "not %.200s", c, Py_TYPE(v)->tp_name);
4504 goto error;
4505 }
4506 if (flags & F_ZERO)
4507 fill = '0';
4508 break;
4509 case 'e':
4510 case 'E':
4511 case 'f':
4512 case 'F':
4513 case 'g':
4514 case 'G':
4515 temp = formatfloat(v, flags, prec, c);
4516 if (temp == NULL)
4517 goto error;
4518 pbuf = PyString_AS_STRING(temp);
4519 len = PyString_GET_SIZE(temp);
4520 sign = 1;
4521 if (flags & F_ZERO)
4522 fill = '0';
4523 break;
4524 case 'c':
4525#ifdef Py_USING_UNICODE
4526 if (PyUnicode_Check(v)) {
4527 fmt = fmt_start;
4528 argidx = argidx_start;
4529 goto unicode;
4530 }
4531#endif
4532 pbuf = formatbuf;
4533 len = formatchar(pbuf, sizeof(formatbuf), v);
4534 if (len < 0)
4535 goto error;
4536 break;
4537 default:
4538 PyErr_Format(PyExc_ValueError,
4539 "unsupported format character '%c' (0x%x) "
4540 "at index %zd",
4541 c, c,
4542 (Py_ssize_t)(fmt - 1 -
4543 PyString_AsString(format)));
4544 goto error;
4545 }
4546 if (sign) {
4547 if (*pbuf == '-' || *pbuf == '+') {
4548 sign = *pbuf++;
4549 len--;
4550 }
4551 else if (flags & F_SIGN)
4552 sign = '+';
4553 else if (flags & F_BLANK)
4554 sign = ' ';
4555 else
4556 sign = 0;
4557 }
4558 if (width < len)
4559 width = len;
4560 if (rescnt - (sign != 0) < width) {
4561 reslen -= rescnt;
4562 rescnt = width + fmtcnt + 100;
4563 reslen += rescnt;
4564 if (reslen < 0) {
4565 Py_DECREF(result);
4566 Py_XDECREF(temp);
4567 return PyErr_NoMemory();
4568 }
4569 if (_PyString_Resize(&result, reslen)) {
4570 Py_XDECREF(temp);
4571 return NULL;
4572 }
4573 res = PyString_AS_STRING(result)
4574 + reslen - rescnt;
4575 }
4576 if (sign) {
4577 if (fill != ' ')
4578 *res++ = sign;
4579 rescnt--;
4580 if (width > len)
4581 width--;
4582 }
4583 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4584 assert(pbuf[0] == '0');
4585 assert(pbuf[1] == c);
4586 if (fill != ' ') {
4587 *res++ = *pbuf++;
4588 *res++ = *pbuf++;
4589 }
4590 rescnt -= 2;
4591 width -= 2;
4592 if (width < 0)
4593 width = 0;
4594 len -= 2;
4595 }
4596 if (width > len && !(flags & F_LJUST)) {
4597 do {
4598 --rescnt;
4599 *res++ = fill;
4600 } while (--width > len);
4601 }
4602 if (fill == ' ') {
4603 if (sign)
4604 *res++ = sign;
4605 if ((flags & F_ALT) &&
4606 (c == 'x' || c == 'X')) {
4607 assert(pbuf[0] == '0');
4608 assert(pbuf[1] == c);
4609 *res++ = *pbuf++;
4610 *res++ = *pbuf++;
4611 }
4612 }
4613 Py_MEMCPY(res, pbuf, len);
4614 res += len;
4615 rescnt -= len;
4616 while (--width >= len) {
4617 --rescnt;
4618 *res++ = ' ';
4619 }
4620 if (dict && (argidx < arglen) && c != '%') {
4621 PyErr_SetString(PyExc_TypeError,
4622 "not all arguments converted during string formatting");
4623 Py_XDECREF(temp);
4624 goto error;
4625 }
4626 Py_XDECREF(temp);
4627 } /* '%' */
4628 } /* until end */
4629 if (argidx < arglen && !dict) {
4630 PyErr_SetString(PyExc_TypeError,
4631 "not all arguments converted during string formatting");
4632 goto error;
4633 }
4634 if (args_owned) {
4635 Py_DECREF(args);
4636 }
4637 if (_PyString_Resize(&result, reslen - rescnt))
4638 return NULL;
4639 return result;
Christian Heimes44720832008-05-26 13:01:01 +00004640
4641#ifdef Py_USING_UNICODE
4642 unicode:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004643 if (args_owned) {
4644 Py_DECREF(args);
4645 args_owned = 0;
4646 }
4647 /* Fiddle args right (remove the first argidx arguments) */
4648 if (PyTuple_Check(orig_args) && argidx > 0) {
4649 PyObject *v;
4650 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4651 v = PyTuple_New(n);
4652 if (v == NULL)
4653 goto error;
4654 while (--n >= 0) {
4655 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4656 Py_INCREF(w);
4657 PyTuple_SET_ITEM(v, n, w);
4658 }
4659 args = v;
4660 } else {
4661 Py_INCREF(orig_args);
4662 args = orig_args;
4663 }
4664 args_owned = 1;
4665 /* Take what we have of the result and let the Unicode formatting
4666 function format the rest of the input. */
4667 rescnt = res - PyString_AS_STRING(result);
4668 if (_PyString_Resize(&result, rescnt))
4669 goto error;
4670 fmtcnt = PyString_GET_SIZE(format) - \
4671 (fmt - PyString_AS_STRING(format));
4672 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4673 if (format == NULL)
4674 goto error;
4675 v = PyUnicode_Format(format, args);
4676 Py_DECREF(format);
4677 if (v == NULL)
4678 goto error;
4679 /* Paste what we have (result) to what the Unicode formatting
4680 function returned (v) and return the result (or error) */
4681 w = PyUnicode_Concat(result, v);
4682 Py_DECREF(result);
4683 Py_DECREF(v);
4684 Py_DECREF(args);
4685 return w;
Christian Heimes44720832008-05-26 13:01:01 +00004686#endif /* Py_USING_UNICODE */
4687
4688 error:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004689 Py_DECREF(result);
4690 if (args_owned) {
4691 Py_DECREF(args);
4692 }
4693 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004694}
4695
4696void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004697PyString_InternInPlace(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00004698{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004699 register PyStringObject *s = (PyStringObject *)(*p);
4700 PyObject *t;
4701 if (s == NULL || !PyString_Check(s))
4702 Py_FatalError("PyString_InternInPlace: strings only please!");
4703 /* If it's a string subclass, we don't really know what putting
4704 it in the interned dict might do. */
4705 if (!PyString_CheckExact(s))
4706 return;
4707 if (PyString_CHECK_INTERNED(s))
4708 return;
4709 if (interned == NULL) {
4710 interned = PyDict_New();
4711 if (interned == NULL) {
4712 PyErr_Clear(); /* Don't leave an exception */
4713 return;
4714 }
4715 }
4716 t = PyDict_GetItem(interned, (PyObject *)s);
4717 if (t) {
4718 Py_INCREF(t);
4719 Py_DECREF(*p);
4720 *p = t;
4721 return;
4722 }
Christian Heimes44720832008-05-26 13:01:01 +00004723
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004724 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4725 PyErr_Clear();
4726 return;
4727 }
4728 /* The two references in interned are not counted by refcnt.
4729 The string deallocator will take care of this */
4730 Py_REFCNT(s) -= 2;
4731 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Christian Heimes44720832008-05-26 13:01:01 +00004732}
4733
4734void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004735PyString_InternImmortal(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00004736{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004737 PyString_InternInPlace(p);
4738 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4739 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4740 Py_INCREF(*p);
4741 }
Christian Heimes44720832008-05-26 13:01:01 +00004742}
4743
4744
4745PyObject *
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004746PyString_InternFromString(const char *cp)
Christian Heimes44720832008-05-26 13:01:01 +00004747{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004748 PyObject *s = PyString_FromString(cp);
4749 if (s == NULL)
4750 return NULL;
4751 PyString_InternInPlace(&s);
4752 return s;
Christian Heimes44720832008-05-26 13:01:01 +00004753}
4754
4755void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004756PyString_Fini(void)
Christian Heimes44720832008-05-26 13:01:01 +00004757{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004758 int i;
4759 for (i = 0; i < UCHAR_MAX + 1; i++) {
4760 Py_XDECREF(characters[i]);
4761 characters[i] = NULL;
4762 }
4763 Py_XDECREF(nullstring);
4764 nullstring = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004765}
4766
4767void _Py_ReleaseInternedStrings(void)
4768{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004769 PyObject *keys;
4770 PyStringObject *s;
4771 Py_ssize_t i, n;
4772 Py_ssize_t immortal_size = 0, mortal_size = 0;
Christian Heimes44720832008-05-26 13:01:01 +00004773
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004774 if (interned == NULL || !PyDict_Check(interned))
4775 return;
4776 keys = PyDict_Keys(interned);
4777 if (keys == NULL || !PyList_Check(keys)) {
4778 PyErr_Clear();
4779 return;
4780 }
Christian Heimes44720832008-05-26 13:01:01 +00004781
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004782 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4783 detector, interned strings are not forcibly deallocated; rather, we
4784 give them their stolen references back, and then clear and DECREF
4785 the interned dict. */
Christian Heimes44720832008-05-26 13:01:01 +00004786
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004787 n = PyList_GET_SIZE(keys);
4788 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4789 n);
4790 for (i = 0; i < n; i++) {
4791 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4792 switch (s->ob_sstate) {
4793 case SSTATE_NOT_INTERNED:
4794 /* XXX Shouldn't happen */
4795 break;
4796 case SSTATE_INTERNED_IMMORTAL:
4797 Py_REFCNT(s) += 1;
4798 immortal_size += Py_SIZE(s);
4799 break;
4800 case SSTATE_INTERNED_MORTAL:
4801 Py_REFCNT(s) += 2;
4802 mortal_size += Py_SIZE(s);
4803 break;
4804 default:
4805 Py_FatalError("Inconsistent interned string state.");
4806 }
4807 s->ob_sstate = SSTATE_NOT_INTERNED;
4808 }
4809 fprintf(stderr, "total size of all interned strings: "
4810 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4811 "mortal/immortal\n", mortal_size, immortal_size);
4812 Py_DECREF(keys);
4813 PyDict_Clear(interned);
4814 Py_DECREF(interned);
4815 interned = NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00004816}