blob: 467de363f4a8929d8669a5463189487b4aaa583a [file] [log] [blame]
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001/* String (str/bytes) object implementation */
Christian Heimes1a6387e2008-03-26 12:49:49 +00002
3#define PY_SSIZE_T_CLEAN
Christian Heimes44720832008-05-26 13:01:01 +00004
Christian Heimes1a6387e2008-03-26 12:49:49 +00005#include "Python.h"
Christian Heimes44720832008-05-26 13:01:01 +00006#include <ctype.h>
Mark Dickinson826f3fe2008-12-05 21:55:28 +00007#include <stddef.h>
Christian Heimes44720832008-05-26 13:01:01 +00008
9#ifdef COUNT_ALLOCS
Martin v. Löwisb90304a2009-01-07 18:40:40 +000010Py_ssize_t null_strings, one_strings;
Christian Heimes44720832008-05-26 13:01:01 +000011#endif
12
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000013static PyStringObject *characters[UCHAR_MAX + 1];
14static PyStringObject *nullstring;
Christian Heimes44720832008-05-26 13:01:01 +000015
16/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Mark Dickinson826f3fe2008-12-05 21:55:28 +000026/* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyStringObject_SIZE + n bytes.
28
29 Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30 3 bytes per string allocation on a typical system.
31*/
32#define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
33
Christian Heimes44720832008-05-26 13:01:01 +000034/*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000035 For PyString_FromString(), the parameter `str' points to a null-terminated
Christian Heimes44720832008-05-26 13:01:01 +000036 string containing exactly `size' bytes.
37
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000038 For PyString_FromStringAndSize(), the parameter the parameter `str' is
Christian Heimes44720832008-05-26 13:01:01 +000039 either NULL or else points to a string containing at least `size' bytes.
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000040 For PyString_FromStringAndSize(), the string in the `str' parameter does
Christian Heimes44720832008-05-26 13:01:01 +000041 not have to be null-terminated. (Therefore it is safe to construct a
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000042 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
43 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
Christian Heimes44720832008-05-26 13:01:01 +000044 bytes (setting the last byte to the null terminating character) and you can
45 fill in the data yourself. If `str' is non-NULL then the resulting
46 PyString object must be treated as immutable and you must not fill in nor
47 alter the data yourself, since the strings may be shared.
48
49 The PyObject member `op->ob_size', which denotes the number of "extra
50 items" in a variable-size object, will contain the number of bytes
Eli Bendersky72de2052011-03-24 22:38:25 +020051 allocated for string data, not counting the null terminating character.
52 It is therefore equal to the `size' parameter (for
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000053 PyString_FromStringAndSize()) or the length of the string in the `str'
54 parameter (for PyString_FromString()).
Christian Heimes44720832008-05-26 13:01:01 +000055*/
56PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000057PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Christian Heimes1a6387e2008-03-26 12:49:49 +000058{
Antoine Pitrouc83ea132010-05-09 14:46:46 +000059 register PyStringObject *op;
60 if (size < 0) {
61 PyErr_SetString(PyExc_SystemError,
62 "Negative size passed to PyString_FromStringAndSize");
63 return NULL;
64 }
65 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +000066#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +000067 null_strings++;
Christian Heimes44720832008-05-26 13:01:01 +000068#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +000069 Py_INCREF(op);
70 return (PyObject *)op;
71 }
72 if (size == 1 && str != NULL &&
73 (op = characters[*str & UCHAR_MAX]) != NULL)
74 {
Christian Heimes44720832008-05-26 13:01:01 +000075#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +000076 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +000077#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +000078 Py_INCREF(op);
79 return (PyObject *)op;
80 }
Christian Heimes44720832008-05-26 13:01:01 +000081
Antoine Pitrouc83ea132010-05-09 14:46:46 +000082 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
83 PyErr_SetString(PyExc_OverflowError, "string is too large");
84 return NULL;
85 }
Neal Norwitze7d8be82008-07-31 17:17:14 +000086
Antoine Pitrouc83ea132010-05-09 14:46:46 +000087 /* Inline PyObject_NewVar */
88 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
89 if (op == NULL)
90 return PyErr_NoMemory();
91 PyObject_INIT_VAR(op, &PyString_Type, size);
92 op->ob_shash = -1;
93 op->ob_sstate = SSTATE_NOT_INTERNED;
94 if (str != NULL)
95 Py_MEMCPY(op->ob_sval, str, size);
96 op->ob_sval[size] = '\0';
97 /* share short strings */
98 if (size == 0) {
99 PyObject *t = (PyObject *)op;
100 PyString_InternInPlace(&t);
101 op = (PyStringObject *)t;
102 nullstring = op;
103 Py_INCREF(op);
104 } else if (size == 1 && str != NULL) {
105 PyObject *t = (PyObject *)op;
106 PyString_InternInPlace(&t);
107 op = (PyStringObject *)t;
108 characters[*str & UCHAR_MAX] = op;
109 Py_INCREF(op);
110 }
111 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000112}
113
Christian Heimes44720832008-05-26 13:01:01 +0000114PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000115PyString_FromString(const char *str)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000116{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000117 register size_t size;
118 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +0000119
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000120 assert(str != NULL);
121 size = strlen(str);
122 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
123 PyErr_SetString(PyExc_OverflowError,
124 "string is too long for a Python string");
125 return NULL;
126 }
127 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +0000128#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000129 null_strings++;
Christian Heimes44720832008-05-26 13:01:01 +0000130#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000131 Py_INCREF(op);
132 return (PyObject *)op;
133 }
134 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +0000135#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000136 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +0000137#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000138 Py_INCREF(op);
139 return (PyObject *)op;
140 }
Christian Heimes44720832008-05-26 13:01:01 +0000141
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000142 /* Inline PyObject_NewVar */
143 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
144 if (op == NULL)
145 return PyErr_NoMemory();
146 PyObject_INIT_VAR(op, &PyString_Type, size);
147 op->ob_shash = -1;
148 op->ob_sstate = SSTATE_NOT_INTERNED;
149 Py_MEMCPY(op->ob_sval, str, size+1);
150 /* share short strings */
151 if (size == 0) {
152 PyObject *t = (PyObject *)op;
153 PyString_InternInPlace(&t);
154 op = (PyStringObject *)t;
155 nullstring = op;
156 Py_INCREF(op);
157 } else if (size == 1) {
158 PyObject *t = (PyObject *)op;
159 PyString_InternInPlace(&t);
160 op = (PyStringObject *)t;
161 characters[*str & UCHAR_MAX] = op;
162 Py_INCREF(op);
163 }
164 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000165}
166
Christian Heimes44720832008-05-26 13:01:01 +0000167PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000168PyString_FromFormatV(const char *format, va_list vargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000169{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000170 va_list count;
171 Py_ssize_t n = 0;
172 const char* f;
173 char *s;
174 PyObject* string;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000175
Christian Heimes44720832008-05-26 13:01:01 +0000176#ifdef VA_LIST_IS_ARRAY
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000177 Py_MEMCPY(count, vargs, sizeof(va_list));
Christian Heimes44720832008-05-26 13:01:01 +0000178#else
179#ifdef __va_copy
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000180 __va_copy(count, vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000181#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000182 count = vargs;
Christian Heimes44720832008-05-26 13:01:01 +0000183#endif
184#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000185 /* step 1: figure out how large a buffer we need */
186 for (f = format; *f; f++) {
187 if (*f == '%') {
Mark Dickinson82864d12009-11-15 16:18:58 +0000188#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000189 int longlongflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000190#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000191 const char* p = f;
192 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
193 ;
Christian Heimes44720832008-05-26 13:01:01 +0000194
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000195 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
196 * they don't affect the amount of space we reserve.
197 */
198 if (*f == 'l') {
199 if (f[1] == 'd' || f[1] == 'u') {
200 ++f;
201 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000202#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000203 else if (f[1] == 'l' &&
204 (f[2] == 'd' || f[2] == 'u')) {
205 longlongflag = 1;
206 f += 2;
207 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000208#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000209 }
210 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
211 ++f;
212 }
Christian Heimes44720832008-05-26 13:01:01 +0000213
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000214 switch (*f) {
215 case 'c':
216 (void)va_arg(count, int);
217 /* fall through... */
218 case '%':
219 n++;
220 break;
221 case 'd': case 'u': case 'i': case 'x':
222 (void) va_arg(count, int);
Mark Dickinson82864d12009-11-15 16:18:58 +0000223#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000224 /* Need at most
225 ceil(log10(256)*SIZEOF_LONG_LONG) digits,
226 plus 1 for the sign. 53/22 is an upper
227 bound for log10(256). */
228 if (longlongflag)
229 n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
230 else
Mark Dickinson82864d12009-11-15 16:18:58 +0000231#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000232 /* 20 bytes is enough to hold a 64-bit
233 integer. Decimal takes the most
234 space. This isn't enough for
235 octal. */
236 n += 20;
Mark Dickinson82864d12009-11-15 16:18:58 +0000237
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000238 break;
239 case 's':
240 s = va_arg(count, char*);
241 n += strlen(s);
242 break;
243 case 'p':
244 (void) va_arg(count, int);
245 /* maximum 64-bit pointer representation:
246 * 0xffffffffffffffff
247 * so 19 characters is enough.
248 * XXX I count 18 -- what's the extra for?
249 */
250 n += 19;
251 break;
252 default:
253 /* if we stumble upon an unknown
254 formatting code, copy the rest of
255 the format string to the output
256 string. (we cannot just skip the
257 code, since there's no way to know
258 what's in the argument list) */
259 n += strlen(p);
260 goto expand;
261 }
262 } else
263 n++;
264 }
Christian Heimes44720832008-05-26 13:01:01 +0000265 expand:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000266 /* step 2: fill the buffer */
267 /* Since we've analyzed how much space we need for the worst case,
268 use sprintf directly instead of the slower PyOS_snprintf. */
269 string = PyString_FromStringAndSize(NULL, n);
270 if (!string)
271 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000272
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000273 s = PyString_AsString(string);
Christian Heimes44720832008-05-26 13:01:01 +0000274
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000275 for (f = format; *f; f++) {
276 if (*f == '%') {
277 const char* p = f++;
278 Py_ssize_t i;
279 int longflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000280#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000281 int longlongflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000282#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000283 int size_tflag = 0;
284 /* parse the width.precision part (we're only
285 interested in the precision value, if any) */
286 n = 0;
287 while (isdigit(Py_CHARMASK(*f)))
288 n = (n*10) + *f++ - '0';
289 if (*f == '.') {
290 f++;
291 n = 0;
292 while (isdigit(Py_CHARMASK(*f)))
293 n = (n*10) + *f++ - '0';
294 }
295 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
296 f++;
297 /* Handle %ld, %lu, %lld and %llu. */
298 if (*f == 'l') {
299 if (f[1] == 'd' || f[1] == 'u') {
300 longflag = 1;
301 ++f;
302 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000303#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000304 else if (f[1] == 'l' &&
305 (f[2] == 'd' || f[2] == 'u')) {
306 longlongflag = 1;
307 f += 2;
308 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000309#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000310 }
311 /* handle the size_t flag. */
312 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
313 size_tflag = 1;
314 ++f;
315 }
Christian Heimes44720832008-05-26 13:01:01 +0000316
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000317 switch (*f) {
318 case 'c':
319 *s++ = va_arg(vargs, int);
320 break;
321 case 'd':
322 if (longflag)
323 sprintf(s, "%ld", va_arg(vargs, long));
Mark Dickinson82864d12009-11-15 16:18:58 +0000324#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000325 else if (longlongflag)
326 sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
327 va_arg(vargs, PY_LONG_LONG));
Mark Dickinson82864d12009-11-15 16:18:58 +0000328#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000329 else if (size_tflag)
330 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
331 va_arg(vargs, Py_ssize_t));
332 else
333 sprintf(s, "%d", va_arg(vargs, int));
334 s += strlen(s);
335 break;
336 case 'u':
337 if (longflag)
338 sprintf(s, "%lu",
339 va_arg(vargs, unsigned long));
Mark Dickinson82864d12009-11-15 16:18:58 +0000340#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000341 else if (longlongflag)
342 sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
343 va_arg(vargs, PY_LONG_LONG));
Mark Dickinson82864d12009-11-15 16:18:58 +0000344#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000345 else if (size_tflag)
346 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
347 va_arg(vargs, size_t));
348 else
349 sprintf(s, "%u",
350 va_arg(vargs, unsigned int));
351 s += strlen(s);
352 break;
353 case 'i':
354 sprintf(s, "%i", va_arg(vargs, int));
355 s += strlen(s);
356 break;
357 case 'x':
358 sprintf(s, "%x", va_arg(vargs, int));
359 s += strlen(s);
360 break;
361 case 's':
362 p = va_arg(vargs, char*);
363 i = strlen(p);
364 if (n > 0 && i > n)
365 i = n;
366 Py_MEMCPY(s, p, i);
367 s += i;
368 break;
369 case 'p':
370 sprintf(s, "%p", va_arg(vargs, void*));
371 /* %p is ill-defined: ensure leading 0x. */
372 if (s[1] == 'X')
373 s[1] = 'x';
374 else if (s[1] != 'x') {
375 memmove(s+2, s, strlen(s)+1);
376 s[0] = '0';
377 s[1] = 'x';
378 }
379 s += strlen(s);
380 break;
381 case '%':
382 *s++ = '%';
383 break;
384 default:
385 strcpy(s, p);
386 s += strlen(s);
387 goto end;
388 }
389 } else
390 *s++ = *f;
391 }
Christian Heimes44720832008-05-26 13:01:01 +0000392
393 end:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000394 if (_PyString_Resize(&string, s - PyString_AS_STRING(string)))
395 return NULL;
396 return string;
Christian Heimes44720832008-05-26 13:01:01 +0000397}
398
399PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000400PyString_FromFormat(const char *format, ...)
Christian Heimes44720832008-05-26 13:01:01 +0000401{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000402 PyObject* ret;
403 va_list vargs;
Christian Heimes44720832008-05-26 13:01:01 +0000404
405#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000406 va_start(vargs, format);
Christian Heimes44720832008-05-26 13:01:01 +0000407#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000408 va_start(vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000409#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000410 ret = PyString_FromFormatV(format, vargs);
411 va_end(vargs);
412 return ret;
Christian Heimes44720832008-05-26 13:01:01 +0000413}
414
415
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000416PyObject *PyString_Decode(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000417 Py_ssize_t size,
418 const char *encoding,
419 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000420{
421 PyObject *v, *str;
422
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000423 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000424 if (str == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000425 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000426 v = PyString_AsDecodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000427 Py_DECREF(str);
428 return v;
429}
430
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000431PyObject *PyString_AsDecodedObject(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000432 const char *encoding,
433 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000434{
435 PyObject *v;
436
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000437 if (!PyString_Check(str)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000438 PyErr_BadArgument();
439 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000440 }
441
Christian Heimes44720832008-05-26 13:01:01 +0000442 if (encoding == NULL) {
443#ifdef Py_USING_UNICODE
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000444 encoding = PyUnicode_GetDefaultEncoding();
Christian Heimes44720832008-05-26 13:01:01 +0000445#else
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000446 PyErr_SetString(PyExc_ValueError, "no encoding specified");
447 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000448#endif
Christian Heimes1a6387e2008-03-26 12:49:49 +0000449 }
Christian Heimes44720832008-05-26 13:01:01 +0000450
451 /* Decode via the codec registry */
452 v = PyCodec_Decode(str, encoding, errors);
453 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000454 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000455
456 return v;
457
458 onError:
459 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000460}
461
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000462PyObject *PyString_AsDecodedString(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000463 const char *encoding,
464 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000465{
Christian Heimes44720832008-05-26 13:01:01 +0000466 PyObject *v;
467
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000468 v = PyString_AsDecodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000469 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000470 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000471
472#ifdef Py_USING_UNICODE
473 /* Convert Unicode to a string using the default encoding */
474 if (PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000475 PyObject *temp = v;
476 v = PyUnicode_AsEncodedString(v, NULL, NULL);
477 Py_DECREF(temp);
478 if (v == NULL)
479 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000480 }
Christian Heimes44720832008-05-26 13:01:01 +0000481#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000482 if (!PyString_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000483 PyErr_Format(PyExc_TypeError,
484 "decoder did not return a string object (type=%.400s)",
485 Py_TYPE(v)->tp_name);
486 Py_DECREF(v);
487 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000488 }
Christian Heimes44720832008-05-26 13:01:01 +0000489
490 return v;
491
492 onError:
493 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000494}
495
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000496PyObject *PyString_Encode(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000497 Py_ssize_t size,
498 const char *encoding,
499 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000500{
Christian Heimes44720832008-05-26 13:01:01 +0000501 PyObject *v, *str;
502
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000503 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000504 if (str == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000505 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000506 v = PyString_AsEncodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000507 Py_DECREF(str);
508 return v;
509}
510
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000511PyObject *PyString_AsEncodedObject(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000512 const char *encoding,
513 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000514{
515 PyObject *v;
516
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000517 if (!PyString_Check(str)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000518 PyErr_BadArgument();
519 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000520 }
521
522 if (encoding == NULL) {
523#ifdef Py_USING_UNICODE
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000524 encoding = PyUnicode_GetDefaultEncoding();
Christian Heimes44720832008-05-26 13:01:01 +0000525#else
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000526 PyErr_SetString(PyExc_ValueError, "no encoding specified");
527 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000528#endif
529 }
530
531 /* Encode via the codec registry */
532 v = PyCodec_Encode(str, encoding, errors);
533 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000534 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000535
536 return v;
537
538 onError:
539 return NULL;
540}
541
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000542PyObject *PyString_AsEncodedString(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000543 const char *encoding,
544 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000545{
546 PyObject *v;
547
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000548 v = PyString_AsEncodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000549 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000550 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000551
552#ifdef Py_USING_UNICODE
553 /* Convert Unicode to a string using the default encoding */
554 if (PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000555 PyObject *temp = v;
556 v = PyUnicode_AsEncodedString(v, NULL, NULL);
557 Py_DECREF(temp);
558 if (v == NULL)
559 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000560 }
561#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000562 if (!PyString_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000563 PyErr_Format(PyExc_TypeError,
564 "encoder did not return a string object (type=%.400s)",
565 Py_TYPE(v)->tp_name);
566 Py_DECREF(v);
567 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000568 }
569
570 return v;
571
572 onError:
573 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000574}
575
576static void
Christian Heimes44720832008-05-26 13:01:01 +0000577string_dealloc(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000578{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000579 switch (PyString_CHECK_INTERNED(op)) {
580 case SSTATE_NOT_INTERNED:
581 break;
Christian Heimes44720832008-05-26 13:01:01 +0000582
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000583 case SSTATE_INTERNED_MORTAL:
584 /* revive dead object temporarily for DelItem */
585 Py_REFCNT(op) = 3;
586 if (PyDict_DelItem(interned, op) != 0)
587 Py_FatalError(
588 "deletion of interned string failed");
589 break;
Christian Heimes44720832008-05-26 13:01:01 +0000590
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000591 case SSTATE_INTERNED_IMMORTAL:
592 Py_FatalError("Immortal interned string died.");
Christian Heimes44720832008-05-26 13:01:01 +0000593
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000594 default:
595 Py_FatalError("Inconsistent interned string state.");
596 }
597 Py_TYPE(op)->tp_free(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000598}
599
Christian Heimes44720832008-05-26 13:01:01 +0000600/* Unescape a backslash-escaped string. If unicode is non-zero,
601 the string is a u-literal. If recode_encoding is non-zero,
602 the string is UTF-8 encoded and should be re-encoded in the
603 specified encoding. */
604
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000605PyObject *PyString_DecodeEscape(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000606 Py_ssize_t len,
607 const char *errors,
608 Py_ssize_t unicode,
609 const char *recode_encoding)
Christian Heimes44720832008-05-26 13:01:01 +0000610{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000611 int c;
612 char *p, *buf;
613 const char *end;
614 PyObject *v;
615 Py_ssize_t newlen = recode_encoding ? 4*len:len;
616 v = PyString_FromStringAndSize((char *)NULL, newlen);
617 if (v == NULL)
618 return NULL;
619 p = buf = PyString_AsString(v);
620 end = s + len;
621 while (s < end) {
622 if (*s != '\\') {
623 non_esc:
Christian Heimes44720832008-05-26 13:01:01 +0000624#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000625 if (recode_encoding && (*s & 0x80)) {
626 PyObject *u, *w;
627 char *r;
628 const char* t;
629 Py_ssize_t rn;
630 t = s;
631 /* Decode non-ASCII bytes as UTF-8. */
632 while (t < end && (*t & 0x80)) t++;
633 u = PyUnicode_DecodeUTF8(s, t - s, errors);
634 if(!u) goto failed;
Christian Heimes44720832008-05-26 13:01:01 +0000635
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000636 /* Recode them in target encoding. */
637 w = PyUnicode_AsEncodedString(
638 u, recode_encoding, errors);
639 Py_DECREF(u);
640 if (!w) goto failed;
Christian Heimes44720832008-05-26 13:01:01 +0000641
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000642 /* Append bytes to output buffer. */
643 assert(PyString_Check(w));
644 r = PyString_AS_STRING(w);
645 rn = PyString_GET_SIZE(w);
646 Py_MEMCPY(p, r, rn);
647 p += rn;
648 Py_DECREF(w);
649 s = t;
650 } else {
651 *p++ = *s++;
652 }
Christian Heimes44720832008-05-26 13:01:01 +0000653#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000654 *p++ = *s++;
Christian Heimes44720832008-05-26 13:01:01 +0000655#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000656 continue;
657 }
658 s++;
659 if (s==end) {
660 PyErr_SetString(PyExc_ValueError,
661 "Trailing \\ in string");
662 goto failed;
663 }
664 switch (*s++) {
665 /* XXX This assumes ASCII! */
666 case '\n': break;
667 case '\\': *p++ = '\\'; break;
668 case '\'': *p++ = '\''; break;
669 case '\"': *p++ = '\"'; break;
670 case 'b': *p++ = '\b'; break;
671 case 'f': *p++ = '\014'; break; /* FF */
672 case 't': *p++ = '\t'; break;
673 case 'n': *p++ = '\n'; break;
674 case 'r': *p++ = '\r'; break;
675 case 'v': *p++ = '\013'; break; /* VT */
676 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
677 case '0': case '1': case '2': case '3':
678 case '4': case '5': case '6': case '7':
679 c = s[-1] - '0';
680 if (s < end && '0' <= *s && *s <= '7') {
681 c = (c<<3) + *s++ - '0';
682 if (s < end && '0' <= *s && *s <= '7')
683 c = (c<<3) + *s++ - '0';
684 }
685 *p++ = c;
686 break;
687 case 'x':
688 if (s+1 < end &&
689 isxdigit(Py_CHARMASK(s[0])) &&
690 isxdigit(Py_CHARMASK(s[1])))
691 {
692 unsigned int x = 0;
693 c = Py_CHARMASK(*s);
694 s++;
695 if (isdigit(c))
696 x = c - '0';
697 else if (islower(c))
698 x = 10 + c - 'a';
699 else
700 x = 10 + c - 'A';
701 x = x << 4;
702 c = Py_CHARMASK(*s);
703 s++;
704 if (isdigit(c))
705 x += c - '0';
706 else if (islower(c))
707 x += 10 + c - 'a';
708 else
709 x += 10 + c - 'A';
710 *p++ = x;
711 break;
712 }
713 if (!errors || strcmp(errors, "strict") == 0) {
714 PyErr_SetString(PyExc_ValueError,
715 "invalid \\x escape");
716 goto failed;
717 }
718 if (strcmp(errors, "replace") == 0) {
719 *p++ = '?';
720 } else if (strcmp(errors, "ignore") == 0)
721 /* do nothing */;
722 else {
723 PyErr_Format(PyExc_ValueError,
724 "decoding error; "
725 "unknown error handling code: %.400s",
726 errors);
727 goto failed;
728 }
Christian Heimes44720832008-05-26 13:01:01 +0000729#ifndef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000730 case 'u':
731 case 'U':
732 case 'N':
733 if (unicode) {
734 PyErr_SetString(PyExc_ValueError,
735 "Unicode escapes not legal "
736 "when Unicode disabled");
737 goto failed;
738 }
Christian Heimes44720832008-05-26 13:01:01 +0000739#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000740 default:
741 *p++ = '\\';
742 s--;
Ezio Melotti24b07bc2011-03-15 18:55:01 +0200743 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000744 UTF-8 bytes may follow. */
745 }
746 }
747 if (p-buf < newlen && _PyString_Resize(&v, p - buf))
748 goto failed;
749 return v;
Christian Heimes44720832008-05-26 13:01:01 +0000750 failed:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000751 Py_DECREF(v);
752 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000753}
754
755/* -------------------------------------------------------------------- */
756/* object api */
757
Christian Heimes1a6387e2008-03-26 12:49:49 +0000758static Py_ssize_t
Christian Heimes44720832008-05-26 13:01:01 +0000759string_getsize(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000760{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000761 char *s;
762 Py_ssize_t len;
763 if (PyString_AsStringAndSize(op, &s, &len))
764 return -1;
765 return len;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000766}
767
Christian Heimes44720832008-05-26 13:01:01 +0000768static /*const*/ char *
769string_getbuffer(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000770{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000771 char *s;
772 Py_ssize_t len;
773 if (PyString_AsStringAndSize(op, &s, &len))
774 return NULL;
775 return s;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000776}
777
778Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000779PyString_Size(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000780{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000781 if (!PyString_Check(op))
782 return string_getsize(op);
783 return Py_SIZE(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000784}
785
Christian Heimes44720832008-05-26 13:01:01 +0000786/*const*/ char *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000787PyString_AsString(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000788{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000789 if (!PyString_Check(op))
790 return string_getbuffer(op);
791 return ((PyStringObject *)op) -> ob_sval;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000792}
793
794int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000795PyString_AsStringAndSize(register PyObject *obj,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000796 register char **s,
797 register Py_ssize_t *len)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000798{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000799 if (s == NULL) {
800 PyErr_BadInternalCall();
801 return -1;
802 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000803
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000804 if (!PyString_Check(obj)) {
Christian Heimes44720832008-05-26 13:01:01 +0000805#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000806 if (PyUnicode_Check(obj)) {
807 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
808 if (obj == NULL)
809 return -1;
810 }
811 else
Christian Heimes1a6387e2008-03-26 12:49:49 +0000812#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000813 {
814 PyErr_Format(PyExc_TypeError,
815 "expected string or Unicode object, "
816 "%.200s found", Py_TYPE(obj)->tp_name);
817 return -1;
818 }
819 }
Christian Heimes44720832008-05-26 13:01:01 +0000820
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000821 *s = PyString_AS_STRING(obj);
822 if (len != NULL)
823 *len = PyString_GET_SIZE(obj);
824 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
825 PyErr_SetString(PyExc_TypeError,
826 "expected string without null bytes");
827 return -1;
828 }
829 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000830}
831
Christian Heimes1a6387e2008-03-26 12:49:49 +0000832/* -------------------------------------------------------------------- */
833/* Methods */
834
Christian Heimes44720832008-05-26 13:01:01 +0000835#include "stringlib/stringdefs.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000836#include "stringlib/fastsearch.h"
Christian Heimes44720832008-05-26 13:01:01 +0000837
Christian Heimes1a6387e2008-03-26 12:49:49 +0000838#include "stringlib/count.h"
839#include "stringlib/find.h"
840#include "stringlib/partition.h"
Antoine Pitrou64672132010-01-13 07:55:48 +0000841#include "stringlib/split.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000842
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000843#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
Christian Heimes44720832008-05-26 13:01:01 +0000844#include "stringlib/localeutil.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000845
Christian Heimes1a6387e2008-03-26 12:49:49 +0000846
847
848static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000849string_print(PyStringObject *op, FILE *fp, int flags)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000850{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000851 Py_ssize_t i, str_len;
852 char c;
853 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000854
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000855 /* XXX Ought to check for interrupts when writing long strings */
856 if (! PyString_CheckExact(op)) {
857 int ret;
858 /* A str subclass may have its own __str__ method. */
859 op = (PyStringObject *) PyObject_Str((PyObject *)op);
860 if (op == NULL)
861 return -1;
862 ret = string_print(op, fp, flags);
863 Py_DECREF(op);
864 return ret;
865 }
866 if (flags & Py_PRINT_RAW) {
867 char *data = op->ob_sval;
868 Py_ssize_t size = Py_SIZE(op);
869 Py_BEGIN_ALLOW_THREADS
870 while (size > INT_MAX) {
871 /* Very long strings cannot be written atomically.
872 * But don't write exactly INT_MAX bytes at a time
873 * to avoid memory aligment issues.
874 */
875 const int chunk_size = INT_MAX & ~0x3FFF;
876 fwrite(data, 1, chunk_size, fp);
877 data += chunk_size;
878 size -= chunk_size;
879 }
Christian Heimes44720832008-05-26 13:01:01 +0000880#ifdef __VMS
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000881 if (size) fwrite(data, (int)size, 1, fp);
Christian Heimes44720832008-05-26 13:01:01 +0000882#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000883 fwrite(data, 1, (int)size, fp);
Christian Heimes44720832008-05-26 13:01:01 +0000884#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000885 Py_END_ALLOW_THREADS
886 return 0;
887 }
Christian Heimes44720832008-05-26 13:01:01 +0000888
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000889 /* figure out which quote to use; single is preferred */
890 quote = '\'';
891 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
892 !memchr(op->ob_sval, '"', Py_SIZE(op)))
893 quote = '"';
Christian Heimes44720832008-05-26 13:01:01 +0000894
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000895 str_len = Py_SIZE(op);
896 Py_BEGIN_ALLOW_THREADS
897 fputc(quote, fp);
898 for (i = 0; i < str_len; i++) {
899 /* Since strings are immutable and the caller should have a
900 reference, accessing the interal buffer should not be an issue
901 with the GIL released. */
902 c = op->ob_sval[i];
903 if (c == quote || c == '\\')
904 fprintf(fp, "\\%c", c);
905 else if (c == '\t')
906 fprintf(fp, "\\t");
907 else if (c == '\n')
908 fprintf(fp, "\\n");
909 else if (c == '\r')
910 fprintf(fp, "\\r");
911 else if (c < ' ' || c >= 0x7f)
912 fprintf(fp, "\\x%02x", c & 0xff);
913 else
914 fputc(c, fp);
915 }
916 fputc(quote, fp);
917 Py_END_ALLOW_THREADS
918 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000919}
920
Christian Heimes44720832008-05-26 13:01:01 +0000921PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000922PyString_Repr(PyObject *obj, int smartquotes)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000923{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000924 register PyStringObject* op = (PyStringObject*) obj;
925 size_t newsize = 2 + 4 * Py_SIZE(op);
926 PyObject *v;
927 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
928 PyErr_SetString(PyExc_OverflowError,
929 "string is too large to make repr");
930 return NULL;
931 }
932 v = PyString_FromStringAndSize((char *)NULL, newsize);
933 if (v == NULL) {
934 return NULL;
935 }
936 else {
937 register Py_ssize_t i;
938 register char c;
939 register char *p;
940 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000941
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000942 /* figure out which quote to use; single is preferred */
943 quote = '\'';
944 if (smartquotes &&
945 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
946 !memchr(op->ob_sval, '"', Py_SIZE(op)))
947 quote = '"';
Christian Heimes44720832008-05-26 13:01:01 +0000948
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000949 p = PyString_AS_STRING(v);
950 *p++ = quote;
951 for (i = 0; i < Py_SIZE(op); i++) {
952 /* There's at least enough room for a hex escape
953 and a closing quote. */
954 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
955 c = op->ob_sval[i];
956 if (c == quote || c == '\\')
957 *p++ = '\\', *p++ = c;
958 else if (c == '\t')
959 *p++ = '\\', *p++ = 't';
960 else if (c == '\n')
961 *p++ = '\\', *p++ = 'n';
962 else if (c == '\r')
963 *p++ = '\\', *p++ = 'r';
964 else if (c < ' ' || c >= 0x7f) {
965 /* For performance, we don't want to call
966 PyOS_snprintf here (extra layers of
967 function call). */
968 sprintf(p, "\\x%02x", c & 0xff);
969 p += 4;
970 }
971 else
972 *p++ = c;
973 }
974 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
975 *p++ = quote;
976 *p = '\0';
977 if (_PyString_Resize(&v, (p - PyString_AS_STRING(v))))
978 return NULL;
979 return v;
980 }
Christian Heimes44720832008-05-26 13:01:01 +0000981}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000982
983static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000984string_repr(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000985{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000986 return PyString_Repr(op, 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000987}
988
Christian Heimes1a6387e2008-03-26 12:49:49 +0000989static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000990string_str(PyObject *s)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000991{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000992 assert(PyString_Check(s));
993 if (PyString_CheckExact(s)) {
994 Py_INCREF(s);
995 return s;
996 }
997 else {
998 /* Subtype -- return genuine string with the same value. */
999 PyStringObject *t = (PyStringObject *) s;
1000 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
1001 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001002}
1003
Christian Heimes44720832008-05-26 13:01:01 +00001004static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001005string_length(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001006{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001007 return Py_SIZE(a);
Christian Heimes44720832008-05-26 13:01:01 +00001008}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001009
Christian Heimes44720832008-05-26 13:01:01 +00001010static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001011string_concat(register PyStringObject *a, register PyObject *bb)
Christian Heimes44720832008-05-26 13:01:01 +00001012{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001013 register Py_ssize_t size;
1014 register PyStringObject *op;
1015 if (!PyString_Check(bb)) {
Christian Heimes44720832008-05-26 13:01:01 +00001016#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001017 if (PyUnicode_Check(bb))
1018 return PyUnicode_Concat((PyObject *)a, bb);
Christian Heimes44720832008-05-26 13:01:01 +00001019#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001020 if (PyByteArray_Check(bb))
1021 return PyByteArray_Concat((PyObject *)a, bb);
1022 PyErr_Format(PyExc_TypeError,
1023 "cannot concatenate 'str' and '%.200s' objects",
1024 Py_TYPE(bb)->tp_name);
1025 return NULL;
1026 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001027#define b ((PyStringObject *)bb)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001028 /* Optimize cases with empty left or right operand */
1029 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
1030 PyString_CheckExact(a) && PyString_CheckExact(b)) {
1031 if (Py_SIZE(a) == 0) {
1032 Py_INCREF(bb);
1033 return bb;
1034 }
1035 Py_INCREF(a);
1036 return (PyObject *)a;
1037 }
1038 size = Py_SIZE(a) + Py_SIZE(b);
1039 /* Check that string sizes are not negative, to prevent an
1040 overflow in cases where we are passed incorrectly-created
1041 strings with negative lengths (due to a bug in other code).
1042 */
1043 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
1044 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
1045 PyErr_SetString(PyExc_OverflowError,
1046 "strings are too large to concat");
1047 return NULL;
1048 }
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001049
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001050 /* Inline PyObject_NewVar */
1051 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
1052 PyErr_SetString(PyExc_OverflowError,
1053 "strings are too large to concat");
1054 return NULL;
1055 }
1056 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
1057 if (op == NULL)
1058 return PyErr_NoMemory();
1059 PyObject_INIT_VAR(op, &PyString_Type, size);
1060 op->ob_shash = -1;
1061 op->ob_sstate = SSTATE_NOT_INTERNED;
1062 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1063 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1064 op->ob_sval[size] = '\0';
1065 return (PyObject *) op;
Christian Heimes44720832008-05-26 13:01:01 +00001066#undef b
1067}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001068
Christian Heimes44720832008-05-26 13:01:01 +00001069static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001070string_repeat(register PyStringObject *a, register Py_ssize_t n)
Christian Heimes44720832008-05-26 13:01:01 +00001071{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001072 register Py_ssize_t i;
1073 register Py_ssize_t j;
1074 register Py_ssize_t size;
1075 register PyStringObject *op;
1076 size_t nbytes;
1077 if (n < 0)
1078 n = 0;
1079 /* watch out for overflows: the size can overflow int,
1080 * and the # of bytes needed can overflow size_t
1081 */
1082 size = Py_SIZE(a) * n;
1083 if (n && size / n != Py_SIZE(a)) {
1084 PyErr_SetString(PyExc_OverflowError,
1085 "repeated string is too long");
1086 return NULL;
1087 }
1088 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1089 Py_INCREF(a);
1090 return (PyObject *)a;
1091 }
1092 nbytes = (size_t)size;
1093 if (nbytes + PyStringObject_SIZE <= nbytes) {
1094 PyErr_SetString(PyExc_OverflowError,
1095 "repeated string is too long");
1096 return NULL;
1097 }
1098 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
1099 if (op == NULL)
1100 return PyErr_NoMemory();
1101 PyObject_INIT_VAR(op, &PyString_Type, size);
1102 op->ob_shash = -1;
1103 op->ob_sstate = SSTATE_NOT_INTERNED;
1104 op->ob_sval[size] = '\0';
1105 if (Py_SIZE(a) == 1 && n > 0) {
1106 memset(op->ob_sval, a->ob_sval[0] , n);
1107 return (PyObject *) op;
1108 }
1109 i = 0;
1110 if (i < size) {
1111 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1112 i = Py_SIZE(a);
1113 }
1114 while (i < size) {
1115 j = (i <= size-i) ? i : size-i;
1116 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1117 i += j;
1118 }
1119 return (PyObject *) op;
Christian Heimes44720832008-05-26 13:01:01 +00001120}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001121
Christian Heimes44720832008-05-26 13:01:01 +00001122/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1123
1124static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001125string_slice(register PyStringObject *a, register Py_ssize_t i,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001126 register Py_ssize_t j)
Christian Heimes44720832008-05-26 13:01:01 +00001127 /* j -- may be negative! */
1128{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001129 if (i < 0)
1130 i = 0;
1131 if (j < 0)
1132 j = 0; /* Avoid signed/unsigned bug in next line */
1133 if (j > Py_SIZE(a))
1134 j = Py_SIZE(a);
1135 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1136 /* It's the same as a */
1137 Py_INCREF(a);
1138 return (PyObject *)a;
1139 }
1140 if (j < i)
1141 j = i;
1142 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001143}
1144
1145static int
1146string_contains(PyObject *str_obj, PyObject *sub_obj)
1147{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001148 if (!PyString_CheckExact(sub_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00001149#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001150 if (PyUnicode_Check(sub_obj))
1151 return PyUnicode_Contains(str_obj, sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001152#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001153 if (!PyString_Check(sub_obj)) {
1154 PyErr_Format(PyExc_TypeError,
1155 "'in <string>' requires string as left operand, "
1156 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1157 return -1;
1158 }
1159 }
Christian Heimes44720832008-05-26 13:01:01 +00001160
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001161 return stringlib_contains_obj(str_obj, sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001162}
1163
1164static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001165string_item(PyStringObject *a, register Py_ssize_t i)
Christian Heimes44720832008-05-26 13:01:01 +00001166{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001167 char pchar;
1168 PyObject *v;
1169 if (i < 0 || i >= Py_SIZE(a)) {
1170 PyErr_SetString(PyExc_IndexError, "string index out of range");
1171 return NULL;
1172 }
1173 pchar = a->ob_sval[i];
1174 v = (PyObject *)characters[pchar & UCHAR_MAX];
1175 if (v == NULL)
1176 v = PyString_FromStringAndSize(&pchar, 1);
1177 else {
Christian Heimes44720832008-05-26 13:01:01 +00001178#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001179 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +00001180#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001181 Py_INCREF(v);
1182 }
1183 return v;
Christian Heimes44720832008-05-26 13:01:01 +00001184}
1185
1186static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001187string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Christian Heimes44720832008-05-26 13:01:01 +00001188{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001189 int c;
1190 Py_ssize_t len_a, len_b;
1191 Py_ssize_t min_len;
1192 PyObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00001193
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001194 /* Make sure both arguments are strings. */
1195 if (!(PyString_Check(a) && PyString_Check(b))) {
1196 result = Py_NotImplemented;
1197 goto out;
1198 }
1199 if (a == b) {
1200 switch (op) {
1201 case Py_EQ:case Py_LE:case Py_GE:
1202 result = Py_True;
1203 goto out;
1204 case Py_NE:case Py_LT:case Py_GT:
1205 result = Py_False;
1206 goto out;
1207 }
1208 }
1209 if (op == Py_EQ) {
1210 /* Supporting Py_NE here as well does not save
1211 much time, since Py_NE is rarely used. */
1212 if (Py_SIZE(a) == Py_SIZE(b)
1213 && (a->ob_sval[0] == b->ob_sval[0]
1214 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1215 result = Py_True;
1216 } else {
1217 result = Py_False;
1218 }
1219 goto out;
1220 }
1221 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1222 min_len = (len_a < len_b) ? len_a : len_b;
1223 if (min_len > 0) {
1224 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1225 if (c==0)
1226 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1227 } else
1228 c = 0;
1229 if (c == 0)
1230 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1231 switch (op) {
1232 case Py_LT: c = c < 0; break;
1233 case Py_LE: c = c <= 0; break;
1234 case Py_EQ: assert(0); break; /* unreachable */
1235 case Py_NE: c = c != 0; break;
1236 case Py_GT: c = c > 0; break;
1237 case Py_GE: c = c >= 0; break;
1238 default:
1239 result = Py_NotImplemented;
1240 goto out;
1241 }
1242 result = c ? Py_True : Py_False;
Christian Heimes44720832008-05-26 13:01:01 +00001243 out:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001244 Py_INCREF(result);
1245 return result;
Christian Heimes44720832008-05-26 13:01:01 +00001246}
1247
1248int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001249_PyString_Eq(PyObject *o1, PyObject *o2)
Christian Heimes44720832008-05-26 13:01:01 +00001250{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001251 PyStringObject *a = (PyStringObject*) o1;
1252 PyStringObject *b = (PyStringObject*) o2;
1253 return Py_SIZE(a) == Py_SIZE(b)
1254 && *a->ob_sval == *b->ob_sval
1255 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
Christian Heimes44720832008-05-26 13:01:01 +00001256}
1257
1258static long
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001259string_hash(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001260{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001261 register Py_ssize_t len;
1262 register unsigned char *p;
1263 register long x;
Christian Heimes44720832008-05-26 13:01:01 +00001264
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001265 if (a->ob_shash != -1)
1266 return a->ob_shash;
1267 len = Py_SIZE(a);
1268 p = (unsigned char *) a->ob_sval;
1269 x = *p << 7;
1270 while (--len >= 0)
1271 x = (1000003*x) ^ *p++;
1272 x ^= Py_SIZE(a);
1273 if (x == -1)
1274 x = -2;
1275 a->ob_shash = x;
1276 return x;
Christian Heimes44720832008-05-26 13:01:01 +00001277}
1278
1279static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001280string_subscript(PyStringObject* self, PyObject* item)
Christian Heimes44720832008-05-26 13:01:01 +00001281{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001282 if (PyIndex_Check(item)) {
1283 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1284 if (i == -1 && PyErr_Occurred())
1285 return NULL;
1286 if (i < 0)
1287 i += PyString_GET_SIZE(self);
1288 return string_item(self, i);
1289 }
1290 else if (PySlice_Check(item)) {
1291 Py_ssize_t start, stop, step, slicelength, cur, i;
1292 char* source_buf;
1293 char* result_buf;
1294 PyObject* result;
Christian Heimes44720832008-05-26 13:01:01 +00001295
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001296 if (PySlice_GetIndicesEx((PySliceObject*)item,
1297 PyString_GET_SIZE(self),
1298 &start, &stop, &step, &slicelength) < 0) {
1299 return NULL;
1300 }
Christian Heimes44720832008-05-26 13:01:01 +00001301
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001302 if (slicelength <= 0) {
1303 return PyString_FromStringAndSize("", 0);
1304 }
1305 else if (start == 0 && step == 1 &&
1306 slicelength == PyString_GET_SIZE(self) &&
1307 PyString_CheckExact(self)) {
1308 Py_INCREF(self);
1309 return (PyObject *)self;
1310 }
1311 else if (step == 1) {
1312 return PyString_FromStringAndSize(
1313 PyString_AS_STRING(self) + start,
1314 slicelength);
1315 }
1316 else {
1317 source_buf = PyString_AsString((PyObject*)self);
1318 result_buf = (char *)PyMem_Malloc(slicelength);
1319 if (result_buf == NULL)
1320 return PyErr_NoMemory();
Christian Heimes44720832008-05-26 13:01:01 +00001321
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001322 for (cur = start, i = 0; i < slicelength;
1323 cur += step, i++) {
1324 result_buf[i] = source_buf[cur];
1325 }
Christian Heimes44720832008-05-26 13:01:01 +00001326
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001327 result = PyString_FromStringAndSize(result_buf,
1328 slicelength);
1329 PyMem_Free(result_buf);
1330 return result;
1331 }
1332 }
1333 else {
1334 PyErr_Format(PyExc_TypeError,
1335 "string indices must be integers, not %.200s",
1336 Py_TYPE(item)->tp_name);
1337 return NULL;
1338 }
Christian Heimes44720832008-05-26 13:01:01 +00001339}
1340
1341static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001342string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001343{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001344 if ( index != 0 ) {
1345 PyErr_SetString(PyExc_SystemError,
1346 "accessing non-existent string segment");
1347 return -1;
1348 }
1349 *ptr = (void *)self->ob_sval;
1350 return Py_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001351}
1352
1353static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001354string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001355{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001356 PyErr_SetString(PyExc_TypeError,
1357 "Cannot use string as modifiable buffer");
1358 return -1;
Christian Heimes44720832008-05-26 13:01:01 +00001359}
1360
1361static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001362string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Christian Heimes44720832008-05-26 13:01:01 +00001363{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001364 if ( lenp )
1365 *lenp = Py_SIZE(self);
1366 return 1;
Christian Heimes44720832008-05-26 13:01:01 +00001367}
1368
1369static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001370string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001371{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001372 if ( index != 0 ) {
1373 PyErr_SetString(PyExc_SystemError,
1374 "accessing non-existent string segment");
1375 return -1;
1376 }
1377 *ptr = self->ob_sval;
1378 return Py_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001379}
1380
1381static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001382string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Christian Heimes44720832008-05-26 13:01:01 +00001383{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001384 return PyBuffer_FillInfo(view, (PyObject*)self,
1385 (void *)self->ob_sval, Py_SIZE(self),
1386 1, flags);
Christian Heimes44720832008-05-26 13:01:01 +00001387}
1388
1389static PySequenceMethods string_as_sequence = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001390 (lenfunc)string_length, /*sq_length*/
1391 (binaryfunc)string_concat, /*sq_concat*/
1392 (ssizeargfunc)string_repeat, /*sq_repeat*/
1393 (ssizeargfunc)string_item, /*sq_item*/
1394 (ssizessizeargfunc)string_slice, /*sq_slice*/
1395 0, /*sq_ass_item*/
1396 0, /*sq_ass_slice*/
1397 (objobjproc)string_contains /*sq_contains*/
Christian Heimes44720832008-05-26 13:01:01 +00001398};
1399
1400static PyMappingMethods string_as_mapping = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001401 (lenfunc)string_length,
1402 (binaryfunc)string_subscript,
1403 0,
Christian Heimes44720832008-05-26 13:01:01 +00001404};
1405
1406static PyBufferProcs string_as_buffer = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001407 (readbufferproc)string_buffer_getreadbuf,
1408 (writebufferproc)string_buffer_getwritebuf,
1409 (segcountproc)string_buffer_getsegcount,
1410 (charbufferproc)string_buffer_getcharbuf,
1411 (getbufferproc)string_buffer_getbuffer,
1412 0, /* XXX */
Christian Heimes44720832008-05-26 13:01:01 +00001413};
1414
1415
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001416
Christian Heimes44720832008-05-26 13:01:01 +00001417#define LEFTSTRIP 0
1418#define RIGHTSTRIP 1
1419#define BOTHSTRIP 2
1420
1421/* Arrays indexed by above */
1422static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1423
1424#define STRIPNAME(i) (stripformat[i]+3)
1425
Christian Heimes1a6387e2008-03-26 12:49:49 +00001426PyDoc_STRVAR(split__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001427"S.split([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001428\n\
Christian Heimes44720832008-05-26 13:01:01 +00001429Return a list of the words in the string S, using sep as the\n\
1430delimiter string. If maxsplit is given, at most maxsplit\n\
1431splits are done. If sep is not specified or is None, any\n\
1432whitespace string is a separator and empty strings are removed\n\
1433from the result.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001434
1435static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001436string_split(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001437{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001438 Py_ssize_t len = PyString_GET_SIZE(self), n;
1439 Py_ssize_t maxsplit = -1;
1440 const char *s = PyString_AS_STRING(self), *sub;
1441 PyObject *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001442
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001443 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1444 return NULL;
1445 if (maxsplit < 0)
1446 maxsplit = PY_SSIZE_T_MAX;
1447 if (subobj == Py_None)
1448 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1449 if (PyString_Check(subobj)) {
1450 sub = PyString_AS_STRING(subobj);
1451 n = PyString_GET_SIZE(subobj);
1452 }
Christian Heimes44720832008-05-26 13:01:01 +00001453#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001454 else if (PyUnicode_Check(subobj))
1455 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001456#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001457 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1458 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001459
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001460 return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001461}
1462
1463PyDoc_STRVAR(partition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001464"S.partition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001465\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001466Search for the separator sep in S, and return the part before it,\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001467the separator itself, and the part after it. If the separator is not\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001468found, return S and two empty strings.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001469
1470static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001471string_partition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001472{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001473 const char *sep;
1474 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001475
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001476 if (PyString_Check(sep_obj)) {
1477 sep = PyString_AS_STRING(sep_obj);
1478 sep_len = PyString_GET_SIZE(sep_obj);
1479 }
Christian Heimes44720832008-05-26 13:01:01 +00001480#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001481 else if (PyUnicode_Check(sep_obj))
1482 return PyUnicode_Partition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001483#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001484 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1485 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001486
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001487 return stringlib_partition(
1488 (PyObject*) self,
1489 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1490 sep_obj, sep, sep_len
1491 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001492}
1493
1494PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti1fafaab2010-01-25 11:24:37 +00001495"S.rpartition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001496\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001497Search for the separator sep in S, starting at the end of S, and return\n\
Christian Heimes44720832008-05-26 13:01:01 +00001498the part before it, the separator itself, and the part after it. If the\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001499separator is not found, return two empty strings and S.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001500
1501static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001502string_rpartition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001503{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001504 const char *sep;
1505 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001506
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001507 if (PyString_Check(sep_obj)) {
1508 sep = PyString_AS_STRING(sep_obj);
1509 sep_len = PyString_GET_SIZE(sep_obj);
1510 }
Christian Heimes44720832008-05-26 13:01:01 +00001511#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001512 else if (PyUnicode_Check(sep_obj))
1513 return PyUnicode_RPartition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001514#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001515 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1516 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001517
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001518 return stringlib_rpartition(
1519 (PyObject*) self,
1520 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1521 sep_obj, sep, sep_len
1522 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001523}
1524
Christian Heimes1a6387e2008-03-26 12:49:49 +00001525PyDoc_STRVAR(rsplit__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001526"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001527\n\
Christian Heimes44720832008-05-26 13:01:01 +00001528Return a list of the words in the string S, using sep as the\n\
1529delimiter string, starting at the end of the string and working\n\
1530to the front. If maxsplit is given, at most maxsplit splits are\n\
1531done. If sep is not specified or is None, any whitespace string\n\
1532is a separator.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001533
1534static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001535string_rsplit(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001536{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001537 Py_ssize_t len = PyString_GET_SIZE(self), n;
1538 Py_ssize_t maxsplit = -1;
1539 const char *s = PyString_AS_STRING(self), *sub;
1540 PyObject *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001541
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001542 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1543 return NULL;
1544 if (maxsplit < 0)
1545 maxsplit = PY_SSIZE_T_MAX;
1546 if (subobj == Py_None)
1547 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1548 if (PyString_Check(subobj)) {
1549 sub = PyString_AS_STRING(subobj);
1550 n = PyString_GET_SIZE(subobj);
1551 }
Christian Heimes44720832008-05-26 13:01:01 +00001552#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001553 else if (PyUnicode_Check(subobj))
1554 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001555#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001556 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1557 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001558
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001559 return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001560}
1561
1562
1563PyDoc_STRVAR(join__doc__,
Georg Brandl9b4e5822009-10-14 18:48:32 +00001564"S.join(iterable) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00001565\n\
1566Return a string which is the concatenation of the strings in the\n\
Georg Brandl9b4e5822009-10-14 18:48:32 +00001567iterable. The separator between elements is S.");
Christian Heimes44720832008-05-26 13:01:01 +00001568
1569static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001570string_join(PyStringObject *self, PyObject *orig)
Christian Heimes44720832008-05-26 13:01:01 +00001571{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001572 char *sep = PyString_AS_STRING(self);
1573 const Py_ssize_t seplen = PyString_GET_SIZE(self);
1574 PyObject *res = NULL;
1575 char *p;
1576 Py_ssize_t seqlen = 0;
1577 size_t sz = 0;
1578 Py_ssize_t i;
1579 PyObject *seq, *item;
Christian Heimes44720832008-05-26 13:01:01 +00001580
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001581 seq = PySequence_Fast(orig, "");
1582 if (seq == NULL) {
1583 return NULL;
1584 }
Christian Heimes44720832008-05-26 13:01:01 +00001585
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001586 seqlen = PySequence_Size(seq);
1587 if (seqlen == 0) {
1588 Py_DECREF(seq);
1589 return PyString_FromString("");
1590 }
1591 if (seqlen == 1) {
1592 item = PySequence_Fast_GET_ITEM(seq, 0);
1593 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1594 Py_INCREF(item);
1595 Py_DECREF(seq);
1596 return item;
1597 }
1598 }
Christian Heimes44720832008-05-26 13:01:01 +00001599
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001600 /* There are at least two things to join, or else we have a subclass
1601 * of the builtin types in the sequence.
1602 * Do a pre-pass to figure out the total amount of space we'll
1603 * need (sz), see whether any argument is absurd, and defer to
1604 * the Unicode join if appropriate.
1605 */
1606 for (i = 0; i < seqlen; i++) {
1607 const size_t old_sz = sz;
1608 item = PySequence_Fast_GET_ITEM(seq, i);
1609 if (!PyString_Check(item)){
Christian Heimes44720832008-05-26 13:01:01 +00001610#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001611 if (PyUnicode_Check(item)) {
1612 /* Defer to Unicode join.
1613 * CAUTION: There's no gurantee that the
1614 * original sequence can be iterated over
1615 * again, so we must pass seq here.
1616 */
1617 PyObject *result;
1618 result = PyUnicode_Join((PyObject *)self, seq);
1619 Py_DECREF(seq);
1620 return result;
1621 }
Christian Heimes44720832008-05-26 13:01:01 +00001622#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001623 PyErr_Format(PyExc_TypeError,
1624 "sequence item %zd: expected string,"
1625 " %.80s found",
1626 i, Py_TYPE(item)->tp_name);
1627 Py_DECREF(seq);
1628 return NULL;
1629 }
1630 sz += PyString_GET_SIZE(item);
1631 if (i != 0)
1632 sz += seplen;
1633 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1634 PyErr_SetString(PyExc_OverflowError,
1635 "join() result is too long for a Python string");
1636 Py_DECREF(seq);
1637 return NULL;
1638 }
1639 }
Christian Heimes44720832008-05-26 13:01:01 +00001640
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001641 /* Allocate result space. */
1642 res = PyString_FromStringAndSize((char*)NULL, sz);
1643 if (res == NULL) {
1644 Py_DECREF(seq);
1645 return NULL;
1646 }
Christian Heimes44720832008-05-26 13:01:01 +00001647
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001648 /* Catenate everything. */
1649 p = PyString_AS_STRING(res);
1650 for (i = 0; i < seqlen; ++i) {
1651 size_t n;
1652 item = PySequence_Fast_GET_ITEM(seq, i);
1653 n = PyString_GET_SIZE(item);
1654 Py_MEMCPY(p, PyString_AS_STRING(item), n);
1655 p += n;
1656 if (i < seqlen - 1) {
1657 Py_MEMCPY(p, sep, seplen);
1658 p += seplen;
1659 }
1660 }
Christian Heimes44720832008-05-26 13:01:01 +00001661
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001662 Py_DECREF(seq);
1663 return res;
Christian Heimes44720832008-05-26 13:01:01 +00001664}
1665
1666PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001667_PyString_Join(PyObject *sep, PyObject *x)
Christian Heimes44720832008-05-26 13:01:01 +00001668{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001669 assert(sep != NULL && PyString_Check(sep));
1670 assert(x != NULL);
1671 return string_join((PyStringObject *)sep, x);
Christian Heimes44720832008-05-26 13:01:01 +00001672}
1673
Antoine Pitrou64672132010-01-13 07:55:48 +00001674/* helper macro to fixup start/end slice values */
1675#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001676 if (end > len) \
1677 end = len; \
1678 else if (end < 0) { \
1679 end += len; \
1680 if (end < 0) \
1681 end = 0; \
1682 } \
1683 if (start < 0) { \
1684 start += len; \
1685 if (start < 0) \
1686 start = 0; \
1687 }
Christian Heimes44720832008-05-26 13:01:01 +00001688
1689Py_LOCAL_INLINE(Py_ssize_t)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001690string_find_internal(PyStringObject *self, PyObject *args, int dir)
Christian Heimes44720832008-05-26 13:01:01 +00001691{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001692 PyObject *subobj;
1693 const char *sub;
1694 Py_ssize_t sub_len;
1695 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1696 PyObject *obj_start=Py_None, *obj_end=Py_None;
Christian Heimes44720832008-05-26 13:01:01 +00001697
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001698 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1699 &obj_start, &obj_end))
1700 return -2;
1701 /* To support None in "start" and "end" arguments, meaning
1702 the same as if they were not passed.
1703 */
1704 if (obj_start != Py_None)
1705 if (!_PyEval_SliceIndex(obj_start, &start))
1706 return -2;
1707 if (obj_end != Py_None)
1708 if (!_PyEval_SliceIndex(obj_end, &end))
1709 return -2;
Christian Heimes44720832008-05-26 13:01:01 +00001710
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001711 if (PyString_Check(subobj)) {
1712 sub = PyString_AS_STRING(subobj);
1713 sub_len = PyString_GET_SIZE(subobj);
1714 }
Christian Heimes44720832008-05-26 13:01:01 +00001715#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001716 else if (PyUnicode_Check(subobj))
1717 return PyUnicode_Find(
1718 (PyObject *)self, subobj, start, end, dir);
Christian Heimes44720832008-05-26 13:01:01 +00001719#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001720 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1721 /* XXX - the "expected a character buffer object" is pretty
1722 confusing for a non-expert. remap to something else ? */
1723 return -2;
Christian Heimes44720832008-05-26 13:01:01 +00001724
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001725 if (dir > 0)
1726 return stringlib_find_slice(
1727 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1728 sub, sub_len, start, end);
1729 else
1730 return stringlib_rfind_slice(
1731 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1732 sub, sub_len, start, end);
Christian Heimes44720832008-05-26 13:01:01 +00001733}
1734
1735
1736PyDoc_STRVAR(find__doc__,
1737"S.find(sub [,start [,end]]) -> int\n\
1738\n\
1739Return the lowest index in S where substring sub is found,\n\
1740such that sub is contained within s[start:end]. Optional\n\
1741arguments start and end are interpreted as in slice notation.\n\
1742\n\
1743Return -1 on failure.");
1744
1745static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001746string_find(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001747{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001748 Py_ssize_t result = string_find_internal(self, args, +1);
1749 if (result == -2)
1750 return NULL;
1751 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001752}
1753
1754
1755PyDoc_STRVAR(index__doc__,
1756"S.index(sub [,start [,end]]) -> int\n\
1757\n\
1758Like S.find() but raise ValueError when the substring is not found.");
1759
1760static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001761string_index(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001762{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001763 Py_ssize_t result = string_find_internal(self, args, +1);
1764 if (result == -2)
1765 return NULL;
1766 if (result == -1) {
1767 PyErr_SetString(PyExc_ValueError,
1768 "substring not found");
1769 return NULL;
1770 }
1771 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001772}
1773
1774
1775PyDoc_STRVAR(rfind__doc__,
1776"S.rfind(sub [,start [,end]]) -> int\n\
1777\n\
1778Return the highest index in S where substring sub is found,\n\
1779such that sub is contained within s[start:end]. Optional\n\
1780arguments start and end are interpreted as in slice notation.\n\
1781\n\
1782Return -1 on failure.");
1783
1784static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001785string_rfind(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001786{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001787 Py_ssize_t result = string_find_internal(self, args, -1);
1788 if (result == -2)
1789 return NULL;
1790 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001791}
1792
1793
1794PyDoc_STRVAR(rindex__doc__,
1795"S.rindex(sub [,start [,end]]) -> int\n\
1796\n\
1797Like S.rfind() but raise ValueError when the substring is not found.");
1798
1799static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001800string_rindex(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001801{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001802 Py_ssize_t result = string_find_internal(self, args, -1);
1803 if (result == -2)
1804 return NULL;
1805 if (result == -1) {
1806 PyErr_SetString(PyExc_ValueError,
1807 "substring not found");
1808 return NULL;
1809 }
1810 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001811}
1812
1813
1814Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001815do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
Christian Heimes44720832008-05-26 13:01:01 +00001816{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001817 char *s = PyString_AS_STRING(self);
1818 Py_ssize_t len = PyString_GET_SIZE(self);
1819 char *sep = PyString_AS_STRING(sepobj);
1820 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1821 Py_ssize_t i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001822
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001823 i = 0;
1824 if (striptype != RIGHTSTRIP) {
1825 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1826 i++;
1827 }
1828 }
Christian Heimes44720832008-05-26 13:01:01 +00001829
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001830 j = len;
1831 if (striptype != LEFTSTRIP) {
1832 do {
1833 j--;
1834 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1835 j++;
1836 }
Christian Heimes44720832008-05-26 13:01:01 +00001837
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001838 if (i == 0 && j == len && PyString_CheckExact(self)) {
1839 Py_INCREF(self);
1840 return (PyObject*)self;
1841 }
1842 else
1843 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001844}
1845
1846
1847Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001848do_strip(PyStringObject *self, int striptype)
Christian Heimes44720832008-05-26 13:01:01 +00001849{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001850 char *s = PyString_AS_STRING(self);
1851 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001852
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001853 i = 0;
1854 if (striptype != RIGHTSTRIP) {
1855 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1856 i++;
1857 }
1858 }
Christian Heimes44720832008-05-26 13:01:01 +00001859
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001860 j = len;
1861 if (striptype != LEFTSTRIP) {
1862 do {
1863 j--;
1864 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1865 j++;
1866 }
Christian Heimes44720832008-05-26 13:01:01 +00001867
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001868 if (i == 0 && j == len && PyString_CheckExact(self)) {
1869 Py_INCREF(self);
1870 return (PyObject*)self;
1871 }
1872 else
1873 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001874}
1875
1876
1877Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001878do_argstrip(PyStringObject *self, int striptype, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001879{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001880 PyObject *sep = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001881
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001882 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1883 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001884
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001885 if (sep != NULL && sep != Py_None) {
1886 if (PyString_Check(sep))
1887 return do_xstrip(self, striptype, sep);
Christian Heimes44720832008-05-26 13:01:01 +00001888#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001889 else if (PyUnicode_Check(sep)) {
1890 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1891 PyObject *res;
1892 if (uniself==NULL)
1893 return NULL;
1894 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1895 striptype, sep);
1896 Py_DECREF(uniself);
1897 return res;
1898 }
Christian Heimes44720832008-05-26 13:01:01 +00001899#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001900 PyErr_Format(PyExc_TypeError,
Christian Heimes44720832008-05-26 13:01:01 +00001901#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001902 "%s arg must be None, str or unicode",
Christian Heimes44720832008-05-26 13:01:01 +00001903#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001904 "%s arg must be None or str",
Christian Heimes44720832008-05-26 13:01:01 +00001905#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001906 STRIPNAME(striptype));
1907 return NULL;
1908 }
Christian Heimes44720832008-05-26 13:01:01 +00001909
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001910 return do_strip(self, striptype);
Christian Heimes44720832008-05-26 13:01:01 +00001911}
1912
1913
1914PyDoc_STRVAR(strip__doc__,
1915"S.strip([chars]) -> string or unicode\n\
1916\n\
1917Return a copy of the string S with leading and trailing\n\
1918whitespace removed.\n\
1919If chars is given and not None, remove characters in chars instead.\n\
1920If chars is unicode, S will be converted to unicode before stripping");
1921
1922static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001923string_strip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001924{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001925 if (PyTuple_GET_SIZE(args) == 0)
1926 return do_strip(self, BOTHSTRIP); /* Common case */
1927 else
1928 return do_argstrip(self, BOTHSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001929}
1930
1931
1932PyDoc_STRVAR(lstrip__doc__,
1933"S.lstrip([chars]) -> string or unicode\n\
1934\n\
1935Return a copy of the string S with leading whitespace removed.\n\
1936If chars is given and not None, remove characters in chars instead.\n\
1937If chars is unicode, S will be converted to unicode before stripping");
1938
1939static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001940string_lstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001941{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001942 if (PyTuple_GET_SIZE(args) == 0)
1943 return do_strip(self, LEFTSTRIP); /* Common case */
1944 else
1945 return do_argstrip(self, LEFTSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001946}
1947
1948
1949PyDoc_STRVAR(rstrip__doc__,
1950"S.rstrip([chars]) -> string or unicode\n\
1951\n\
1952Return a copy of the string S with trailing whitespace removed.\n\
1953If chars is given and not None, remove characters in chars instead.\n\
1954If chars is unicode, S will be converted to unicode before stripping");
1955
1956static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001957string_rstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001958{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001959 if (PyTuple_GET_SIZE(args) == 0)
1960 return do_strip(self, RIGHTSTRIP); /* Common case */
1961 else
1962 return do_argstrip(self, RIGHTSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001963}
1964
1965
1966PyDoc_STRVAR(lower__doc__,
1967"S.lower() -> string\n\
1968\n\
1969Return a copy of the string S converted to lowercase.");
1970
1971/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1972#ifndef _tolower
1973#define _tolower tolower
1974#endif
1975
1976static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001977string_lower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00001978{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001979 char *s;
1980 Py_ssize_t i, n = PyString_GET_SIZE(self);
1981 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00001982
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001983 newobj = PyString_FromStringAndSize(NULL, n);
1984 if (!newobj)
1985 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001986
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001987 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00001988
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001989 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00001990
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001991 for (i = 0; i < n; i++) {
1992 int c = Py_CHARMASK(s[i]);
1993 if (isupper(c))
1994 s[i] = _tolower(c);
1995 }
Christian Heimes44720832008-05-26 13:01:01 +00001996
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001997 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00001998}
1999
2000PyDoc_STRVAR(upper__doc__,
2001"S.upper() -> string\n\
2002\n\
2003Return a copy of the string S converted to uppercase.");
2004
2005#ifndef _toupper
2006#define _toupper toupper
2007#endif
2008
2009static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002010string_upper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002011{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002012 char *s;
2013 Py_ssize_t i, n = PyString_GET_SIZE(self);
2014 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002015
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002016 newobj = PyString_FromStringAndSize(NULL, n);
2017 if (!newobj)
2018 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002019
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002020 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002021
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002022 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002023
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002024 for (i = 0; i < n; i++) {
2025 int c = Py_CHARMASK(s[i]);
2026 if (islower(c))
2027 s[i] = _toupper(c);
2028 }
Christian Heimes44720832008-05-26 13:01:01 +00002029
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002030 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002031}
2032
2033PyDoc_STRVAR(title__doc__,
2034"S.title() -> string\n\
2035\n\
2036Return a titlecased version of S, i.e. words start with uppercase\n\
2037characters, all remaining cased characters have lowercase.");
2038
2039static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002040string_title(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002041{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002042 char *s = PyString_AS_STRING(self), *s_new;
2043 Py_ssize_t i, n = PyString_GET_SIZE(self);
2044 int previous_is_cased = 0;
2045 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002046
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002047 newobj = PyString_FromStringAndSize(NULL, n);
2048 if (newobj == NULL)
2049 return NULL;
2050 s_new = PyString_AsString(newobj);
2051 for (i = 0; i < n; i++) {
2052 int c = Py_CHARMASK(*s++);
2053 if (islower(c)) {
2054 if (!previous_is_cased)
2055 c = toupper(c);
2056 previous_is_cased = 1;
2057 } else if (isupper(c)) {
2058 if (previous_is_cased)
2059 c = tolower(c);
2060 previous_is_cased = 1;
2061 } else
2062 previous_is_cased = 0;
2063 *s_new++ = c;
2064 }
2065 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002066}
2067
2068PyDoc_STRVAR(capitalize__doc__,
2069"S.capitalize() -> string\n\
2070\n\
2071Return a copy of the string S with only its first character\n\
2072capitalized.");
2073
2074static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002075string_capitalize(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002076{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002077 char *s = PyString_AS_STRING(self), *s_new;
2078 Py_ssize_t i, n = PyString_GET_SIZE(self);
2079 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002080
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002081 newobj = PyString_FromStringAndSize(NULL, n);
2082 if (newobj == NULL)
2083 return NULL;
2084 s_new = PyString_AsString(newobj);
2085 if (0 < n) {
2086 int c = Py_CHARMASK(*s++);
2087 if (islower(c))
2088 *s_new = toupper(c);
2089 else
2090 *s_new = c;
2091 s_new++;
2092 }
2093 for (i = 1; i < n; i++) {
2094 int c = Py_CHARMASK(*s++);
2095 if (isupper(c))
2096 *s_new = tolower(c);
2097 else
2098 *s_new = c;
2099 s_new++;
2100 }
2101 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002102}
2103
2104
2105PyDoc_STRVAR(count__doc__,
2106"S.count(sub[, start[, end]]) -> int\n\
2107\n\
2108Return the number of non-overlapping occurrences of substring sub in\n\
2109string S[start:end]. Optional arguments start and end are interpreted\n\
2110as in slice notation.");
2111
2112static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002113string_count(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002114{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002115 PyObject *sub_obj;
2116 const char *str = PyString_AS_STRING(self), *sub;
2117 Py_ssize_t sub_len;
2118 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes44720832008-05-26 13:01:01 +00002119
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002120 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2121 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2122 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002123
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002124 if (PyString_Check(sub_obj)) {
2125 sub = PyString_AS_STRING(sub_obj);
2126 sub_len = PyString_GET_SIZE(sub_obj);
2127 }
Christian Heimes44720832008-05-26 13:01:01 +00002128#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002129 else if (PyUnicode_Check(sub_obj)) {
2130 Py_ssize_t count;
2131 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2132 if (count == -1)
2133 return NULL;
2134 else
2135 return PyInt_FromSsize_t(count);
2136 }
Christian Heimes44720832008-05-26 13:01:01 +00002137#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002138 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2139 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002140
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002141 ADJUST_INDICES(start, end, PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002142
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002143 return PyInt_FromSsize_t(
2144 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2145 );
Christian Heimes44720832008-05-26 13:01:01 +00002146}
2147
2148PyDoc_STRVAR(swapcase__doc__,
2149"S.swapcase() -> string\n\
2150\n\
2151Return a copy of the string S with uppercase characters\n\
2152converted to lowercase and vice versa.");
2153
2154static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002155string_swapcase(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002156{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002157 char *s = PyString_AS_STRING(self), *s_new;
2158 Py_ssize_t i, n = PyString_GET_SIZE(self);
2159 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002160
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002161 newobj = PyString_FromStringAndSize(NULL, n);
2162 if (newobj == NULL)
2163 return NULL;
2164 s_new = PyString_AsString(newobj);
2165 for (i = 0; i < n; i++) {
2166 int c = Py_CHARMASK(*s++);
2167 if (islower(c)) {
2168 *s_new = toupper(c);
2169 }
2170 else if (isupper(c)) {
2171 *s_new = tolower(c);
2172 }
2173 else
2174 *s_new = c;
2175 s_new++;
2176 }
2177 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002178}
2179
2180
2181PyDoc_STRVAR(translate__doc__,
2182"S.translate(table [,deletechars]) -> string\n\
2183\n\
2184Return a copy of the string S, where all characters occurring\n\
2185in the optional argument deletechars are removed, and the\n\
2186remaining characters have been mapped through the given\n\
2187translation table, which must be a string of length 256.");
2188
2189static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002190string_translate(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002191{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002192 register char *input, *output;
2193 const char *table;
2194 register Py_ssize_t i, c, changed = 0;
2195 PyObject *input_obj = (PyObject*)self;
2196 const char *output_start, *del_table=NULL;
2197 Py_ssize_t inlen, tablen, dellen = 0;
2198 PyObject *result;
2199 int trans_table[256];
2200 PyObject *tableobj, *delobj = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002201
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002202 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2203 &tableobj, &delobj))
2204 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002205
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002206 if (PyString_Check(tableobj)) {
2207 table = PyString_AS_STRING(tableobj);
2208 tablen = PyString_GET_SIZE(tableobj);
2209 }
2210 else if (tableobj == Py_None) {
2211 table = NULL;
2212 tablen = 256;
2213 }
Christian Heimes44720832008-05-26 13:01:01 +00002214#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002215 else if (PyUnicode_Check(tableobj)) {
2216 /* Unicode .translate() does not support the deletechars
2217 parameter; instead a mapping to None will cause characters
2218 to be deleted. */
2219 if (delobj != NULL) {
2220 PyErr_SetString(PyExc_TypeError,
2221 "deletions are implemented differently for unicode");
2222 return NULL;
2223 }
2224 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2225 }
Christian Heimes44720832008-05-26 13:01:01 +00002226#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002227 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2228 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002229
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002230 if (tablen != 256) {
2231 PyErr_SetString(PyExc_ValueError,
2232 "translation table must be 256 characters long");
2233 return NULL;
2234 }
Christian Heimes44720832008-05-26 13:01:01 +00002235
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002236 if (delobj != NULL) {
2237 if (PyString_Check(delobj)) {
2238 del_table = PyString_AS_STRING(delobj);
2239 dellen = PyString_GET_SIZE(delobj);
2240 }
Christian Heimes44720832008-05-26 13:01:01 +00002241#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002242 else if (PyUnicode_Check(delobj)) {
2243 PyErr_SetString(PyExc_TypeError,
2244 "deletions are implemented differently for unicode");
2245 return NULL;
2246 }
Christian Heimes44720832008-05-26 13:01:01 +00002247#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002248 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2249 return NULL;
2250 }
2251 else {
2252 del_table = NULL;
2253 dellen = 0;
2254 }
Christian Heimes44720832008-05-26 13:01:01 +00002255
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002256 inlen = PyString_GET_SIZE(input_obj);
2257 result = PyString_FromStringAndSize((char *)NULL, inlen);
2258 if (result == NULL)
2259 return NULL;
2260 output_start = output = PyString_AsString(result);
2261 input = PyString_AS_STRING(input_obj);
Christian Heimes44720832008-05-26 13:01:01 +00002262
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002263 if (dellen == 0 && table != NULL) {
2264 /* If no deletions are required, use faster code */
2265 for (i = inlen; --i >= 0; ) {
2266 c = Py_CHARMASK(*input++);
2267 if (Py_CHARMASK((*output++ = table[c])) != c)
2268 changed = 1;
2269 }
2270 if (changed || !PyString_CheckExact(input_obj))
2271 return result;
2272 Py_DECREF(result);
2273 Py_INCREF(input_obj);
2274 return input_obj;
2275 }
Christian Heimes44720832008-05-26 13:01:01 +00002276
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002277 if (table == NULL) {
2278 for (i = 0; i < 256; i++)
2279 trans_table[i] = Py_CHARMASK(i);
2280 } else {
2281 for (i = 0; i < 256; i++)
2282 trans_table[i] = Py_CHARMASK(table[i]);
2283 }
Christian Heimes44720832008-05-26 13:01:01 +00002284
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002285 for (i = 0; i < dellen; i++)
2286 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes44720832008-05-26 13:01:01 +00002287
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002288 for (i = inlen; --i >= 0; ) {
2289 c = Py_CHARMASK(*input++);
2290 if (trans_table[c] != -1)
2291 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2292 continue;
2293 changed = 1;
2294 }
2295 if (!changed && PyString_CheckExact(input_obj)) {
2296 Py_DECREF(result);
2297 Py_INCREF(input_obj);
2298 return input_obj;
2299 }
2300 /* Fix the size of the resulting string */
2301 if (inlen > 0 && _PyString_Resize(&result, output - output_start))
2302 return NULL;
2303 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002304}
2305
2306
Christian Heimes44720832008-05-26 13:01:01 +00002307/* find and count characters and substrings */
2308
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002309#define findchar(target, target_len, c) \
Christian Heimes44720832008-05-26 13:01:01 +00002310 ((char *)memchr((const void *)(target), c, target_len))
2311
2312/* String ops must return a string. */
2313/* If the object is subclass of string, create a copy */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002314Py_LOCAL(PyStringObject *)
2315return_self(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002316{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002317 if (PyString_CheckExact(self)) {
2318 Py_INCREF(self);
2319 return self;
2320 }
2321 return (PyStringObject *)PyString_FromStringAndSize(
2322 PyString_AS_STRING(self),
2323 PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002324}
2325
2326Py_LOCAL_INLINE(Py_ssize_t)
2327countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2328{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002329 Py_ssize_t count=0;
2330 const char *start=target;
2331 const char *end=target+target_len;
Christian Heimes44720832008-05-26 13:01:01 +00002332
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002333 while ( (start=findchar(start, end-start, c)) != NULL ) {
2334 count++;
2335 if (count >= maxcount)
2336 break;
2337 start += 1;
2338 }
2339 return count;
Christian Heimes44720832008-05-26 13:01:01 +00002340}
2341
Christian Heimes44720832008-05-26 13:01:01 +00002342
2343/* Algorithms for different cases of string replacement */
2344
2345/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002346Py_LOCAL(PyStringObject *)
2347replace_interleave(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002348 const char *to_s, Py_ssize_t to_len,
2349 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002350{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002351 char *self_s, *result_s;
2352 Py_ssize_t self_len, result_len;
2353 Py_ssize_t count, i, product;
2354 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002355
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002356 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002357
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002358 /* 1 at the end plus 1 after every character */
2359 count = self_len+1;
2360 if (maxcount < count)
2361 count = maxcount;
Christian Heimes44720832008-05-26 13:01:01 +00002362
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002363 /* Check for overflow */
2364 /* result_len = count * to_len + self_len; */
2365 product = count * to_len;
2366 if (product / to_len != count) {
2367 PyErr_SetString(PyExc_OverflowError,
2368 "replace string is too long");
2369 return NULL;
2370 }
2371 result_len = product + self_len;
2372 if (result_len < 0) {
2373 PyErr_SetString(PyExc_OverflowError,
2374 "replace string is too long");
2375 return NULL;
2376 }
Christian Heimes44720832008-05-26 13:01:01 +00002377
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002378 if (! (result = (PyStringObject *)
2379 PyString_FromStringAndSize(NULL, result_len)) )
2380 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002381
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002382 self_s = PyString_AS_STRING(self);
2383 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002384
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002385 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes44720832008-05-26 13:01:01 +00002386
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002387 /* Lay the first one down (guaranteed this will occur) */
2388 Py_MEMCPY(result_s, to_s, to_len);
2389 result_s += to_len;
2390 count -= 1;
Christian Heimes44720832008-05-26 13:01:01 +00002391
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002392 for (i=0; i<count; i++) {
2393 *result_s++ = *self_s++;
2394 Py_MEMCPY(result_s, to_s, to_len);
2395 result_s += to_len;
2396 }
2397
2398 /* Copy the rest of the original string */
2399 Py_MEMCPY(result_s, self_s, self_len-i);
2400
2401 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002402}
2403
2404/* Special case for deleting a single character */
2405/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002406Py_LOCAL(PyStringObject *)
2407replace_delete_single_character(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002408 char from_c, Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002409{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002410 char *self_s, *result_s;
2411 char *start, *next, *end;
2412 Py_ssize_t self_len, result_len;
2413 Py_ssize_t count;
2414 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002415
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002416 self_len = PyString_GET_SIZE(self);
2417 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002418
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002419 count = countchar(self_s, self_len, from_c, maxcount);
2420 if (count == 0) {
2421 return return_self(self);
2422 }
Christian Heimes44720832008-05-26 13:01:01 +00002423
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002424 result_len = self_len - count; /* from_len == 1 */
2425 assert(result_len>=0);
Christian Heimes44720832008-05-26 13:01:01 +00002426
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002427 if ( (result = (PyStringObject *)
2428 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2429 return NULL;
2430 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002431
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002432 start = self_s;
2433 end = self_s + self_len;
2434 while (count-- > 0) {
2435 next = findchar(start, end-start, from_c);
2436 if (next == NULL)
2437 break;
2438 Py_MEMCPY(result_s, start, next-start);
2439 result_s += (next-start);
2440 start = next+1;
2441 }
2442 Py_MEMCPY(result_s, start, end-start);
2443
2444 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002445}
2446
2447/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2448
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002449Py_LOCAL(PyStringObject *)
2450replace_delete_substring(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002451 const char *from_s, Py_ssize_t from_len,
2452 Py_ssize_t maxcount) {
2453 char *self_s, *result_s;
2454 char *start, *next, *end;
2455 Py_ssize_t self_len, result_len;
2456 Py_ssize_t count, offset;
2457 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002458
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002459 self_len = PyString_GET_SIZE(self);
2460 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002461
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002462 count = stringlib_count(self_s, self_len,
2463 from_s, from_len,
2464 maxcount);
Christian Heimes44720832008-05-26 13:01:01 +00002465
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002466 if (count == 0) {
2467 /* no matches */
2468 return return_self(self);
2469 }
Christian Heimes44720832008-05-26 13:01:01 +00002470
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002471 result_len = self_len - (count * from_len);
2472 assert (result_len>=0);
Christian Heimes44720832008-05-26 13:01:01 +00002473
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002474 if ( (result = (PyStringObject *)
2475 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2476 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002477
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002478 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002479
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002480 start = self_s;
2481 end = self_s + self_len;
2482 while (count-- > 0) {
2483 offset = stringlib_find(start, end-start,
2484 from_s, from_len,
2485 0);
2486 if (offset == -1)
2487 break;
2488 next = start + offset;
Christian Heimes44720832008-05-26 13:01:01 +00002489
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002490 Py_MEMCPY(result_s, start, next-start);
Christian Heimes44720832008-05-26 13:01:01 +00002491
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002492 result_s += (next-start);
2493 start = next+from_len;
2494 }
2495 Py_MEMCPY(result_s, start, end-start);
2496 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002497}
2498
2499/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002500Py_LOCAL(PyStringObject *)
2501replace_single_character_in_place(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002502 char from_c, char to_c,
2503 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002504{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002505 char *self_s, *result_s, *start, *end, *next;
2506 Py_ssize_t self_len;
2507 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002508
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002509 /* The result string will be the same size */
2510 self_s = PyString_AS_STRING(self);
2511 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002512
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002513 next = findchar(self_s, self_len, from_c);
Christian Heimes44720832008-05-26 13:01:01 +00002514
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002515 if (next == NULL) {
2516 /* No matches; return the original string */
2517 return return_self(self);
2518 }
Christian Heimes44720832008-05-26 13:01:01 +00002519
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002520 /* Need to make a new string */
2521 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2522 if (result == NULL)
2523 return NULL;
2524 result_s = PyString_AS_STRING(result);
2525 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002526
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002527 /* change everything in-place, starting with this one */
2528 start = result_s + (next-self_s);
2529 *start = to_c;
2530 start++;
2531 end = result_s + self_len;
Christian Heimes44720832008-05-26 13:01:01 +00002532
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002533 while (--maxcount > 0) {
2534 next = findchar(start, end-start, from_c);
2535 if (next == NULL)
2536 break;
2537 *next = to_c;
2538 start = next+1;
2539 }
Christian Heimes44720832008-05-26 13:01:01 +00002540
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002541 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002542}
2543
2544/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002545Py_LOCAL(PyStringObject *)
2546replace_substring_in_place(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002547 const char *from_s, Py_ssize_t from_len,
2548 const char *to_s, Py_ssize_t to_len,
2549 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002550{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002551 char *result_s, *start, *end;
2552 char *self_s;
2553 Py_ssize_t self_len, offset;
2554 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002555
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002556 /* The result string will be the same size */
Christian Heimes44720832008-05-26 13:01:01 +00002557
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002558 self_s = PyString_AS_STRING(self);
2559 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002560
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002561 offset = stringlib_find(self_s, self_len,
2562 from_s, from_len,
2563 0);
2564 if (offset == -1) {
2565 /* No matches; return the original string */
2566 return return_self(self);
2567 }
Christian Heimes44720832008-05-26 13:01:01 +00002568
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002569 /* Need to make a new string */
2570 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2571 if (result == NULL)
2572 return NULL;
2573 result_s = PyString_AS_STRING(result);
2574 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002575
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002576 /* change everything in-place, starting with this one */
2577 start = result_s + offset;
2578 Py_MEMCPY(start, to_s, from_len);
2579 start += from_len;
2580 end = result_s + self_len;
Christian Heimes44720832008-05-26 13:01:01 +00002581
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002582 while ( --maxcount > 0) {
2583 offset = stringlib_find(start, end-start,
2584 from_s, from_len,
2585 0);
2586 if (offset==-1)
2587 break;
2588 Py_MEMCPY(start+offset, to_s, from_len);
2589 start += offset+from_len;
2590 }
Christian Heimes44720832008-05-26 13:01:01 +00002591
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002592 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002593}
2594
2595/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002596Py_LOCAL(PyStringObject *)
2597replace_single_character(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002598 char from_c,
2599 const char *to_s, Py_ssize_t to_len,
2600 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002601{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002602 char *self_s, *result_s;
2603 char *start, *next, *end;
2604 Py_ssize_t self_len, result_len;
2605 Py_ssize_t count, product;
2606 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002607
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002608 self_s = PyString_AS_STRING(self);
2609 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002610
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002611 count = countchar(self_s, self_len, from_c, maxcount);
2612 if (count == 0) {
2613 /* no matches, return unchanged */
2614 return return_self(self);
2615 }
Christian Heimes44720832008-05-26 13:01:01 +00002616
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002617 /* use the difference between current and new, hence the "-1" */
2618 /* result_len = self_len + count * (to_len-1) */
2619 product = count * (to_len-1);
2620 if (product / (to_len-1) != count) {
2621 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2622 return NULL;
2623 }
2624 result_len = self_len + product;
2625 if (result_len < 0) {
2626 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2627 return NULL;
2628 }
Christian Heimes44720832008-05-26 13:01:01 +00002629
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002630 if ( (result = (PyStringObject *)
2631 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2632 return NULL;
2633 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002634
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002635 start = self_s;
2636 end = self_s + self_len;
2637 while (count-- > 0) {
2638 next = findchar(start, end-start, from_c);
2639 if (next == NULL)
2640 break;
Christian Heimes44720832008-05-26 13:01:01 +00002641
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002642 if (next == start) {
2643 /* replace with the 'to' */
2644 Py_MEMCPY(result_s, to_s, to_len);
2645 result_s += to_len;
2646 start += 1;
2647 } else {
2648 /* copy the unchanged old then the 'to' */
2649 Py_MEMCPY(result_s, start, next-start);
2650 result_s += (next-start);
2651 Py_MEMCPY(result_s, to_s, to_len);
2652 result_s += to_len;
2653 start = next+1;
2654 }
2655 }
2656 /* Copy the remainder of the remaining string */
2657 Py_MEMCPY(result_s, start, end-start);
Christian Heimes44720832008-05-26 13:01:01 +00002658
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002659 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002660}
2661
2662/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002663Py_LOCAL(PyStringObject *)
2664replace_substring(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002665 const char *from_s, Py_ssize_t from_len,
2666 const char *to_s, Py_ssize_t to_len,
2667 Py_ssize_t maxcount) {
2668 char *self_s, *result_s;
2669 char *start, *next, *end;
2670 Py_ssize_t self_len, result_len;
2671 Py_ssize_t count, offset, product;
2672 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002673
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002674 self_s = PyString_AS_STRING(self);
2675 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002676
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002677 count = stringlib_count(self_s, self_len,
2678 from_s, from_len,
2679 maxcount);
Antoine Pitrou64672132010-01-13 07:55:48 +00002680
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002681 if (count == 0) {
2682 /* no matches, return unchanged */
2683 return return_self(self);
2684 }
Christian Heimes44720832008-05-26 13:01:01 +00002685
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002686 /* Check for overflow */
2687 /* result_len = self_len + count * (to_len-from_len) */
2688 product = count * (to_len-from_len);
2689 if (product / (to_len-from_len) != count) {
2690 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2691 return NULL;
2692 }
2693 result_len = self_len + product;
2694 if (result_len < 0) {
2695 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2696 return NULL;
2697 }
Christian Heimes44720832008-05-26 13:01:01 +00002698
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002699 if ( (result = (PyStringObject *)
2700 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2701 return NULL;
2702 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002703
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002704 start = self_s;
2705 end = self_s + self_len;
2706 while (count-- > 0) {
2707 offset = stringlib_find(start, end-start,
2708 from_s, from_len,
2709 0);
2710 if (offset == -1)
2711 break;
2712 next = start+offset;
2713 if (next == start) {
2714 /* replace with the 'to' */
2715 Py_MEMCPY(result_s, to_s, to_len);
2716 result_s += to_len;
2717 start += from_len;
2718 } else {
2719 /* copy the unchanged old then the 'to' */
2720 Py_MEMCPY(result_s, start, next-start);
2721 result_s += (next-start);
2722 Py_MEMCPY(result_s, to_s, to_len);
2723 result_s += to_len;
2724 start = next+from_len;
2725 }
2726 }
2727 /* Copy the remainder of the remaining string */
2728 Py_MEMCPY(result_s, start, end-start);
Christian Heimes44720832008-05-26 13:01:01 +00002729
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002730 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002731}
2732
2733
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002734Py_LOCAL(PyStringObject *)
2735replace(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002736 const char *from_s, Py_ssize_t from_len,
2737 const char *to_s, Py_ssize_t to_len,
2738 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002739{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002740 if (maxcount < 0) {
2741 maxcount = PY_SSIZE_T_MAX;
2742 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2743 /* nothing to do; return the original string */
2744 return return_self(self);
2745 }
Christian Heimes44720832008-05-26 13:01:01 +00002746
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002747 if (maxcount == 0 ||
2748 (from_len == 0 && to_len == 0)) {
2749 /* nothing to do; return the original string */
2750 return return_self(self);
2751 }
Christian Heimes44720832008-05-26 13:01:01 +00002752
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002753 /* Handle zero-length special cases */
Christian Heimes44720832008-05-26 13:01:01 +00002754
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002755 if (from_len == 0) {
2756 /* insert the 'to' string everywhere. */
2757 /* >>> "Python".replace("", ".") */
2758 /* '.P.y.t.h.o.n.' */
2759 return replace_interleave(self, to_s, to_len, maxcount);
2760 }
Christian Heimes44720832008-05-26 13:01:01 +00002761
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002762 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2763 /* point for an empty self string to generate a non-empty string */
2764 /* Special case so the remaining code always gets a non-empty string */
2765 if (PyString_GET_SIZE(self) == 0) {
2766 return return_self(self);
2767 }
Christian Heimes44720832008-05-26 13:01:01 +00002768
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002769 if (to_len == 0) {
2770 /* delete all occurances of 'from' string */
2771 if (from_len == 1) {
2772 return replace_delete_single_character(
2773 self, from_s[0], maxcount);
2774 } else {
2775 return replace_delete_substring(self, from_s, from_len, maxcount);
2776 }
2777 }
Christian Heimes44720832008-05-26 13:01:01 +00002778
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002779 /* Handle special case where both strings have the same length */
Christian Heimes44720832008-05-26 13:01:01 +00002780
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002781 if (from_len == to_len) {
2782 if (from_len == 1) {
2783 return replace_single_character_in_place(
2784 self,
2785 from_s[0],
2786 to_s[0],
2787 maxcount);
2788 } else {
2789 return replace_substring_in_place(
2790 self, from_s, from_len, to_s, to_len, maxcount);
2791 }
2792 }
Christian Heimes44720832008-05-26 13:01:01 +00002793
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002794 /* Otherwise use the more generic algorithms */
2795 if (from_len == 1) {
2796 return replace_single_character(self, from_s[0],
2797 to_s, to_len, maxcount);
2798 } else {
2799 /* len('from')>=2, len('to')>=1 */
2800 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2801 }
Christian Heimes44720832008-05-26 13:01:01 +00002802}
2803
2804PyDoc_STRVAR(replace__doc__,
Ezio Melotti2f06b782010-06-26 18:44:42 +00002805"S.replace(old, new[, count]) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00002806\n\
2807Return a copy of string S with all occurrences of substring\n\
2808old replaced by new. If the optional argument count is\n\
2809given, only the first count occurrences are replaced.");
2810
2811static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002812string_replace(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002813{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002814 Py_ssize_t count = -1;
2815 PyObject *from, *to;
2816 const char *from_s, *to_s;
2817 Py_ssize_t from_len, to_len;
Christian Heimes44720832008-05-26 13:01:01 +00002818
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002819 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2820 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002821
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002822 if (PyString_Check(from)) {
2823 from_s = PyString_AS_STRING(from);
2824 from_len = PyString_GET_SIZE(from);
2825 }
Christian Heimes44720832008-05-26 13:01:01 +00002826#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002827 if (PyUnicode_Check(from))
2828 return PyUnicode_Replace((PyObject *)self,
2829 from, to, count);
Christian Heimes44720832008-05-26 13:01:01 +00002830#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002831 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2832 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002833
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002834 if (PyString_Check(to)) {
2835 to_s = PyString_AS_STRING(to);
2836 to_len = PyString_GET_SIZE(to);
2837 }
Christian Heimes44720832008-05-26 13:01:01 +00002838#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002839 else if (PyUnicode_Check(to))
2840 return PyUnicode_Replace((PyObject *)self,
2841 from, to, count);
Christian Heimes44720832008-05-26 13:01:01 +00002842#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002843 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2844 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002845
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002846 return (PyObject *)replace((PyStringObject *) self,
2847 from_s, from_len,
2848 to_s, to_len, count);
Christian Heimes44720832008-05-26 13:01:01 +00002849}
2850
2851/** End DALKE **/
2852
2853/* Matches the end (direction >= 0) or start (direction < 0) of self
2854 * against substr, using the start and end arguments. Returns
2855 * -1 on error, 0 if not found and 1 if found.
2856 */
2857Py_LOCAL(int)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002858_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002859 Py_ssize_t end, int direction)
Christian Heimes44720832008-05-26 13:01:01 +00002860{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002861 Py_ssize_t len = PyString_GET_SIZE(self);
2862 Py_ssize_t slen;
2863 const char* sub;
2864 const char* str;
Christian Heimes44720832008-05-26 13:01:01 +00002865
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002866 if (PyString_Check(substr)) {
2867 sub = PyString_AS_STRING(substr);
2868 slen = PyString_GET_SIZE(substr);
2869 }
Christian Heimes44720832008-05-26 13:01:01 +00002870#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002871 else if (PyUnicode_Check(substr))
2872 return PyUnicode_Tailmatch((PyObject *)self,
2873 substr, start, end, direction);
Christian Heimes44720832008-05-26 13:01:01 +00002874#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002875 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2876 return -1;
2877 str = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002878
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002879 ADJUST_INDICES(start, end, len);
Christian Heimes44720832008-05-26 13:01:01 +00002880
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002881 if (direction < 0) {
2882 /* startswith */
2883 if (start+slen > len)
2884 return 0;
2885 } else {
2886 /* endswith */
2887 if (end-start < slen || start > len)
2888 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00002889
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002890 if (end-slen > start)
2891 start = end - slen;
2892 }
2893 if (end-start >= slen)
2894 return ! memcmp(str+start, sub, slen);
2895 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00002896}
2897
2898
2899PyDoc_STRVAR(startswith__doc__,
2900"S.startswith(prefix[, start[, end]]) -> bool\n\
2901\n\
2902Return True if S starts with the specified prefix, False otherwise.\n\
2903With optional start, test S beginning at that position.\n\
2904With optional end, stop comparing S at that position.\n\
2905prefix can also be a tuple of strings to try.");
2906
2907static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002908string_startswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002909{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002910 Py_ssize_t start = 0;
2911 Py_ssize_t end = PY_SSIZE_T_MAX;
2912 PyObject *subobj;
2913 int result;
Christian Heimes44720832008-05-26 13:01:01 +00002914
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002915 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2916 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2917 return NULL;
2918 if (PyTuple_Check(subobj)) {
2919 Py_ssize_t i;
2920 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2921 result = _string_tailmatch(self,
2922 PyTuple_GET_ITEM(subobj, i),
2923 start, end, -1);
2924 if (result == -1)
2925 return NULL;
2926 else if (result) {
2927 Py_RETURN_TRUE;
2928 }
2929 }
2930 Py_RETURN_FALSE;
2931 }
2932 result = _string_tailmatch(self, subobj, start, end, -1);
2933 if (result == -1)
2934 return NULL;
2935 else
2936 return PyBool_FromLong(result);
Christian Heimes44720832008-05-26 13:01:01 +00002937}
2938
2939
2940PyDoc_STRVAR(endswith__doc__,
2941"S.endswith(suffix[, start[, end]]) -> bool\n\
2942\n\
2943Return True if S ends with the specified suffix, False otherwise.\n\
2944With optional start, test S beginning at that position.\n\
2945With optional end, stop comparing S at that position.\n\
2946suffix can also be a tuple of strings to try.");
2947
2948static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002949string_endswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002950{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002951 Py_ssize_t start = 0;
2952 Py_ssize_t end = PY_SSIZE_T_MAX;
2953 PyObject *subobj;
2954 int result;
Christian Heimes44720832008-05-26 13:01:01 +00002955
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002956 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2957 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2958 return NULL;
2959 if (PyTuple_Check(subobj)) {
2960 Py_ssize_t i;
2961 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2962 result = _string_tailmatch(self,
2963 PyTuple_GET_ITEM(subobj, i),
2964 start, end, +1);
2965 if (result == -1)
2966 return NULL;
2967 else if (result) {
2968 Py_RETURN_TRUE;
2969 }
2970 }
2971 Py_RETURN_FALSE;
2972 }
2973 result = _string_tailmatch(self, subobj, start, end, +1);
2974 if (result == -1)
2975 return NULL;
2976 else
2977 return PyBool_FromLong(result);
Christian Heimes44720832008-05-26 13:01:01 +00002978}
2979
2980
2981PyDoc_STRVAR(encode__doc__,
2982"S.encode([encoding[,errors]]) -> object\n\
2983\n\
2984Encodes S using the codec registered for encoding. encoding defaults\n\
2985to the default encoding. errors may be given to set a different error\n\
2986handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2987a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2988'xmlcharrefreplace' as well as any other name registered with\n\
2989codecs.register_error that is able to handle UnicodeEncodeErrors.");
2990
2991static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00002992string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes44720832008-05-26 13:01:01 +00002993{
Benjamin Peterson332d7212009-09-18 21:14:55 +00002994 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00002995 char *encoding = NULL;
2996 char *errors = NULL;
2997 PyObject *v;
2998
Benjamin Peterson332d7212009-09-18 21:14:55 +00002999 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003000 kwlist, &encoding, &errors))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003001 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003002 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003003 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003004 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003005 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003006 PyErr_Format(PyExc_TypeError,
3007 "encoder did not return a string/unicode object "
3008 "(type=%.400s)",
3009 Py_TYPE(v)->tp_name);
3010 Py_DECREF(v);
3011 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003012 }
3013 return v;
3014
3015 onError:
Christian Heimes1a6387e2008-03-26 12:49:49 +00003016 return NULL;
3017}
3018
Christian Heimes44720832008-05-26 13:01:01 +00003019
3020PyDoc_STRVAR(decode__doc__,
3021"S.decode([encoding[,errors]]) -> object\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003022\n\
Christian Heimes44720832008-05-26 13:01:01 +00003023Decodes S using the codec registered for encoding. encoding defaults\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003024to the default encoding. errors may be given to set a different error\n\
Christian Heimes44720832008-05-26 13:01:01 +00003025handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3026a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003027as well as any other name registered with codecs.register_error that is\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003028able to handle UnicodeDecodeErrors.");
3029
3030static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00003031string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003032{
Benjamin Peterson332d7212009-09-18 21:14:55 +00003033 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003034 char *encoding = NULL;
3035 char *errors = NULL;
3036 PyObject *v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003037
Benjamin Peterson332d7212009-09-18 21:14:55 +00003038 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003039 kwlist, &encoding, &errors))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003040 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003041 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003042 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003043 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003044 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003045 PyErr_Format(PyExc_TypeError,
3046 "decoder did not return a string/unicode object "
3047 "(type=%.400s)",
3048 Py_TYPE(v)->tp_name);
3049 Py_DECREF(v);
3050 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003051 }
3052 return v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003053
Christian Heimes44720832008-05-26 13:01:01 +00003054 onError:
3055 return NULL;
3056}
3057
3058
3059PyDoc_STRVAR(expandtabs__doc__,
3060"S.expandtabs([tabsize]) -> string\n\
3061\n\
3062Return a copy of S where all tab characters are expanded using spaces.\n\
3063If tabsize is not given, a tab size of 8 characters is assumed.");
3064
3065static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003066string_expandtabs(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003067{
3068 const char *e, *p, *qe;
3069 char *q;
3070 Py_ssize_t i, j, incr;
3071 PyObject *u;
3072 int tabsize = 8;
3073
3074 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003075 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003076
3077 /* First pass: determine size of output string */
3078 i = 0; /* chars up to and including most recent \n or \r */
3079 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003080 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3081 for (p = PyString_AS_STRING(self); p < e; p++)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003082 if (*p == '\t') {
3083 if (tabsize > 0) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003084 incr = tabsize - (j % tabsize);
3085 if (j > PY_SSIZE_T_MAX - incr)
3086 goto overflow1;
3087 j += incr;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003088 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003089 }
3090 else {
3091 if (j > PY_SSIZE_T_MAX - 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003092 goto overflow1;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003093 j++;
3094 if (*p == '\n' || *p == '\r') {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003095 if (i > PY_SSIZE_T_MAX - j)
3096 goto overflow1;
3097 i += j;
3098 j = 0;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003099 }
3100 }
Christian Heimes44720832008-05-26 13:01:01 +00003101
3102 if (i > PY_SSIZE_T_MAX - j)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003103 goto overflow1;
Christian Heimes44720832008-05-26 13:01:01 +00003104
3105 /* Second pass: create output string and fill it */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003106 u = PyString_FromStringAndSize(NULL, i + j);
Christian Heimes44720832008-05-26 13:01:01 +00003107 if (!u)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003108 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003109
3110 j = 0; /* same as in first pass */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003111 q = PyString_AS_STRING(u); /* next output char */
3112 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
Christian Heimes44720832008-05-26 13:01:01 +00003113
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003114 for (p = PyString_AS_STRING(self); p < e; p++)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003115 if (*p == '\t') {
3116 if (tabsize > 0) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003117 i = tabsize - (j % tabsize);
3118 j += i;
3119 while (i--) {
3120 if (q >= qe)
3121 goto overflow2;
3122 *q++ = ' ';
3123 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003124 }
3125 }
3126 else {
3127 if (q >= qe)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003128 goto overflow2;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003129 *q++ = *p;
3130 j++;
3131 if (*p == '\n' || *p == '\r')
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003132 j = 0;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003133 }
Christian Heimes44720832008-05-26 13:01:01 +00003134
3135 return u;
3136
3137 overflow2:
3138 Py_DECREF(u);
3139 overflow1:
3140 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3141 return NULL;
3142}
3143
3144Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003145pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Christian Heimes44720832008-05-26 13:01:01 +00003146{
3147 PyObject *u;
3148
3149 if (left < 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003150 left = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003151 if (right < 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003152 right = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003153
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003154 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003155 Py_INCREF(self);
3156 return (PyObject *)self;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003157 }
3158
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003159 u = PyString_FromStringAndSize(NULL,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003160 left + PyString_GET_SIZE(self) + right);
Christian Heimes44720832008-05-26 13:01:01 +00003161 if (u) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003162 if (left)
3163 memset(PyString_AS_STRING(u), fill, left);
3164 Py_MEMCPY(PyString_AS_STRING(u) + left,
3165 PyString_AS_STRING(self),
3166 PyString_GET_SIZE(self));
3167 if (right)
3168 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3169 fill, right);
Christian Heimes44720832008-05-26 13:01:01 +00003170 }
3171
3172 return u;
3173}
3174
3175PyDoc_STRVAR(ljust__doc__,
3176"S.ljust(width[, fillchar]) -> string\n"
3177"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003178"Return S left-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003179"done using the specified fill character (default is a space).");
3180
3181static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003182string_ljust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003183{
3184 Py_ssize_t width;
3185 char fillchar = ' ';
3186
3187 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003188 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003189
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003190 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003191 Py_INCREF(self);
3192 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003193 }
3194
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003195 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003196}
3197
3198
3199PyDoc_STRVAR(rjust__doc__,
3200"S.rjust(width[, fillchar]) -> string\n"
3201"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003202"Return S right-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003203"done using the specified fill character (default is a space)");
3204
3205static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003206string_rjust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003207{
3208 Py_ssize_t width;
3209 char fillchar = ' ';
3210
3211 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003212 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003213
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003214 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003215 Py_INCREF(self);
3216 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003217 }
3218
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003219 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003220}
3221
3222
3223PyDoc_STRVAR(center__doc__,
3224"S.center(width[, fillchar]) -> string\n"
3225"\n"
3226"Return S centered in a string of length width. Padding is\n"
3227"done using the specified fill character (default is a space)");
3228
3229static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003230string_center(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003231{
3232 Py_ssize_t marg, left;
3233 Py_ssize_t width;
3234 char fillchar = ' ';
3235
3236 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003237 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003238
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003239 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003240 Py_INCREF(self);
3241 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003242 }
3243
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003244 marg = width - PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003245 left = marg / 2 + (marg & width & 1);
3246
3247 return pad(self, left, marg - left, fillchar);
3248}
3249
3250PyDoc_STRVAR(zfill__doc__,
3251"S.zfill(width) -> string\n"
3252"\n"
3253"Pad a numeric string S with zeros on the left, to fill a field\n"
3254"of the specified width. The string S is never truncated.");
3255
3256static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003257string_zfill(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003258{
3259 Py_ssize_t fill;
3260 PyObject *s;
3261 char *p;
3262 Py_ssize_t width;
3263
3264 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003265 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003266
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003267 if (PyString_GET_SIZE(self) >= width) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003268 if (PyString_CheckExact(self)) {
3269 Py_INCREF(self);
3270 return (PyObject*) self;
3271 }
3272 else
3273 return PyString_FromStringAndSize(
3274 PyString_AS_STRING(self),
3275 PyString_GET_SIZE(self)
3276 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003277 }
3278
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003279 fill = width - PyString_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003280
Christian Heimes44720832008-05-26 13:01:01 +00003281 s = pad(self, fill, 0, '0');
3282
3283 if (s == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003284 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003285
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003286 p = PyString_AS_STRING(s);
Christian Heimes44720832008-05-26 13:01:01 +00003287 if (p[fill] == '+' || p[fill] == '-') {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003288 /* move sign to beginning of string */
3289 p[0] = p[fill];
3290 p[fill] = '0';
Christian Heimes44720832008-05-26 13:01:01 +00003291 }
3292
3293 return (PyObject*) s;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003294}
3295
Christian Heimes44720832008-05-26 13:01:01 +00003296PyDoc_STRVAR(isspace__doc__,
3297"S.isspace() -> bool\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003298\n\
Christian Heimes44720832008-05-26 13:01:01 +00003299Return True if all characters in S are whitespace\n\
3300and there is at least one character in S, False otherwise.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003301
Christian Heimes44720832008-05-26 13:01:01 +00003302static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003303string_isspace(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003304{
Christian Heimes44720832008-05-26 13:01:01 +00003305 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003306 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003307 register const unsigned char *e;
3308
3309 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003310 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003311 isspace(*p))
3312 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003313
3314 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003315 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003316 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003317
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003318 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003319 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003320 if (!isspace(*p))
3321 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003322 }
Christian Heimes44720832008-05-26 13:01:01 +00003323 return PyBool_FromLong(1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003324}
3325
Christian Heimes44720832008-05-26 13:01:01 +00003326
3327PyDoc_STRVAR(isalpha__doc__,
3328"S.isalpha() -> bool\n\
3329\n\
3330Return True if all characters in S are alphabetic\n\
3331and there is at least one character in S, False otherwise.");
3332
3333static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003334string_isalpha(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003335{
Christian Heimes44720832008-05-26 13:01:01 +00003336 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003337 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003338 register const unsigned char *e;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003339
Christian Heimes44720832008-05-26 13:01:01 +00003340 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003341 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003342 isalpha(*p))
3343 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003344
3345 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003346 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003347 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003348
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003349 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003350 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003351 if (!isalpha(*p))
3352 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003353 }
Christian Heimes44720832008-05-26 13:01:01 +00003354 return PyBool_FromLong(1);
3355}
Christian Heimes1a6387e2008-03-26 12:49:49 +00003356
Christian Heimes44720832008-05-26 13:01:01 +00003357
3358PyDoc_STRVAR(isalnum__doc__,
3359"S.isalnum() -> bool\n\
3360\n\
3361Return True if all characters in S are alphanumeric\n\
3362and there is at least one character in S, False otherwise.");
3363
3364static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003365string_isalnum(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003366{
3367 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003368 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003369 register const unsigned char *e;
3370
3371 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003372 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003373 isalnum(*p))
3374 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003375
3376 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003377 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003378 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003379
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003380 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003381 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003382 if (!isalnum(*p))
3383 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003384 }
3385 return PyBool_FromLong(1);
3386}
3387
3388
3389PyDoc_STRVAR(isdigit__doc__,
3390"S.isdigit() -> bool\n\
3391\n\
3392Return True if all characters in S are digits\n\
3393and there is at least one character in S, False otherwise.");
3394
3395static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003396string_isdigit(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003397{
3398 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003399 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003400 register const unsigned char *e;
3401
3402 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003403 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003404 isdigit(*p))
3405 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003406
3407 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003408 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003409 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003410
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003411 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003412 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003413 if (!isdigit(*p))
3414 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003415 }
3416 return PyBool_FromLong(1);
3417}
3418
3419
3420PyDoc_STRVAR(islower__doc__,
3421"S.islower() -> bool\n\
3422\n\
3423Return True if all cased characters in S are lowercase and there is\n\
3424at least one cased character in S, False otherwise.");
3425
3426static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003427string_islower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003428{
3429 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003430 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003431 register const unsigned char *e;
3432 int cased;
3433
3434 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003435 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003436 return PyBool_FromLong(islower(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003437
3438 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003439 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003440 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003441
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003442 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003443 cased = 0;
3444 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003445 if (isupper(*p))
3446 return PyBool_FromLong(0);
3447 else if (!cased && islower(*p))
3448 cased = 1;
Christian Heimes44720832008-05-26 13:01:01 +00003449 }
3450 return PyBool_FromLong(cased);
3451}
3452
3453
3454PyDoc_STRVAR(isupper__doc__,
3455"S.isupper() -> bool\n\
3456\n\
3457Return True if all cased characters in S are uppercase and there is\n\
3458at least one cased character in S, False otherwise.");
3459
3460static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003461string_isupper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003462{
3463 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003464 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003465 register const unsigned char *e;
3466 int cased;
3467
3468 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003469 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003470 return PyBool_FromLong(isupper(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003471
3472 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003473 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003474 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003475
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003476 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003477 cased = 0;
3478 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003479 if (islower(*p))
3480 return PyBool_FromLong(0);
3481 else if (!cased && isupper(*p))
3482 cased = 1;
Christian Heimes44720832008-05-26 13:01:01 +00003483 }
3484 return PyBool_FromLong(cased);
3485}
3486
3487
3488PyDoc_STRVAR(istitle__doc__,
3489"S.istitle() -> bool\n\
3490\n\
3491Return True if S is a titlecased string and there is at least one\n\
3492character in S, i.e. uppercase characters may only follow uncased\n\
3493characters and lowercase characters only cased ones. Return False\n\
3494otherwise.");
3495
3496static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003497string_istitle(PyStringObject *self, PyObject *uncased)
Christian Heimes44720832008-05-26 13:01:01 +00003498{
3499 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003500 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003501 register const unsigned char *e;
3502 int cased, previous_is_cased;
3503
3504 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003505 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003506 return PyBool_FromLong(isupper(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003507
3508 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003509 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003510 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003511
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003512 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003513 cased = 0;
3514 previous_is_cased = 0;
3515 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003516 register const unsigned char ch = *p;
Christian Heimes44720832008-05-26 13:01:01 +00003517
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003518 if (isupper(ch)) {
3519 if (previous_is_cased)
3520 return PyBool_FromLong(0);
3521 previous_is_cased = 1;
3522 cased = 1;
3523 }
3524 else if (islower(ch)) {
3525 if (!previous_is_cased)
3526 return PyBool_FromLong(0);
3527 previous_is_cased = 1;
3528 cased = 1;
3529 }
3530 else
3531 previous_is_cased = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003532 }
3533 return PyBool_FromLong(cased);
3534}
3535
3536
3537PyDoc_STRVAR(splitlines__doc__,
3538"S.splitlines([keepends]) -> list of strings\n\
3539\n\
3540Return a list of the lines in S, breaking at line boundaries.\n\
3541Line breaks are not included in the resulting list unless keepends\n\
3542is given and true.");
3543
3544static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003545string_splitlines(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003546{
Christian Heimes44720832008-05-26 13:01:01 +00003547 int keepends = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003548
3549 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003550 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003551
Antoine Pitrou64672132010-01-13 07:55:48 +00003552 return stringlib_splitlines(
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003553 (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),
3554 keepends
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003555 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003556}
3557
Robert Schuppenies51df0642008-06-01 16:16:17 +00003558PyDoc_STRVAR(sizeof__doc__,
Georg Brandl7a6de8b2008-06-01 16:42:16 +00003559"S.__sizeof__() -> size of S in memory, in bytes");
Robert Schuppenies51df0642008-06-01 16:16:17 +00003560
3561static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003562string_sizeof(PyStringObject *v)
Robert Schuppenies51df0642008-06-01 16:16:17 +00003563{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003564 Py_ssize_t res;
3565 res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
3566 return PyInt_FromSsize_t(res);
Robert Schuppenies51df0642008-06-01 16:16:17 +00003567}
3568
Christian Heimes1a6387e2008-03-26 12:49:49 +00003569static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003570string_getnewargs(PyStringObject *v)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003571{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003572 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Christian Heimes1a6387e2008-03-26 12:49:49 +00003573}
3574
Christian Heimes1a6387e2008-03-26 12:49:49 +00003575
Christian Heimes44720832008-05-26 13:01:01 +00003576#include "stringlib/string_format.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +00003577
Christian Heimes44720832008-05-26 13:01:01 +00003578PyDoc_STRVAR(format__doc__,
Georg Brandl05f819b2010-07-31 19:07:37 +00003579"S.format(*args, **kwargs) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003580\n\
Eric Smith6c840852010-11-06 19:43:44 +00003581Return a formatted version of S, using substitutions from args and kwargs.\n\
3582The substitutions are identified by braces ('{' and '}').");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003583
Eric Smithdc13b792008-05-30 18:10:04 +00003584static PyObject *
3585string__format__(PyObject* self, PyObject* args)
3586{
3587 PyObject *format_spec;
3588 PyObject *result = NULL;
3589 PyObject *tmp = NULL;
3590
3591 /* If 2.x, convert format_spec to the same type as value */
3592 /* This is to allow things like u''.format('') */
3593 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003594 goto done;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003595 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003596 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3597 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3598 goto done;
Eric Smithdc13b792008-05-30 18:10:04 +00003599 }
3600 tmp = PyObject_Str(format_spec);
3601 if (tmp == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003602 goto done;
Eric Smithdc13b792008-05-30 18:10:04 +00003603 format_spec = tmp;
3604
3605 result = _PyBytes_FormatAdvanced(self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003606 PyString_AS_STRING(format_spec),
3607 PyString_GET_SIZE(format_spec));
Eric Smithdc13b792008-05-30 18:10:04 +00003608done:
3609 Py_XDECREF(tmp);
3610 return result;
3611}
3612
Christian Heimes44720832008-05-26 13:01:01 +00003613PyDoc_STRVAR(p_format__doc__,
Georg Brandl05f819b2010-07-31 19:07:37 +00003614"S.__format__(format_spec) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003615\n\
Eric Smith6c840852010-11-06 19:43:44 +00003616Return a formatted version of S as described by format_spec.");
Christian Heimes44720832008-05-26 13:01:01 +00003617
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00003618
Christian Heimes1a6387e2008-03-26 12:49:49 +00003619static PyMethodDef
Christian Heimes44720832008-05-26 13:01:01 +00003620string_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003621 /* Counterparts of the obsolete stropmodule functions; except
3622 string.maketrans(). */
3623 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3624 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3625 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3626 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3627 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3628 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3629 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3630 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3631 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3632 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3633 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3634 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3635 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3636 capitalize__doc__},
3637 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3638 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3639 endswith__doc__},
3640 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3641 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3642 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3643 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3644 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3645 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3646 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3647 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3648 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3649 rpartition__doc__},
3650 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3651 startswith__doc__},
3652 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3653 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3654 swapcase__doc__},
3655 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3656 translate__doc__},
3657 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3658 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3659 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3660 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3661 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3662 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3663 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3664 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3665 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
3666 {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
3667 {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
3668 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3669 expandtabs__doc__},
3670 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3671 splitlines__doc__},
3672 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
3673 sizeof__doc__},
3674 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
3675 {NULL, NULL} /* sentinel */
Christian Heimes1a6387e2008-03-26 12:49:49 +00003676};
3677
3678static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00003679str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003680
Christian Heimes44720832008-05-26 13:01:01 +00003681static PyObject *
3682string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3683{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003684 PyObject *x = NULL;
3685 static char *kwlist[] = {"object", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003686
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003687 if (type != &PyString_Type)
3688 return str_subtype_new(type, args, kwds);
3689 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3690 return NULL;
3691 if (x == NULL)
3692 return PyString_FromString("");
3693 return PyObject_Str(x);
Christian Heimes44720832008-05-26 13:01:01 +00003694}
3695
3696static PyObject *
3697str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3698{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003699 PyObject *tmp, *pnew;
3700 Py_ssize_t n;
Christian Heimes44720832008-05-26 13:01:01 +00003701
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003702 assert(PyType_IsSubtype(type, &PyString_Type));
3703 tmp = string_new(&PyString_Type, args, kwds);
3704 if (tmp == NULL)
3705 return NULL;
3706 assert(PyString_CheckExact(tmp));
3707 n = PyString_GET_SIZE(tmp);
3708 pnew = type->tp_alloc(type, n);
3709 if (pnew != NULL) {
3710 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3711 ((PyStringObject *)pnew)->ob_shash =
3712 ((PyStringObject *)tmp)->ob_shash;
3713 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3714 }
3715 Py_DECREF(tmp);
3716 return pnew;
Christian Heimes44720832008-05-26 13:01:01 +00003717}
3718
3719static PyObject *
3720basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3721{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003722 PyErr_SetString(PyExc_TypeError,
3723 "The basestring type cannot be instantiated");
3724 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003725}
3726
3727static PyObject *
3728string_mod(PyObject *v, PyObject *w)
3729{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003730 if (!PyString_Check(v)) {
3731 Py_INCREF(Py_NotImplemented);
3732 return Py_NotImplemented;
3733 }
3734 return PyString_Format(v, w);
Christian Heimes44720832008-05-26 13:01:01 +00003735}
3736
3737PyDoc_STRVAR(basestring_doc,
3738"Type basestring cannot be instantiated; it is the base for str and unicode.");
3739
3740static PyNumberMethods string_as_number = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003741 0, /*nb_add*/
3742 0, /*nb_subtract*/
3743 0, /*nb_multiply*/
3744 0, /*nb_divide*/
3745 string_mod, /*nb_remainder*/
Christian Heimes44720832008-05-26 13:01:01 +00003746};
3747
3748
3749PyTypeObject PyBaseString_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003750 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3751 "basestring",
3752 0,
3753 0,
3754 0, /* tp_dealloc */
3755 0, /* tp_print */
3756 0, /* tp_getattr */
3757 0, /* tp_setattr */
3758 0, /* tp_compare */
3759 0, /* tp_repr */
3760 0, /* tp_as_number */
3761 0, /* tp_as_sequence */
3762 0, /* tp_as_mapping */
3763 0, /* tp_hash */
3764 0, /* tp_call */
3765 0, /* tp_str */
3766 0, /* tp_getattro */
3767 0, /* tp_setattro */
3768 0, /* tp_as_buffer */
3769 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3770 basestring_doc, /* tp_doc */
3771 0, /* tp_traverse */
3772 0, /* tp_clear */
3773 0, /* tp_richcompare */
3774 0, /* tp_weaklistoffset */
3775 0, /* tp_iter */
3776 0, /* tp_iternext */
3777 0, /* tp_methods */
3778 0, /* tp_members */
3779 0, /* tp_getset */
3780 &PyBaseObject_Type, /* tp_base */
3781 0, /* tp_dict */
3782 0, /* tp_descr_get */
3783 0, /* tp_descr_set */
3784 0, /* tp_dictoffset */
3785 0, /* tp_init */
3786 0, /* tp_alloc */
3787 basestring_new, /* tp_new */
3788 0, /* tp_free */
Christian Heimes44720832008-05-26 13:01:01 +00003789};
3790
3791PyDoc_STRVAR(string_doc,
3792"str(object) -> string\n\
3793\n\
3794Return a nice string representation of the object.\n\
3795If the argument is a string, the return value is the same object.");
3796
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003797PyTypeObject PyString_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003798 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3799 "str",
3800 PyStringObject_SIZE,
3801 sizeof(char),
3802 string_dealloc, /* tp_dealloc */
3803 (printfunc)string_print, /* tp_print */
3804 0, /* tp_getattr */
3805 0, /* tp_setattr */
3806 0, /* tp_compare */
3807 string_repr, /* tp_repr */
3808 &string_as_number, /* tp_as_number */
3809 &string_as_sequence, /* tp_as_sequence */
3810 &string_as_mapping, /* tp_as_mapping */
3811 (hashfunc)string_hash, /* tp_hash */
3812 0, /* tp_call */
3813 string_str, /* tp_str */
3814 PyObject_GenericGetAttr, /* tp_getattro */
3815 0, /* tp_setattro */
3816 &string_as_buffer, /* tp_as_buffer */
3817 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3818 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
3819 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3820 string_doc, /* tp_doc */
3821 0, /* tp_traverse */
3822 0, /* tp_clear */
3823 (richcmpfunc)string_richcompare, /* tp_richcompare */
3824 0, /* tp_weaklistoffset */
3825 0, /* tp_iter */
3826 0, /* tp_iternext */
3827 string_methods, /* tp_methods */
3828 0, /* tp_members */
3829 0, /* tp_getset */
3830 &PyBaseString_Type, /* tp_base */
3831 0, /* tp_dict */
3832 0, /* tp_descr_get */
3833 0, /* tp_descr_set */
3834 0, /* tp_dictoffset */
3835 0, /* tp_init */
3836 0, /* tp_alloc */
3837 string_new, /* tp_new */
3838 PyObject_Del, /* tp_free */
Christian Heimes44720832008-05-26 13:01:01 +00003839};
3840
3841void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003842PyString_Concat(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00003843{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003844 register PyObject *v;
3845 if (*pv == NULL)
3846 return;
3847 if (w == NULL || !PyString_Check(*pv)) {
3848 Py_DECREF(*pv);
3849 *pv = NULL;
3850 return;
3851 }
3852 v = string_concat((PyStringObject *) *pv, w);
3853 Py_DECREF(*pv);
3854 *pv = v;
Christian Heimes44720832008-05-26 13:01:01 +00003855}
3856
3857void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003858PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00003859{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003860 PyString_Concat(pv, w);
3861 Py_XDECREF(w);
Christian Heimes44720832008-05-26 13:01:01 +00003862}
3863
3864
3865/* The following function breaks the notion that strings are immutable:
3866 it changes the size of a string. We get away with this only if there
3867 is only one module referencing the object. You can also think of it
3868 as creating a new string object and destroying the old one, only
3869 more efficiently. In any case, don't use this if the string may
3870 already be known to some other part of the code...
3871 Note that if there's not enough memory to resize the string, the original
3872 string object at *pv is deallocated, *pv is set to NULL, an "out of
3873 memory" exception is set, and -1 is returned. Else (on success) 0 is
3874 returned, and the value in *pv may or may not be the same as on input.
3875 As always, an extra byte is allocated for a trailing \0 byte (newsize
3876 does *not* include that), and a trailing \0 byte is stored.
3877*/
3878
3879int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003880_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Christian Heimes44720832008-05-26 13:01:01 +00003881{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003882 register PyObject *v;
3883 register PyStringObject *sv;
3884 v = *pv;
3885 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
3886 PyString_CHECK_INTERNED(v)) {
3887 *pv = 0;
3888 Py_DECREF(v);
3889 PyErr_BadInternalCall();
3890 return -1;
3891 }
3892 /* XXX UNREF/NEWREF interface should be more symmetrical */
3893 _Py_DEC_REFTOTAL;
3894 _Py_ForgetReference(v);
3895 *pv = (PyObject *)
3896 PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
3897 if (*pv == NULL) {
3898 PyObject_Del(v);
3899 PyErr_NoMemory();
3900 return -1;
3901 }
3902 _Py_NewReference(*pv);
3903 sv = (PyStringObject *) *pv;
3904 Py_SIZE(sv) = newsize;
3905 sv->ob_sval[newsize] = '\0';
3906 sv->ob_shash = -1; /* invalidate cached hash value */
3907 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00003908}
3909
3910/* Helpers for formatstring */
3911
3912Py_LOCAL_INLINE(PyObject *)
3913getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
3914{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003915 Py_ssize_t argidx = *p_argidx;
3916 if (argidx < arglen) {
3917 (*p_argidx)++;
3918 if (arglen < 0)
3919 return args;
3920 else
3921 return PyTuple_GetItem(args, argidx);
3922 }
3923 PyErr_SetString(PyExc_TypeError,
3924 "not enough arguments for format string");
3925 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003926}
3927
3928/* Format codes
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003929 * F_LJUST '-'
3930 * F_SIGN '+'
3931 * F_BLANK ' '
3932 * F_ALT '#'
3933 * F_ZERO '0'
Christian Heimes44720832008-05-26 13:01:01 +00003934 */
3935#define F_LJUST (1<<0)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003936#define F_SIGN (1<<1)
Christian Heimes44720832008-05-26 13:01:01 +00003937#define F_BLANK (1<<2)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003938#define F_ALT (1<<3)
3939#define F_ZERO (1<<4)
Christian Heimes44720832008-05-26 13:01:01 +00003940
Mark Dickinson18cfada2009-11-23 18:46:41 +00003941/* Returns a new reference to a PyString object, or NULL on failure. */
3942
3943static PyObject *
3944formatfloat(PyObject *v, int flags, int prec, int type)
Christian Heimes44720832008-05-26 13:01:01 +00003945{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003946 char *p;
3947 PyObject *result;
3948 double x;
Eric Smithc1bdf892009-10-26 17:46:17 +00003949
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003950 x = PyFloat_AsDouble(v);
3951 if (x == -1.0 && PyErr_Occurred()) {
3952 PyErr_Format(PyExc_TypeError, "float argument required, "
3953 "not %.200s", Py_TYPE(v)->tp_name);
3954 return NULL;
3955 }
Mark Dickinson18cfada2009-11-23 18:46:41 +00003956
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003957 if (prec < 0)
3958 prec = 6;
Mark Dickinson174e9092009-03-29 16:17:16 +00003959
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003960 p = PyOS_double_to_string(x, type, prec,
3961 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
Christian Heimes44720832008-05-26 13:01:01 +00003962
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003963 if (p == NULL)
3964 return NULL;
3965 result = PyString_FromStringAndSize(p, strlen(p));
3966 PyMem_Free(p);
3967 return result;
Christian Heimes44720832008-05-26 13:01:01 +00003968}
3969
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003970/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
Christian Heimes44720832008-05-26 13:01:01 +00003971 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3972 * Python's regular ints.
3973 * Return value: a new PyString*, or NULL if error.
3974 * . *pbuf is set to point into it,
3975 * *plen set to the # of chars following that.
3976 * Caller must decref it when done using pbuf.
3977 * The string starting at *pbuf is of the form
3978 * "-"? ("0x" | "0X")? digit+
3979 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3980 * set in flags. The case of hex digits will be correct,
3981 * There will be at least prec digits, zero-filled on the left if
3982 * necessary to get that many.
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003983 * val object to be converted
3984 * flags bitmask of format flags; only F_ALT is looked at
3985 * prec minimum number of digits; 0-fill on left if needed
3986 * type a character in [duoxX]; u acts the same as d
Christian Heimes44720832008-05-26 13:01:01 +00003987 *
3988 * CAUTION: o, x and X conversions on regular ints can never
3989 * produce a '-' sign, but can for Python's unbounded ints.
3990 */
3991PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003992_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003993 char **pbuf, int *plen)
Christian Heimes44720832008-05-26 13:01:01 +00003994{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003995 PyObject *result = NULL;
3996 char *buf;
3997 Py_ssize_t i;
3998 int sign; /* 1 if '-', else 0 */
3999 int len; /* number of characters */
4000 Py_ssize_t llen;
4001 int numdigits; /* len == numnondigits + numdigits */
4002 int numnondigits = 0;
Christian Heimes44720832008-05-26 13:01:01 +00004003
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004004 switch (type) {
4005 case 'd':
4006 case 'u':
4007 result = Py_TYPE(val)->tp_str(val);
4008 break;
4009 case 'o':
4010 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4011 break;
4012 case 'x':
4013 case 'X':
4014 numnondigits = 2;
4015 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4016 break;
4017 default:
4018 assert(!"'type' not in [duoxX]");
4019 }
4020 if (!result)
4021 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004022
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004023 buf = PyString_AsString(result);
4024 if (!buf) {
4025 Py_DECREF(result);
4026 return NULL;
4027 }
Christian Heimes44720832008-05-26 13:01:01 +00004028
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004029 /* To modify the string in-place, there can only be one reference. */
4030 if (Py_REFCNT(result) != 1) {
4031 PyErr_BadInternalCall();
4032 return NULL;
4033 }
4034 llen = PyString_Size(result);
4035 if (llen > INT_MAX) {
4036 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4037 return NULL;
4038 }
4039 len = (int)llen;
4040 if (buf[len-1] == 'L') {
4041 --len;
4042 buf[len] = '\0';
4043 }
4044 sign = buf[0] == '-';
4045 numnondigits += sign;
4046 numdigits = len - numnondigits;
4047 assert(numdigits > 0);
Christian Heimes44720832008-05-26 13:01:01 +00004048
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004049 /* Get rid of base marker unless F_ALT */
4050 if ((flags & F_ALT) == 0) {
4051 /* Need to skip 0x, 0X or 0. */
4052 int skipped = 0;
4053 switch (type) {
4054 case 'o':
4055 assert(buf[sign] == '0');
4056 /* If 0 is only digit, leave it alone. */
4057 if (numdigits > 1) {
4058 skipped = 1;
4059 --numdigits;
4060 }
4061 break;
4062 case 'x':
4063 case 'X':
4064 assert(buf[sign] == '0');
4065 assert(buf[sign + 1] == 'x');
4066 skipped = 2;
4067 numnondigits -= 2;
4068 break;
4069 }
4070 if (skipped) {
4071 buf += skipped;
4072 len -= skipped;
4073 if (sign)
4074 buf[0] = '-';
4075 }
4076 assert(len == numnondigits + numdigits);
4077 assert(numdigits > 0);
4078 }
Christian Heimes44720832008-05-26 13:01:01 +00004079
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004080 /* Fill with leading zeroes to meet minimum width. */
4081 if (prec > numdigits) {
4082 PyObject *r1 = PyString_FromStringAndSize(NULL,
4083 numnondigits + prec);
4084 char *b1;
4085 if (!r1) {
4086 Py_DECREF(result);
4087 return NULL;
4088 }
4089 b1 = PyString_AS_STRING(r1);
4090 for (i = 0; i < numnondigits; ++i)
4091 *b1++ = *buf++;
4092 for (i = 0; i < prec - numdigits; i++)
4093 *b1++ = '0';
4094 for (i = 0; i < numdigits; i++)
4095 *b1++ = *buf++;
4096 *b1 = '\0';
4097 Py_DECREF(result);
4098 result = r1;
4099 buf = PyString_AS_STRING(result);
4100 len = numnondigits + prec;
4101 }
Christian Heimes44720832008-05-26 13:01:01 +00004102
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004103 /* Fix up case for hex conversions. */
4104 if (type == 'X') {
4105 /* Need to convert all lower case letters to upper case.
4106 and need to convert 0x to 0X (and -0x to -0X). */
4107 for (i = 0; i < len; i++)
4108 if (buf[i] >= 'a' && buf[i] <= 'x')
4109 buf[i] -= 'a'-'A';
4110 }
4111 *pbuf = buf;
4112 *plen = len;
4113 return result;
Christian Heimes44720832008-05-26 13:01:01 +00004114}
4115
4116Py_LOCAL_INLINE(int)
4117formatint(char *buf, size_t buflen, int flags,
4118 int prec, int type, PyObject *v)
4119{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004120 /* fmt = '%#.' + `prec` + 'l' + `type`
4121 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4122 + 1 + 1 = 24 */
4123 char fmt[64]; /* plenty big enough! */
4124 char *sign;
4125 long x;
Christian Heimes44720832008-05-26 13:01:01 +00004126
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004127 x = PyInt_AsLong(v);
4128 if (x == -1 && PyErr_Occurred()) {
4129 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4130 Py_TYPE(v)->tp_name);
4131 return -1;
4132 }
4133 if (x < 0 && type == 'u') {
4134 type = 'd';
4135 }
4136 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4137 sign = "-";
4138 else
4139 sign = "";
4140 if (prec < 0)
4141 prec = 1;
Christian Heimes44720832008-05-26 13:01:01 +00004142
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004143 if ((flags & F_ALT) &&
4144 (type == 'x' || type == 'X')) {
4145 /* When converting under %#x or %#X, there are a number
4146 * of issues that cause pain:
4147 * - when 0 is being converted, the C standard leaves off
4148 * the '0x' or '0X', which is inconsistent with other
4149 * %#x/%#X conversions and inconsistent with Python's
4150 * hex() function
4151 * - there are platforms that violate the standard and
4152 * convert 0 with the '0x' or '0X'
4153 * (Metrowerks, Compaq Tru64)
4154 * - there are platforms that give '0x' when converting
4155 * under %#X, but convert 0 in accordance with the
4156 * standard (OS/2 EMX)
4157 *
4158 * We can achieve the desired consistency by inserting our
4159 * own '0x' or '0X' prefix, and substituting %x/%X in place
4160 * of %#x/%#X.
4161 *
4162 * Note that this is the same approach as used in
4163 * formatint() in unicodeobject.c
4164 */
4165 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4166 sign, type, prec, type);
4167 }
4168 else {
4169 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4170 sign, (flags&F_ALT) ? "#" : "",
4171 prec, type);
4172 }
Christian Heimes44720832008-05-26 13:01:01 +00004173
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004174 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4175 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4176 */
4177 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4178 PyErr_SetString(PyExc_OverflowError,
4179 "formatted integer is too long (precision too large?)");
4180 return -1;
4181 }
4182 if (sign[0])
4183 PyOS_snprintf(buf, buflen, fmt, -x);
4184 else
4185 PyOS_snprintf(buf, buflen, fmt, x);
4186 return (int)strlen(buf);
Christian Heimes44720832008-05-26 13:01:01 +00004187}
4188
4189Py_LOCAL_INLINE(int)
4190formatchar(char *buf, size_t buflen, PyObject *v)
4191{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004192 /* presume that the buffer is at least 2 characters long */
4193 if (PyString_Check(v)) {
4194 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4195 return -1;
4196 }
4197 else {
4198 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4199 return -1;
4200 }
4201 buf[1] = '\0';
4202 return 1;
Christian Heimes44720832008-05-26 13:01:01 +00004203}
4204
4205/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4206
Mark Dickinson18cfada2009-11-23 18:46:41 +00004207 FORMATBUFLEN is the length of the buffer in which the ints &
Christian Heimes44720832008-05-26 13:01:01 +00004208 chars are formatted. XXX This is a magic number. Each formatting
4209 routine does bounds checking to ensure no overflow, but a better
4210 solution may be to malloc a buffer of appropriate size for each
4211 format. For now, the current solution is sufficient.
4212*/
4213#define FORMATBUFLEN (size_t)120
4214
4215PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004216PyString_Format(PyObject *format, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00004217{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004218 char *fmt, *res;
4219 Py_ssize_t arglen, argidx;
4220 Py_ssize_t reslen, rescnt, fmtcnt;
4221 int args_owned = 0;
4222 PyObject *result, *orig_args;
Christian Heimes44720832008-05-26 13:01:01 +00004223#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004224 PyObject *v, *w;
Christian Heimes44720832008-05-26 13:01:01 +00004225#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004226 PyObject *dict = NULL;
4227 if (format == NULL || !PyString_Check(format) || args == NULL) {
4228 PyErr_BadInternalCall();
4229 return NULL;
4230 }
4231 orig_args = args;
4232 fmt = PyString_AS_STRING(format);
4233 fmtcnt = PyString_GET_SIZE(format);
4234 reslen = rescnt = fmtcnt + 100;
4235 result = PyString_FromStringAndSize((char *)NULL, reslen);
4236 if (result == NULL)
4237 return NULL;
4238 res = PyString_AsString(result);
4239 if (PyTuple_Check(args)) {
4240 arglen = PyTuple_GET_SIZE(args);
4241 argidx = 0;
4242 }
4243 else {
4244 arglen = -1;
4245 argidx = -2;
4246 }
4247 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4248 !PyObject_TypeCheck(args, &PyBaseString_Type))
4249 dict = args;
4250 while (--fmtcnt >= 0) {
4251 if (*fmt != '%') {
4252 if (--rescnt < 0) {
4253 rescnt = fmtcnt + 100;
4254 reslen += rescnt;
4255 if (_PyString_Resize(&result, reslen))
4256 return NULL;
4257 res = PyString_AS_STRING(result)
4258 + reslen - rescnt;
4259 --rescnt;
4260 }
4261 *res++ = *fmt++;
4262 }
4263 else {
4264 /* Got a format specifier */
4265 int flags = 0;
4266 Py_ssize_t width = -1;
4267 int prec = -1;
4268 int c = '\0';
4269 int fill;
4270 int isnumok;
4271 PyObject *v = NULL;
4272 PyObject *temp = NULL;
4273 char *pbuf;
4274 int sign;
4275 Py_ssize_t len;
4276 char formatbuf[FORMATBUFLEN];
4277 /* For format{int,char}() */
Christian Heimes44720832008-05-26 13:01:01 +00004278#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004279 char *fmt_start = fmt;
4280 Py_ssize_t argidx_start = argidx;
Christian Heimes44720832008-05-26 13:01:01 +00004281#endif
4282
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004283 fmt++;
4284 if (*fmt == '(') {
4285 char *keystart;
4286 Py_ssize_t keylen;
4287 PyObject *key;
4288 int pcount = 1;
Christian Heimes44720832008-05-26 13:01:01 +00004289
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004290 if (dict == NULL) {
4291 PyErr_SetString(PyExc_TypeError,
4292 "format requires a mapping");
4293 goto error;
4294 }
4295 ++fmt;
4296 --fmtcnt;
4297 keystart = fmt;
4298 /* Skip over balanced parentheses */
4299 while (pcount > 0 && --fmtcnt >= 0) {
4300 if (*fmt == ')')
4301 --pcount;
4302 else if (*fmt == '(')
4303 ++pcount;
4304 fmt++;
4305 }
4306 keylen = fmt - keystart - 1;
4307 if (fmtcnt < 0 || pcount > 0) {
4308 PyErr_SetString(PyExc_ValueError,
4309 "incomplete format key");
4310 goto error;
4311 }
4312 key = PyString_FromStringAndSize(keystart,
4313 keylen);
4314 if (key == NULL)
4315 goto error;
4316 if (args_owned) {
4317 Py_DECREF(args);
4318 args_owned = 0;
4319 }
4320 args = PyObject_GetItem(dict, key);
4321 Py_DECREF(key);
4322 if (args == NULL) {
4323 goto error;
4324 }
4325 args_owned = 1;
4326 arglen = -1;
4327 argidx = -2;
4328 }
4329 while (--fmtcnt >= 0) {
4330 switch (c = *fmt++) {
4331 case '-': flags |= F_LJUST; continue;
4332 case '+': flags |= F_SIGN; continue;
4333 case ' ': flags |= F_BLANK; continue;
4334 case '#': flags |= F_ALT; continue;
4335 case '0': flags |= F_ZERO; continue;
4336 }
4337 break;
4338 }
4339 if (c == '*') {
4340 v = getnextarg(args, arglen, &argidx);
4341 if (v == NULL)
4342 goto error;
4343 if (!PyInt_Check(v)) {
4344 PyErr_SetString(PyExc_TypeError,
4345 "* wants int");
4346 goto error;
4347 }
4348 width = PyInt_AsLong(v);
4349 if (width < 0) {
4350 flags |= F_LJUST;
4351 width = -width;
4352 }
4353 if (--fmtcnt >= 0)
4354 c = *fmt++;
4355 }
4356 else if (c >= 0 && isdigit(c)) {
4357 width = c - '0';
4358 while (--fmtcnt >= 0) {
4359 c = Py_CHARMASK(*fmt++);
4360 if (!isdigit(c))
4361 break;
4362 if ((width*10) / 10 != width) {
4363 PyErr_SetString(
4364 PyExc_ValueError,
4365 "width too big");
4366 goto error;
4367 }
4368 width = width*10 + (c - '0');
4369 }
4370 }
4371 if (c == '.') {
4372 prec = 0;
4373 if (--fmtcnt >= 0)
4374 c = *fmt++;
4375 if (c == '*') {
4376 v = getnextarg(args, arglen, &argidx);
4377 if (v == NULL)
4378 goto error;
4379 if (!PyInt_Check(v)) {
4380 PyErr_SetString(
4381 PyExc_TypeError,
4382 "* wants int");
4383 goto error;
4384 }
4385 prec = PyInt_AsLong(v);
4386 if (prec < 0)
4387 prec = 0;
4388 if (--fmtcnt >= 0)
4389 c = *fmt++;
4390 }
4391 else if (c >= 0 && isdigit(c)) {
4392 prec = c - '0';
4393 while (--fmtcnt >= 0) {
4394 c = Py_CHARMASK(*fmt++);
4395 if (!isdigit(c))
4396 break;
4397 if ((prec*10) / 10 != prec) {
4398 PyErr_SetString(
4399 PyExc_ValueError,
4400 "prec too big");
4401 goto error;
Christian Heimes44720832008-05-26 13:01:01 +00004402 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004403 prec = prec*10 + (c - '0');
4404 }
4405 }
4406 } /* prec */
4407 if (fmtcnt >= 0) {
4408 if (c == 'h' || c == 'l' || c == 'L') {
4409 if (--fmtcnt >= 0)
4410 c = *fmt++;
4411 }
4412 }
4413 if (fmtcnt < 0) {
4414 PyErr_SetString(PyExc_ValueError,
4415 "incomplete format");
4416 goto error;
4417 }
4418 if (c != '%') {
4419 v = getnextarg(args, arglen, &argidx);
4420 if (v == NULL)
4421 goto error;
4422 }
4423 sign = 0;
4424 fill = ' ';
4425 switch (c) {
4426 case '%':
4427 pbuf = "%";
4428 len = 1;
4429 break;
4430 case 's':
4431#ifdef Py_USING_UNICODE
4432 if (PyUnicode_Check(v)) {
4433 fmt = fmt_start;
4434 argidx = argidx_start;
4435 goto unicode;
4436 }
4437#endif
4438 temp = _PyObject_Str(v);
4439#ifdef Py_USING_UNICODE
4440 if (temp != NULL && PyUnicode_Check(temp)) {
4441 Py_DECREF(temp);
4442 fmt = fmt_start;
4443 argidx = argidx_start;
4444 goto unicode;
4445 }
4446#endif
4447 /* Fall through */
4448 case 'r':
4449 if (c == 'r')
4450 temp = PyObject_Repr(v);
4451 if (temp == NULL)
4452 goto error;
4453 if (!PyString_Check(temp)) {
4454 PyErr_SetString(PyExc_TypeError,
4455 "%s argument has non-string str()");
4456 Py_DECREF(temp);
4457 goto error;
4458 }
4459 pbuf = PyString_AS_STRING(temp);
4460 len = PyString_GET_SIZE(temp);
4461 if (prec >= 0 && len > prec)
4462 len = prec;
4463 break;
4464 case 'i':
4465 case 'd':
4466 case 'u':
4467 case 'o':
4468 case 'x':
4469 case 'X':
4470 if (c == 'i')
4471 c = 'd';
4472 isnumok = 0;
4473 if (PyNumber_Check(v)) {
4474 PyObject *iobj=NULL;
4475
4476 if (PyInt_Check(v) || (PyLong_Check(v))) {
4477 iobj = v;
4478 Py_INCREF(iobj);
4479 }
4480 else {
4481 iobj = PyNumber_Int(v);
4482 if (iobj==NULL) iobj = PyNumber_Long(v);
4483 }
4484 if (iobj!=NULL) {
4485 if (PyInt_Check(iobj)) {
4486 isnumok = 1;
4487 pbuf = formatbuf;
4488 len = formatint(pbuf,
4489 sizeof(formatbuf),
4490 flags, prec, c, iobj);
4491 Py_DECREF(iobj);
4492 if (len < 0)
4493 goto error;
4494 sign = 1;
4495 }
4496 else if (PyLong_Check(iobj)) {
4497 int ilen;
4498
4499 isnumok = 1;
4500 temp = _PyString_FormatLong(iobj, flags,
4501 prec, c, &pbuf, &ilen);
4502 Py_DECREF(iobj);
4503 len = ilen;
4504 if (!temp)
4505 goto error;
4506 sign = 1;
4507 }
4508 else {
4509 Py_DECREF(iobj);
4510 }
4511 }
4512 }
4513 if (!isnumok) {
4514 PyErr_Format(PyExc_TypeError,
4515 "%%%c format: a number is required, "
4516 "not %.200s", c, Py_TYPE(v)->tp_name);
4517 goto error;
4518 }
4519 if (flags & F_ZERO)
4520 fill = '0';
4521 break;
4522 case 'e':
4523 case 'E':
4524 case 'f':
4525 case 'F':
4526 case 'g':
4527 case 'G':
4528 temp = formatfloat(v, flags, prec, c);
4529 if (temp == NULL)
4530 goto error;
4531 pbuf = PyString_AS_STRING(temp);
4532 len = PyString_GET_SIZE(temp);
4533 sign = 1;
4534 if (flags & F_ZERO)
4535 fill = '0';
4536 break;
4537 case 'c':
4538#ifdef Py_USING_UNICODE
4539 if (PyUnicode_Check(v)) {
4540 fmt = fmt_start;
4541 argidx = argidx_start;
4542 goto unicode;
4543 }
4544#endif
4545 pbuf = formatbuf;
4546 len = formatchar(pbuf, sizeof(formatbuf), v);
4547 if (len < 0)
4548 goto error;
4549 break;
4550 default:
4551 PyErr_Format(PyExc_ValueError,
4552 "unsupported format character '%c' (0x%x) "
4553 "at index %zd",
4554 c, c,
4555 (Py_ssize_t)(fmt - 1 -
4556 PyString_AsString(format)));
4557 goto error;
4558 }
4559 if (sign) {
4560 if (*pbuf == '-' || *pbuf == '+') {
4561 sign = *pbuf++;
4562 len--;
4563 }
4564 else if (flags & F_SIGN)
4565 sign = '+';
4566 else if (flags & F_BLANK)
4567 sign = ' ';
4568 else
4569 sign = 0;
4570 }
4571 if (width < len)
4572 width = len;
4573 if (rescnt - (sign != 0) < width) {
4574 reslen -= rescnt;
4575 rescnt = width + fmtcnt + 100;
4576 reslen += rescnt;
4577 if (reslen < 0) {
4578 Py_DECREF(result);
4579 Py_XDECREF(temp);
4580 return PyErr_NoMemory();
4581 }
4582 if (_PyString_Resize(&result, reslen)) {
4583 Py_XDECREF(temp);
4584 return NULL;
4585 }
4586 res = PyString_AS_STRING(result)
4587 + reslen - rescnt;
4588 }
4589 if (sign) {
4590 if (fill != ' ')
4591 *res++ = sign;
4592 rescnt--;
4593 if (width > len)
4594 width--;
4595 }
4596 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4597 assert(pbuf[0] == '0');
4598 assert(pbuf[1] == c);
4599 if (fill != ' ') {
4600 *res++ = *pbuf++;
4601 *res++ = *pbuf++;
4602 }
4603 rescnt -= 2;
4604 width -= 2;
4605 if (width < 0)
4606 width = 0;
4607 len -= 2;
4608 }
4609 if (width > len && !(flags & F_LJUST)) {
4610 do {
4611 --rescnt;
4612 *res++ = fill;
4613 } while (--width > len);
4614 }
4615 if (fill == ' ') {
4616 if (sign)
4617 *res++ = sign;
4618 if ((flags & F_ALT) &&
4619 (c == 'x' || c == 'X')) {
4620 assert(pbuf[0] == '0');
4621 assert(pbuf[1] == c);
4622 *res++ = *pbuf++;
4623 *res++ = *pbuf++;
4624 }
4625 }
4626 Py_MEMCPY(res, pbuf, len);
4627 res += len;
4628 rescnt -= len;
4629 while (--width >= len) {
4630 --rescnt;
4631 *res++ = ' ';
4632 }
4633 if (dict && (argidx < arglen) && c != '%') {
4634 PyErr_SetString(PyExc_TypeError,
4635 "not all arguments converted during string formatting");
4636 Py_XDECREF(temp);
4637 goto error;
4638 }
4639 Py_XDECREF(temp);
4640 } /* '%' */
4641 } /* until end */
4642 if (argidx < arglen && !dict) {
4643 PyErr_SetString(PyExc_TypeError,
4644 "not all arguments converted during string formatting");
4645 goto error;
4646 }
4647 if (args_owned) {
4648 Py_DECREF(args);
4649 }
4650 if (_PyString_Resize(&result, reslen - rescnt))
4651 return NULL;
4652 return result;
Christian Heimes44720832008-05-26 13:01:01 +00004653
4654#ifdef Py_USING_UNICODE
4655 unicode:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004656 if (args_owned) {
4657 Py_DECREF(args);
4658 args_owned = 0;
4659 }
4660 /* Fiddle args right (remove the first argidx arguments) */
4661 if (PyTuple_Check(orig_args) && argidx > 0) {
4662 PyObject *v;
4663 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4664 v = PyTuple_New(n);
4665 if (v == NULL)
4666 goto error;
4667 while (--n >= 0) {
4668 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4669 Py_INCREF(w);
4670 PyTuple_SET_ITEM(v, n, w);
4671 }
4672 args = v;
4673 } else {
4674 Py_INCREF(orig_args);
4675 args = orig_args;
4676 }
4677 args_owned = 1;
4678 /* Take what we have of the result and let the Unicode formatting
4679 function format the rest of the input. */
4680 rescnt = res - PyString_AS_STRING(result);
4681 if (_PyString_Resize(&result, rescnt))
4682 goto error;
4683 fmtcnt = PyString_GET_SIZE(format) - \
4684 (fmt - PyString_AS_STRING(format));
4685 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4686 if (format == NULL)
4687 goto error;
4688 v = PyUnicode_Format(format, args);
4689 Py_DECREF(format);
4690 if (v == NULL)
4691 goto error;
4692 /* Paste what we have (result) to what the Unicode formatting
4693 function returned (v) and return the result (or error) */
4694 w = PyUnicode_Concat(result, v);
4695 Py_DECREF(result);
4696 Py_DECREF(v);
4697 Py_DECREF(args);
4698 return w;
Christian Heimes44720832008-05-26 13:01:01 +00004699#endif /* Py_USING_UNICODE */
4700
4701 error:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004702 Py_DECREF(result);
4703 if (args_owned) {
4704 Py_DECREF(args);
4705 }
4706 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004707}
4708
4709void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004710PyString_InternInPlace(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00004711{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004712 register PyStringObject *s = (PyStringObject *)(*p);
4713 PyObject *t;
4714 if (s == NULL || !PyString_Check(s))
4715 Py_FatalError("PyString_InternInPlace: strings only please!");
4716 /* If it's a string subclass, we don't really know what putting
4717 it in the interned dict might do. */
4718 if (!PyString_CheckExact(s))
4719 return;
4720 if (PyString_CHECK_INTERNED(s))
4721 return;
4722 if (interned == NULL) {
4723 interned = PyDict_New();
4724 if (interned == NULL) {
4725 PyErr_Clear(); /* Don't leave an exception */
4726 return;
4727 }
4728 }
4729 t = PyDict_GetItem(interned, (PyObject *)s);
4730 if (t) {
4731 Py_INCREF(t);
4732 Py_DECREF(*p);
4733 *p = t;
4734 return;
4735 }
Christian Heimes44720832008-05-26 13:01:01 +00004736
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004737 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4738 PyErr_Clear();
4739 return;
4740 }
4741 /* The two references in interned are not counted by refcnt.
4742 The string deallocator will take care of this */
4743 Py_REFCNT(s) -= 2;
4744 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Christian Heimes44720832008-05-26 13:01:01 +00004745}
4746
4747void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004748PyString_InternImmortal(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00004749{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004750 PyString_InternInPlace(p);
4751 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4752 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4753 Py_INCREF(*p);
4754 }
Christian Heimes44720832008-05-26 13:01:01 +00004755}
4756
4757
4758PyObject *
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004759PyString_InternFromString(const char *cp)
Christian Heimes44720832008-05-26 13:01:01 +00004760{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004761 PyObject *s = PyString_FromString(cp);
4762 if (s == NULL)
4763 return NULL;
4764 PyString_InternInPlace(&s);
4765 return s;
Christian Heimes44720832008-05-26 13:01:01 +00004766}
4767
4768void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004769PyString_Fini(void)
Christian Heimes44720832008-05-26 13:01:01 +00004770{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004771 int i;
4772 for (i = 0; i < UCHAR_MAX + 1; i++) {
4773 Py_XDECREF(characters[i]);
4774 characters[i] = NULL;
4775 }
4776 Py_XDECREF(nullstring);
4777 nullstring = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004778}
4779
4780void _Py_ReleaseInternedStrings(void)
4781{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004782 PyObject *keys;
4783 PyStringObject *s;
4784 Py_ssize_t i, n;
4785 Py_ssize_t immortal_size = 0, mortal_size = 0;
Christian Heimes44720832008-05-26 13:01:01 +00004786
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004787 if (interned == NULL || !PyDict_Check(interned))
4788 return;
4789 keys = PyDict_Keys(interned);
4790 if (keys == NULL || !PyList_Check(keys)) {
4791 PyErr_Clear();
4792 return;
4793 }
Christian Heimes44720832008-05-26 13:01:01 +00004794
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004795 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4796 detector, interned strings are not forcibly deallocated; rather, we
4797 give them their stolen references back, and then clear and DECREF
4798 the interned dict. */
Christian Heimes44720832008-05-26 13:01:01 +00004799
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004800 n = PyList_GET_SIZE(keys);
4801 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4802 n);
4803 for (i = 0; i < n; i++) {
4804 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4805 switch (s->ob_sstate) {
4806 case SSTATE_NOT_INTERNED:
4807 /* XXX Shouldn't happen */
4808 break;
4809 case SSTATE_INTERNED_IMMORTAL:
4810 Py_REFCNT(s) += 1;
4811 immortal_size += Py_SIZE(s);
4812 break;
4813 case SSTATE_INTERNED_MORTAL:
4814 Py_REFCNT(s) += 2;
4815 mortal_size += Py_SIZE(s);
4816 break;
4817 default:
4818 Py_FatalError("Inconsistent interned string state.");
4819 }
4820 s->ob_sstate = SSTATE_NOT_INTERNED;
4821 }
4822 fprintf(stderr, "total size of all interned strings: "
4823 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4824 "mortal/immortal\n", mortal_size, immortal_size);
4825 Py_DECREF(keys);
4826 PyDict_Clear(interned);
4827 Py_DECREF(interned);
4828 interned = NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00004829}