blob: d6c4f770e33093d41f45bcd7e959fdaa0cede03c [file] [log] [blame]
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001/* String (str/bytes) object implementation */
Christian Heimes1a6387e2008-03-26 12:49:49 +00002
3#define PY_SSIZE_T_CLEAN
Christian Heimes44720832008-05-26 13:01:01 +00004
Christian Heimes1a6387e2008-03-26 12:49:49 +00005#include "Python.h"
Christian Heimes44720832008-05-26 13:01:01 +00006#include <ctype.h>
Mark Dickinson826f3fe2008-12-05 21:55:28 +00007#include <stddef.h>
Christian Heimes44720832008-05-26 13:01:01 +00008
9#ifdef COUNT_ALLOCS
Martin v. Löwisb90304a2009-01-07 18:40:40 +000010Py_ssize_t null_strings, one_strings;
Christian Heimes44720832008-05-26 13:01:01 +000011#endif
12
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000013static PyStringObject *characters[UCHAR_MAX + 1];
14static PyStringObject *nullstring;
Christian Heimes44720832008-05-26 13:01:01 +000015
16/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Mark Dickinson826f3fe2008-12-05 21:55:28 +000026/* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyStringObject_SIZE + n bytes.
28
29 Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30 3 bytes per string allocation on a typical system.
31*/
32#define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
33
Christian Heimes44720832008-05-26 13:01:01 +000034/*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000035 For PyString_FromString(), the parameter `str' points to a null-terminated
Christian Heimes44720832008-05-26 13:01:01 +000036 string containing exactly `size' bytes.
37
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000038 For PyString_FromStringAndSize(), the parameter the parameter `str' is
Christian Heimes44720832008-05-26 13:01:01 +000039 either NULL or else points to a string containing at least `size' bytes.
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000040 For PyString_FromStringAndSize(), the string in the `str' parameter does
Christian Heimes44720832008-05-26 13:01:01 +000041 not have to be null-terminated. (Therefore it is safe to construct a
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000042 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
43 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
Christian Heimes44720832008-05-26 13:01:01 +000044 bytes (setting the last byte to the null terminating character) and you can
45 fill in the data yourself. If `str' is non-NULL then the resulting
46 PyString object must be treated as immutable and you must not fill in nor
47 alter the data yourself, since the strings may be shared.
48
49 The PyObject member `op->ob_size', which denotes the number of "extra
50 items" in a variable-size object, will contain the number of bytes
Eli Bendersky72de2052011-03-24 22:38:25 +020051 allocated for string data, not counting the null terminating character.
52 It is therefore equal to the `size' parameter (for
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000053 PyString_FromStringAndSize()) or the length of the string in the `str'
54 parameter (for PyString_FromString()).
Christian Heimes44720832008-05-26 13:01:01 +000055*/
56PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000057PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Christian Heimes1a6387e2008-03-26 12:49:49 +000058{
Antoine Pitrouc83ea132010-05-09 14:46:46 +000059 register PyStringObject *op;
60 if (size < 0) {
61 PyErr_SetString(PyExc_SystemError,
62 "Negative size passed to PyString_FromStringAndSize");
63 return NULL;
64 }
65 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +000066#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +000067 null_strings++;
Christian Heimes44720832008-05-26 13:01:01 +000068#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +000069 Py_INCREF(op);
70 return (PyObject *)op;
71 }
72 if (size == 1 && str != NULL &&
73 (op = characters[*str & UCHAR_MAX]) != NULL)
74 {
Christian Heimes44720832008-05-26 13:01:01 +000075#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +000076 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +000077#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +000078 Py_INCREF(op);
79 return (PyObject *)op;
80 }
Christian Heimes44720832008-05-26 13:01:01 +000081
Antoine Pitrouc83ea132010-05-09 14:46:46 +000082 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
83 PyErr_SetString(PyExc_OverflowError, "string is too large");
84 return NULL;
85 }
Neal Norwitze7d8be82008-07-31 17:17:14 +000086
Antoine Pitrouc83ea132010-05-09 14:46:46 +000087 /* Inline PyObject_NewVar */
88 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
89 if (op == NULL)
90 return PyErr_NoMemory();
91 PyObject_INIT_VAR(op, &PyString_Type, size);
92 op->ob_shash = -1;
93 op->ob_sstate = SSTATE_NOT_INTERNED;
94 if (str != NULL)
95 Py_MEMCPY(op->ob_sval, str, size);
96 op->ob_sval[size] = '\0';
97 /* share short strings */
98 if (size == 0) {
99 PyObject *t = (PyObject *)op;
100 PyString_InternInPlace(&t);
101 op = (PyStringObject *)t;
102 nullstring = op;
103 Py_INCREF(op);
104 } else if (size == 1 && str != NULL) {
105 PyObject *t = (PyObject *)op;
106 PyString_InternInPlace(&t);
107 op = (PyStringObject *)t;
108 characters[*str & UCHAR_MAX] = op;
109 Py_INCREF(op);
110 }
111 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000112}
113
Christian Heimes44720832008-05-26 13:01:01 +0000114PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000115PyString_FromString(const char *str)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000116{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000117 register size_t size;
118 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +0000119
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000120 assert(str != NULL);
121 size = strlen(str);
122 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
123 PyErr_SetString(PyExc_OverflowError,
124 "string is too long for a Python string");
125 return NULL;
126 }
127 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +0000128#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000129 null_strings++;
Christian Heimes44720832008-05-26 13:01:01 +0000130#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000131 Py_INCREF(op);
132 return (PyObject *)op;
133 }
134 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +0000135#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000136 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +0000137#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000138 Py_INCREF(op);
139 return (PyObject *)op;
140 }
Christian Heimes44720832008-05-26 13:01:01 +0000141
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000142 /* Inline PyObject_NewVar */
143 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
144 if (op == NULL)
145 return PyErr_NoMemory();
146 PyObject_INIT_VAR(op, &PyString_Type, size);
147 op->ob_shash = -1;
148 op->ob_sstate = SSTATE_NOT_INTERNED;
149 Py_MEMCPY(op->ob_sval, str, size+1);
150 /* share short strings */
151 if (size == 0) {
152 PyObject *t = (PyObject *)op;
153 PyString_InternInPlace(&t);
154 op = (PyStringObject *)t;
155 nullstring = op;
156 Py_INCREF(op);
157 } else if (size == 1) {
158 PyObject *t = (PyObject *)op;
159 PyString_InternInPlace(&t);
160 op = (PyStringObject *)t;
161 characters[*str & UCHAR_MAX] = op;
162 Py_INCREF(op);
163 }
164 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000165}
166
Christian Heimes44720832008-05-26 13:01:01 +0000167PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000168PyString_FromFormatV(const char *format, va_list vargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000169{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000170 va_list count;
171 Py_ssize_t n = 0;
172 const char* f;
173 char *s;
174 PyObject* string;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000175
Christian Heimes44720832008-05-26 13:01:01 +0000176#ifdef VA_LIST_IS_ARRAY
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000177 Py_MEMCPY(count, vargs, sizeof(va_list));
Christian Heimes44720832008-05-26 13:01:01 +0000178#else
179#ifdef __va_copy
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000180 __va_copy(count, vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000181#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000182 count = vargs;
Christian Heimes44720832008-05-26 13:01:01 +0000183#endif
184#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000185 /* step 1: figure out how large a buffer we need */
186 for (f = format; *f; f++) {
187 if (*f == '%') {
Mark Dickinson82864d12009-11-15 16:18:58 +0000188#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000189 int longlongflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000190#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000191 const char* p = f;
192 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
193 ;
Christian Heimes44720832008-05-26 13:01:01 +0000194
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000195 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
196 * they don't affect the amount of space we reserve.
197 */
198 if (*f == 'l') {
199 if (f[1] == 'd' || f[1] == 'u') {
200 ++f;
201 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000202#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000203 else if (f[1] == 'l' &&
204 (f[2] == 'd' || f[2] == 'u')) {
205 longlongflag = 1;
206 f += 2;
207 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000208#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000209 }
210 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
211 ++f;
212 }
Christian Heimes44720832008-05-26 13:01:01 +0000213
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000214 switch (*f) {
215 case 'c':
216 (void)va_arg(count, int);
217 /* fall through... */
218 case '%':
219 n++;
220 break;
221 case 'd': case 'u': case 'i': case 'x':
222 (void) va_arg(count, int);
Mark Dickinson82864d12009-11-15 16:18:58 +0000223#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000224 /* Need at most
225 ceil(log10(256)*SIZEOF_LONG_LONG) digits,
226 plus 1 for the sign. 53/22 is an upper
227 bound for log10(256). */
228 if (longlongflag)
229 n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
230 else
Mark Dickinson82864d12009-11-15 16:18:58 +0000231#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000232 /* 20 bytes is enough to hold a 64-bit
233 integer. Decimal takes the most
234 space. This isn't enough for
235 octal. */
236 n += 20;
Mark Dickinson82864d12009-11-15 16:18:58 +0000237
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000238 break;
239 case 's':
240 s = va_arg(count, char*);
241 n += strlen(s);
242 break;
243 case 'p':
244 (void) va_arg(count, int);
245 /* maximum 64-bit pointer representation:
246 * 0xffffffffffffffff
247 * so 19 characters is enough.
248 * XXX I count 18 -- what's the extra for?
249 */
250 n += 19;
251 break;
252 default:
253 /* if we stumble upon an unknown
254 formatting code, copy the rest of
255 the format string to the output
256 string. (we cannot just skip the
257 code, since there's no way to know
258 what's in the argument list) */
259 n += strlen(p);
260 goto expand;
261 }
262 } else
263 n++;
264 }
Christian Heimes44720832008-05-26 13:01:01 +0000265 expand:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000266 /* step 2: fill the buffer */
267 /* Since we've analyzed how much space we need for the worst case,
268 use sprintf directly instead of the slower PyOS_snprintf. */
269 string = PyString_FromStringAndSize(NULL, n);
270 if (!string)
271 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000272
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000273 s = PyString_AsString(string);
Christian Heimes44720832008-05-26 13:01:01 +0000274
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000275 for (f = format; *f; f++) {
276 if (*f == '%') {
277 const char* p = f++;
278 Py_ssize_t i;
279 int longflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000280#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000281 int longlongflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000282#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000283 int size_tflag = 0;
284 /* parse the width.precision part (we're only
285 interested in the precision value, if any) */
286 n = 0;
287 while (isdigit(Py_CHARMASK(*f)))
288 n = (n*10) + *f++ - '0';
289 if (*f == '.') {
290 f++;
291 n = 0;
292 while (isdigit(Py_CHARMASK(*f)))
293 n = (n*10) + *f++ - '0';
294 }
295 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
296 f++;
297 /* Handle %ld, %lu, %lld and %llu. */
298 if (*f == 'l') {
299 if (f[1] == 'd' || f[1] == 'u') {
300 longflag = 1;
301 ++f;
302 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000303#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000304 else if (f[1] == 'l' &&
305 (f[2] == 'd' || f[2] == 'u')) {
306 longlongflag = 1;
307 f += 2;
308 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000309#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000310 }
311 /* handle the size_t flag. */
312 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
313 size_tflag = 1;
314 ++f;
315 }
Christian Heimes44720832008-05-26 13:01:01 +0000316
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000317 switch (*f) {
318 case 'c':
319 *s++ = va_arg(vargs, int);
320 break;
321 case 'd':
322 if (longflag)
323 sprintf(s, "%ld", va_arg(vargs, long));
Mark Dickinson82864d12009-11-15 16:18:58 +0000324#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000325 else if (longlongflag)
326 sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
327 va_arg(vargs, PY_LONG_LONG));
Mark Dickinson82864d12009-11-15 16:18:58 +0000328#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000329 else if (size_tflag)
330 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
331 va_arg(vargs, Py_ssize_t));
332 else
333 sprintf(s, "%d", va_arg(vargs, int));
334 s += strlen(s);
335 break;
336 case 'u':
337 if (longflag)
338 sprintf(s, "%lu",
339 va_arg(vargs, unsigned long));
Mark Dickinson82864d12009-11-15 16:18:58 +0000340#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000341 else if (longlongflag)
342 sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
343 va_arg(vargs, PY_LONG_LONG));
Mark Dickinson82864d12009-11-15 16:18:58 +0000344#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000345 else if (size_tflag)
346 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
347 va_arg(vargs, size_t));
348 else
349 sprintf(s, "%u",
350 va_arg(vargs, unsigned int));
351 s += strlen(s);
352 break;
353 case 'i':
354 sprintf(s, "%i", va_arg(vargs, int));
355 s += strlen(s);
356 break;
357 case 'x':
358 sprintf(s, "%x", va_arg(vargs, int));
359 s += strlen(s);
360 break;
361 case 's':
362 p = va_arg(vargs, char*);
363 i = strlen(p);
364 if (n > 0 && i > n)
365 i = n;
366 Py_MEMCPY(s, p, i);
367 s += i;
368 break;
369 case 'p':
370 sprintf(s, "%p", va_arg(vargs, void*));
371 /* %p is ill-defined: ensure leading 0x. */
372 if (s[1] == 'X')
373 s[1] = 'x';
374 else if (s[1] != 'x') {
375 memmove(s+2, s, strlen(s)+1);
376 s[0] = '0';
377 s[1] = 'x';
378 }
379 s += strlen(s);
380 break;
381 case '%':
382 *s++ = '%';
383 break;
384 default:
385 strcpy(s, p);
386 s += strlen(s);
387 goto end;
388 }
389 } else
390 *s++ = *f;
391 }
Christian Heimes44720832008-05-26 13:01:01 +0000392
393 end:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000394 if (_PyString_Resize(&string, s - PyString_AS_STRING(string)))
395 return NULL;
396 return string;
Christian Heimes44720832008-05-26 13:01:01 +0000397}
398
399PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000400PyString_FromFormat(const char *format, ...)
Christian Heimes44720832008-05-26 13:01:01 +0000401{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000402 PyObject* ret;
403 va_list vargs;
Christian Heimes44720832008-05-26 13:01:01 +0000404
405#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000406 va_start(vargs, format);
Christian Heimes44720832008-05-26 13:01:01 +0000407#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000408 va_start(vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000409#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000410 ret = PyString_FromFormatV(format, vargs);
411 va_end(vargs);
412 return ret;
Christian Heimes44720832008-05-26 13:01:01 +0000413}
414
415
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000416PyObject *PyString_Decode(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000417 Py_ssize_t size,
418 const char *encoding,
419 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000420{
421 PyObject *v, *str;
422
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000423 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000424 if (str == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000425 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000426 v = PyString_AsDecodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000427 Py_DECREF(str);
428 return v;
429}
430
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000431PyObject *PyString_AsDecodedObject(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000432 const char *encoding,
433 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000434{
435 PyObject *v;
436
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000437 if (!PyString_Check(str)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000438 PyErr_BadArgument();
439 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000440 }
441
Christian Heimes44720832008-05-26 13:01:01 +0000442 if (encoding == NULL) {
443#ifdef Py_USING_UNICODE
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000444 encoding = PyUnicode_GetDefaultEncoding();
Christian Heimes44720832008-05-26 13:01:01 +0000445#else
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000446 PyErr_SetString(PyExc_ValueError, "no encoding specified");
447 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000448#endif
Christian Heimes1a6387e2008-03-26 12:49:49 +0000449 }
Christian Heimes44720832008-05-26 13:01:01 +0000450
451 /* Decode via the codec registry */
452 v = PyCodec_Decode(str, encoding, errors);
453 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000454 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000455
456 return v;
457
458 onError:
459 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000460}
461
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000462PyObject *PyString_AsDecodedString(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000463 const char *encoding,
464 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000465{
Christian Heimes44720832008-05-26 13:01:01 +0000466 PyObject *v;
467
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000468 v = PyString_AsDecodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000469 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000470 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000471
472#ifdef Py_USING_UNICODE
473 /* Convert Unicode to a string using the default encoding */
474 if (PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000475 PyObject *temp = v;
476 v = PyUnicode_AsEncodedString(v, NULL, NULL);
477 Py_DECREF(temp);
478 if (v == NULL)
479 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000480 }
Christian Heimes44720832008-05-26 13:01:01 +0000481#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000482 if (!PyString_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000483 PyErr_Format(PyExc_TypeError,
484 "decoder did not return a string object (type=%.400s)",
485 Py_TYPE(v)->tp_name);
486 Py_DECREF(v);
487 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000488 }
Christian Heimes44720832008-05-26 13:01:01 +0000489
490 return v;
491
492 onError:
493 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000494}
495
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000496PyObject *PyString_Encode(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000497 Py_ssize_t size,
498 const char *encoding,
499 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000500{
Christian Heimes44720832008-05-26 13:01:01 +0000501 PyObject *v, *str;
502
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000503 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000504 if (str == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000505 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000506 v = PyString_AsEncodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000507 Py_DECREF(str);
508 return v;
509}
510
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000511PyObject *PyString_AsEncodedObject(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000512 const char *encoding,
513 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000514{
515 PyObject *v;
516
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000517 if (!PyString_Check(str)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000518 PyErr_BadArgument();
519 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000520 }
521
522 if (encoding == NULL) {
523#ifdef Py_USING_UNICODE
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000524 encoding = PyUnicode_GetDefaultEncoding();
Christian Heimes44720832008-05-26 13:01:01 +0000525#else
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000526 PyErr_SetString(PyExc_ValueError, "no encoding specified");
527 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000528#endif
529 }
530
531 /* Encode via the codec registry */
532 v = PyCodec_Encode(str, encoding, errors);
533 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000534 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000535
536 return v;
537
538 onError:
539 return NULL;
540}
541
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000542PyObject *PyString_AsEncodedString(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000543 const char *encoding,
544 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000545{
546 PyObject *v;
547
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000548 v = PyString_AsEncodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000549 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000550 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000551
552#ifdef Py_USING_UNICODE
553 /* Convert Unicode to a string using the default encoding */
554 if (PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000555 PyObject *temp = v;
556 v = PyUnicode_AsEncodedString(v, NULL, NULL);
557 Py_DECREF(temp);
558 if (v == NULL)
559 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000560 }
561#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000562 if (!PyString_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000563 PyErr_Format(PyExc_TypeError,
564 "encoder did not return a string object (type=%.400s)",
565 Py_TYPE(v)->tp_name);
566 Py_DECREF(v);
567 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000568 }
569
570 return v;
571
572 onError:
573 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000574}
575
576static void
Christian Heimes44720832008-05-26 13:01:01 +0000577string_dealloc(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000578{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000579 switch (PyString_CHECK_INTERNED(op)) {
580 case SSTATE_NOT_INTERNED:
581 break;
Christian Heimes44720832008-05-26 13:01:01 +0000582
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000583 case SSTATE_INTERNED_MORTAL:
584 /* revive dead object temporarily for DelItem */
585 Py_REFCNT(op) = 3;
586 if (PyDict_DelItem(interned, op) != 0)
587 Py_FatalError(
588 "deletion of interned string failed");
589 break;
Christian Heimes44720832008-05-26 13:01:01 +0000590
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000591 case SSTATE_INTERNED_IMMORTAL:
592 Py_FatalError("Immortal interned string died.");
Christian Heimes44720832008-05-26 13:01:01 +0000593
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000594 default:
595 Py_FatalError("Inconsistent interned string state.");
596 }
597 Py_TYPE(op)->tp_free(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000598}
599
Christian Heimes44720832008-05-26 13:01:01 +0000600/* Unescape a backslash-escaped string. If unicode is non-zero,
601 the string is a u-literal. If recode_encoding is non-zero,
602 the string is UTF-8 encoded and should be re-encoded in the
603 specified encoding. */
604
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000605PyObject *PyString_DecodeEscape(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000606 Py_ssize_t len,
607 const char *errors,
608 Py_ssize_t unicode,
609 const char *recode_encoding)
Christian Heimes44720832008-05-26 13:01:01 +0000610{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000611 int c;
612 char *p, *buf;
613 const char *end;
614 PyObject *v;
615 Py_ssize_t newlen = recode_encoding ? 4*len:len;
616 v = PyString_FromStringAndSize((char *)NULL, newlen);
617 if (v == NULL)
618 return NULL;
619 p = buf = PyString_AsString(v);
620 end = s + len;
621 while (s < end) {
622 if (*s != '\\') {
623 non_esc:
Christian Heimes44720832008-05-26 13:01:01 +0000624#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000625 if (recode_encoding && (*s & 0x80)) {
626 PyObject *u, *w;
627 char *r;
628 const char* t;
629 Py_ssize_t rn;
630 t = s;
631 /* Decode non-ASCII bytes as UTF-8. */
632 while (t < end && (*t & 0x80)) t++;
633 u = PyUnicode_DecodeUTF8(s, t - s, errors);
634 if(!u) goto failed;
Christian Heimes44720832008-05-26 13:01:01 +0000635
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000636 /* Recode them in target encoding. */
637 w = PyUnicode_AsEncodedString(
638 u, recode_encoding, errors);
639 Py_DECREF(u);
640 if (!w) goto failed;
Christian Heimes44720832008-05-26 13:01:01 +0000641
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000642 /* Append bytes to output buffer. */
643 assert(PyString_Check(w));
644 r = PyString_AS_STRING(w);
645 rn = PyString_GET_SIZE(w);
646 Py_MEMCPY(p, r, rn);
647 p += rn;
648 Py_DECREF(w);
649 s = t;
650 } else {
651 *p++ = *s++;
652 }
Christian Heimes44720832008-05-26 13:01:01 +0000653#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000654 *p++ = *s++;
Christian Heimes44720832008-05-26 13:01:01 +0000655#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000656 continue;
657 }
658 s++;
659 if (s==end) {
660 PyErr_SetString(PyExc_ValueError,
661 "Trailing \\ in string");
662 goto failed;
663 }
664 switch (*s++) {
665 /* XXX This assumes ASCII! */
666 case '\n': break;
667 case '\\': *p++ = '\\'; break;
668 case '\'': *p++ = '\''; break;
669 case '\"': *p++ = '\"'; break;
670 case 'b': *p++ = '\b'; break;
671 case 'f': *p++ = '\014'; break; /* FF */
672 case 't': *p++ = '\t'; break;
673 case 'n': *p++ = '\n'; break;
674 case 'r': *p++ = '\r'; break;
675 case 'v': *p++ = '\013'; break; /* VT */
676 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
677 case '0': case '1': case '2': case '3':
678 case '4': case '5': case '6': case '7':
679 c = s[-1] - '0';
680 if (s < end && '0' <= *s && *s <= '7') {
681 c = (c<<3) + *s++ - '0';
682 if (s < end && '0' <= *s && *s <= '7')
683 c = (c<<3) + *s++ - '0';
684 }
685 *p++ = c;
686 break;
687 case 'x':
688 if (s+1 < end &&
689 isxdigit(Py_CHARMASK(s[0])) &&
690 isxdigit(Py_CHARMASK(s[1])))
691 {
692 unsigned int x = 0;
693 c = Py_CHARMASK(*s);
694 s++;
695 if (isdigit(c))
696 x = c - '0';
697 else if (islower(c))
698 x = 10 + c - 'a';
699 else
700 x = 10 + c - 'A';
701 x = x << 4;
702 c = Py_CHARMASK(*s);
703 s++;
704 if (isdigit(c))
705 x += c - '0';
706 else if (islower(c))
707 x += 10 + c - 'a';
708 else
709 x += 10 + c - 'A';
710 *p++ = x;
711 break;
712 }
713 if (!errors || strcmp(errors, "strict") == 0) {
714 PyErr_SetString(PyExc_ValueError,
715 "invalid \\x escape");
716 goto failed;
717 }
718 if (strcmp(errors, "replace") == 0) {
719 *p++ = '?';
720 } else if (strcmp(errors, "ignore") == 0)
721 /* do nothing */;
722 else {
723 PyErr_Format(PyExc_ValueError,
724 "decoding error; "
725 "unknown error handling code: %.400s",
726 errors);
727 goto failed;
728 }
Christian Heimes44720832008-05-26 13:01:01 +0000729#ifndef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000730 case 'u':
731 case 'U':
732 case 'N':
733 if (unicode) {
734 PyErr_SetString(PyExc_ValueError,
735 "Unicode escapes not legal "
736 "when Unicode disabled");
737 goto failed;
738 }
Christian Heimes44720832008-05-26 13:01:01 +0000739#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000740 default:
741 *p++ = '\\';
742 s--;
Ezio Melotti24b07bc2011-03-15 18:55:01 +0200743 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000744 UTF-8 bytes may follow. */
745 }
746 }
747 if (p-buf < newlen && _PyString_Resize(&v, p - buf))
748 goto failed;
749 return v;
Christian Heimes44720832008-05-26 13:01:01 +0000750 failed:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000751 Py_DECREF(v);
752 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000753}
754
755/* -------------------------------------------------------------------- */
756/* object api */
757
Christian Heimes1a6387e2008-03-26 12:49:49 +0000758static Py_ssize_t
Christian Heimes44720832008-05-26 13:01:01 +0000759string_getsize(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000760{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000761 char *s;
762 Py_ssize_t len;
763 if (PyString_AsStringAndSize(op, &s, &len))
764 return -1;
765 return len;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000766}
767
Christian Heimes44720832008-05-26 13:01:01 +0000768static /*const*/ char *
769string_getbuffer(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000770{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000771 char *s;
772 Py_ssize_t len;
773 if (PyString_AsStringAndSize(op, &s, &len))
774 return NULL;
775 return s;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000776}
777
778Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000779PyString_Size(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000780{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000781 if (!PyString_Check(op))
782 return string_getsize(op);
783 return Py_SIZE(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000784}
785
Christian Heimes44720832008-05-26 13:01:01 +0000786/*const*/ char *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000787PyString_AsString(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000788{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000789 if (!PyString_Check(op))
790 return string_getbuffer(op);
791 return ((PyStringObject *)op) -> ob_sval;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000792}
793
794int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000795PyString_AsStringAndSize(register PyObject *obj,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000796 register char **s,
797 register Py_ssize_t *len)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000798{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000799 if (s == NULL) {
800 PyErr_BadInternalCall();
801 return -1;
802 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000803
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000804 if (!PyString_Check(obj)) {
Christian Heimes44720832008-05-26 13:01:01 +0000805#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000806 if (PyUnicode_Check(obj)) {
807 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
808 if (obj == NULL)
809 return -1;
810 }
811 else
Christian Heimes1a6387e2008-03-26 12:49:49 +0000812#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000813 {
814 PyErr_Format(PyExc_TypeError,
815 "expected string or Unicode object, "
816 "%.200s found", Py_TYPE(obj)->tp_name);
817 return -1;
818 }
819 }
Christian Heimes44720832008-05-26 13:01:01 +0000820
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000821 *s = PyString_AS_STRING(obj);
822 if (len != NULL)
823 *len = PyString_GET_SIZE(obj);
824 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
825 PyErr_SetString(PyExc_TypeError,
826 "expected string without null bytes");
827 return -1;
828 }
829 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000830}
831
Christian Heimes1a6387e2008-03-26 12:49:49 +0000832/* -------------------------------------------------------------------- */
833/* Methods */
834
Christian Heimes44720832008-05-26 13:01:01 +0000835#include "stringlib/stringdefs.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000836#include "stringlib/fastsearch.h"
Christian Heimes44720832008-05-26 13:01:01 +0000837
Christian Heimes1a6387e2008-03-26 12:49:49 +0000838#include "stringlib/count.h"
839#include "stringlib/find.h"
840#include "stringlib/partition.h"
Antoine Pitrou64672132010-01-13 07:55:48 +0000841#include "stringlib/split.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000842
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000843#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
Christian Heimes44720832008-05-26 13:01:01 +0000844#include "stringlib/localeutil.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000845
Christian Heimes1a6387e2008-03-26 12:49:49 +0000846
847
848static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000849string_print(PyStringObject *op, FILE *fp, int flags)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000850{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000851 Py_ssize_t i, str_len;
852 char c;
853 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000854
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000855 /* XXX Ought to check for interrupts when writing long strings */
856 if (! PyString_CheckExact(op)) {
857 int ret;
858 /* A str subclass may have its own __str__ method. */
859 op = (PyStringObject *) PyObject_Str((PyObject *)op);
860 if (op == NULL)
861 return -1;
862 ret = string_print(op, fp, flags);
863 Py_DECREF(op);
864 return ret;
865 }
866 if (flags & Py_PRINT_RAW) {
867 char *data = op->ob_sval;
868 Py_ssize_t size = Py_SIZE(op);
869 Py_BEGIN_ALLOW_THREADS
870 while (size > INT_MAX) {
871 /* Very long strings cannot be written atomically.
872 * But don't write exactly INT_MAX bytes at a time
873 * to avoid memory aligment issues.
874 */
875 const int chunk_size = INT_MAX & ~0x3FFF;
876 fwrite(data, 1, chunk_size, fp);
877 data += chunk_size;
878 size -= chunk_size;
879 }
Christian Heimes44720832008-05-26 13:01:01 +0000880#ifdef __VMS
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000881 if (size) fwrite(data, (int)size, 1, fp);
Christian Heimes44720832008-05-26 13:01:01 +0000882#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000883 fwrite(data, 1, (int)size, fp);
Christian Heimes44720832008-05-26 13:01:01 +0000884#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000885 Py_END_ALLOW_THREADS
886 return 0;
887 }
Christian Heimes44720832008-05-26 13:01:01 +0000888
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000889 /* figure out which quote to use; single is preferred */
890 quote = '\'';
891 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
892 !memchr(op->ob_sval, '"', Py_SIZE(op)))
893 quote = '"';
Christian Heimes44720832008-05-26 13:01:01 +0000894
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000895 str_len = Py_SIZE(op);
896 Py_BEGIN_ALLOW_THREADS
897 fputc(quote, fp);
898 for (i = 0; i < str_len; i++) {
899 /* Since strings are immutable and the caller should have a
900 reference, accessing the interal buffer should not be an issue
901 with the GIL released. */
902 c = op->ob_sval[i];
903 if (c == quote || c == '\\')
904 fprintf(fp, "\\%c", c);
905 else if (c == '\t')
906 fprintf(fp, "\\t");
907 else if (c == '\n')
908 fprintf(fp, "\\n");
909 else if (c == '\r')
910 fprintf(fp, "\\r");
911 else if (c < ' ' || c >= 0x7f)
912 fprintf(fp, "\\x%02x", c & 0xff);
913 else
914 fputc(c, fp);
915 }
916 fputc(quote, fp);
917 Py_END_ALLOW_THREADS
918 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000919}
920
Christian Heimes44720832008-05-26 13:01:01 +0000921PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000922PyString_Repr(PyObject *obj, int smartquotes)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000923{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000924 register PyStringObject* op = (PyStringObject*) obj;
925 size_t newsize = 2 + 4 * Py_SIZE(op);
926 PyObject *v;
927 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
928 PyErr_SetString(PyExc_OverflowError,
929 "string is too large to make repr");
930 return NULL;
931 }
932 v = PyString_FromStringAndSize((char *)NULL, newsize);
933 if (v == NULL) {
934 return NULL;
935 }
936 else {
937 register Py_ssize_t i;
938 register char c;
939 register char *p;
940 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000941
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000942 /* figure out which quote to use; single is preferred */
943 quote = '\'';
944 if (smartquotes &&
945 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
946 !memchr(op->ob_sval, '"', Py_SIZE(op)))
947 quote = '"';
Christian Heimes44720832008-05-26 13:01:01 +0000948
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000949 p = PyString_AS_STRING(v);
950 *p++ = quote;
951 for (i = 0; i < Py_SIZE(op); i++) {
952 /* There's at least enough room for a hex escape
953 and a closing quote. */
954 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
955 c = op->ob_sval[i];
956 if (c == quote || c == '\\')
957 *p++ = '\\', *p++ = c;
958 else if (c == '\t')
959 *p++ = '\\', *p++ = 't';
960 else if (c == '\n')
961 *p++ = '\\', *p++ = 'n';
962 else if (c == '\r')
963 *p++ = '\\', *p++ = 'r';
964 else if (c < ' ' || c >= 0x7f) {
965 /* For performance, we don't want to call
966 PyOS_snprintf here (extra layers of
967 function call). */
968 sprintf(p, "\\x%02x", c & 0xff);
969 p += 4;
970 }
971 else
972 *p++ = c;
973 }
974 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
975 *p++ = quote;
976 *p = '\0';
977 if (_PyString_Resize(&v, (p - PyString_AS_STRING(v))))
978 return NULL;
979 return v;
980 }
Christian Heimes44720832008-05-26 13:01:01 +0000981}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000982
983static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000984string_repr(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000985{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000986 return PyString_Repr(op, 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000987}
988
Christian Heimes1a6387e2008-03-26 12:49:49 +0000989static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000990string_str(PyObject *s)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000991{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000992 assert(PyString_Check(s));
993 if (PyString_CheckExact(s)) {
994 Py_INCREF(s);
995 return s;
996 }
997 else {
998 /* Subtype -- return genuine string with the same value. */
999 PyStringObject *t = (PyStringObject *) s;
1000 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
1001 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001002}
1003
Christian Heimes44720832008-05-26 13:01:01 +00001004static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001005string_length(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001006{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001007 return Py_SIZE(a);
Christian Heimes44720832008-05-26 13:01:01 +00001008}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001009
Christian Heimes44720832008-05-26 13:01:01 +00001010static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001011string_concat(register PyStringObject *a, register PyObject *bb)
Christian Heimes44720832008-05-26 13:01:01 +00001012{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001013 register Py_ssize_t size;
1014 register PyStringObject *op;
1015 if (!PyString_Check(bb)) {
Christian Heimes44720832008-05-26 13:01:01 +00001016#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001017 if (PyUnicode_Check(bb))
1018 return PyUnicode_Concat((PyObject *)a, bb);
Christian Heimes44720832008-05-26 13:01:01 +00001019#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001020 if (PyByteArray_Check(bb))
1021 return PyByteArray_Concat((PyObject *)a, bb);
1022 PyErr_Format(PyExc_TypeError,
1023 "cannot concatenate 'str' and '%.200s' objects",
1024 Py_TYPE(bb)->tp_name);
1025 return NULL;
1026 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001027#define b ((PyStringObject *)bb)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001028 /* Optimize cases with empty left or right operand */
1029 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
1030 PyString_CheckExact(a) && PyString_CheckExact(b)) {
1031 if (Py_SIZE(a) == 0) {
1032 Py_INCREF(bb);
1033 return bb;
1034 }
1035 Py_INCREF(a);
1036 return (PyObject *)a;
1037 }
1038 size = Py_SIZE(a) + Py_SIZE(b);
1039 /* Check that string sizes are not negative, to prevent an
1040 overflow in cases where we are passed incorrectly-created
1041 strings with negative lengths (due to a bug in other code).
1042 */
1043 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
1044 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
1045 PyErr_SetString(PyExc_OverflowError,
1046 "strings are too large to concat");
1047 return NULL;
1048 }
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001049
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001050 /* Inline PyObject_NewVar */
1051 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
1052 PyErr_SetString(PyExc_OverflowError,
1053 "strings are too large to concat");
1054 return NULL;
1055 }
1056 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
1057 if (op == NULL)
1058 return PyErr_NoMemory();
1059 PyObject_INIT_VAR(op, &PyString_Type, size);
1060 op->ob_shash = -1;
1061 op->ob_sstate = SSTATE_NOT_INTERNED;
1062 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1063 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1064 op->ob_sval[size] = '\0';
1065 return (PyObject *) op;
Christian Heimes44720832008-05-26 13:01:01 +00001066#undef b
1067}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001068
Christian Heimes44720832008-05-26 13:01:01 +00001069static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001070string_repeat(register PyStringObject *a, register Py_ssize_t n)
Christian Heimes44720832008-05-26 13:01:01 +00001071{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001072 register Py_ssize_t i;
1073 register Py_ssize_t j;
1074 register Py_ssize_t size;
1075 register PyStringObject *op;
1076 size_t nbytes;
1077 if (n < 0)
1078 n = 0;
1079 /* watch out for overflows: the size can overflow int,
1080 * and the # of bytes needed can overflow size_t
1081 */
1082 size = Py_SIZE(a) * n;
1083 if (n && size / n != Py_SIZE(a)) {
1084 PyErr_SetString(PyExc_OverflowError,
1085 "repeated string is too long");
1086 return NULL;
1087 }
1088 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1089 Py_INCREF(a);
1090 return (PyObject *)a;
1091 }
1092 nbytes = (size_t)size;
1093 if (nbytes + PyStringObject_SIZE <= nbytes) {
1094 PyErr_SetString(PyExc_OverflowError,
1095 "repeated string is too long");
1096 return NULL;
1097 }
1098 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
1099 if (op == NULL)
1100 return PyErr_NoMemory();
1101 PyObject_INIT_VAR(op, &PyString_Type, size);
1102 op->ob_shash = -1;
1103 op->ob_sstate = SSTATE_NOT_INTERNED;
1104 op->ob_sval[size] = '\0';
1105 if (Py_SIZE(a) == 1 && n > 0) {
1106 memset(op->ob_sval, a->ob_sval[0] , n);
1107 return (PyObject *) op;
1108 }
1109 i = 0;
1110 if (i < size) {
1111 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1112 i = Py_SIZE(a);
1113 }
1114 while (i < size) {
1115 j = (i <= size-i) ? i : size-i;
1116 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1117 i += j;
1118 }
1119 return (PyObject *) op;
Christian Heimes44720832008-05-26 13:01:01 +00001120}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001121
Christian Heimes44720832008-05-26 13:01:01 +00001122/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1123
1124static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001125string_slice(register PyStringObject *a, register Py_ssize_t i,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001126 register Py_ssize_t j)
Christian Heimes44720832008-05-26 13:01:01 +00001127 /* j -- may be negative! */
1128{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001129 if (i < 0)
1130 i = 0;
1131 if (j < 0)
1132 j = 0; /* Avoid signed/unsigned bug in next line */
1133 if (j > Py_SIZE(a))
1134 j = Py_SIZE(a);
1135 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1136 /* It's the same as a */
1137 Py_INCREF(a);
1138 return (PyObject *)a;
1139 }
1140 if (j < i)
1141 j = i;
1142 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001143}
1144
1145static int
1146string_contains(PyObject *str_obj, PyObject *sub_obj)
1147{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001148 if (!PyString_CheckExact(sub_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00001149#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001150 if (PyUnicode_Check(sub_obj))
1151 return PyUnicode_Contains(str_obj, sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001152#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001153 if (!PyString_Check(sub_obj)) {
1154 PyErr_Format(PyExc_TypeError,
1155 "'in <string>' requires string as left operand, "
1156 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1157 return -1;
1158 }
1159 }
Christian Heimes44720832008-05-26 13:01:01 +00001160
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001161 return stringlib_contains_obj(str_obj, sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001162}
1163
1164static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001165string_item(PyStringObject *a, register Py_ssize_t i)
Christian Heimes44720832008-05-26 13:01:01 +00001166{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001167 char pchar;
1168 PyObject *v;
1169 if (i < 0 || i >= Py_SIZE(a)) {
1170 PyErr_SetString(PyExc_IndexError, "string index out of range");
1171 return NULL;
1172 }
1173 pchar = a->ob_sval[i];
1174 v = (PyObject *)characters[pchar & UCHAR_MAX];
1175 if (v == NULL)
1176 v = PyString_FromStringAndSize(&pchar, 1);
1177 else {
Christian Heimes44720832008-05-26 13:01:01 +00001178#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001179 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +00001180#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001181 Py_INCREF(v);
1182 }
1183 return v;
Christian Heimes44720832008-05-26 13:01:01 +00001184}
1185
1186static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001187string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Christian Heimes44720832008-05-26 13:01:01 +00001188{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001189 int c;
1190 Py_ssize_t len_a, len_b;
1191 Py_ssize_t min_len;
1192 PyObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00001193
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001194 /* Make sure both arguments are strings. */
1195 if (!(PyString_Check(a) && PyString_Check(b))) {
1196 result = Py_NotImplemented;
1197 goto out;
1198 }
1199 if (a == b) {
1200 switch (op) {
1201 case Py_EQ:case Py_LE:case Py_GE:
1202 result = Py_True;
1203 goto out;
1204 case Py_NE:case Py_LT:case Py_GT:
1205 result = Py_False;
1206 goto out;
1207 }
1208 }
1209 if (op == Py_EQ) {
1210 /* Supporting Py_NE here as well does not save
1211 much time, since Py_NE is rarely used. */
1212 if (Py_SIZE(a) == Py_SIZE(b)
1213 && (a->ob_sval[0] == b->ob_sval[0]
1214 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1215 result = Py_True;
1216 } else {
1217 result = Py_False;
1218 }
1219 goto out;
1220 }
1221 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1222 min_len = (len_a < len_b) ? len_a : len_b;
1223 if (min_len > 0) {
1224 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1225 if (c==0)
1226 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1227 } else
1228 c = 0;
1229 if (c == 0)
1230 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1231 switch (op) {
1232 case Py_LT: c = c < 0; break;
1233 case Py_LE: c = c <= 0; break;
1234 case Py_EQ: assert(0); break; /* unreachable */
1235 case Py_NE: c = c != 0; break;
1236 case Py_GT: c = c > 0; break;
1237 case Py_GE: c = c >= 0; break;
1238 default:
1239 result = Py_NotImplemented;
1240 goto out;
1241 }
1242 result = c ? Py_True : Py_False;
Christian Heimes44720832008-05-26 13:01:01 +00001243 out:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001244 Py_INCREF(result);
1245 return result;
Christian Heimes44720832008-05-26 13:01:01 +00001246}
1247
1248int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001249_PyString_Eq(PyObject *o1, PyObject *o2)
Christian Heimes44720832008-05-26 13:01:01 +00001250{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001251 PyStringObject *a = (PyStringObject*) o1;
1252 PyStringObject *b = (PyStringObject*) o2;
1253 return Py_SIZE(a) == Py_SIZE(b)
1254 && *a->ob_sval == *b->ob_sval
1255 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
Christian Heimes44720832008-05-26 13:01:01 +00001256}
1257
1258static long
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001259string_hash(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001260{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001261 register Py_ssize_t len;
1262 register unsigned char *p;
1263 register long x;
Christian Heimes44720832008-05-26 13:01:01 +00001264
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001265 if (a->ob_shash != -1)
1266 return a->ob_shash;
1267 len = Py_SIZE(a);
1268 p = (unsigned char *) a->ob_sval;
1269 x = *p << 7;
1270 while (--len >= 0)
1271 x = (1000003*x) ^ *p++;
1272 x ^= Py_SIZE(a);
1273 if (x == -1)
1274 x = -2;
1275 a->ob_shash = x;
1276 return x;
Christian Heimes44720832008-05-26 13:01:01 +00001277}
1278
1279static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001280string_subscript(PyStringObject* self, PyObject* item)
Christian Heimes44720832008-05-26 13:01:01 +00001281{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001282 if (PyIndex_Check(item)) {
1283 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1284 if (i == -1 && PyErr_Occurred())
1285 return NULL;
1286 if (i < 0)
1287 i += PyString_GET_SIZE(self);
1288 return string_item(self, i);
1289 }
1290 else if (PySlice_Check(item)) {
1291 Py_ssize_t start, stop, step, slicelength, cur, i;
1292 char* source_buf;
1293 char* result_buf;
1294 PyObject* result;
Christian Heimes44720832008-05-26 13:01:01 +00001295
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001296 if (PySlice_GetIndicesEx((PySliceObject*)item,
1297 PyString_GET_SIZE(self),
1298 &start, &stop, &step, &slicelength) < 0) {
1299 return NULL;
1300 }
Christian Heimes44720832008-05-26 13:01:01 +00001301
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001302 if (slicelength <= 0) {
1303 return PyString_FromStringAndSize("", 0);
1304 }
1305 else if (start == 0 && step == 1 &&
1306 slicelength == PyString_GET_SIZE(self) &&
1307 PyString_CheckExact(self)) {
1308 Py_INCREF(self);
1309 return (PyObject *)self;
1310 }
1311 else if (step == 1) {
1312 return PyString_FromStringAndSize(
1313 PyString_AS_STRING(self) + start,
1314 slicelength);
1315 }
1316 else {
1317 source_buf = PyString_AsString((PyObject*)self);
1318 result_buf = (char *)PyMem_Malloc(slicelength);
1319 if (result_buf == NULL)
1320 return PyErr_NoMemory();
Christian Heimes44720832008-05-26 13:01:01 +00001321
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001322 for (cur = start, i = 0; i < slicelength;
1323 cur += step, i++) {
1324 result_buf[i] = source_buf[cur];
1325 }
Christian Heimes44720832008-05-26 13:01:01 +00001326
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001327 result = PyString_FromStringAndSize(result_buf,
1328 slicelength);
1329 PyMem_Free(result_buf);
1330 return result;
1331 }
1332 }
1333 else {
1334 PyErr_Format(PyExc_TypeError,
1335 "string indices must be integers, not %.200s",
1336 Py_TYPE(item)->tp_name);
1337 return NULL;
1338 }
Christian Heimes44720832008-05-26 13:01:01 +00001339}
1340
1341static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001342string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001343{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001344 if ( index != 0 ) {
1345 PyErr_SetString(PyExc_SystemError,
1346 "accessing non-existent string segment");
1347 return -1;
1348 }
1349 *ptr = (void *)self->ob_sval;
1350 return Py_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001351}
1352
1353static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001354string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001355{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001356 PyErr_SetString(PyExc_TypeError,
1357 "Cannot use string as modifiable buffer");
1358 return -1;
Christian Heimes44720832008-05-26 13:01:01 +00001359}
1360
1361static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001362string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Christian Heimes44720832008-05-26 13:01:01 +00001363{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001364 if ( lenp )
1365 *lenp = Py_SIZE(self);
1366 return 1;
Christian Heimes44720832008-05-26 13:01:01 +00001367}
1368
1369static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001370string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001371{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001372 if ( index != 0 ) {
1373 PyErr_SetString(PyExc_SystemError,
1374 "accessing non-existent string segment");
1375 return -1;
1376 }
1377 *ptr = self->ob_sval;
1378 return Py_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001379}
1380
1381static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001382string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Christian Heimes44720832008-05-26 13:01:01 +00001383{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001384 return PyBuffer_FillInfo(view, (PyObject*)self,
1385 (void *)self->ob_sval, Py_SIZE(self),
1386 1, flags);
Christian Heimes44720832008-05-26 13:01:01 +00001387}
1388
1389static PySequenceMethods string_as_sequence = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001390 (lenfunc)string_length, /*sq_length*/
1391 (binaryfunc)string_concat, /*sq_concat*/
1392 (ssizeargfunc)string_repeat, /*sq_repeat*/
1393 (ssizeargfunc)string_item, /*sq_item*/
1394 (ssizessizeargfunc)string_slice, /*sq_slice*/
1395 0, /*sq_ass_item*/
1396 0, /*sq_ass_slice*/
1397 (objobjproc)string_contains /*sq_contains*/
Christian Heimes44720832008-05-26 13:01:01 +00001398};
1399
1400static PyMappingMethods string_as_mapping = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001401 (lenfunc)string_length,
1402 (binaryfunc)string_subscript,
1403 0,
Christian Heimes44720832008-05-26 13:01:01 +00001404};
1405
1406static PyBufferProcs string_as_buffer = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001407 (readbufferproc)string_buffer_getreadbuf,
1408 (writebufferproc)string_buffer_getwritebuf,
1409 (segcountproc)string_buffer_getsegcount,
1410 (charbufferproc)string_buffer_getcharbuf,
1411 (getbufferproc)string_buffer_getbuffer,
1412 0, /* XXX */
Christian Heimes44720832008-05-26 13:01:01 +00001413};
1414
1415
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001416
Christian Heimes44720832008-05-26 13:01:01 +00001417#define LEFTSTRIP 0
1418#define RIGHTSTRIP 1
1419#define BOTHSTRIP 2
1420
1421/* Arrays indexed by above */
1422static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1423
1424#define STRIPNAME(i) (stripformat[i]+3)
1425
Christian Heimes1a6387e2008-03-26 12:49:49 +00001426PyDoc_STRVAR(split__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001427"S.split([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001428\n\
Christian Heimes44720832008-05-26 13:01:01 +00001429Return a list of the words in the string S, using sep as the\n\
1430delimiter string. If maxsplit is given, at most maxsplit\n\
1431splits are done. If sep is not specified or is None, any\n\
1432whitespace string is a separator and empty strings are removed\n\
1433from the result.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001434
1435static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001436string_split(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001437{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001438 Py_ssize_t len = PyString_GET_SIZE(self), n;
1439 Py_ssize_t maxsplit = -1;
1440 const char *s = PyString_AS_STRING(self), *sub;
1441 PyObject *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001442
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001443 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1444 return NULL;
1445 if (maxsplit < 0)
1446 maxsplit = PY_SSIZE_T_MAX;
1447 if (subobj == Py_None)
1448 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1449 if (PyString_Check(subobj)) {
1450 sub = PyString_AS_STRING(subobj);
1451 n = PyString_GET_SIZE(subobj);
1452 }
Christian Heimes44720832008-05-26 13:01:01 +00001453#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001454 else if (PyUnicode_Check(subobj))
1455 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001456#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001457 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1458 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001459
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001460 return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001461}
1462
1463PyDoc_STRVAR(partition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001464"S.partition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001465\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001466Search for the separator sep in S, and return the part before it,\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001467the separator itself, and the part after it. If the separator is not\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001468found, return S and two empty strings.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001469
1470static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001471string_partition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001472{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001473 const char *sep;
1474 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001475
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001476 if (PyString_Check(sep_obj)) {
1477 sep = PyString_AS_STRING(sep_obj);
1478 sep_len = PyString_GET_SIZE(sep_obj);
1479 }
Christian Heimes44720832008-05-26 13:01:01 +00001480#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001481 else if (PyUnicode_Check(sep_obj))
1482 return PyUnicode_Partition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001483#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001484 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1485 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001486
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001487 return stringlib_partition(
1488 (PyObject*) self,
1489 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1490 sep_obj, sep, sep_len
1491 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001492}
1493
1494PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti1fafaab2010-01-25 11:24:37 +00001495"S.rpartition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001496\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001497Search for the separator sep in S, starting at the end of S, and return\n\
Christian Heimes44720832008-05-26 13:01:01 +00001498the part before it, the separator itself, and the part after it. If the\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001499separator is not found, return two empty strings and S.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001500
1501static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001502string_rpartition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001503{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001504 const char *sep;
1505 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001506
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001507 if (PyString_Check(sep_obj)) {
1508 sep = PyString_AS_STRING(sep_obj);
1509 sep_len = PyString_GET_SIZE(sep_obj);
1510 }
Christian Heimes44720832008-05-26 13:01:01 +00001511#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001512 else if (PyUnicode_Check(sep_obj))
1513 return PyUnicode_RPartition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001514#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001515 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1516 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001517
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001518 return stringlib_rpartition(
1519 (PyObject*) self,
1520 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1521 sep_obj, sep, sep_len
1522 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001523}
1524
Christian Heimes1a6387e2008-03-26 12:49:49 +00001525PyDoc_STRVAR(rsplit__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001526"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001527\n\
Christian Heimes44720832008-05-26 13:01:01 +00001528Return a list of the words in the string S, using sep as the\n\
1529delimiter string, starting at the end of the string and working\n\
1530to the front. If maxsplit is given, at most maxsplit splits are\n\
1531done. If sep is not specified or is None, any whitespace string\n\
1532is a separator.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001533
1534static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001535string_rsplit(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001536{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001537 Py_ssize_t len = PyString_GET_SIZE(self), n;
1538 Py_ssize_t maxsplit = -1;
1539 const char *s = PyString_AS_STRING(self), *sub;
1540 PyObject *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001541
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001542 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1543 return NULL;
1544 if (maxsplit < 0)
1545 maxsplit = PY_SSIZE_T_MAX;
1546 if (subobj == Py_None)
1547 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1548 if (PyString_Check(subobj)) {
1549 sub = PyString_AS_STRING(subobj);
1550 n = PyString_GET_SIZE(subobj);
1551 }
Christian Heimes44720832008-05-26 13:01:01 +00001552#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001553 else if (PyUnicode_Check(subobj))
1554 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001555#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001556 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1557 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001558
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001559 return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001560}
1561
1562
1563PyDoc_STRVAR(join__doc__,
Georg Brandl9b4e5822009-10-14 18:48:32 +00001564"S.join(iterable) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00001565\n\
1566Return a string which is the concatenation of the strings in the\n\
Georg Brandl9b4e5822009-10-14 18:48:32 +00001567iterable. The separator between elements is S.");
Christian Heimes44720832008-05-26 13:01:01 +00001568
1569static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001570string_join(PyStringObject *self, PyObject *orig)
Christian Heimes44720832008-05-26 13:01:01 +00001571{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001572 char *sep = PyString_AS_STRING(self);
1573 const Py_ssize_t seplen = PyString_GET_SIZE(self);
1574 PyObject *res = NULL;
1575 char *p;
1576 Py_ssize_t seqlen = 0;
1577 size_t sz = 0;
1578 Py_ssize_t i;
1579 PyObject *seq, *item;
Christian Heimes44720832008-05-26 13:01:01 +00001580
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001581 seq = PySequence_Fast(orig, "");
1582 if (seq == NULL) {
1583 return NULL;
1584 }
Christian Heimes44720832008-05-26 13:01:01 +00001585
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001586 seqlen = PySequence_Size(seq);
1587 if (seqlen == 0) {
1588 Py_DECREF(seq);
1589 return PyString_FromString("");
1590 }
1591 if (seqlen == 1) {
1592 item = PySequence_Fast_GET_ITEM(seq, 0);
1593 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1594 Py_INCREF(item);
1595 Py_DECREF(seq);
1596 return item;
1597 }
1598 }
Christian Heimes44720832008-05-26 13:01:01 +00001599
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001600 /* There are at least two things to join, or else we have a subclass
1601 * of the builtin types in the sequence.
1602 * Do a pre-pass to figure out the total amount of space we'll
1603 * need (sz), see whether any argument is absurd, and defer to
1604 * the Unicode join if appropriate.
1605 */
1606 for (i = 0; i < seqlen; i++) {
1607 const size_t old_sz = sz;
1608 item = PySequence_Fast_GET_ITEM(seq, i);
1609 if (!PyString_Check(item)){
Christian Heimes44720832008-05-26 13:01:01 +00001610#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001611 if (PyUnicode_Check(item)) {
1612 /* Defer to Unicode join.
1613 * CAUTION: There's no gurantee that the
1614 * original sequence can be iterated over
1615 * again, so we must pass seq here.
1616 */
1617 PyObject *result;
1618 result = PyUnicode_Join((PyObject *)self, seq);
1619 Py_DECREF(seq);
1620 return result;
1621 }
Christian Heimes44720832008-05-26 13:01:01 +00001622#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001623 PyErr_Format(PyExc_TypeError,
1624 "sequence item %zd: expected string,"
1625 " %.80s found",
1626 i, Py_TYPE(item)->tp_name);
1627 Py_DECREF(seq);
1628 return NULL;
1629 }
1630 sz += PyString_GET_SIZE(item);
1631 if (i != 0)
1632 sz += seplen;
1633 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1634 PyErr_SetString(PyExc_OverflowError,
1635 "join() result is too long for a Python string");
1636 Py_DECREF(seq);
1637 return NULL;
1638 }
1639 }
Christian Heimes44720832008-05-26 13:01:01 +00001640
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001641 /* Allocate result space. */
1642 res = PyString_FromStringAndSize((char*)NULL, sz);
1643 if (res == NULL) {
1644 Py_DECREF(seq);
1645 return NULL;
1646 }
Christian Heimes44720832008-05-26 13:01:01 +00001647
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001648 /* Catenate everything. */
1649 p = PyString_AS_STRING(res);
1650 for (i = 0; i < seqlen; ++i) {
1651 size_t n;
1652 item = PySequence_Fast_GET_ITEM(seq, i);
1653 n = PyString_GET_SIZE(item);
1654 Py_MEMCPY(p, PyString_AS_STRING(item), n);
1655 p += n;
1656 if (i < seqlen - 1) {
1657 Py_MEMCPY(p, sep, seplen);
1658 p += seplen;
1659 }
1660 }
Christian Heimes44720832008-05-26 13:01:01 +00001661
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001662 Py_DECREF(seq);
1663 return res;
Christian Heimes44720832008-05-26 13:01:01 +00001664}
1665
1666PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001667_PyString_Join(PyObject *sep, PyObject *x)
Christian Heimes44720832008-05-26 13:01:01 +00001668{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001669 assert(sep != NULL && PyString_Check(sep));
1670 assert(x != NULL);
1671 return string_join((PyStringObject *)sep, x);
Christian Heimes44720832008-05-26 13:01:01 +00001672}
1673
Antoine Pitrou64672132010-01-13 07:55:48 +00001674/* helper macro to fixup start/end slice values */
1675#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001676 if (end > len) \
1677 end = len; \
1678 else if (end < 0) { \
1679 end += len; \
1680 if (end < 0) \
1681 end = 0; \
1682 } \
1683 if (start < 0) { \
1684 start += len; \
1685 if (start < 0) \
1686 start = 0; \
1687 }
Christian Heimes44720832008-05-26 13:01:01 +00001688
1689Py_LOCAL_INLINE(Py_ssize_t)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001690string_find_internal(PyStringObject *self, PyObject *args, int dir)
Christian Heimes44720832008-05-26 13:01:01 +00001691{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001692 PyObject *subobj;
1693 const char *sub;
1694 Py_ssize_t sub_len;
1695 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Christian Heimes44720832008-05-26 13:01:01 +00001696
Jesus Cea44e81682011-04-20 16:39:15 +02001697 if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1698 args, &subobj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001699 return -2;
Christian Heimes44720832008-05-26 13:01:01 +00001700
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001701 if (PyString_Check(subobj)) {
1702 sub = PyString_AS_STRING(subobj);
1703 sub_len = PyString_GET_SIZE(subobj);
1704 }
Christian Heimes44720832008-05-26 13:01:01 +00001705#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001706 else if (PyUnicode_Check(subobj))
1707 return PyUnicode_Find(
1708 (PyObject *)self, subobj, start, end, dir);
Christian Heimes44720832008-05-26 13:01:01 +00001709#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001710 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1711 /* XXX - the "expected a character buffer object" is pretty
1712 confusing for a non-expert. remap to something else ? */
1713 return -2;
Christian Heimes44720832008-05-26 13:01:01 +00001714
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001715 if (dir > 0)
1716 return stringlib_find_slice(
1717 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1718 sub, sub_len, start, end);
1719 else
1720 return stringlib_rfind_slice(
1721 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1722 sub, sub_len, start, end);
Christian Heimes44720832008-05-26 13:01:01 +00001723}
1724
1725
1726PyDoc_STRVAR(find__doc__,
1727"S.find(sub [,start [,end]]) -> int\n\
1728\n\
1729Return the lowest index in S where substring sub is found,\n\
Senthil Kumaran5e3a19d2011-07-27 23:36:51 +08001730such that sub is contained within S[start:end]. Optional\n\
Christian Heimes44720832008-05-26 13:01:01 +00001731arguments start and end are interpreted as in slice notation.\n\
1732\n\
1733Return -1 on failure.");
1734
1735static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001736string_find(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001737{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001738 Py_ssize_t result = string_find_internal(self, args, +1);
1739 if (result == -2)
1740 return NULL;
1741 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001742}
1743
1744
1745PyDoc_STRVAR(index__doc__,
1746"S.index(sub [,start [,end]]) -> int\n\
1747\n\
1748Like S.find() but raise ValueError when the substring is not found.");
1749
1750static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001751string_index(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001752{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001753 Py_ssize_t result = string_find_internal(self, args, +1);
1754 if (result == -2)
1755 return NULL;
1756 if (result == -1) {
1757 PyErr_SetString(PyExc_ValueError,
1758 "substring not found");
1759 return NULL;
1760 }
1761 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001762}
1763
1764
1765PyDoc_STRVAR(rfind__doc__,
1766"S.rfind(sub [,start [,end]]) -> int\n\
1767\n\
1768Return the highest index in S where substring sub is found,\n\
Senthil Kumaran5e3a19d2011-07-27 23:36:51 +08001769such that sub is contained within S[start:end]. Optional\n\
Christian Heimes44720832008-05-26 13:01:01 +00001770arguments start and end are interpreted as in slice notation.\n\
1771\n\
1772Return -1 on failure.");
1773
1774static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001775string_rfind(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001776{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001777 Py_ssize_t result = string_find_internal(self, args, -1);
1778 if (result == -2)
1779 return NULL;
1780 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001781}
1782
1783
1784PyDoc_STRVAR(rindex__doc__,
1785"S.rindex(sub [,start [,end]]) -> int\n\
1786\n\
1787Like S.rfind() but raise ValueError when the substring is not found.");
1788
1789static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001790string_rindex(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001791{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001792 Py_ssize_t result = string_find_internal(self, args, -1);
1793 if (result == -2)
1794 return NULL;
1795 if (result == -1) {
1796 PyErr_SetString(PyExc_ValueError,
1797 "substring not found");
1798 return NULL;
1799 }
1800 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001801}
1802
1803
1804Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001805do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
Christian Heimes44720832008-05-26 13:01:01 +00001806{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001807 char *s = PyString_AS_STRING(self);
1808 Py_ssize_t len = PyString_GET_SIZE(self);
1809 char *sep = PyString_AS_STRING(sepobj);
1810 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1811 Py_ssize_t i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001812
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001813 i = 0;
1814 if (striptype != RIGHTSTRIP) {
1815 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1816 i++;
1817 }
1818 }
Christian Heimes44720832008-05-26 13:01:01 +00001819
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001820 j = len;
1821 if (striptype != LEFTSTRIP) {
1822 do {
1823 j--;
1824 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1825 j++;
1826 }
Christian Heimes44720832008-05-26 13:01:01 +00001827
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001828 if (i == 0 && j == len && PyString_CheckExact(self)) {
1829 Py_INCREF(self);
1830 return (PyObject*)self;
1831 }
1832 else
1833 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001834}
1835
1836
1837Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001838do_strip(PyStringObject *self, int striptype)
Christian Heimes44720832008-05-26 13:01:01 +00001839{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001840 char *s = PyString_AS_STRING(self);
1841 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001842
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001843 i = 0;
1844 if (striptype != RIGHTSTRIP) {
1845 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1846 i++;
1847 }
1848 }
Christian Heimes44720832008-05-26 13:01:01 +00001849
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001850 j = len;
1851 if (striptype != LEFTSTRIP) {
1852 do {
1853 j--;
1854 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1855 j++;
1856 }
Christian Heimes44720832008-05-26 13:01:01 +00001857
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001858 if (i == 0 && j == len && PyString_CheckExact(self)) {
1859 Py_INCREF(self);
1860 return (PyObject*)self;
1861 }
1862 else
1863 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001864}
1865
1866
1867Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001868do_argstrip(PyStringObject *self, int striptype, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001869{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001870 PyObject *sep = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001871
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001872 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1873 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001874
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001875 if (sep != NULL && sep != Py_None) {
1876 if (PyString_Check(sep))
1877 return do_xstrip(self, striptype, sep);
Christian Heimes44720832008-05-26 13:01:01 +00001878#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001879 else if (PyUnicode_Check(sep)) {
1880 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1881 PyObject *res;
1882 if (uniself==NULL)
1883 return NULL;
1884 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1885 striptype, sep);
1886 Py_DECREF(uniself);
1887 return res;
1888 }
Christian Heimes44720832008-05-26 13:01:01 +00001889#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001890 PyErr_Format(PyExc_TypeError,
Christian Heimes44720832008-05-26 13:01:01 +00001891#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001892 "%s arg must be None, str or unicode",
Christian Heimes44720832008-05-26 13:01:01 +00001893#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001894 "%s arg must be None or str",
Christian Heimes44720832008-05-26 13:01:01 +00001895#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001896 STRIPNAME(striptype));
1897 return NULL;
1898 }
Christian Heimes44720832008-05-26 13:01:01 +00001899
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001900 return do_strip(self, striptype);
Christian Heimes44720832008-05-26 13:01:01 +00001901}
1902
1903
1904PyDoc_STRVAR(strip__doc__,
1905"S.strip([chars]) -> string or unicode\n\
1906\n\
1907Return a copy of the string S with leading and trailing\n\
1908whitespace removed.\n\
1909If chars is given and not None, remove characters in chars instead.\n\
1910If chars is unicode, S will be converted to unicode before stripping");
1911
1912static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001913string_strip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001914{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001915 if (PyTuple_GET_SIZE(args) == 0)
1916 return do_strip(self, BOTHSTRIP); /* Common case */
1917 else
1918 return do_argstrip(self, BOTHSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001919}
1920
1921
1922PyDoc_STRVAR(lstrip__doc__,
1923"S.lstrip([chars]) -> string or unicode\n\
1924\n\
1925Return a copy of the string S with leading whitespace removed.\n\
1926If chars is given and not None, remove characters in chars instead.\n\
1927If chars is unicode, S will be converted to unicode before stripping");
1928
1929static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001930string_lstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001931{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001932 if (PyTuple_GET_SIZE(args) == 0)
1933 return do_strip(self, LEFTSTRIP); /* Common case */
1934 else
1935 return do_argstrip(self, LEFTSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001936}
1937
1938
1939PyDoc_STRVAR(rstrip__doc__,
1940"S.rstrip([chars]) -> string or unicode\n\
1941\n\
1942Return a copy of the string S with trailing whitespace removed.\n\
1943If chars is given and not None, remove characters in chars instead.\n\
1944If chars is unicode, S will be converted to unicode before stripping");
1945
1946static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001947string_rstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001948{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001949 if (PyTuple_GET_SIZE(args) == 0)
1950 return do_strip(self, RIGHTSTRIP); /* Common case */
1951 else
1952 return do_argstrip(self, RIGHTSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001953}
1954
1955
1956PyDoc_STRVAR(lower__doc__,
1957"S.lower() -> string\n\
1958\n\
1959Return a copy of the string S converted to lowercase.");
1960
1961/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1962#ifndef _tolower
1963#define _tolower tolower
1964#endif
1965
1966static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001967string_lower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00001968{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001969 char *s;
1970 Py_ssize_t i, n = PyString_GET_SIZE(self);
1971 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00001972
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001973 newobj = PyString_FromStringAndSize(NULL, n);
1974 if (!newobj)
1975 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001976
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001977 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00001978
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001979 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00001980
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001981 for (i = 0; i < n; i++) {
1982 int c = Py_CHARMASK(s[i]);
1983 if (isupper(c))
1984 s[i] = _tolower(c);
1985 }
Christian Heimes44720832008-05-26 13:01:01 +00001986
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001987 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00001988}
1989
1990PyDoc_STRVAR(upper__doc__,
1991"S.upper() -> string\n\
1992\n\
1993Return a copy of the string S converted to uppercase.");
1994
1995#ifndef _toupper
1996#define _toupper toupper
1997#endif
1998
1999static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002000string_upper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002001{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002002 char *s;
2003 Py_ssize_t i, n = PyString_GET_SIZE(self);
2004 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002005
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002006 newobj = PyString_FromStringAndSize(NULL, n);
2007 if (!newobj)
2008 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002009
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002010 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002011
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002012 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002013
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002014 for (i = 0; i < n; i++) {
2015 int c = Py_CHARMASK(s[i]);
2016 if (islower(c))
2017 s[i] = _toupper(c);
2018 }
Christian Heimes44720832008-05-26 13:01:01 +00002019
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002020 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002021}
2022
2023PyDoc_STRVAR(title__doc__,
2024"S.title() -> string\n\
2025\n\
2026Return a titlecased version of S, i.e. words start with uppercase\n\
2027characters, all remaining cased characters have lowercase.");
2028
2029static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002030string_title(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002031{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002032 char *s = PyString_AS_STRING(self), *s_new;
2033 Py_ssize_t i, n = PyString_GET_SIZE(self);
2034 int previous_is_cased = 0;
2035 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002036
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002037 newobj = PyString_FromStringAndSize(NULL, n);
2038 if (newobj == NULL)
2039 return NULL;
2040 s_new = PyString_AsString(newobj);
2041 for (i = 0; i < n; i++) {
2042 int c = Py_CHARMASK(*s++);
2043 if (islower(c)) {
2044 if (!previous_is_cased)
2045 c = toupper(c);
2046 previous_is_cased = 1;
2047 } else if (isupper(c)) {
2048 if (previous_is_cased)
2049 c = tolower(c);
2050 previous_is_cased = 1;
2051 } else
2052 previous_is_cased = 0;
2053 *s_new++ = c;
2054 }
2055 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002056}
2057
2058PyDoc_STRVAR(capitalize__doc__,
2059"S.capitalize() -> string\n\
2060\n\
2061Return a copy of the string S with only its first character\n\
2062capitalized.");
2063
2064static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002065string_capitalize(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002066{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002067 char *s = PyString_AS_STRING(self), *s_new;
2068 Py_ssize_t i, n = PyString_GET_SIZE(self);
2069 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002070
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002071 newobj = PyString_FromStringAndSize(NULL, n);
2072 if (newobj == NULL)
2073 return NULL;
2074 s_new = PyString_AsString(newobj);
2075 if (0 < n) {
2076 int c = Py_CHARMASK(*s++);
2077 if (islower(c))
2078 *s_new = toupper(c);
2079 else
2080 *s_new = c;
2081 s_new++;
2082 }
2083 for (i = 1; i < n; i++) {
2084 int c = Py_CHARMASK(*s++);
2085 if (isupper(c))
2086 *s_new = tolower(c);
2087 else
2088 *s_new = c;
2089 s_new++;
2090 }
2091 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002092}
2093
2094
2095PyDoc_STRVAR(count__doc__,
2096"S.count(sub[, start[, end]]) -> int\n\
2097\n\
2098Return the number of non-overlapping occurrences of substring sub in\n\
2099string S[start:end]. Optional arguments start and end are interpreted\n\
2100as in slice notation.");
2101
2102static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002103string_count(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002104{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002105 PyObject *sub_obj;
2106 const char *str = PyString_AS_STRING(self), *sub;
2107 Py_ssize_t sub_len;
2108 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes44720832008-05-26 13:01:01 +00002109
Jesus Cea44e81682011-04-20 16:39:15 +02002110 if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002111 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002112
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002113 if (PyString_Check(sub_obj)) {
2114 sub = PyString_AS_STRING(sub_obj);
2115 sub_len = PyString_GET_SIZE(sub_obj);
2116 }
Christian Heimes44720832008-05-26 13:01:01 +00002117#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002118 else if (PyUnicode_Check(sub_obj)) {
2119 Py_ssize_t count;
2120 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2121 if (count == -1)
2122 return NULL;
2123 else
2124 return PyInt_FromSsize_t(count);
2125 }
Christian Heimes44720832008-05-26 13:01:01 +00002126#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002127 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2128 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002129
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002130 ADJUST_INDICES(start, end, PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002131
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002132 return PyInt_FromSsize_t(
2133 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2134 );
Christian Heimes44720832008-05-26 13:01:01 +00002135}
2136
2137PyDoc_STRVAR(swapcase__doc__,
2138"S.swapcase() -> string\n\
2139\n\
2140Return a copy of the string S with uppercase characters\n\
2141converted to lowercase and vice versa.");
2142
2143static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002144string_swapcase(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002145{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002146 char *s = PyString_AS_STRING(self), *s_new;
2147 Py_ssize_t i, n = PyString_GET_SIZE(self);
2148 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002149
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002150 newobj = PyString_FromStringAndSize(NULL, n);
2151 if (newobj == NULL)
2152 return NULL;
2153 s_new = PyString_AsString(newobj);
2154 for (i = 0; i < n; i++) {
2155 int c = Py_CHARMASK(*s++);
2156 if (islower(c)) {
2157 *s_new = toupper(c);
2158 }
2159 else if (isupper(c)) {
2160 *s_new = tolower(c);
2161 }
2162 else
2163 *s_new = c;
2164 s_new++;
2165 }
2166 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002167}
2168
2169
2170PyDoc_STRVAR(translate__doc__,
2171"S.translate(table [,deletechars]) -> string\n\
2172\n\
2173Return a copy of the string S, where all characters occurring\n\
2174in the optional argument deletechars are removed, and the\n\
2175remaining characters have been mapped through the given\n\
Mark Dickinsoncb9bf1a2011-06-25 11:00:12 +02002176translation table, which must be a string of length 256 or None.\n\
2177If the table argument is None, no translation is applied and\n\
2178the operation simply removes the characters in deletechars.");
Christian Heimes44720832008-05-26 13:01:01 +00002179
2180static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002181string_translate(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002182{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002183 register char *input, *output;
2184 const char *table;
2185 register Py_ssize_t i, c, changed = 0;
2186 PyObject *input_obj = (PyObject*)self;
2187 const char *output_start, *del_table=NULL;
2188 Py_ssize_t inlen, tablen, dellen = 0;
2189 PyObject *result;
2190 int trans_table[256];
2191 PyObject *tableobj, *delobj = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002192
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002193 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2194 &tableobj, &delobj))
2195 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002196
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002197 if (PyString_Check(tableobj)) {
2198 table = PyString_AS_STRING(tableobj);
2199 tablen = PyString_GET_SIZE(tableobj);
2200 }
2201 else if (tableobj == Py_None) {
2202 table = NULL;
2203 tablen = 256;
2204 }
Christian Heimes44720832008-05-26 13:01:01 +00002205#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002206 else if (PyUnicode_Check(tableobj)) {
2207 /* Unicode .translate() does not support the deletechars
2208 parameter; instead a mapping to None will cause characters
2209 to be deleted. */
2210 if (delobj != NULL) {
2211 PyErr_SetString(PyExc_TypeError,
2212 "deletions are implemented differently for unicode");
2213 return NULL;
2214 }
2215 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2216 }
Christian Heimes44720832008-05-26 13:01:01 +00002217#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002218 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2219 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002220
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002221 if (tablen != 256) {
2222 PyErr_SetString(PyExc_ValueError,
2223 "translation table must be 256 characters long");
2224 return NULL;
2225 }
Christian Heimes44720832008-05-26 13:01:01 +00002226
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002227 if (delobj != NULL) {
2228 if (PyString_Check(delobj)) {
2229 del_table = PyString_AS_STRING(delobj);
2230 dellen = PyString_GET_SIZE(delobj);
2231 }
Christian Heimes44720832008-05-26 13:01:01 +00002232#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002233 else if (PyUnicode_Check(delobj)) {
2234 PyErr_SetString(PyExc_TypeError,
2235 "deletions are implemented differently for unicode");
2236 return NULL;
2237 }
Christian Heimes44720832008-05-26 13:01:01 +00002238#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002239 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2240 return NULL;
2241 }
2242 else {
2243 del_table = NULL;
2244 dellen = 0;
2245 }
Christian Heimes44720832008-05-26 13:01:01 +00002246
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002247 inlen = PyString_GET_SIZE(input_obj);
2248 result = PyString_FromStringAndSize((char *)NULL, inlen);
2249 if (result == NULL)
2250 return NULL;
2251 output_start = output = PyString_AsString(result);
2252 input = PyString_AS_STRING(input_obj);
Christian Heimes44720832008-05-26 13:01:01 +00002253
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002254 if (dellen == 0 && table != NULL) {
2255 /* If no deletions are required, use faster code */
2256 for (i = inlen; --i >= 0; ) {
2257 c = Py_CHARMASK(*input++);
2258 if (Py_CHARMASK((*output++ = table[c])) != c)
2259 changed = 1;
2260 }
2261 if (changed || !PyString_CheckExact(input_obj))
2262 return result;
2263 Py_DECREF(result);
2264 Py_INCREF(input_obj);
2265 return input_obj;
2266 }
Christian Heimes44720832008-05-26 13:01:01 +00002267
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002268 if (table == NULL) {
2269 for (i = 0; i < 256; i++)
2270 trans_table[i] = Py_CHARMASK(i);
2271 } else {
2272 for (i = 0; i < 256; i++)
2273 trans_table[i] = Py_CHARMASK(table[i]);
2274 }
Christian Heimes44720832008-05-26 13:01:01 +00002275
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002276 for (i = 0; i < dellen; i++)
2277 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes44720832008-05-26 13:01:01 +00002278
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002279 for (i = inlen; --i >= 0; ) {
2280 c = Py_CHARMASK(*input++);
2281 if (trans_table[c] != -1)
2282 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2283 continue;
2284 changed = 1;
2285 }
2286 if (!changed && PyString_CheckExact(input_obj)) {
2287 Py_DECREF(result);
2288 Py_INCREF(input_obj);
2289 return input_obj;
2290 }
2291 /* Fix the size of the resulting string */
2292 if (inlen > 0 && _PyString_Resize(&result, output - output_start))
2293 return NULL;
2294 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002295}
2296
2297
Christian Heimes44720832008-05-26 13:01:01 +00002298/* find and count characters and substrings */
2299
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002300#define findchar(target, target_len, c) \
Christian Heimes44720832008-05-26 13:01:01 +00002301 ((char *)memchr((const void *)(target), c, target_len))
2302
2303/* String ops must return a string. */
2304/* If the object is subclass of string, create a copy */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002305Py_LOCAL(PyStringObject *)
2306return_self(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002307{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002308 if (PyString_CheckExact(self)) {
2309 Py_INCREF(self);
2310 return self;
2311 }
2312 return (PyStringObject *)PyString_FromStringAndSize(
2313 PyString_AS_STRING(self),
2314 PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002315}
2316
2317Py_LOCAL_INLINE(Py_ssize_t)
2318countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2319{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002320 Py_ssize_t count=0;
2321 const char *start=target;
2322 const char *end=target+target_len;
Christian Heimes44720832008-05-26 13:01:01 +00002323
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002324 while ( (start=findchar(start, end-start, c)) != NULL ) {
2325 count++;
2326 if (count >= maxcount)
2327 break;
2328 start += 1;
2329 }
2330 return count;
Christian Heimes44720832008-05-26 13:01:01 +00002331}
2332
Christian Heimes44720832008-05-26 13:01:01 +00002333
2334/* Algorithms for different cases of string replacement */
2335
2336/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002337Py_LOCAL(PyStringObject *)
2338replace_interleave(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002339 const char *to_s, Py_ssize_t to_len,
2340 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002341{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002342 char *self_s, *result_s;
2343 Py_ssize_t self_len, result_len;
2344 Py_ssize_t count, i, product;
2345 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002346
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002347 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002348
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002349 /* 1 at the end plus 1 after every character */
2350 count = self_len+1;
2351 if (maxcount < count)
2352 count = maxcount;
Christian Heimes44720832008-05-26 13:01:01 +00002353
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002354 /* Check for overflow */
2355 /* result_len = count * to_len + self_len; */
2356 product = count * to_len;
2357 if (product / to_len != count) {
2358 PyErr_SetString(PyExc_OverflowError,
2359 "replace string is too long");
2360 return NULL;
2361 }
2362 result_len = product + self_len;
2363 if (result_len < 0) {
2364 PyErr_SetString(PyExc_OverflowError,
2365 "replace string is too long");
2366 return NULL;
2367 }
Christian Heimes44720832008-05-26 13:01:01 +00002368
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002369 if (! (result = (PyStringObject *)
2370 PyString_FromStringAndSize(NULL, result_len)) )
2371 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002372
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002373 self_s = PyString_AS_STRING(self);
2374 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002375
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002376 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes44720832008-05-26 13:01:01 +00002377
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002378 /* Lay the first one down (guaranteed this will occur) */
2379 Py_MEMCPY(result_s, to_s, to_len);
2380 result_s += to_len;
2381 count -= 1;
Christian Heimes44720832008-05-26 13:01:01 +00002382
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002383 for (i=0; i<count; i++) {
2384 *result_s++ = *self_s++;
2385 Py_MEMCPY(result_s, to_s, to_len);
2386 result_s += to_len;
2387 }
2388
2389 /* Copy the rest of the original string */
2390 Py_MEMCPY(result_s, self_s, self_len-i);
2391
2392 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002393}
2394
2395/* Special case for deleting a single character */
2396/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002397Py_LOCAL(PyStringObject *)
2398replace_delete_single_character(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002399 char from_c, Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002400{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002401 char *self_s, *result_s;
2402 char *start, *next, *end;
2403 Py_ssize_t self_len, result_len;
2404 Py_ssize_t count;
2405 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002406
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002407 self_len = PyString_GET_SIZE(self);
2408 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002409
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002410 count = countchar(self_s, self_len, from_c, maxcount);
2411 if (count == 0) {
2412 return return_self(self);
2413 }
Christian Heimes44720832008-05-26 13:01:01 +00002414
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002415 result_len = self_len - count; /* from_len == 1 */
2416 assert(result_len>=0);
Christian Heimes44720832008-05-26 13:01:01 +00002417
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002418 if ( (result = (PyStringObject *)
2419 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2420 return NULL;
2421 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002422
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002423 start = self_s;
2424 end = self_s + self_len;
2425 while (count-- > 0) {
2426 next = findchar(start, end-start, from_c);
2427 if (next == NULL)
2428 break;
2429 Py_MEMCPY(result_s, start, next-start);
2430 result_s += (next-start);
2431 start = next+1;
2432 }
2433 Py_MEMCPY(result_s, start, end-start);
2434
2435 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002436}
2437
2438/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2439
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002440Py_LOCAL(PyStringObject *)
2441replace_delete_substring(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002442 const char *from_s, Py_ssize_t from_len,
2443 Py_ssize_t maxcount) {
2444 char *self_s, *result_s;
2445 char *start, *next, *end;
2446 Py_ssize_t self_len, result_len;
2447 Py_ssize_t count, offset;
2448 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002449
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002450 self_len = PyString_GET_SIZE(self);
2451 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002452
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002453 count = stringlib_count(self_s, self_len,
2454 from_s, from_len,
2455 maxcount);
Christian Heimes44720832008-05-26 13:01:01 +00002456
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002457 if (count == 0) {
2458 /* no matches */
2459 return return_self(self);
2460 }
Christian Heimes44720832008-05-26 13:01:01 +00002461
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002462 result_len = self_len - (count * from_len);
2463 assert (result_len>=0);
Christian Heimes44720832008-05-26 13:01:01 +00002464
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002465 if ( (result = (PyStringObject *)
2466 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2467 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002468
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002469 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002470
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002471 start = self_s;
2472 end = self_s + self_len;
2473 while (count-- > 0) {
2474 offset = stringlib_find(start, end-start,
2475 from_s, from_len,
2476 0);
2477 if (offset == -1)
2478 break;
2479 next = start + offset;
Christian Heimes44720832008-05-26 13:01:01 +00002480
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002481 Py_MEMCPY(result_s, start, next-start);
Christian Heimes44720832008-05-26 13:01:01 +00002482
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002483 result_s += (next-start);
2484 start = next+from_len;
2485 }
2486 Py_MEMCPY(result_s, start, end-start);
2487 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002488}
2489
2490/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002491Py_LOCAL(PyStringObject *)
2492replace_single_character_in_place(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002493 char from_c, char to_c,
2494 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002495{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002496 char *self_s, *result_s, *start, *end, *next;
2497 Py_ssize_t self_len;
2498 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002499
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002500 /* The result string will be the same size */
2501 self_s = PyString_AS_STRING(self);
2502 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002503
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002504 next = findchar(self_s, self_len, from_c);
Christian Heimes44720832008-05-26 13:01:01 +00002505
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002506 if (next == NULL) {
2507 /* No matches; return the original string */
2508 return return_self(self);
2509 }
Christian Heimes44720832008-05-26 13:01:01 +00002510
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002511 /* Need to make a new string */
2512 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2513 if (result == NULL)
2514 return NULL;
2515 result_s = PyString_AS_STRING(result);
2516 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002517
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002518 /* change everything in-place, starting with this one */
2519 start = result_s + (next-self_s);
2520 *start = to_c;
2521 start++;
2522 end = result_s + self_len;
Christian Heimes44720832008-05-26 13:01:01 +00002523
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002524 while (--maxcount > 0) {
2525 next = findchar(start, end-start, from_c);
2526 if (next == NULL)
2527 break;
2528 *next = to_c;
2529 start = next+1;
2530 }
Christian Heimes44720832008-05-26 13:01:01 +00002531
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002532 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002533}
2534
2535/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002536Py_LOCAL(PyStringObject *)
2537replace_substring_in_place(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002538 const char *from_s, Py_ssize_t from_len,
2539 const char *to_s, Py_ssize_t to_len,
2540 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002541{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002542 char *result_s, *start, *end;
2543 char *self_s;
2544 Py_ssize_t self_len, offset;
2545 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002546
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002547 /* The result string will be the same size */
Christian Heimes44720832008-05-26 13:01:01 +00002548
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002549 self_s = PyString_AS_STRING(self);
2550 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002551
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002552 offset = stringlib_find(self_s, self_len,
2553 from_s, from_len,
2554 0);
2555 if (offset == -1) {
2556 /* No matches; return the original string */
2557 return return_self(self);
2558 }
Christian Heimes44720832008-05-26 13:01:01 +00002559
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002560 /* Need to make a new string */
2561 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2562 if (result == NULL)
2563 return NULL;
2564 result_s = PyString_AS_STRING(result);
2565 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002566
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002567 /* change everything in-place, starting with this one */
2568 start = result_s + offset;
2569 Py_MEMCPY(start, to_s, from_len);
2570 start += from_len;
2571 end = result_s + self_len;
Christian Heimes44720832008-05-26 13:01:01 +00002572
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002573 while ( --maxcount > 0) {
2574 offset = stringlib_find(start, end-start,
2575 from_s, from_len,
2576 0);
2577 if (offset==-1)
2578 break;
2579 Py_MEMCPY(start+offset, to_s, from_len);
2580 start += offset+from_len;
2581 }
Christian Heimes44720832008-05-26 13:01:01 +00002582
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002583 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002584}
2585
2586/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002587Py_LOCAL(PyStringObject *)
2588replace_single_character(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002589 char from_c,
2590 const char *to_s, Py_ssize_t to_len,
2591 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002592{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002593 char *self_s, *result_s;
2594 char *start, *next, *end;
2595 Py_ssize_t self_len, result_len;
2596 Py_ssize_t count, product;
2597 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002598
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002599 self_s = PyString_AS_STRING(self);
2600 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002601
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002602 count = countchar(self_s, self_len, from_c, maxcount);
2603 if (count == 0) {
2604 /* no matches, return unchanged */
2605 return return_self(self);
2606 }
Christian Heimes44720832008-05-26 13:01:01 +00002607
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002608 /* use the difference between current and new, hence the "-1" */
2609 /* result_len = self_len + count * (to_len-1) */
2610 product = count * (to_len-1);
2611 if (product / (to_len-1) != count) {
2612 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2613 return NULL;
2614 }
2615 result_len = self_len + product;
2616 if (result_len < 0) {
2617 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2618 return NULL;
2619 }
Christian Heimes44720832008-05-26 13:01:01 +00002620
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002621 if ( (result = (PyStringObject *)
2622 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2623 return NULL;
2624 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002625
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002626 start = self_s;
2627 end = self_s + self_len;
2628 while (count-- > 0) {
2629 next = findchar(start, end-start, from_c);
2630 if (next == NULL)
2631 break;
Christian Heimes44720832008-05-26 13:01:01 +00002632
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002633 if (next == start) {
2634 /* replace with the 'to' */
2635 Py_MEMCPY(result_s, to_s, to_len);
2636 result_s += to_len;
2637 start += 1;
2638 } else {
2639 /* copy the unchanged old then the 'to' */
2640 Py_MEMCPY(result_s, start, next-start);
2641 result_s += (next-start);
2642 Py_MEMCPY(result_s, to_s, to_len);
2643 result_s += to_len;
2644 start = next+1;
2645 }
2646 }
2647 /* Copy the remainder of the remaining string */
2648 Py_MEMCPY(result_s, start, end-start);
Christian Heimes44720832008-05-26 13:01:01 +00002649
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002650 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002651}
2652
2653/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002654Py_LOCAL(PyStringObject *)
2655replace_substring(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002656 const char *from_s, Py_ssize_t from_len,
2657 const char *to_s, Py_ssize_t to_len,
2658 Py_ssize_t maxcount) {
2659 char *self_s, *result_s;
2660 char *start, *next, *end;
2661 Py_ssize_t self_len, result_len;
2662 Py_ssize_t count, offset, product;
2663 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002664
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002665 self_s = PyString_AS_STRING(self);
2666 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002667
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002668 count = stringlib_count(self_s, self_len,
2669 from_s, from_len,
2670 maxcount);
Antoine Pitrou64672132010-01-13 07:55:48 +00002671
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002672 if (count == 0) {
2673 /* no matches, return unchanged */
2674 return return_self(self);
2675 }
Christian Heimes44720832008-05-26 13:01:01 +00002676
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002677 /* Check for overflow */
2678 /* result_len = self_len + count * (to_len-from_len) */
2679 product = count * (to_len-from_len);
2680 if (product / (to_len-from_len) != count) {
2681 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2682 return NULL;
2683 }
2684 result_len = self_len + product;
2685 if (result_len < 0) {
2686 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2687 return NULL;
2688 }
Christian Heimes44720832008-05-26 13:01:01 +00002689
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002690 if ( (result = (PyStringObject *)
2691 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2692 return NULL;
2693 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002694
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002695 start = self_s;
2696 end = self_s + self_len;
2697 while (count-- > 0) {
2698 offset = stringlib_find(start, end-start,
2699 from_s, from_len,
2700 0);
2701 if (offset == -1)
2702 break;
2703 next = start+offset;
2704 if (next == start) {
2705 /* replace with the 'to' */
2706 Py_MEMCPY(result_s, to_s, to_len);
2707 result_s += to_len;
2708 start += from_len;
2709 } else {
2710 /* copy the unchanged old then the 'to' */
2711 Py_MEMCPY(result_s, start, next-start);
2712 result_s += (next-start);
2713 Py_MEMCPY(result_s, to_s, to_len);
2714 result_s += to_len;
2715 start = next+from_len;
2716 }
2717 }
2718 /* Copy the remainder of the remaining string */
2719 Py_MEMCPY(result_s, start, end-start);
Christian Heimes44720832008-05-26 13:01:01 +00002720
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002721 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002722}
2723
2724
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002725Py_LOCAL(PyStringObject *)
2726replace(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002727 const char *from_s, Py_ssize_t from_len,
2728 const char *to_s, Py_ssize_t to_len,
2729 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002730{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002731 if (maxcount < 0) {
2732 maxcount = PY_SSIZE_T_MAX;
2733 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2734 /* nothing to do; return the original string */
2735 return return_self(self);
2736 }
Christian Heimes44720832008-05-26 13:01:01 +00002737
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002738 if (maxcount == 0 ||
2739 (from_len == 0 && to_len == 0)) {
2740 /* nothing to do; return the original string */
2741 return return_self(self);
2742 }
Christian Heimes44720832008-05-26 13:01:01 +00002743
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002744 /* Handle zero-length special cases */
Christian Heimes44720832008-05-26 13:01:01 +00002745
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002746 if (from_len == 0) {
2747 /* insert the 'to' string everywhere. */
2748 /* >>> "Python".replace("", ".") */
2749 /* '.P.y.t.h.o.n.' */
2750 return replace_interleave(self, to_s, to_len, maxcount);
2751 }
Christian Heimes44720832008-05-26 13:01:01 +00002752
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002753 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2754 /* point for an empty self string to generate a non-empty string */
2755 /* Special case so the remaining code always gets a non-empty string */
2756 if (PyString_GET_SIZE(self) == 0) {
2757 return return_self(self);
2758 }
Christian Heimes44720832008-05-26 13:01:01 +00002759
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002760 if (to_len == 0) {
2761 /* delete all occurances of 'from' string */
2762 if (from_len == 1) {
2763 return replace_delete_single_character(
2764 self, from_s[0], maxcount);
2765 } else {
2766 return replace_delete_substring(self, from_s, from_len, maxcount);
2767 }
2768 }
Christian Heimes44720832008-05-26 13:01:01 +00002769
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002770 /* Handle special case where both strings have the same length */
Christian Heimes44720832008-05-26 13:01:01 +00002771
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002772 if (from_len == to_len) {
2773 if (from_len == 1) {
2774 return replace_single_character_in_place(
2775 self,
2776 from_s[0],
2777 to_s[0],
2778 maxcount);
2779 } else {
2780 return replace_substring_in_place(
2781 self, from_s, from_len, to_s, to_len, maxcount);
2782 }
2783 }
Christian Heimes44720832008-05-26 13:01:01 +00002784
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002785 /* Otherwise use the more generic algorithms */
2786 if (from_len == 1) {
2787 return replace_single_character(self, from_s[0],
2788 to_s, to_len, maxcount);
2789 } else {
2790 /* len('from')>=2, len('to')>=1 */
2791 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2792 }
Christian Heimes44720832008-05-26 13:01:01 +00002793}
2794
2795PyDoc_STRVAR(replace__doc__,
Ezio Melotti2f06b782010-06-26 18:44:42 +00002796"S.replace(old, new[, count]) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00002797\n\
2798Return a copy of string S with all occurrences of substring\n\
2799old replaced by new. If the optional argument count is\n\
2800given, only the first count occurrences are replaced.");
2801
2802static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002803string_replace(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002804{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002805 Py_ssize_t count = -1;
2806 PyObject *from, *to;
2807 const char *from_s, *to_s;
2808 Py_ssize_t from_len, to_len;
Christian Heimes44720832008-05-26 13:01:01 +00002809
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002810 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2811 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002812
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002813 if (PyString_Check(from)) {
2814 from_s = PyString_AS_STRING(from);
2815 from_len = PyString_GET_SIZE(from);
2816 }
Christian Heimes44720832008-05-26 13:01:01 +00002817#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002818 if (PyUnicode_Check(from))
2819 return PyUnicode_Replace((PyObject *)self,
2820 from, to, count);
Christian Heimes44720832008-05-26 13:01:01 +00002821#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002822 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2823 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002824
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002825 if (PyString_Check(to)) {
2826 to_s = PyString_AS_STRING(to);
2827 to_len = PyString_GET_SIZE(to);
2828 }
Christian Heimes44720832008-05-26 13:01:01 +00002829#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002830 else if (PyUnicode_Check(to))
2831 return PyUnicode_Replace((PyObject *)self,
2832 from, to, count);
Christian Heimes44720832008-05-26 13:01:01 +00002833#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002834 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2835 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002836
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002837 return (PyObject *)replace((PyStringObject *) self,
2838 from_s, from_len,
2839 to_s, to_len, count);
Christian Heimes44720832008-05-26 13:01:01 +00002840}
2841
2842/** End DALKE **/
2843
2844/* Matches the end (direction >= 0) or start (direction < 0) of self
2845 * against substr, using the start and end arguments. Returns
2846 * -1 on error, 0 if not found and 1 if found.
2847 */
2848Py_LOCAL(int)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002849_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002850 Py_ssize_t end, int direction)
Christian Heimes44720832008-05-26 13:01:01 +00002851{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002852 Py_ssize_t len = PyString_GET_SIZE(self);
2853 Py_ssize_t slen;
2854 const char* sub;
2855 const char* str;
Christian Heimes44720832008-05-26 13:01:01 +00002856
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002857 if (PyString_Check(substr)) {
2858 sub = PyString_AS_STRING(substr);
2859 slen = PyString_GET_SIZE(substr);
2860 }
Christian Heimes44720832008-05-26 13:01:01 +00002861#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002862 else if (PyUnicode_Check(substr))
2863 return PyUnicode_Tailmatch((PyObject *)self,
2864 substr, start, end, direction);
Christian Heimes44720832008-05-26 13:01:01 +00002865#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002866 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2867 return -1;
2868 str = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002869
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002870 ADJUST_INDICES(start, end, len);
Christian Heimes44720832008-05-26 13:01:01 +00002871
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002872 if (direction < 0) {
2873 /* startswith */
2874 if (start+slen > len)
2875 return 0;
2876 } else {
2877 /* endswith */
2878 if (end-start < slen || start > len)
2879 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00002880
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002881 if (end-slen > start)
2882 start = end - slen;
2883 }
2884 if (end-start >= slen)
2885 return ! memcmp(str+start, sub, slen);
2886 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00002887}
2888
2889
2890PyDoc_STRVAR(startswith__doc__,
2891"S.startswith(prefix[, start[, end]]) -> bool\n\
2892\n\
2893Return True if S starts with the specified prefix, False otherwise.\n\
2894With optional start, test S beginning at that position.\n\
2895With optional end, stop comparing S at that position.\n\
2896prefix can also be a tuple of strings to try.");
2897
2898static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002899string_startswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002900{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002901 Py_ssize_t start = 0;
2902 Py_ssize_t end = PY_SSIZE_T_MAX;
2903 PyObject *subobj;
2904 int result;
Christian Heimes44720832008-05-26 13:01:01 +00002905
Jesus Cea44e81682011-04-20 16:39:15 +02002906 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002907 return NULL;
2908 if (PyTuple_Check(subobj)) {
2909 Py_ssize_t i;
2910 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2911 result = _string_tailmatch(self,
2912 PyTuple_GET_ITEM(subobj, i),
2913 start, end, -1);
2914 if (result == -1)
2915 return NULL;
2916 else if (result) {
2917 Py_RETURN_TRUE;
2918 }
2919 }
2920 Py_RETURN_FALSE;
2921 }
2922 result = _string_tailmatch(self, subobj, start, end, -1);
Ezio Melottie3685f62011-04-26 05:12:51 +03002923 if (result == -1) {
2924 if (PyErr_ExceptionMatches(PyExc_TypeError))
2925 PyErr_Format(PyExc_TypeError, "startswith first arg must be str, "
2926 "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002927 return NULL;
Ezio Melottie3685f62011-04-26 05:12:51 +03002928 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002929 else
2930 return PyBool_FromLong(result);
Christian Heimes44720832008-05-26 13:01:01 +00002931}
2932
2933
2934PyDoc_STRVAR(endswith__doc__,
2935"S.endswith(suffix[, start[, end]]) -> bool\n\
2936\n\
2937Return True if S ends with the specified suffix, False otherwise.\n\
2938With optional start, test S beginning at that position.\n\
2939With optional end, stop comparing S at that position.\n\
2940suffix can also be a tuple of strings to try.");
2941
2942static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002943string_endswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002944{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002945 Py_ssize_t start = 0;
2946 Py_ssize_t end = PY_SSIZE_T_MAX;
2947 PyObject *subobj;
2948 int result;
Christian Heimes44720832008-05-26 13:01:01 +00002949
Jesus Cea44e81682011-04-20 16:39:15 +02002950 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002951 return NULL;
2952 if (PyTuple_Check(subobj)) {
2953 Py_ssize_t i;
2954 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2955 result = _string_tailmatch(self,
2956 PyTuple_GET_ITEM(subobj, i),
2957 start, end, +1);
2958 if (result == -1)
2959 return NULL;
2960 else if (result) {
2961 Py_RETURN_TRUE;
2962 }
2963 }
2964 Py_RETURN_FALSE;
2965 }
2966 result = _string_tailmatch(self, subobj, start, end, +1);
Ezio Melottie3685f62011-04-26 05:12:51 +03002967 if (result == -1) {
2968 if (PyErr_ExceptionMatches(PyExc_TypeError))
2969 PyErr_Format(PyExc_TypeError, "endswith first arg must be str, "
2970 "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002971 return NULL;
Ezio Melottie3685f62011-04-26 05:12:51 +03002972 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002973 else
2974 return PyBool_FromLong(result);
Christian Heimes44720832008-05-26 13:01:01 +00002975}
2976
2977
2978PyDoc_STRVAR(encode__doc__,
2979"S.encode([encoding[,errors]]) -> object\n\
2980\n\
2981Encodes S using the codec registered for encoding. encoding defaults\n\
2982to the default encoding. errors may be given to set a different error\n\
2983handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2984a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2985'xmlcharrefreplace' as well as any other name registered with\n\
2986codecs.register_error that is able to handle UnicodeEncodeErrors.");
2987
2988static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00002989string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes44720832008-05-26 13:01:01 +00002990{
Benjamin Peterson332d7212009-09-18 21:14:55 +00002991 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00002992 char *encoding = NULL;
2993 char *errors = NULL;
2994 PyObject *v;
2995
Benjamin Peterson332d7212009-09-18 21:14:55 +00002996 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002997 kwlist, &encoding, &errors))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00002998 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002999 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003000 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003001 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003002 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003003 PyErr_Format(PyExc_TypeError,
3004 "encoder did not return a string/unicode object "
3005 "(type=%.400s)",
3006 Py_TYPE(v)->tp_name);
3007 Py_DECREF(v);
3008 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003009 }
3010 return v;
3011
3012 onError:
Christian Heimes1a6387e2008-03-26 12:49:49 +00003013 return NULL;
3014}
3015
Christian Heimes44720832008-05-26 13:01:01 +00003016
3017PyDoc_STRVAR(decode__doc__,
3018"S.decode([encoding[,errors]]) -> object\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003019\n\
Christian Heimes44720832008-05-26 13:01:01 +00003020Decodes S using the codec registered for encoding. encoding defaults\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003021to the default encoding. errors may be given to set a different error\n\
Christian Heimes44720832008-05-26 13:01:01 +00003022handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3023a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003024as well as any other name registered with codecs.register_error that is\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003025able to handle UnicodeDecodeErrors.");
3026
3027static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00003028string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003029{
Benjamin Peterson332d7212009-09-18 21:14:55 +00003030 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003031 char *encoding = NULL;
3032 char *errors = NULL;
3033 PyObject *v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003034
Benjamin Peterson332d7212009-09-18 21:14:55 +00003035 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003036 kwlist, &encoding, &errors))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003037 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003038 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003039 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003040 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003041 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003042 PyErr_Format(PyExc_TypeError,
3043 "decoder did not return a string/unicode object "
3044 "(type=%.400s)",
3045 Py_TYPE(v)->tp_name);
3046 Py_DECREF(v);
3047 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003048 }
3049 return v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003050
Christian Heimes44720832008-05-26 13:01:01 +00003051 onError:
3052 return NULL;
3053}
3054
3055
3056PyDoc_STRVAR(expandtabs__doc__,
3057"S.expandtabs([tabsize]) -> string\n\
3058\n\
3059Return a copy of S where all tab characters are expanded using spaces.\n\
3060If tabsize is not given, a tab size of 8 characters is assumed.");
3061
3062static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003063string_expandtabs(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003064{
3065 const char *e, *p, *qe;
3066 char *q;
3067 Py_ssize_t i, j, incr;
3068 PyObject *u;
3069 int tabsize = 8;
3070
3071 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003072 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003073
3074 /* First pass: determine size of output string */
3075 i = 0; /* chars up to and including most recent \n or \r */
3076 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003077 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3078 for (p = PyString_AS_STRING(self); p < e; p++)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003079 if (*p == '\t') {
3080 if (tabsize > 0) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003081 incr = tabsize - (j % tabsize);
3082 if (j > PY_SSIZE_T_MAX - incr)
3083 goto overflow1;
3084 j += incr;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003085 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003086 }
3087 else {
3088 if (j > PY_SSIZE_T_MAX - 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003089 goto overflow1;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003090 j++;
3091 if (*p == '\n' || *p == '\r') {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003092 if (i > PY_SSIZE_T_MAX - j)
3093 goto overflow1;
3094 i += j;
3095 j = 0;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003096 }
3097 }
Christian Heimes44720832008-05-26 13:01:01 +00003098
3099 if (i > PY_SSIZE_T_MAX - j)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003100 goto overflow1;
Christian Heimes44720832008-05-26 13:01:01 +00003101
3102 /* Second pass: create output string and fill it */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003103 u = PyString_FromStringAndSize(NULL, i + j);
Christian Heimes44720832008-05-26 13:01:01 +00003104 if (!u)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003105 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003106
3107 j = 0; /* same as in first pass */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003108 q = PyString_AS_STRING(u); /* next output char */
3109 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
Christian Heimes44720832008-05-26 13:01:01 +00003110
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003111 for (p = PyString_AS_STRING(self); p < e; p++)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003112 if (*p == '\t') {
3113 if (tabsize > 0) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003114 i = tabsize - (j % tabsize);
3115 j += i;
3116 while (i--) {
3117 if (q >= qe)
3118 goto overflow2;
3119 *q++ = ' ';
3120 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003121 }
3122 }
3123 else {
3124 if (q >= qe)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003125 goto overflow2;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003126 *q++ = *p;
3127 j++;
3128 if (*p == '\n' || *p == '\r')
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003129 j = 0;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003130 }
Christian Heimes44720832008-05-26 13:01:01 +00003131
3132 return u;
3133
3134 overflow2:
3135 Py_DECREF(u);
3136 overflow1:
3137 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3138 return NULL;
3139}
3140
3141Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003142pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Christian Heimes44720832008-05-26 13:01:01 +00003143{
3144 PyObject *u;
3145
3146 if (left < 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003147 left = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003148 if (right < 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003149 right = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003150
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003151 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003152 Py_INCREF(self);
3153 return (PyObject *)self;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003154 }
3155
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003156 u = PyString_FromStringAndSize(NULL,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003157 left + PyString_GET_SIZE(self) + right);
Christian Heimes44720832008-05-26 13:01:01 +00003158 if (u) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003159 if (left)
3160 memset(PyString_AS_STRING(u), fill, left);
3161 Py_MEMCPY(PyString_AS_STRING(u) + left,
3162 PyString_AS_STRING(self),
3163 PyString_GET_SIZE(self));
3164 if (right)
3165 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3166 fill, right);
Christian Heimes44720832008-05-26 13:01:01 +00003167 }
3168
3169 return u;
3170}
3171
3172PyDoc_STRVAR(ljust__doc__,
3173"S.ljust(width[, fillchar]) -> string\n"
3174"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003175"Return S left-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003176"done using the specified fill character (default is a space).");
3177
3178static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003179string_ljust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003180{
3181 Py_ssize_t width;
3182 char fillchar = ' ';
3183
3184 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003185 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003186
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003187 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003188 Py_INCREF(self);
3189 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003190 }
3191
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003192 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003193}
3194
3195
3196PyDoc_STRVAR(rjust__doc__,
3197"S.rjust(width[, fillchar]) -> string\n"
3198"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003199"Return S right-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003200"done using the specified fill character (default is a space)");
3201
3202static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003203string_rjust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003204{
3205 Py_ssize_t width;
3206 char fillchar = ' ';
3207
3208 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003209 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003210
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003211 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003212 Py_INCREF(self);
3213 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003214 }
3215
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003216 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003217}
3218
3219
3220PyDoc_STRVAR(center__doc__,
3221"S.center(width[, fillchar]) -> string\n"
3222"\n"
3223"Return S centered in a string of length width. Padding is\n"
3224"done using the specified fill character (default is a space)");
3225
3226static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003227string_center(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003228{
3229 Py_ssize_t marg, left;
3230 Py_ssize_t width;
3231 char fillchar = ' ';
3232
3233 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003234 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003235
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003236 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003237 Py_INCREF(self);
3238 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003239 }
3240
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003241 marg = width - PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003242 left = marg / 2 + (marg & width & 1);
3243
3244 return pad(self, left, marg - left, fillchar);
3245}
3246
3247PyDoc_STRVAR(zfill__doc__,
3248"S.zfill(width) -> string\n"
3249"\n"
3250"Pad a numeric string S with zeros on the left, to fill a field\n"
3251"of the specified width. The string S is never truncated.");
3252
3253static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003254string_zfill(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003255{
3256 Py_ssize_t fill;
3257 PyObject *s;
3258 char *p;
3259 Py_ssize_t width;
3260
3261 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003262 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003263
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003264 if (PyString_GET_SIZE(self) >= width) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003265 if (PyString_CheckExact(self)) {
3266 Py_INCREF(self);
3267 return (PyObject*) self;
3268 }
3269 else
3270 return PyString_FromStringAndSize(
3271 PyString_AS_STRING(self),
3272 PyString_GET_SIZE(self)
3273 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003274 }
3275
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003276 fill = width - PyString_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003277
Christian Heimes44720832008-05-26 13:01:01 +00003278 s = pad(self, fill, 0, '0');
3279
3280 if (s == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003281 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003282
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003283 p = PyString_AS_STRING(s);
Christian Heimes44720832008-05-26 13:01:01 +00003284 if (p[fill] == '+' || p[fill] == '-') {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003285 /* move sign to beginning of string */
3286 p[0] = p[fill];
3287 p[fill] = '0';
Christian Heimes44720832008-05-26 13:01:01 +00003288 }
3289
3290 return (PyObject*) s;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003291}
3292
Christian Heimes44720832008-05-26 13:01:01 +00003293PyDoc_STRVAR(isspace__doc__,
3294"S.isspace() -> bool\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003295\n\
Christian Heimes44720832008-05-26 13:01:01 +00003296Return True if all characters in S are whitespace\n\
3297and there is at least one character in S, False otherwise.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003298
Christian Heimes44720832008-05-26 13:01:01 +00003299static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003300string_isspace(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003301{
Christian Heimes44720832008-05-26 13:01:01 +00003302 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003303 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003304 register const unsigned char *e;
3305
3306 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003307 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003308 isspace(*p))
3309 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003310
3311 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003312 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003313 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003314
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003315 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003316 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003317 if (!isspace(*p))
3318 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003319 }
Christian Heimes44720832008-05-26 13:01:01 +00003320 return PyBool_FromLong(1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003321}
3322
Christian Heimes44720832008-05-26 13:01:01 +00003323
3324PyDoc_STRVAR(isalpha__doc__,
3325"S.isalpha() -> bool\n\
3326\n\
3327Return True if all characters in S are alphabetic\n\
3328and there is at least one character in S, False otherwise.");
3329
3330static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003331string_isalpha(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003332{
Christian Heimes44720832008-05-26 13:01:01 +00003333 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003334 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003335 register const unsigned char *e;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003336
Christian Heimes44720832008-05-26 13:01:01 +00003337 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003338 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003339 isalpha(*p))
3340 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003341
3342 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003343 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003344 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003345
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003346 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003347 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003348 if (!isalpha(*p))
3349 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003350 }
Christian Heimes44720832008-05-26 13:01:01 +00003351 return PyBool_FromLong(1);
3352}
Christian Heimes1a6387e2008-03-26 12:49:49 +00003353
Christian Heimes44720832008-05-26 13:01:01 +00003354
3355PyDoc_STRVAR(isalnum__doc__,
3356"S.isalnum() -> bool\n\
3357\n\
3358Return True if all characters in S are alphanumeric\n\
3359and there is at least one character in S, False otherwise.");
3360
3361static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003362string_isalnum(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003363{
3364 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003365 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003366 register const unsigned char *e;
3367
3368 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003369 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003370 isalnum(*p))
3371 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003372
3373 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003374 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003375 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003376
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003377 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003378 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003379 if (!isalnum(*p))
3380 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003381 }
3382 return PyBool_FromLong(1);
3383}
3384
3385
3386PyDoc_STRVAR(isdigit__doc__,
3387"S.isdigit() -> bool\n\
3388\n\
3389Return True if all characters in S are digits\n\
3390and there is at least one character in S, False otherwise.");
3391
3392static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003393string_isdigit(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003394{
3395 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003396 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003397 register const unsigned char *e;
3398
3399 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003400 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003401 isdigit(*p))
3402 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003403
3404 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003405 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003406 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003407
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003408 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003409 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003410 if (!isdigit(*p))
3411 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003412 }
3413 return PyBool_FromLong(1);
3414}
3415
3416
3417PyDoc_STRVAR(islower__doc__,
3418"S.islower() -> bool\n\
3419\n\
3420Return True if all cased characters in S are lowercase and there is\n\
3421at least one cased character in S, False otherwise.");
3422
3423static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003424string_islower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003425{
3426 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003427 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003428 register const unsigned char *e;
3429 int cased;
3430
3431 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003432 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003433 return PyBool_FromLong(islower(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003434
3435 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003436 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003437 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003438
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003439 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003440 cased = 0;
3441 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003442 if (isupper(*p))
3443 return PyBool_FromLong(0);
3444 else if (!cased && islower(*p))
3445 cased = 1;
Christian Heimes44720832008-05-26 13:01:01 +00003446 }
3447 return PyBool_FromLong(cased);
3448}
3449
3450
3451PyDoc_STRVAR(isupper__doc__,
3452"S.isupper() -> bool\n\
3453\n\
3454Return True if all cased characters in S are uppercase and there is\n\
3455at least one cased character in S, False otherwise.");
3456
3457static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003458string_isupper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003459{
3460 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003461 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003462 register const unsigned char *e;
3463 int cased;
3464
3465 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003466 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003467 return PyBool_FromLong(isupper(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003468
3469 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003470 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003471 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003472
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003473 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003474 cased = 0;
3475 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003476 if (islower(*p))
3477 return PyBool_FromLong(0);
3478 else if (!cased && isupper(*p))
3479 cased = 1;
Christian Heimes44720832008-05-26 13:01:01 +00003480 }
3481 return PyBool_FromLong(cased);
3482}
3483
3484
3485PyDoc_STRVAR(istitle__doc__,
3486"S.istitle() -> bool\n\
3487\n\
3488Return True if S is a titlecased string and there is at least one\n\
3489character in S, i.e. uppercase characters may only follow uncased\n\
3490characters and lowercase characters only cased ones. Return False\n\
3491otherwise.");
3492
3493static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003494string_istitle(PyStringObject *self, PyObject *uncased)
Christian Heimes44720832008-05-26 13:01:01 +00003495{
3496 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003497 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003498 register const unsigned char *e;
3499 int cased, previous_is_cased;
3500
3501 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003502 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003503 return PyBool_FromLong(isupper(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003504
3505 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003506 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003507 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003508
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003509 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003510 cased = 0;
3511 previous_is_cased = 0;
3512 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003513 register const unsigned char ch = *p;
Christian Heimes44720832008-05-26 13:01:01 +00003514
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003515 if (isupper(ch)) {
3516 if (previous_is_cased)
3517 return PyBool_FromLong(0);
3518 previous_is_cased = 1;
3519 cased = 1;
3520 }
3521 else if (islower(ch)) {
3522 if (!previous_is_cased)
3523 return PyBool_FromLong(0);
3524 previous_is_cased = 1;
3525 cased = 1;
3526 }
3527 else
3528 previous_is_cased = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003529 }
3530 return PyBool_FromLong(cased);
3531}
3532
3533
3534PyDoc_STRVAR(splitlines__doc__,
3535"S.splitlines([keepends]) -> list of strings\n\
3536\n\
3537Return a list of the lines in S, breaking at line boundaries.\n\
3538Line breaks are not included in the resulting list unless keepends\n\
3539is given and true.");
3540
3541static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003542string_splitlines(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003543{
Christian Heimes44720832008-05-26 13:01:01 +00003544 int keepends = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003545
3546 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003547 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003548
Antoine Pitrou64672132010-01-13 07:55:48 +00003549 return stringlib_splitlines(
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003550 (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),
3551 keepends
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003552 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003553}
3554
Robert Schuppenies51df0642008-06-01 16:16:17 +00003555PyDoc_STRVAR(sizeof__doc__,
Georg Brandl7a6de8b2008-06-01 16:42:16 +00003556"S.__sizeof__() -> size of S in memory, in bytes");
Robert Schuppenies51df0642008-06-01 16:16:17 +00003557
3558static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003559string_sizeof(PyStringObject *v)
Robert Schuppenies51df0642008-06-01 16:16:17 +00003560{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003561 Py_ssize_t res;
3562 res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
3563 return PyInt_FromSsize_t(res);
Robert Schuppenies51df0642008-06-01 16:16:17 +00003564}
3565
Christian Heimes1a6387e2008-03-26 12:49:49 +00003566static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003567string_getnewargs(PyStringObject *v)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003568{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003569 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Christian Heimes1a6387e2008-03-26 12:49:49 +00003570}
3571
Christian Heimes1a6387e2008-03-26 12:49:49 +00003572
Christian Heimes44720832008-05-26 13:01:01 +00003573#include "stringlib/string_format.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +00003574
Christian Heimes44720832008-05-26 13:01:01 +00003575PyDoc_STRVAR(format__doc__,
Georg Brandl05f819b2010-07-31 19:07:37 +00003576"S.format(*args, **kwargs) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003577\n\
Eric Smith6c840852010-11-06 19:43:44 +00003578Return a formatted version of S, using substitutions from args and kwargs.\n\
3579The substitutions are identified by braces ('{' and '}').");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003580
Eric Smithdc13b792008-05-30 18:10:04 +00003581static PyObject *
3582string__format__(PyObject* self, PyObject* args)
3583{
3584 PyObject *format_spec;
3585 PyObject *result = NULL;
3586 PyObject *tmp = NULL;
3587
3588 /* If 2.x, convert format_spec to the same type as value */
3589 /* This is to allow things like u''.format('') */
3590 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003591 goto done;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003592 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003593 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3594 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3595 goto done;
Eric Smithdc13b792008-05-30 18:10:04 +00003596 }
3597 tmp = PyObject_Str(format_spec);
3598 if (tmp == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003599 goto done;
Eric Smithdc13b792008-05-30 18:10:04 +00003600 format_spec = tmp;
3601
3602 result = _PyBytes_FormatAdvanced(self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003603 PyString_AS_STRING(format_spec),
3604 PyString_GET_SIZE(format_spec));
Eric Smithdc13b792008-05-30 18:10:04 +00003605done:
3606 Py_XDECREF(tmp);
3607 return result;
3608}
3609
Christian Heimes44720832008-05-26 13:01:01 +00003610PyDoc_STRVAR(p_format__doc__,
Georg Brandl05f819b2010-07-31 19:07:37 +00003611"S.__format__(format_spec) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003612\n\
Eric Smith6c840852010-11-06 19:43:44 +00003613Return a formatted version of S as described by format_spec.");
Christian Heimes44720832008-05-26 13:01:01 +00003614
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00003615
Christian Heimes1a6387e2008-03-26 12:49:49 +00003616static PyMethodDef
Christian Heimes44720832008-05-26 13:01:01 +00003617string_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003618 /* Counterparts of the obsolete stropmodule functions; except
3619 string.maketrans(). */
3620 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3621 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3622 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3623 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3624 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3625 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3626 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3627 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3628 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3629 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3630 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3631 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3632 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3633 capitalize__doc__},
3634 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3635 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3636 endswith__doc__},
3637 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3638 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3639 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3640 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3641 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3642 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3643 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3644 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3645 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3646 rpartition__doc__},
3647 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3648 startswith__doc__},
3649 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3650 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3651 swapcase__doc__},
3652 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3653 translate__doc__},
3654 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3655 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3656 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3657 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3658 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3659 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3660 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3661 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3662 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
3663 {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
3664 {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
3665 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3666 expandtabs__doc__},
3667 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3668 splitlines__doc__},
3669 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
3670 sizeof__doc__},
3671 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
3672 {NULL, NULL} /* sentinel */
Christian Heimes1a6387e2008-03-26 12:49:49 +00003673};
3674
3675static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00003676str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003677
Christian Heimes44720832008-05-26 13:01:01 +00003678static PyObject *
3679string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3680{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003681 PyObject *x = NULL;
3682 static char *kwlist[] = {"object", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003683
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003684 if (type != &PyString_Type)
3685 return str_subtype_new(type, args, kwds);
3686 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3687 return NULL;
3688 if (x == NULL)
3689 return PyString_FromString("");
3690 return PyObject_Str(x);
Christian Heimes44720832008-05-26 13:01:01 +00003691}
3692
3693static PyObject *
3694str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3695{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003696 PyObject *tmp, *pnew;
3697 Py_ssize_t n;
Christian Heimes44720832008-05-26 13:01:01 +00003698
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003699 assert(PyType_IsSubtype(type, &PyString_Type));
3700 tmp = string_new(&PyString_Type, args, kwds);
3701 if (tmp == NULL)
3702 return NULL;
3703 assert(PyString_CheckExact(tmp));
3704 n = PyString_GET_SIZE(tmp);
3705 pnew = type->tp_alloc(type, n);
3706 if (pnew != NULL) {
3707 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3708 ((PyStringObject *)pnew)->ob_shash =
3709 ((PyStringObject *)tmp)->ob_shash;
3710 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3711 }
3712 Py_DECREF(tmp);
3713 return pnew;
Christian Heimes44720832008-05-26 13:01:01 +00003714}
3715
3716static PyObject *
3717basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3718{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003719 PyErr_SetString(PyExc_TypeError,
3720 "The basestring type cannot be instantiated");
3721 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003722}
3723
3724static PyObject *
3725string_mod(PyObject *v, PyObject *w)
3726{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003727 if (!PyString_Check(v)) {
3728 Py_INCREF(Py_NotImplemented);
3729 return Py_NotImplemented;
3730 }
3731 return PyString_Format(v, w);
Christian Heimes44720832008-05-26 13:01:01 +00003732}
3733
3734PyDoc_STRVAR(basestring_doc,
3735"Type basestring cannot be instantiated; it is the base for str and unicode.");
3736
3737static PyNumberMethods string_as_number = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003738 0, /*nb_add*/
3739 0, /*nb_subtract*/
3740 0, /*nb_multiply*/
3741 0, /*nb_divide*/
3742 string_mod, /*nb_remainder*/
Christian Heimes44720832008-05-26 13:01:01 +00003743};
3744
3745
3746PyTypeObject PyBaseString_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003747 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3748 "basestring",
3749 0,
3750 0,
3751 0, /* tp_dealloc */
3752 0, /* tp_print */
3753 0, /* tp_getattr */
3754 0, /* tp_setattr */
3755 0, /* tp_compare */
3756 0, /* tp_repr */
3757 0, /* tp_as_number */
3758 0, /* tp_as_sequence */
3759 0, /* tp_as_mapping */
3760 0, /* tp_hash */
3761 0, /* tp_call */
3762 0, /* tp_str */
3763 0, /* tp_getattro */
3764 0, /* tp_setattro */
3765 0, /* tp_as_buffer */
3766 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3767 basestring_doc, /* tp_doc */
3768 0, /* tp_traverse */
3769 0, /* tp_clear */
3770 0, /* tp_richcompare */
3771 0, /* tp_weaklistoffset */
3772 0, /* tp_iter */
3773 0, /* tp_iternext */
3774 0, /* tp_methods */
3775 0, /* tp_members */
3776 0, /* tp_getset */
3777 &PyBaseObject_Type, /* tp_base */
3778 0, /* tp_dict */
3779 0, /* tp_descr_get */
3780 0, /* tp_descr_set */
3781 0, /* tp_dictoffset */
3782 0, /* tp_init */
3783 0, /* tp_alloc */
3784 basestring_new, /* tp_new */
3785 0, /* tp_free */
Christian Heimes44720832008-05-26 13:01:01 +00003786};
3787
3788PyDoc_STRVAR(string_doc,
3789"str(object) -> string\n\
3790\n\
3791Return a nice string representation of the object.\n\
3792If the argument is a string, the return value is the same object.");
3793
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003794PyTypeObject PyString_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003795 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3796 "str",
3797 PyStringObject_SIZE,
3798 sizeof(char),
3799 string_dealloc, /* tp_dealloc */
3800 (printfunc)string_print, /* tp_print */
3801 0, /* tp_getattr */
3802 0, /* tp_setattr */
3803 0, /* tp_compare */
3804 string_repr, /* tp_repr */
3805 &string_as_number, /* tp_as_number */
3806 &string_as_sequence, /* tp_as_sequence */
3807 &string_as_mapping, /* tp_as_mapping */
3808 (hashfunc)string_hash, /* tp_hash */
3809 0, /* tp_call */
3810 string_str, /* tp_str */
3811 PyObject_GenericGetAttr, /* tp_getattro */
3812 0, /* tp_setattro */
3813 &string_as_buffer, /* tp_as_buffer */
3814 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3815 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
3816 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3817 string_doc, /* tp_doc */
3818 0, /* tp_traverse */
3819 0, /* tp_clear */
3820 (richcmpfunc)string_richcompare, /* tp_richcompare */
3821 0, /* tp_weaklistoffset */
3822 0, /* tp_iter */
3823 0, /* tp_iternext */
3824 string_methods, /* tp_methods */
3825 0, /* tp_members */
3826 0, /* tp_getset */
3827 &PyBaseString_Type, /* tp_base */
3828 0, /* tp_dict */
3829 0, /* tp_descr_get */
3830 0, /* tp_descr_set */
3831 0, /* tp_dictoffset */
3832 0, /* tp_init */
3833 0, /* tp_alloc */
3834 string_new, /* tp_new */
3835 PyObject_Del, /* tp_free */
Christian Heimes44720832008-05-26 13:01:01 +00003836};
3837
3838void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003839PyString_Concat(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00003840{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003841 register PyObject *v;
3842 if (*pv == NULL)
3843 return;
3844 if (w == NULL || !PyString_Check(*pv)) {
3845 Py_DECREF(*pv);
3846 *pv = NULL;
3847 return;
3848 }
3849 v = string_concat((PyStringObject *) *pv, w);
3850 Py_DECREF(*pv);
3851 *pv = v;
Christian Heimes44720832008-05-26 13:01:01 +00003852}
3853
3854void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003855PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00003856{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003857 PyString_Concat(pv, w);
3858 Py_XDECREF(w);
Christian Heimes44720832008-05-26 13:01:01 +00003859}
3860
3861
3862/* The following function breaks the notion that strings are immutable:
3863 it changes the size of a string. We get away with this only if there
3864 is only one module referencing the object. You can also think of it
3865 as creating a new string object and destroying the old one, only
3866 more efficiently. In any case, don't use this if the string may
3867 already be known to some other part of the code...
3868 Note that if there's not enough memory to resize the string, the original
3869 string object at *pv is deallocated, *pv is set to NULL, an "out of
3870 memory" exception is set, and -1 is returned. Else (on success) 0 is
3871 returned, and the value in *pv may or may not be the same as on input.
3872 As always, an extra byte is allocated for a trailing \0 byte (newsize
3873 does *not* include that), and a trailing \0 byte is stored.
3874*/
3875
3876int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003877_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Christian Heimes44720832008-05-26 13:01:01 +00003878{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003879 register PyObject *v;
3880 register PyStringObject *sv;
3881 v = *pv;
3882 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
3883 PyString_CHECK_INTERNED(v)) {
3884 *pv = 0;
3885 Py_DECREF(v);
3886 PyErr_BadInternalCall();
3887 return -1;
3888 }
3889 /* XXX UNREF/NEWREF interface should be more symmetrical */
3890 _Py_DEC_REFTOTAL;
3891 _Py_ForgetReference(v);
3892 *pv = (PyObject *)
3893 PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
3894 if (*pv == NULL) {
3895 PyObject_Del(v);
3896 PyErr_NoMemory();
3897 return -1;
3898 }
3899 _Py_NewReference(*pv);
3900 sv = (PyStringObject *) *pv;
3901 Py_SIZE(sv) = newsize;
3902 sv->ob_sval[newsize] = '\0';
3903 sv->ob_shash = -1; /* invalidate cached hash value */
3904 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00003905}
3906
3907/* Helpers for formatstring */
3908
3909Py_LOCAL_INLINE(PyObject *)
3910getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
3911{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003912 Py_ssize_t argidx = *p_argidx;
3913 if (argidx < arglen) {
3914 (*p_argidx)++;
3915 if (arglen < 0)
3916 return args;
3917 else
3918 return PyTuple_GetItem(args, argidx);
3919 }
3920 PyErr_SetString(PyExc_TypeError,
3921 "not enough arguments for format string");
3922 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003923}
3924
3925/* Format codes
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003926 * F_LJUST '-'
3927 * F_SIGN '+'
3928 * F_BLANK ' '
3929 * F_ALT '#'
3930 * F_ZERO '0'
Christian Heimes44720832008-05-26 13:01:01 +00003931 */
3932#define F_LJUST (1<<0)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003933#define F_SIGN (1<<1)
Christian Heimes44720832008-05-26 13:01:01 +00003934#define F_BLANK (1<<2)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003935#define F_ALT (1<<3)
3936#define F_ZERO (1<<4)
Christian Heimes44720832008-05-26 13:01:01 +00003937
Mark Dickinson18cfada2009-11-23 18:46:41 +00003938/* Returns a new reference to a PyString object, or NULL on failure. */
3939
3940static PyObject *
3941formatfloat(PyObject *v, int flags, int prec, int type)
Christian Heimes44720832008-05-26 13:01:01 +00003942{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003943 char *p;
3944 PyObject *result;
3945 double x;
Eric Smithc1bdf892009-10-26 17:46:17 +00003946
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003947 x = PyFloat_AsDouble(v);
3948 if (x == -1.0 && PyErr_Occurred()) {
3949 PyErr_Format(PyExc_TypeError, "float argument required, "
3950 "not %.200s", Py_TYPE(v)->tp_name);
3951 return NULL;
3952 }
Mark Dickinson18cfada2009-11-23 18:46:41 +00003953
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003954 if (prec < 0)
3955 prec = 6;
Mark Dickinson174e9092009-03-29 16:17:16 +00003956
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003957 p = PyOS_double_to_string(x, type, prec,
3958 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
Christian Heimes44720832008-05-26 13:01:01 +00003959
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003960 if (p == NULL)
3961 return NULL;
3962 result = PyString_FromStringAndSize(p, strlen(p));
3963 PyMem_Free(p);
3964 return result;
Christian Heimes44720832008-05-26 13:01:01 +00003965}
3966
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003967/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
Christian Heimes44720832008-05-26 13:01:01 +00003968 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3969 * Python's regular ints.
3970 * Return value: a new PyString*, or NULL if error.
3971 * . *pbuf is set to point into it,
3972 * *plen set to the # of chars following that.
3973 * Caller must decref it when done using pbuf.
3974 * The string starting at *pbuf is of the form
3975 * "-"? ("0x" | "0X")? digit+
3976 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3977 * set in flags. The case of hex digits will be correct,
3978 * There will be at least prec digits, zero-filled on the left if
3979 * necessary to get that many.
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003980 * val object to be converted
3981 * flags bitmask of format flags; only F_ALT is looked at
3982 * prec minimum number of digits; 0-fill on left if needed
3983 * type a character in [duoxX]; u acts the same as d
Christian Heimes44720832008-05-26 13:01:01 +00003984 *
3985 * CAUTION: o, x and X conversions on regular ints can never
3986 * produce a '-' sign, but can for Python's unbounded ints.
3987 */
3988PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003989_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003990 char **pbuf, int *plen)
Christian Heimes44720832008-05-26 13:01:01 +00003991{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003992 PyObject *result = NULL;
3993 char *buf;
3994 Py_ssize_t i;
3995 int sign; /* 1 if '-', else 0 */
3996 int len; /* number of characters */
3997 Py_ssize_t llen;
3998 int numdigits; /* len == numnondigits + numdigits */
3999 int numnondigits = 0;
Christian Heimes44720832008-05-26 13:01:01 +00004000
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004001 switch (type) {
4002 case 'd':
4003 case 'u':
4004 result = Py_TYPE(val)->tp_str(val);
4005 break;
4006 case 'o':
4007 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4008 break;
4009 case 'x':
4010 case 'X':
4011 numnondigits = 2;
4012 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4013 break;
4014 default:
4015 assert(!"'type' not in [duoxX]");
4016 }
4017 if (!result)
4018 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004019
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004020 buf = PyString_AsString(result);
4021 if (!buf) {
4022 Py_DECREF(result);
4023 return NULL;
4024 }
Christian Heimes44720832008-05-26 13:01:01 +00004025
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004026 /* To modify the string in-place, there can only be one reference. */
4027 if (Py_REFCNT(result) != 1) {
4028 PyErr_BadInternalCall();
4029 return NULL;
4030 }
4031 llen = PyString_Size(result);
4032 if (llen > INT_MAX) {
4033 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4034 return NULL;
4035 }
4036 len = (int)llen;
4037 if (buf[len-1] == 'L') {
4038 --len;
4039 buf[len] = '\0';
4040 }
4041 sign = buf[0] == '-';
4042 numnondigits += sign;
4043 numdigits = len - numnondigits;
4044 assert(numdigits > 0);
Christian Heimes44720832008-05-26 13:01:01 +00004045
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004046 /* Get rid of base marker unless F_ALT */
4047 if ((flags & F_ALT) == 0) {
4048 /* Need to skip 0x, 0X or 0. */
4049 int skipped = 0;
4050 switch (type) {
4051 case 'o':
4052 assert(buf[sign] == '0');
4053 /* If 0 is only digit, leave it alone. */
4054 if (numdigits > 1) {
4055 skipped = 1;
4056 --numdigits;
4057 }
4058 break;
4059 case 'x':
4060 case 'X':
4061 assert(buf[sign] == '0');
4062 assert(buf[sign + 1] == 'x');
4063 skipped = 2;
4064 numnondigits -= 2;
4065 break;
4066 }
4067 if (skipped) {
4068 buf += skipped;
4069 len -= skipped;
4070 if (sign)
4071 buf[0] = '-';
4072 }
4073 assert(len == numnondigits + numdigits);
4074 assert(numdigits > 0);
4075 }
Christian Heimes44720832008-05-26 13:01:01 +00004076
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004077 /* Fill with leading zeroes to meet minimum width. */
4078 if (prec > numdigits) {
4079 PyObject *r1 = PyString_FromStringAndSize(NULL,
4080 numnondigits + prec);
4081 char *b1;
4082 if (!r1) {
4083 Py_DECREF(result);
4084 return NULL;
4085 }
4086 b1 = PyString_AS_STRING(r1);
4087 for (i = 0; i < numnondigits; ++i)
4088 *b1++ = *buf++;
4089 for (i = 0; i < prec - numdigits; i++)
4090 *b1++ = '0';
4091 for (i = 0; i < numdigits; i++)
4092 *b1++ = *buf++;
4093 *b1 = '\0';
4094 Py_DECREF(result);
4095 result = r1;
4096 buf = PyString_AS_STRING(result);
4097 len = numnondigits + prec;
4098 }
Christian Heimes44720832008-05-26 13:01:01 +00004099
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004100 /* Fix up case for hex conversions. */
4101 if (type == 'X') {
4102 /* Need to convert all lower case letters to upper case.
4103 and need to convert 0x to 0X (and -0x to -0X). */
4104 for (i = 0; i < len; i++)
4105 if (buf[i] >= 'a' && buf[i] <= 'x')
4106 buf[i] -= 'a'-'A';
4107 }
4108 *pbuf = buf;
4109 *plen = len;
4110 return result;
Christian Heimes44720832008-05-26 13:01:01 +00004111}
4112
4113Py_LOCAL_INLINE(int)
4114formatint(char *buf, size_t buflen, int flags,
4115 int prec, int type, PyObject *v)
4116{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004117 /* fmt = '%#.' + `prec` + 'l' + `type`
4118 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4119 + 1 + 1 = 24 */
4120 char fmt[64]; /* plenty big enough! */
4121 char *sign;
4122 long x;
Christian Heimes44720832008-05-26 13:01:01 +00004123
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004124 x = PyInt_AsLong(v);
4125 if (x == -1 && PyErr_Occurred()) {
4126 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4127 Py_TYPE(v)->tp_name);
4128 return -1;
4129 }
4130 if (x < 0 && type == 'u') {
4131 type = 'd';
4132 }
4133 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4134 sign = "-";
4135 else
4136 sign = "";
4137 if (prec < 0)
4138 prec = 1;
Christian Heimes44720832008-05-26 13:01:01 +00004139
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004140 if ((flags & F_ALT) &&
4141 (type == 'x' || type == 'X')) {
4142 /* When converting under %#x or %#X, there are a number
4143 * of issues that cause pain:
4144 * - when 0 is being converted, the C standard leaves off
4145 * the '0x' or '0X', which is inconsistent with other
4146 * %#x/%#X conversions and inconsistent with Python's
4147 * hex() function
4148 * - there are platforms that violate the standard and
4149 * convert 0 with the '0x' or '0X'
4150 * (Metrowerks, Compaq Tru64)
4151 * - there are platforms that give '0x' when converting
4152 * under %#X, but convert 0 in accordance with the
4153 * standard (OS/2 EMX)
4154 *
4155 * We can achieve the desired consistency by inserting our
4156 * own '0x' or '0X' prefix, and substituting %x/%X in place
4157 * of %#x/%#X.
4158 *
4159 * Note that this is the same approach as used in
4160 * formatint() in unicodeobject.c
4161 */
4162 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4163 sign, type, prec, type);
4164 }
4165 else {
4166 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4167 sign, (flags&F_ALT) ? "#" : "",
4168 prec, type);
4169 }
Christian Heimes44720832008-05-26 13:01:01 +00004170
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004171 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4172 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4173 */
4174 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4175 PyErr_SetString(PyExc_OverflowError,
4176 "formatted integer is too long (precision too large?)");
4177 return -1;
4178 }
4179 if (sign[0])
4180 PyOS_snprintf(buf, buflen, fmt, -x);
4181 else
4182 PyOS_snprintf(buf, buflen, fmt, x);
4183 return (int)strlen(buf);
Christian Heimes44720832008-05-26 13:01:01 +00004184}
4185
4186Py_LOCAL_INLINE(int)
4187formatchar(char *buf, size_t buflen, PyObject *v)
4188{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004189 /* presume that the buffer is at least 2 characters long */
4190 if (PyString_Check(v)) {
4191 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4192 return -1;
4193 }
4194 else {
4195 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4196 return -1;
4197 }
4198 buf[1] = '\0';
4199 return 1;
Christian Heimes44720832008-05-26 13:01:01 +00004200}
4201
4202/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4203
Mark Dickinson18cfada2009-11-23 18:46:41 +00004204 FORMATBUFLEN is the length of the buffer in which the ints &
Christian Heimes44720832008-05-26 13:01:01 +00004205 chars are formatted. XXX This is a magic number. Each formatting
4206 routine does bounds checking to ensure no overflow, but a better
4207 solution may be to malloc a buffer of appropriate size for each
4208 format. For now, the current solution is sufficient.
4209*/
4210#define FORMATBUFLEN (size_t)120
4211
4212PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004213PyString_Format(PyObject *format, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00004214{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004215 char *fmt, *res;
4216 Py_ssize_t arglen, argidx;
4217 Py_ssize_t reslen, rescnt, fmtcnt;
4218 int args_owned = 0;
4219 PyObject *result, *orig_args;
Christian Heimes44720832008-05-26 13:01:01 +00004220#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004221 PyObject *v, *w;
Christian Heimes44720832008-05-26 13:01:01 +00004222#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004223 PyObject *dict = NULL;
4224 if (format == NULL || !PyString_Check(format) || args == NULL) {
4225 PyErr_BadInternalCall();
4226 return NULL;
4227 }
4228 orig_args = args;
4229 fmt = PyString_AS_STRING(format);
4230 fmtcnt = PyString_GET_SIZE(format);
4231 reslen = rescnt = fmtcnt + 100;
4232 result = PyString_FromStringAndSize((char *)NULL, reslen);
4233 if (result == NULL)
4234 return NULL;
4235 res = PyString_AsString(result);
4236 if (PyTuple_Check(args)) {
4237 arglen = PyTuple_GET_SIZE(args);
4238 argidx = 0;
4239 }
4240 else {
4241 arglen = -1;
4242 argidx = -2;
4243 }
4244 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4245 !PyObject_TypeCheck(args, &PyBaseString_Type))
4246 dict = args;
4247 while (--fmtcnt >= 0) {
4248 if (*fmt != '%') {
4249 if (--rescnt < 0) {
4250 rescnt = fmtcnt + 100;
4251 reslen += rescnt;
4252 if (_PyString_Resize(&result, reslen))
4253 return NULL;
4254 res = PyString_AS_STRING(result)
4255 + reslen - rescnt;
4256 --rescnt;
4257 }
4258 *res++ = *fmt++;
4259 }
4260 else {
4261 /* Got a format specifier */
4262 int flags = 0;
4263 Py_ssize_t width = -1;
4264 int prec = -1;
4265 int c = '\0';
4266 int fill;
4267 int isnumok;
4268 PyObject *v = NULL;
4269 PyObject *temp = NULL;
4270 char *pbuf;
4271 int sign;
4272 Py_ssize_t len;
4273 char formatbuf[FORMATBUFLEN];
4274 /* For format{int,char}() */
Christian Heimes44720832008-05-26 13:01:01 +00004275#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004276 char *fmt_start = fmt;
4277 Py_ssize_t argidx_start = argidx;
Christian Heimes44720832008-05-26 13:01:01 +00004278#endif
4279
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004280 fmt++;
4281 if (*fmt == '(') {
4282 char *keystart;
4283 Py_ssize_t keylen;
4284 PyObject *key;
4285 int pcount = 1;
Christian Heimes44720832008-05-26 13:01:01 +00004286
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004287 if (dict == NULL) {
4288 PyErr_SetString(PyExc_TypeError,
4289 "format requires a mapping");
4290 goto error;
4291 }
4292 ++fmt;
4293 --fmtcnt;
4294 keystart = fmt;
4295 /* Skip over balanced parentheses */
4296 while (pcount > 0 && --fmtcnt >= 0) {
4297 if (*fmt == ')')
4298 --pcount;
4299 else if (*fmt == '(')
4300 ++pcount;
4301 fmt++;
4302 }
4303 keylen = fmt - keystart - 1;
4304 if (fmtcnt < 0 || pcount > 0) {
4305 PyErr_SetString(PyExc_ValueError,
4306 "incomplete format key");
4307 goto error;
4308 }
4309 key = PyString_FromStringAndSize(keystart,
4310 keylen);
4311 if (key == NULL)
4312 goto error;
4313 if (args_owned) {
4314 Py_DECREF(args);
4315 args_owned = 0;
4316 }
4317 args = PyObject_GetItem(dict, key);
4318 Py_DECREF(key);
4319 if (args == NULL) {
4320 goto error;
4321 }
4322 args_owned = 1;
4323 arglen = -1;
4324 argidx = -2;
4325 }
4326 while (--fmtcnt >= 0) {
4327 switch (c = *fmt++) {
4328 case '-': flags |= F_LJUST; continue;
4329 case '+': flags |= F_SIGN; continue;
4330 case ' ': flags |= F_BLANK; continue;
4331 case '#': flags |= F_ALT; continue;
4332 case '0': flags |= F_ZERO; continue;
4333 }
4334 break;
4335 }
4336 if (c == '*') {
4337 v = getnextarg(args, arglen, &argidx);
4338 if (v == NULL)
4339 goto error;
4340 if (!PyInt_Check(v)) {
4341 PyErr_SetString(PyExc_TypeError,
4342 "* wants int");
4343 goto error;
4344 }
4345 width = PyInt_AsLong(v);
4346 if (width < 0) {
4347 flags |= F_LJUST;
4348 width = -width;
4349 }
4350 if (--fmtcnt >= 0)
4351 c = *fmt++;
4352 }
4353 else if (c >= 0 && isdigit(c)) {
4354 width = c - '0';
4355 while (--fmtcnt >= 0) {
4356 c = Py_CHARMASK(*fmt++);
4357 if (!isdigit(c))
4358 break;
4359 if ((width*10) / 10 != width) {
4360 PyErr_SetString(
4361 PyExc_ValueError,
4362 "width too big");
4363 goto error;
4364 }
4365 width = width*10 + (c - '0');
4366 }
4367 }
4368 if (c == '.') {
4369 prec = 0;
4370 if (--fmtcnt >= 0)
4371 c = *fmt++;
4372 if (c == '*') {
4373 v = getnextarg(args, arglen, &argidx);
4374 if (v == NULL)
4375 goto error;
4376 if (!PyInt_Check(v)) {
4377 PyErr_SetString(
4378 PyExc_TypeError,
4379 "* wants int");
4380 goto error;
4381 }
4382 prec = PyInt_AsLong(v);
4383 if (prec < 0)
4384 prec = 0;
4385 if (--fmtcnt >= 0)
4386 c = *fmt++;
4387 }
4388 else if (c >= 0 && isdigit(c)) {
4389 prec = c - '0';
4390 while (--fmtcnt >= 0) {
4391 c = Py_CHARMASK(*fmt++);
4392 if (!isdigit(c))
4393 break;
4394 if ((prec*10) / 10 != prec) {
4395 PyErr_SetString(
4396 PyExc_ValueError,
4397 "prec too big");
4398 goto error;
Christian Heimes44720832008-05-26 13:01:01 +00004399 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004400 prec = prec*10 + (c - '0');
4401 }
4402 }
4403 } /* prec */
4404 if (fmtcnt >= 0) {
4405 if (c == 'h' || c == 'l' || c == 'L') {
4406 if (--fmtcnt >= 0)
4407 c = *fmt++;
4408 }
4409 }
4410 if (fmtcnt < 0) {
4411 PyErr_SetString(PyExc_ValueError,
4412 "incomplete format");
4413 goto error;
4414 }
4415 if (c != '%') {
4416 v = getnextarg(args, arglen, &argidx);
4417 if (v == NULL)
4418 goto error;
4419 }
4420 sign = 0;
4421 fill = ' ';
4422 switch (c) {
4423 case '%':
4424 pbuf = "%";
4425 len = 1;
4426 break;
4427 case 's':
4428#ifdef Py_USING_UNICODE
4429 if (PyUnicode_Check(v)) {
4430 fmt = fmt_start;
4431 argidx = argidx_start;
4432 goto unicode;
4433 }
4434#endif
4435 temp = _PyObject_Str(v);
4436#ifdef Py_USING_UNICODE
4437 if (temp != NULL && PyUnicode_Check(temp)) {
4438 Py_DECREF(temp);
4439 fmt = fmt_start;
4440 argidx = argidx_start;
4441 goto unicode;
4442 }
4443#endif
4444 /* Fall through */
4445 case 'r':
4446 if (c == 'r')
4447 temp = PyObject_Repr(v);
4448 if (temp == NULL)
4449 goto error;
4450 if (!PyString_Check(temp)) {
4451 PyErr_SetString(PyExc_TypeError,
4452 "%s argument has non-string str()");
4453 Py_DECREF(temp);
4454 goto error;
4455 }
4456 pbuf = PyString_AS_STRING(temp);
4457 len = PyString_GET_SIZE(temp);
4458 if (prec >= 0 && len > prec)
4459 len = prec;
4460 break;
4461 case 'i':
4462 case 'd':
4463 case 'u':
4464 case 'o':
4465 case 'x':
4466 case 'X':
4467 if (c == 'i')
4468 c = 'd';
4469 isnumok = 0;
4470 if (PyNumber_Check(v)) {
4471 PyObject *iobj=NULL;
4472
4473 if (PyInt_Check(v) || (PyLong_Check(v))) {
4474 iobj = v;
4475 Py_INCREF(iobj);
4476 }
4477 else {
4478 iobj = PyNumber_Int(v);
4479 if (iobj==NULL) iobj = PyNumber_Long(v);
4480 }
4481 if (iobj!=NULL) {
4482 if (PyInt_Check(iobj)) {
4483 isnumok = 1;
4484 pbuf = formatbuf;
4485 len = formatint(pbuf,
4486 sizeof(formatbuf),
4487 flags, prec, c, iobj);
4488 Py_DECREF(iobj);
4489 if (len < 0)
4490 goto error;
4491 sign = 1;
4492 }
4493 else if (PyLong_Check(iobj)) {
4494 int ilen;
4495
4496 isnumok = 1;
4497 temp = _PyString_FormatLong(iobj, flags,
4498 prec, c, &pbuf, &ilen);
4499 Py_DECREF(iobj);
4500 len = ilen;
4501 if (!temp)
4502 goto error;
4503 sign = 1;
4504 }
4505 else {
4506 Py_DECREF(iobj);
4507 }
4508 }
4509 }
4510 if (!isnumok) {
4511 PyErr_Format(PyExc_TypeError,
4512 "%%%c format: a number is required, "
4513 "not %.200s", c, Py_TYPE(v)->tp_name);
4514 goto error;
4515 }
4516 if (flags & F_ZERO)
4517 fill = '0';
4518 break;
4519 case 'e':
4520 case 'E':
4521 case 'f':
4522 case 'F':
4523 case 'g':
4524 case 'G':
4525 temp = formatfloat(v, flags, prec, c);
4526 if (temp == NULL)
4527 goto error;
4528 pbuf = PyString_AS_STRING(temp);
4529 len = PyString_GET_SIZE(temp);
4530 sign = 1;
4531 if (flags & F_ZERO)
4532 fill = '0';
4533 break;
4534 case 'c':
4535#ifdef Py_USING_UNICODE
4536 if (PyUnicode_Check(v)) {
4537 fmt = fmt_start;
4538 argidx = argidx_start;
4539 goto unicode;
4540 }
4541#endif
4542 pbuf = formatbuf;
4543 len = formatchar(pbuf, sizeof(formatbuf), v);
4544 if (len < 0)
4545 goto error;
4546 break;
4547 default:
4548 PyErr_Format(PyExc_ValueError,
4549 "unsupported format character '%c' (0x%x) "
4550 "at index %zd",
4551 c, c,
4552 (Py_ssize_t)(fmt - 1 -
4553 PyString_AsString(format)));
4554 goto error;
4555 }
4556 if (sign) {
4557 if (*pbuf == '-' || *pbuf == '+') {
4558 sign = *pbuf++;
4559 len--;
4560 }
4561 else if (flags & F_SIGN)
4562 sign = '+';
4563 else if (flags & F_BLANK)
4564 sign = ' ';
4565 else
4566 sign = 0;
4567 }
4568 if (width < len)
4569 width = len;
4570 if (rescnt - (sign != 0) < width) {
4571 reslen -= rescnt;
4572 rescnt = width + fmtcnt + 100;
4573 reslen += rescnt;
4574 if (reslen < 0) {
4575 Py_DECREF(result);
4576 Py_XDECREF(temp);
4577 return PyErr_NoMemory();
4578 }
4579 if (_PyString_Resize(&result, reslen)) {
4580 Py_XDECREF(temp);
4581 return NULL;
4582 }
4583 res = PyString_AS_STRING(result)
4584 + reslen - rescnt;
4585 }
4586 if (sign) {
4587 if (fill != ' ')
4588 *res++ = sign;
4589 rescnt--;
4590 if (width > len)
4591 width--;
4592 }
4593 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4594 assert(pbuf[0] == '0');
4595 assert(pbuf[1] == c);
4596 if (fill != ' ') {
4597 *res++ = *pbuf++;
4598 *res++ = *pbuf++;
4599 }
4600 rescnt -= 2;
4601 width -= 2;
4602 if (width < 0)
4603 width = 0;
4604 len -= 2;
4605 }
4606 if (width > len && !(flags & F_LJUST)) {
4607 do {
4608 --rescnt;
4609 *res++ = fill;
4610 } while (--width > len);
4611 }
4612 if (fill == ' ') {
4613 if (sign)
4614 *res++ = sign;
4615 if ((flags & F_ALT) &&
4616 (c == 'x' || c == 'X')) {
4617 assert(pbuf[0] == '0');
4618 assert(pbuf[1] == c);
4619 *res++ = *pbuf++;
4620 *res++ = *pbuf++;
4621 }
4622 }
4623 Py_MEMCPY(res, pbuf, len);
4624 res += len;
4625 rescnt -= len;
4626 while (--width >= len) {
4627 --rescnt;
4628 *res++ = ' ';
4629 }
4630 if (dict && (argidx < arglen) && c != '%') {
4631 PyErr_SetString(PyExc_TypeError,
4632 "not all arguments converted during string formatting");
4633 Py_XDECREF(temp);
4634 goto error;
4635 }
4636 Py_XDECREF(temp);
4637 } /* '%' */
4638 } /* until end */
4639 if (argidx < arglen && !dict) {
4640 PyErr_SetString(PyExc_TypeError,
4641 "not all arguments converted during string formatting");
4642 goto error;
4643 }
4644 if (args_owned) {
4645 Py_DECREF(args);
4646 }
4647 if (_PyString_Resize(&result, reslen - rescnt))
4648 return NULL;
4649 return result;
Christian Heimes44720832008-05-26 13:01:01 +00004650
4651#ifdef Py_USING_UNICODE
4652 unicode:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004653 if (args_owned) {
4654 Py_DECREF(args);
4655 args_owned = 0;
4656 }
4657 /* Fiddle args right (remove the first argidx arguments) */
4658 if (PyTuple_Check(orig_args) && argidx > 0) {
4659 PyObject *v;
4660 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4661 v = PyTuple_New(n);
4662 if (v == NULL)
4663 goto error;
4664 while (--n >= 0) {
4665 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4666 Py_INCREF(w);
4667 PyTuple_SET_ITEM(v, n, w);
4668 }
4669 args = v;
4670 } else {
4671 Py_INCREF(orig_args);
4672 args = orig_args;
4673 }
4674 args_owned = 1;
4675 /* Take what we have of the result and let the Unicode formatting
4676 function format the rest of the input. */
4677 rescnt = res - PyString_AS_STRING(result);
4678 if (_PyString_Resize(&result, rescnt))
4679 goto error;
4680 fmtcnt = PyString_GET_SIZE(format) - \
4681 (fmt - PyString_AS_STRING(format));
4682 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4683 if (format == NULL)
4684 goto error;
4685 v = PyUnicode_Format(format, args);
4686 Py_DECREF(format);
4687 if (v == NULL)
4688 goto error;
4689 /* Paste what we have (result) to what the Unicode formatting
4690 function returned (v) and return the result (or error) */
4691 w = PyUnicode_Concat(result, v);
4692 Py_DECREF(result);
4693 Py_DECREF(v);
4694 Py_DECREF(args);
4695 return w;
Christian Heimes44720832008-05-26 13:01:01 +00004696#endif /* Py_USING_UNICODE */
4697
4698 error:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004699 Py_DECREF(result);
4700 if (args_owned) {
4701 Py_DECREF(args);
4702 }
4703 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004704}
4705
4706void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004707PyString_InternInPlace(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00004708{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004709 register PyStringObject *s = (PyStringObject *)(*p);
4710 PyObject *t;
4711 if (s == NULL || !PyString_Check(s))
4712 Py_FatalError("PyString_InternInPlace: strings only please!");
4713 /* If it's a string subclass, we don't really know what putting
4714 it in the interned dict might do. */
4715 if (!PyString_CheckExact(s))
4716 return;
4717 if (PyString_CHECK_INTERNED(s))
4718 return;
4719 if (interned == NULL) {
4720 interned = PyDict_New();
4721 if (interned == NULL) {
4722 PyErr_Clear(); /* Don't leave an exception */
4723 return;
4724 }
4725 }
4726 t = PyDict_GetItem(interned, (PyObject *)s);
4727 if (t) {
4728 Py_INCREF(t);
4729 Py_DECREF(*p);
4730 *p = t;
4731 return;
4732 }
Christian Heimes44720832008-05-26 13:01:01 +00004733
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004734 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4735 PyErr_Clear();
4736 return;
4737 }
4738 /* The two references in interned are not counted by refcnt.
4739 The string deallocator will take care of this */
4740 Py_REFCNT(s) -= 2;
4741 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Christian Heimes44720832008-05-26 13:01:01 +00004742}
4743
4744void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004745PyString_InternImmortal(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00004746{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004747 PyString_InternInPlace(p);
4748 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4749 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4750 Py_INCREF(*p);
4751 }
Christian Heimes44720832008-05-26 13:01:01 +00004752}
4753
4754
4755PyObject *
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004756PyString_InternFromString(const char *cp)
Christian Heimes44720832008-05-26 13:01:01 +00004757{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004758 PyObject *s = PyString_FromString(cp);
4759 if (s == NULL)
4760 return NULL;
4761 PyString_InternInPlace(&s);
4762 return s;
Christian Heimes44720832008-05-26 13:01:01 +00004763}
4764
4765void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004766PyString_Fini(void)
Christian Heimes44720832008-05-26 13:01:01 +00004767{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004768 int i;
4769 for (i = 0; i < UCHAR_MAX + 1; i++) {
4770 Py_XDECREF(characters[i]);
4771 characters[i] = NULL;
4772 }
4773 Py_XDECREF(nullstring);
4774 nullstring = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004775}
4776
4777void _Py_ReleaseInternedStrings(void)
4778{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004779 PyObject *keys;
4780 PyStringObject *s;
4781 Py_ssize_t i, n;
4782 Py_ssize_t immortal_size = 0, mortal_size = 0;
Christian Heimes44720832008-05-26 13:01:01 +00004783
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004784 if (interned == NULL || !PyDict_Check(interned))
4785 return;
4786 keys = PyDict_Keys(interned);
4787 if (keys == NULL || !PyList_Check(keys)) {
4788 PyErr_Clear();
4789 return;
4790 }
Christian Heimes44720832008-05-26 13:01:01 +00004791
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004792 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4793 detector, interned strings are not forcibly deallocated; rather, we
4794 give them their stolen references back, and then clear and DECREF
4795 the interned dict. */
Christian Heimes44720832008-05-26 13:01:01 +00004796
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004797 n = PyList_GET_SIZE(keys);
4798 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4799 n);
4800 for (i = 0; i < n; i++) {
4801 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4802 switch (s->ob_sstate) {
4803 case SSTATE_NOT_INTERNED:
4804 /* XXX Shouldn't happen */
4805 break;
4806 case SSTATE_INTERNED_IMMORTAL:
4807 Py_REFCNT(s) += 1;
4808 immortal_size += Py_SIZE(s);
4809 break;
4810 case SSTATE_INTERNED_MORTAL:
4811 Py_REFCNT(s) += 2;
4812 mortal_size += Py_SIZE(s);
4813 break;
4814 default:
4815 Py_FatalError("Inconsistent interned string state.");
4816 }
4817 s->ob_sstate = SSTATE_NOT_INTERNED;
4818 }
4819 fprintf(stderr, "total size of all interned strings: "
4820 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4821 "mortal/immortal\n", mortal_size, immortal_size);
4822 Py_DECREF(keys);
4823 PyDict_Clear(interned);
4824 Py_DECREF(interned);
4825 interned = NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00004826}