blob: 4746b3c48b5ba12d11b702b118197e9bf6d172c1 [file] [log] [blame]
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001/* String (str/bytes) object implementation */
Christian Heimes1a6387e2008-03-26 12:49:49 +00002
3#define PY_SSIZE_T_CLEAN
Christian Heimes44720832008-05-26 13:01:01 +00004
Christian Heimes1a6387e2008-03-26 12:49:49 +00005#include "Python.h"
Christian Heimes44720832008-05-26 13:01:01 +00006#include <ctype.h>
Mark Dickinson826f3fe2008-12-05 21:55:28 +00007#include <stddef.h>
Christian Heimes44720832008-05-26 13:01:01 +00008
9#ifdef COUNT_ALLOCS
Martin v. Löwisb90304a2009-01-07 18:40:40 +000010Py_ssize_t null_strings, one_strings;
Christian Heimes44720832008-05-26 13:01:01 +000011#endif
12
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000013static PyStringObject *characters[UCHAR_MAX + 1];
14static PyStringObject *nullstring;
Christian Heimes44720832008-05-26 13:01:01 +000015
16/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Mark Dickinson826f3fe2008-12-05 21:55:28 +000026/* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyStringObject_SIZE + n bytes.
28
29 Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30 3 bytes per string allocation on a typical system.
31*/
32#define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
33
Christian Heimes44720832008-05-26 13:01:01 +000034/*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000035 For both PyString_FromString() and PyString_FromStringAndSize(), the
Christian Heimes44720832008-05-26 13:01:01 +000036 parameter `size' denotes number of characters to allocate, not counting any
37 null terminating character.
38
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000039 For PyString_FromString(), the parameter `str' points to a null-terminated
Christian Heimes44720832008-05-26 13:01:01 +000040 string containing exactly `size' bytes.
41
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000042 For PyString_FromStringAndSize(), the parameter the parameter `str' is
Christian Heimes44720832008-05-26 13:01:01 +000043 either NULL or else points to a string containing at least `size' bytes.
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000044 For PyString_FromStringAndSize(), the string in the `str' parameter does
Christian Heimes44720832008-05-26 13:01:01 +000045 not have to be null-terminated. (Therefore it is safe to construct a
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000046 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
47 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
Christian Heimes44720832008-05-26 13:01:01 +000048 bytes (setting the last byte to the null terminating character) and you can
49 fill in the data yourself. If `str' is non-NULL then the resulting
50 PyString object must be treated as immutable and you must not fill in nor
51 alter the data yourself, since the strings may be shared.
52
53 The PyObject member `op->ob_size', which denotes the number of "extra
54 items" in a variable-size object, will contain the number of bytes
55 allocated for string data, not counting the null terminating character. It
56 is therefore equal to the equal to the `size' parameter (for
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000057 PyString_FromStringAndSize()) or the length of the string in the `str'
58 parameter (for PyString_FromString()).
Christian Heimes44720832008-05-26 13:01:01 +000059*/
60PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000061PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Christian Heimes1a6387e2008-03-26 12:49:49 +000062{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000063 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +000064 if (size < 0) {
65 PyErr_SetString(PyExc_SystemError,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000066 "Negative size passed to PyString_FromStringAndSize");
Christian Heimes44720832008-05-26 13:01:01 +000067 return NULL;
68 }
69 if (size == 0 && (op = nullstring) != NULL) {
70#ifdef COUNT_ALLOCS
71 null_strings++;
72#endif
73 Py_INCREF(op);
74 return (PyObject *)op;
75 }
76 if (size == 1 && str != NULL &&
77 (op = characters[*str & UCHAR_MAX]) != NULL)
78 {
79#ifdef COUNT_ALLOCS
80 one_strings++;
81#endif
82 Py_INCREF(op);
83 return (PyObject *)op;
84 }
85
Mark Dickinson826f3fe2008-12-05 21:55:28 +000086 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
Neal Norwitze7d8be82008-07-31 17:17:14 +000087 PyErr_SetString(PyExc_OverflowError, "string is too large");
88 return NULL;
89 }
90
Christian Heimes44720832008-05-26 13:01:01 +000091 /* Inline PyObject_NewVar */
Mark Dickinson826f3fe2008-12-05 21:55:28 +000092 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
Christian Heimes44720832008-05-26 13:01:01 +000093 if (op == NULL)
94 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000095 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +000096 op->ob_shash = -1;
97 op->ob_sstate = SSTATE_NOT_INTERNED;
98 if (str != NULL)
99 Py_MEMCPY(op->ob_sval, str, size);
100 op->ob_sval[size] = '\0';
101 /* share short strings */
102 if (size == 0) {
103 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000104 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000105 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +0000106 nullstring = op;
107 Py_INCREF(op);
108 } else if (size == 1 && str != NULL) {
109 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000110 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000111 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +0000112 characters[*str & UCHAR_MAX] = op;
113 Py_INCREF(op);
114 }
115 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000116}
117
Christian Heimes44720832008-05-26 13:01:01 +0000118PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000119PyString_FromString(const char *str)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000120{
Christian Heimes44720832008-05-26 13:01:01 +0000121 register size_t size;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000122 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +0000123
124 assert(str != NULL);
125 size = strlen(str);
Mark Dickinson826f3fe2008-12-05 21:55:28 +0000126 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
Christian Heimes44720832008-05-26 13:01:01 +0000127 PyErr_SetString(PyExc_OverflowError,
128 "string is too long for a Python string");
129 return NULL;
130 }
131 if (size == 0 && (op = nullstring) != NULL) {
132#ifdef COUNT_ALLOCS
133 null_strings++;
134#endif
135 Py_INCREF(op);
136 return (PyObject *)op;
137 }
138 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
139#ifdef COUNT_ALLOCS
140 one_strings++;
141#endif
142 Py_INCREF(op);
143 return (PyObject *)op;
144 }
145
146 /* Inline PyObject_NewVar */
Mark Dickinson826f3fe2008-12-05 21:55:28 +0000147 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
Christian Heimes44720832008-05-26 13:01:01 +0000148 if (op == NULL)
149 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000150 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +0000151 op->ob_shash = -1;
152 op->ob_sstate = SSTATE_NOT_INTERNED;
153 Py_MEMCPY(op->ob_sval, str, size+1);
154 /* share short strings */
155 if (size == 0) {
156 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000157 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000158 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +0000159 nullstring = op;
160 Py_INCREF(op);
161 } else if (size == 1) {
162 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000163 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000164 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +0000165 characters[*str & UCHAR_MAX] = op;
166 Py_INCREF(op);
167 }
168 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000169}
170
Christian Heimes44720832008-05-26 13:01:01 +0000171PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000172PyString_FromFormatV(const char *format, va_list vargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000173{
Christian Heimes44720832008-05-26 13:01:01 +0000174 va_list count;
175 Py_ssize_t n = 0;
176 const char* f;
177 char *s;
178 PyObject* string;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000179
Christian Heimes44720832008-05-26 13:01:01 +0000180#ifdef VA_LIST_IS_ARRAY
181 Py_MEMCPY(count, vargs, sizeof(va_list));
182#else
183#ifdef __va_copy
184 __va_copy(count, vargs);
185#else
186 count = vargs;
187#endif
188#endif
189 /* step 1: figure out how large a buffer we need */
190 for (f = format; *f; f++) {
191 if (*f == '%') {
Mark Dickinson82864d12009-11-15 16:18:58 +0000192#ifdef HAVE_LONG_LONG
193 int longlongflag = 0;
194#endif
Christian Heimes44720832008-05-26 13:01:01 +0000195 const char* p = f;
196 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
197 ;
198
199 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
200 * they don't affect the amount of space we reserve.
201 */
Mark Dickinson82864d12009-11-15 16:18:58 +0000202 if (*f == 'l') {
203 if (f[1] == 'd' || f[1] == 'u') {
204 ++f;
205 }
206#ifdef HAVE_LONG_LONG
207 else if (f[1] == 'l' &&
208 (f[2] == 'd' || f[2] == 'u')) {
209 longlongflag = 1;
210 f += 2;
211 }
212#endif
213 }
214 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Christian Heimes44720832008-05-26 13:01:01 +0000215 ++f;
Mark Dickinson82864d12009-11-15 16:18:58 +0000216 }
Christian Heimes44720832008-05-26 13:01:01 +0000217
218 switch (*f) {
219 case 'c':
220 (void)va_arg(count, int);
221 /* fall through... */
222 case '%':
223 n++;
224 break;
225 case 'd': case 'u': case 'i': case 'x':
226 (void) va_arg(count, int);
Mark Dickinson82864d12009-11-15 16:18:58 +0000227#ifdef HAVE_LONG_LONG
228 /* Need at most
229 ceil(log10(256)*SIZEOF_LONG_LONG) digits,
230 plus 1 for the sign. 53/22 is an upper
231 bound for log10(256). */
232 if (longlongflag)
233 n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
234 else
235#endif
236 /* 20 bytes is enough to hold a 64-bit
237 integer. Decimal takes the most
238 space. This isn't enough for
239 octal. */
240 n += 20;
241
Christian Heimes44720832008-05-26 13:01:01 +0000242 break;
243 case 's':
244 s = va_arg(count, char*);
245 n += strlen(s);
246 break;
247 case 'p':
248 (void) va_arg(count, int);
249 /* maximum 64-bit pointer representation:
250 * 0xffffffffffffffff
251 * so 19 characters is enough.
252 * XXX I count 18 -- what's the extra for?
253 */
254 n += 19;
255 break;
256 default:
257 /* if we stumble upon an unknown
258 formatting code, copy the rest of
259 the format string to the output
260 string. (we cannot just skip the
261 code, since there's no way to know
262 what's in the argument list) */
263 n += strlen(p);
264 goto expand;
265 }
266 } else
267 n++;
268 }
269 expand:
270 /* step 2: fill the buffer */
271 /* Since we've analyzed how much space we need for the worst case,
272 use sprintf directly instead of the slower PyOS_snprintf. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000273 string = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +0000274 if (!string)
275 return NULL;
276
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000277 s = PyString_AsString(string);
Christian Heimes44720832008-05-26 13:01:01 +0000278
279 for (f = format; *f; f++) {
280 if (*f == '%') {
281 const char* p = f++;
282 Py_ssize_t i;
283 int longflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000284#ifdef HAVE_LONG_LONG
285 int longlongflag = 0;
286#endif
Christian Heimes44720832008-05-26 13:01:01 +0000287 int size_tflag = 0;
288 /* parse the width.precision part (we're only
289 interested in the precision value, if any) */
290 n = 0;
291 while (isdigit(Py_CHARMASK(*f)))
292 n = (n*10) + *f++ - '0';
293 if (*f == '.') {
294 f++;
295 n = 0;
296 while (isdigit(Py_CHARMASK(*f)))
297 n = (n*10) + *f++ - '0';
298 }
299 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
300 f++;
Mark Dickinson82864d12009-11-15 16:18:58 +0000301 /* Handle %ld, %lu, %lld and %llu. */
302 if (*f == 'l') {
303 if (f[1] == 'd' || f[1] == 'u') {
304 longflag = 1;
305 ++f;
306 }
307#ifdef HAVE_LONG_LONG
308 else if (f[1] == 'l' &&
309 (f[2] == 'd' || f[2] == 'u')) {
310 longlongflag = 1;
311 f += 2;
312 }
313#endif
Christian Heimes44720832008-05-26 13:01:01 +0000314 }
315 /* handle the size_t flag. */
Mark Dickinson82864d12009-11-15 16:18:58 +0000316 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Christian Heimes44720832008-05-26 13:01:01 +0000317 size_tflag = 1;
318 ++f;
319 }
320
321 switch (*f) {
322 case 'c':
323 *s++ = va_arg(vargs, int);
324 break;
325 case 'd':
326 if (longflag)
327 sprintf(s, "%ld", va_arg(vargs, long));
Mark Dickinson82864d12009-11-15 16:18:58 +0000328#ifdef HAVE_LONG_LONG
329 else if (longlongflag)
330 sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
331 va_arg(vargs, PY_LONG_LONG));
332#endif
Christian Heimes44720832008-05-26 13:01:01 +0000333 else if (size_tflag)
334 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
335 va_arg(vargs, Py_ssize_t));
336 else
337 sprintf(s, "%d", va_arg(vargs, int));
338 s += strlen(s);
339 break;
340 case 'u':
341 if (longflag)
342 sprintf(s, "%lu",
343 va_arg(vargs, unsigned long));
Mark Dickinson82864d12009-11-15 16:18:58 +0000344#ifdef HAVE_LONG_LONG
345 else if (longlongflag)
346 sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
347 va_arg(vargs, PY_LONG_LONG));
348#endif
Christian Heimes44720832008-05-26 13:01:01 +0000349 else if (size_tflag)
350 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
351 va_arg(vargs, size_t));
352 else
353 sprintf(s, "%u",
354 va_arg(vargs, unsigned int));
355 s += strlen(s);
356 break;
357 case 'i':
358 sprintf(s, "%i", va_arg(vargs, int));
359 s += strlen(s);
360 break;
361 case 'x':
362 sprintf(s, "%x", va_arg(vargs, int));
363 s += strlen(s);
364 break;
365 case 's':
366 p = va_arg(vargs, char*);
367 i = strlen(p);
368 if (n > 0 && i > n)
369 i = n;
370 Py_MEMCPY(s, p, i);
371 s += i;
372 break;
373 case 'p':
374 sprintf(s, "%p", va_arg(vargs, void*));
375 /* %p is ill-defined: ensure leading 0x. */
376 if (s[1] == 'X')
377 s[1] = 'x';
378 else if (s[1] != 'x') {
379 memmove(s+2, s, strlen(s)+1);
380 s[0] = '0';
381 s[1] = 'x';
382 }
383 s += strlen(s);
384 break;
385 case '%':
386 *s++ = '%';
387 break;
388 default:
389 strcpy(s, p);
390 s += strlen(s);
391 goto end;
392 }
393 } else
394 *s++ = *f;
395 }
396
397 end:
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000398 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Christian Heimes44720832008-05-26 13:01:01 +0000399 return string;
400}
401
402PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000403PyString_FromFormat(const char *format, ...)
Christian Heimes44720832008-05-26 13:01:01 +0000404{
405 PyObject* ret;
406 va_list vargs;
407
408#ifdef HAVE_STDARG_PROTOTYPES
409 va_start(vargs, format);
410#else
411 va_start(vargs);
412#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000413 ret = PyString_FromFormatV(format, vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000414 va_end(vargs);
415 return ret;
416}
417
418
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000419PyObject *PyString_Decode(const char *s,
Christian Heimes44720832008-05-26 13:01:01 +0000420 Py_ssize_t size,
421 const char *encoding,
422 const char *errors)
423{
424 PyObject *v, *str;
425
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000426 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000427 if (str == NULL)
428 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000429 v = PyString_AsDecodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000430 Py_DECREF(str);
431 return v;
432}
433
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000434PyObject *PyString_AsDecodedObject(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000435 const char *encoding,
436 const char *errors)
437{
438 PyObject *v;
439
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000440 if (!PyString_Check(str)) {
Christian Heimes44720832008-05-26 13:01:01 +0000441 PyErr_BadArgument();
442 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000443 }
444
Christian Heimes44720832008-05-26 13:01:01 +0000445 if (encoding == NULL) {
446#ifdef Py_USING_UNICODE
447 encoding = PyUnicode_GetDefaultEncoding();
448#else
449 PyErr_SetString(PyExc_ValueError, "no encoding specified");
450 goto onError;
451#endif
Christian Heimes1a6387e2008-03-26 12:49:49 +0000452 }
Christian Heimes44720832008-05-26 13:01:01 +0000453
454 /* Decode via the codec registry */
455 v = PyCodec_Decode(str, encoding, errors);
456 if (v == NULL)
457 goto onError;
458
459 return v;
460
461 onError:
462 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000463}
464
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000465PyObject *PyString_AsDecodedString(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000466 const char *encoding,
467 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000468{
Christian Heimes44720832008-05-26 13:01:01 +0000469 PyObject *v;
470
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000471 v = PyString_AsDecodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000472 if (v == NULL)
473 goto onError;
474
475#ifdef Py_USING_UNICODE
476 /* Convert Unicode to a string using the default encoding */
477 if (PyUnicode_Check(v)) {
478 PyObject *temp = v;
479 v = PyUnicode_AsEncodedString(v, NULL, NULL);
480 Py_DECREF(temp);
481 if (v == NULL)
482 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000483 }
Christian Heimes44720832008-05-26 13:01:01 +0000484#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000485 if (!PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +0000486 PyErr_Format(PyExc_TypeError,
487 "decoder did not return a string object (type=%.400s)",
488 Py_TYPE(v)->tp_name);
489 Py_DECREF(v);
490 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000491 }
Christian Heimes44720832008-05-26 13:01:01 +0000492
493 return v;
494
495 onError:
496 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000497}
498
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000499PyObject *PyString_Encode(const char *s,
Christian Heimes44720832008-05-26 13:01:01 +0000500 Py_ssize_t size,
501 const char *encoding,
502 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000503{
Christian Heimes44720832008-05-26 13:01:01 +0000504 PyObject *v, *str;
505
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000506 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000507 if (str == NULL)
508 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000509 v = PyString_AsEncodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000510 Py_DECREF(str);
511 return v;
512}
513
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000514PyObject *PyString_AsEncodedObject(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000515 const char *encoding,
516 const char *errors)
517{
518 PyObject *v;
519
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000520 if (!PyString_Check(str)) {
Christian Heimes44720832008-05-26 13:01:01 +0000521 PyErr_BadArgument();
522 goto onError;
523 }
524
525 if (encoding == NULL) {
526#ifdef Py_USING_UNICODE
527 encoding = PyUnicode_GetDefaultEncoding();
528#else
529 PyErr_SetString(PyExc_ValueError, "no encoding specified");
530 goto onError;
531#endif
532 }
533
534 /* Encode via the codec registry */
535 v = PyCodec_Encode(str, encoding, errors);
536 if (v == NULL)
537 goto onError;
538
539 return v;
540
541 onError:
542 return NULL;
543}
544
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000545PyObject *PyString_AsEncodedString(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000546 const char *encoding,
547 const char *errors)
548{
549 PyObject *v;
550
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000551 v = PyString_AsEncodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000552 if (v == NULL)
553 goto onError;
554
555#ifdef Py_USING_UNICODE
556 /* Convert Unicode to a string using the default encoding */
557 if (PyUnicode_Check(v)) {
558 PyObject *temp = v;
559 v = PyUnicode_AsEncodedString(v, NULL, NULL);
560 Py_DECREF(temp);
561 if (v == NULL)
562 goto onError;
563 }
564#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000565 if (!PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +0000566 PyErr_Format(PyExc_TypeError,
567 "encoder did not return a string object (type=%.400s)",
568 Py_TYPE(v)->tp_name);
569 Py_DECREF(v);
570 goto onError;
571 }
572
573 return v;
574
575 onError:
576 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000577}
578
579static void
Christian Heimes44720832008-05-26 13:01:01 +0000580string_dealloc(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000581{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000582 switch (PyString_CHECK_INTERNED(op)) {
Christian Heimes44720832008-05-26 13:01:01 +0000583 case SSTATE_NOT_INTERNED:
584 break;
585
586 case SSTATE_INTERNED_MORTAL:
587 /* revive dead object temporarily for DelItem */
588 Py_REFCNT(op) = 3;
589 if (PyDict_DelItem(interned, op) != 0)
590 Py_FatalError(
591 "deletion of interned string failed");
592 break;
593
594 case SSTATE_INTERNED_IMMORTAL:
595 Py_FatalError("Immortal interned string died.");
596
597 default:
598 Py_FatalError("Inconsistent interned string state.");
599 }
600 Py_TYPE(op)->tp_free(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000601}
602
Christian Heimes44720832008-05-26 13:01:01 +0000603/* Unescape a backslash-escaped string. If unicode is non-zero,
604 the string is a u-literal. If recode_encoding is non-zero,
605 the string is UTF-8 encoded and should be re-encoded in the
606 specified encoding. */
607
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000608PyObject *PyString_DecodeEscape(const char *s,
Christian Heimes44720832008-05-26 13:01:01 +0000609 Py_ssize_t len,
610 const char *errors,
611 Py_ssize_t unicode,
612 const char *recode_encoding)
613{
614 int c;
615 char *p, *buf;
616 const char *end;
617 PyObject *v;
618 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000619 v = PyString_FromStringAndSize((char *)NULL, newlen);
Christian Heimes44720832008-05-26 13:01:01 +0000620 if (v == NULL)
621 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000622 p = buf = PyString_AsString(v);
Christian Heimes44720832008-05-26 13:01:01 +0000623 end = s + len;
624 while (s < end) {
625 if (*s != '\\') {
626 non_esc:
627#ifdef Py_USING_UNICODE
628 if (recode_encoding && (*s & 0x80)) {
629 PyObject *u, *w;
630 char *r;
631 const char* t;
632 Py_ssize_t rn;
633 t = s;
634 /* Decode non-ASCII bytes as UTF-8. */
635 while (t < end && (*t & 0x80)) t++;
636 u = PyUnicode_DecodeUTF8(s, t - s, errors);
637 if(!u) goto failed;
638
639 /* Recode them in target encoding. */
640 w = PyUnicode_AsEncodedString(
641 u, recode_encoding, errors);
642 Py_DECREF(u);
643 if (!w) goto failed;
644
645 /* Append bytes to output buffer. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000646 assert(PyString_Check(w));
647 r = PyString_AS_STRING(w);
648 rn = PyString_GET_SIZE(w);
Christian Heimes44720832008-05-26 13:01:01 +0000649 Py_MEMCPY(p, r, rn);
650 p += rn;
651 Py_DECREF(w);
652 s = t;
653 } else {
654 *p++ = *s++;
655 }
656#else
657 *p++ = *s++;
658#endif
659 continue;
660 }
661 s++;
662 if (s==end) {
663 PyErr_SetString(PyExc_ValueError,
664 "Trailing \\ in string");
665 goto failed;
666 }
667 switch (*s++) {
668 /* XXX This assumes ASCII! */
669 case '\n': break;
670 case '\\': *p++ = '\\'; break;
671 case '\'': *p++ = '\''; break;
672 case '\"': *p++ = '\"'; break;
673 case 'b': *p++ = '\b'; break;
674 case 'f': *p++ = '\014'; break; /* FF */
675 case 't': *p++ = '\t'; break;
676 case 'n': *p++ = '\n'; break;
677 case 'r': *p++ = '\r'; break;
678 case 'v': *p++ = '\013'; break; /* VT */
679 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
680 case '0': case '1': case '2': case '3':
681 case '4': case '5': case '6': case '7':
682 c = s[-1] - '0';
683 if (s < end && '0' <= *s && *s <= '7') {
684 c = (c<<3) + *s++ - '0';
685 if (s < end && '0' <= *s && *s <= '7')
686 c = (c<<3) + *s++ - '0';
687 }
688 *p++ = c;
689 break;
690 case 'x':
691 if (s+1 < end &&
692 isxdigit(Py_CHARMASK(s[0])) &&
693 isxdigit(Py_CHARMASK(s[1])))
694 {
695 unsigned int x = 0;
696 c = Py_CHARMASK(*s);
697 s++;
698 if (isdigit(c))
699 x = c - '0';
700 else if (islower(c))
701 x = 10 + c - 'a';
702 else
703 x = 10 + c - 'A';
704 x = x << 4;
705 c = Py_CHARMASK(*s);
706 s++;
707 if (isdigit(c))
708 x += c - '0';
709 else if (islower(c))
710 x += 10 + c - 'a';
711 else
712 x += 10 + c - 'A';
713 *p++ = x;
714 break;
715 }
716 if (!errors || strcmp(errors, "strict") == 0) {
717 PyErr_SetString(PyExc_ValueError,
718 "invalid \\x escape");
719 goto failed;
720 }
721 if (strcmp(errors, "replace") == 0) {
722 *p++ = '?';
723 } else if (strcmp(errors, "ignore") == 0)
724 /* do nothing */;
725 else {
726 PyErr_Format(PyExc_ValueError,
727 "decoding error; "
728 "unknown error handling code: %.400s",
729 errors);
730 goto failed;
731 }
732#ifndef Py_USING_UNICODE
733 case 'u':
734 case 'U':
735 case 'N':
736 if (unicode) {
737 PyErr_SetString(PyExc_ValueError,
738 "Unicode escapes not legal "
739 "when Unicode disabled");
740 goto failed;
741 }
742#endif
743 default:
744 *p++ = '\\';
745 s--;
746 goto non_esc; /* an arbitry number of unescaped
747 UTF-8 bytes may follow. */
748 }
749 }
750 if (p-buf < newlen)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000751 _PyString_Resize(&v, p - buf);
Christian Heimes44720832008-05-26 13:01:01 +0000752 return v;
753 failed:
754 Py_DECREF(v);
755 return NULL;
756}
757
758/* -------------------------------------------------------------------- */
759/* object api */
760
Christian Heimes1a6387e2008-03-26 12:49:49 +0000761static Py_ssize_t
Christian Heimes44720832008-05-26 13:01:01 +0000762string_getsize(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000763{
Christian Heimes44720832008-05-26 13:01:01 +0000764 char *s;
765 Py_ssize_t len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000766 if (PyString_AsStringAndSize(op, &s, &len))
Christian Heimes44720832008-05-26 13:01:01 +0000767 return -1;
768 return len;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000769}
770
Christian Heimes44720832008-05-26 13:01:01 +0000771static /*const*/ char *
772string_getbuffer(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000773{
Christian Heimes44720832008-05-26 13:01:01 +0000774 char *s;
775 Py_ssize_t len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000776 if (PyString_AsStringAndSize(op, &s, &len))
Christian Heimes44720832008-05-26 13:01:01 +0000777 return NULL;
778 return s;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000779}
780
781Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000782PyString_Size(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000783{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000784 if (!PyString_Check(op))
Christian Heimes44720832008-05-26 13:01:01 +0000785 return string_getsize(op);
786 return Py_SIZE(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000787}
788
Christian Heimes44720832008-05-26 13:01:01 +0000789/*const*/ char *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000790PyString_AsString(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000791{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000792 if (!PyString_Check(op))
Christian Heimes44720832008-05-26 13:01:01 +0000793 return string_getbuffer(op);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000794 return ((PyStringObject *)op) -> ob_sval;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000795}
796
797int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000798PyString_AsStringAndSize(register PyObject *obj,
Christian Heimes44720832008-05-26 13:01:01 +0000799 register char **s,
800 register Py_ssize_t *len)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000801{
Christian Heimes44720832008-05-26 13:01:01 +0000802 if (s == NULL) {
803 PyErr_BadInternalCall();
804 return -1;
805 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000806
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000807 if (!PyString_Check(obj)) {
Christian Heimes44720832008-05-26 13:01:01 +0000808#ifdef Py_USING_UNICODE
809 if (PyUnicode_Check(obj)) {
810 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
811 if (obj == NULL)
812 return -1;
813 }
814 else
Christian Heimes1a6387e2008-03-26 12:49:49 +0000815#endif
Christian Heimes44720832008-05-26 13:01:01 +0000816 {
817 PyErr_Format(PyExc_TypeError,
818 "expected string or Unicode object, "
819 "%.200s found", Py_TYPE(obj)->tp_name);
820 return -1;
821 }
822 }
823
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000824 *s = PyString_AS_STRING(obj);
Christian Heimes44720832008-05-26 13:01:01 +0000825 if (len != NULL)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000826 *len = PyString_GET_SIZE(obj);
827 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Christian Heimes44720832008-05-26 13:01:01 +0000828 PyErr_SetString(PyExc_TypeError,
829 "expected string without null bytes");
830 return -1;
831 }
832 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000833}
834
Christian Heimes1a6387e2008-03-26 12:49:49 +0000835/* -------------------------------------------------------------------- */
836/* Methods */
837
Christian Heimes44720832008-05-26 13:01:01 +0000838#include "stringlib/stringdefs.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000839#include "stringlib/fastsearch.h"
Christian Heimes44720832008-05-26 13:01:01 +0000840
Christian Heimes1a6387e2008-03-26 12:49:49 +0000841#include "stringlib/count.h"
842#include "stringlib/find.h"
843#include "stringlib/partition.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000844
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000845#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
Christian Heimes44720832008-05-26 13:01:01 +0000846#include "stringlib/localeutil.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000847
Christian Heimes1a6387e2008-03-26 12:49:49 +0000848
849
850static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000851string_print(PyStringObject *op, FILE *fp, int flags)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000852{
Christian Heimes44720832008-05-26 13:01:01 +0000853 Py_ssize_t i, str_len;
854 char c;
855 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000856
Christian Heimes44720832008-05-26 13:01:01 +0000857 /* XXX Ought to check for interrupts when writing long strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000858 if (! PyString_CheckExact(op)) {
Christian Heimes44720832008-05-26 13:01:01 +0000859 int ret;
860 /* A str subclass may have its own __str__ method. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000861 op = (PyStringObject *) PyObject_Str((PyObject *)op);
Christian Heimes44720832008-05-26 13:01:01 +0000862 if (op == NULL)
863 return -1;
864 ret = string_print(op, fp, flags);
865 Py_DECREF(op);
866 return ret;
867 }
868 if (flags & Py_PRINT_RAW) {
869 char *data = op->ob_sval;
870 Py_ssize_t size = Py_SIZE(op);
871 Py_BEGIN_ALLOW_THREADS
872 while (size > INT_MAX) {
873 /* Very long strings cannot be written atomically.
874 * But don't write exactly INT_MAX bytes at a time
875 * to avoid memory aligment issues.
876 */
877 const int chunk_size = INT_MAX & ~0x3FFF;
878 fwrite(data, 1, chunk_size, fp);
879 data += chunk_size;
880 size -= chunk_size;
881 }
882#ifdef __VMS
883 if (size) fwrite(data, (int)size, 1, fp);
884#else
885 fwrite(data, 1, (int)size, fp);
886#endif
887 Py_END_ALLOW_THREADS
888 return 0;
889 }
890
891 /* figure out which quote to use; single is preferred */
892 quote = '\'';
893 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
894 !memchr(op->ob_sval, '"', Py_SIZE(op)))
895 quote = '"';
896
897 str_len = Py_SIZE(op);
898 Py_BEGIN_ALLOW_THREADS
899 fputc(quote, fp);
900 for (i = 0; i < str_len; i++) {
901 /* Since strings are immutable and the caller should have a
902 reference, accessing the interal buffer should not be an issue
903 with the GIL released. */
904 c = op->ob_sval[i];
905 if (c == quote || c == '\\')
906 fprintf(fp, "\\%c", c);
907 else if (c == '\t')
908 fprintf(fp, "\\t");
909 else if (c == '\n')
910 fprintf(fp, "\\n");
911 else if (c == '\r')
912 fprintf(fp, "\\r");
913 else if (c < ' ' || c >= 0x7f)
914 fprintf(fp, "\\x%02x", c & 0xff);
915 else
916 fputc(c, fp);
917 }
918 fputc(quote, fp);
919 Py_END_ALLOW_THREADS
920 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000921}
922
Christian Heimes44720832008-05-26 13:01:01 +0000923PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000924PyString_Repr(PyObject *obj, int smartquotes)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000925{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000926 register PyStringObject* op = (PyStringObject*) obj;
Christian Heimes44720832008-05-26 13:01:01 +0000927 size_t newsize = 2 + 4 * Py_SIZE(op);
928 PyObject *v;
929 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
930 PyErr_SetString(PyExc_OverflowError,
931 "string is too large to make repr");
Christian Heimes1a6387e2008-03-26 12:49:49 +0000932 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000933 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000934 v = PyString_FromStringAndSize((char *)NULL, newsize);
Christian Heimes44720832008-05-26 13:01:01 +0000935 if (v == NULL) {
936 return NULL;
937 }
938 else {
939 register Py_ssize_t i;
940 register char c;
941 register char *p;
942 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000943
Christian Heimes44720832008-05-26 13:01:01 +0000944 /* figure out which quote to use; single is preferred */
945 quote = '\'';
946 if (smartquotes &&
947 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
948 !memchr(op->ob_sval, '"', Py_SIZE(op)))
949 quote = '"';
950
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000951 p = PyString_AS_STRING(v);
Christian Heimes44720832008-05-26 13:01:01 +0000952 *p++ = quote;
953 for (i = 0; i < Py_SIZE(op); i++) {
954 /* There's at least enough room for a hex escape
955 and a closing quote. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000956 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Christian Heimes44720832008-05-26 13:01:01 +0000957 c = op->ob_sval[i];
958 if (c == quote || c == '\\')
959 *p++ = '\\', *p++ = c;
960 else if (c == '\t')
961 *p++ = '\\', *p++ = 't';
962 else if (c == '\n')
963 *p++ = '\\', *p++ = 'n';
964 else if (c == '\r')
965 *p++ = '\\', *p++ = 'r';
966 else if (c < ' ' || c >= 0x7f) {
967 /* For performance, we don't want to call
968 PyOS_snprintf here (extra layers of
969 function call). */
970 sprintf(p, "\\x%02x", c & 0xff);
971 p += 4;
972 }
973 else
974 *p++ = c;
975 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000976 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Christian Heimes44720832008-05-26 13:01:01 +0000977 *p++ = quote;
978 *p = '\0';
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000979 _PyString_Resize(
980 &v, (p - PyString_AS_STRING(v)));
Christian Heimes44720832008-05-26 13:01:01 +0000981 return v;
982 }
983}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000984
985static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000986string_repr(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000987{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000988 return PyString_Repr(op, 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000989}
990
Christian Heimes1a6387e2008-03-26 12:49:49 +0000991static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000992string_str(PyObject *s)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000993{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000994 assert(PyString_Check(s));
995 if (PyString_CheckExact(s)) {
Christian Heimes44720832008-05-26 13:01:01 +0000996 Py_INCREF(s);
997 return s;
998 }
999 else {
1000 /* Subtype -- return genuine string with the same value. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001001 PyStringObject *t = (PyStringObject *) s;
1002 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
Christian Heimes44720832008-05-26 13:01:01 +00001003 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001004}
1005
Christian Heimes44720832008-05-26 13:01:01 +00001006static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001007string_length(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001008{
1009 return Py_SIZE(a);
1010}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001011
Christian Heimes44720832008-05-26 13:01:01 +00001012static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001013string_concat(register PyStringObject *a, register PyObject *bb)
Christian Heimes44720832008-05-26 13:01:01 +00001014{
1015 register Py_ssize_t size;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001016 register PyStringObject *op;
1017 if (!PyString_Check(bb)) {
Christian Heimes44720832008-05-26 13:01:01 +00001018#ifdef Py_USING_UNICODE
1019 if (PyUnicode_Check(bb))
1020 return PyUnicode_Concat((PyObject *)a, bb);
1021#endif
1022 if (PyByteArray_Check(bb))
1023 return PyByteArray_Concat((PyObject *)a, bb);
1024 PyErr_Format(PyExc_TypeError,
1025 "cannot concatenate 'str' and '%.200s' objects",
1026 Py_TYPE(bb)->tp_name);
1027 return NULL;
1028 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001029#define b ((PyStringObject *)bb)
Christian Heimes44720832008-05-26 13:01:01 +00001030 /* Optimize cases with empty left or right operand */
1031 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001032 PyString_CheckExact(a) && PyString_CheckExact(b)) {
Christian Heimes44720832008-05-26 13:01:01 +00001033 if (Py_SIZE(a) == 0) {
1034 Py_INCREF(bb);
1035 return bb;
1036 }
1037 Py_INCREF(a);
1038 return (PyObject *)a;
1039 }
1040 size = Py_SIZE(a) + Py_SIZE(b);
Neal Norwitze7d8be82008-07-31 17:17:14 +00001041 /* Check that string sizes are not negative, to prevent an
1042 overflow in cases where we are passed incorrectly-created
1043 strings with negative lengths (due to a bug in other code).
1044 */
1045 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
1046 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
Christian Heimes44720832008-05-26 13:01:01 +00001047 PyErr_SetString(PyExc_OverflowError,
1048 "strings are too large to concat");
1049 return NULL;
1050 }
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001051
Christian Heimes44720832008-05-26 13:01:01 +00001052 /* Inline PyObject_NewVar */
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001053 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
Neal Norwitze7d8be82008-07-31 17:17:14 +00001054 PyErr_SetString(PyExc_OverflowError,
1055 "strings are too large to concat");
1056 return NULL;
1057 }
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001058 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
Christian Heimes44720832008-05-26 13:01:01 +00001059 if (op == NULL)
1060 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001061 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +00001062 op->ob_shash = -1;
1063 op->ob_sstate = SSTATE_NOT_INTERNED;
1064 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1065 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1066 op->ob_sval[size] = '\0';
1067 return (PyObject *) op;
1068#undef b
1069}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001070
Christian Heimes44720832008-05-26 13:01:01 +00001071static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001072string_repeat(register PyStringObject *a, register Py_ssize_t n)
Christian Heimes44720832008-05-26 13:01:01 +00001073{
1074 register Py_ssize_t i;
1075 register Py_ssize_t j;
1076 register Py_ssize_t size;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001077 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +00001078 size_t nbytes;
1079 if (n < 0)
1080 n = 0;
1081 /* watch out for overflows: the size can overflow int,
1082 * and the # of bytes needed can overflow size_t
1083 */
1084 size = Py_SIZE(a) * n;
1085 if (n && size / n != Py_SIZE(a)) {
1086 PyErr_SetString(PyExc_OverflowError,
1087 "repeated string is too long");
1088 return NULL;
1089 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001090 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
Christian Heimes44720832008-05-26 13:01:01 +00001091 Py_INCREF(a);
1092 return (PyObject *)a;
1093 }
1094 nbytes = (size_t)size;
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001095 if (nbytes + PyStringObject_SIZE <= nbytes) {
Christian Heimes44720832008-05-26 13:01:01 +00001096 PyErr_SetString(PyExc_OverflowError,
1097 "repeated string is too long");
1098 return NULL;
1099 }
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001100 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
Christian Heimes44720832008-05-26 13:01:01 +00001101 if (op == NULL)
1102 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001103 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +00001104 op->ob_shash = -1;
1105 op->ob_sstate = SSTATE_NOT_INTERNED;
1106 op->ob_sval[size] = '\0';
1107 if (Py_SIZE(a) == 1 && n > 0) {
1108 memset(op->ob_sval, a->ob_sval[0] , n);
1109 return (PyObject *) op;
1110 }
1111 i = 0;
1112 if (i < size) {
1113 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1114 i = Py_SIZE(a);
1115 }
1116 while (i < size) {
1117 j = (i <= size-i) ? i : size-i;
1118 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1119 i += j;
1120 }
1121 return (PyObject *) op;
1122}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001123
Christian Heimes44720832008-05-26 13:01:01 +00001124/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1125
1126static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001127string_slice(register PyStringObject *a, register Py_ssize_t i,
Christian Heimes44720832008-05-26 13:01:01 +00001128 register Py_ssize_t j)
1129 /* j -- may be negative! */
1130{
1131 if (i < 0)
1132 i = 0;
1133 if (j < 0)
1134 j = 0; /* Avoid signed/unsigned bug in next line */
1135 if (j > Py_SIZE(a))
1136 j = Py_SIZE(a);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001137 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
Christian Heimes44720832008-05-26 13:01:01 +00001138 /* It's the same as a */
1139 Py_INCREF(a);
1140 return (PyObject *)a;
1141 }
1142 if (j < i)
1143 j = i;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001144 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001145}
1146
1147static int
1148string_contains(PyObject *str_obj, PyObject *sub_obj)
1149{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001150 if (!PyString_CheckExact(sub_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00001151#ifdef Py_USING_UNICODE
1152 if (PyUnicode_Check(sub_obj))
1153 return PyUnicode_Contains(str_obj, sub_obj);
1154#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001155 if (!PyString_Check(sub_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00001156 PyErr_Format(PyExc_TypeError,
1157 "'in <string>' requires string as left operand, "
1158 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1159 return -1;
1160 }
1161 }
1162
1163 return stringlib_contains_obj(str_obj, sub_obj);
1164}
1165
1166static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001167string_item(PyStringObject *a, register Py_ssize_t i)
Christian Heimes44720832008-05-26 13:01:01 +00001168{
1169 char pchar;
1170 PyObject *v;
1171 if (i < 0 || i >= Py_SIZE(a)) {
1172 PyErr_SetString(PyExc_IndexError, "string index out of range");
1173 return NULL;
1174 }
1175 pchar = a->ob_sval[i];
1176 v = (PyObject *)characters[pchar & UCHAR_MAX];
1177 if (v == NULL)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001178 v = PyString_FromStringAndSize(&pchar, 1);
Christian Heimes44720832008-05-26 13:01:01 +00001179 else {
1180#ifdef COUNT_ALLOCS
1181 one_strings++;
1182#endif
1183 Py_INCREF(v);
1184 }
1185 return v;
1186}
1187
1188static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001189string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Christian Heimes44720832008-05-26 13:01:01 +00001190{
1191 int c;
1192 Py_ssize_t len_a, len_b;
1193 Py_ssize_t min_len;
1194 PyObject *result;
1195
1196 /* Make sure both arguments are strings. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001197 if (!(PyString_Check(a) && PyString_Check(b))) {
Christian Heimes44720832008-05-26 13:01:01 +00001198 result = Py_NotImplemented;
1199 goto out;
1200 }
1201 if (a == b) {
1202 switch (op) {
1203 case Py_EQ:case Py_LE:case Py_GE:
1204 result = Py_True;
1205 goto out;
1206 case Py_NE:case Py_LT:case Py_GT:
1207 result = Py_False;
1208 goto out;
1209 }
1210 }
1211 if (op == Py_EQ) {
1212 /* Supporting Py_NE here as well does not save
1213 much time, since Py_NE is rarely used. */
1214 if (Py_SIZE(a) == Py_SIZE(b)
1215 && (a->ob_sval[0] == b->ob_sval[0]
1216 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1217 result = Py_True;
1218 } else {
1219 result = Py_False;
1220 }
1221 goto out;
1222 }
1223 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1224 min_len = (len_a < len_b) ? len_a : len_b;
1225 if (min_len > 0) {
1226 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1227 if (c==0)
1228 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1229 } else
1230 c = 0;
1231 if (c == 0)
1232 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1233 switch (op) {
1234 case Py_LT: c = c < 0; break;
1235 case Py_LE: c = c <= 0; break;
1236 case Py_EQ: assert(0); break; /* unreachable */
1237 case Py_NE: c = c != 0; break;
1238 case Py_GT: c = c > 0; break;
1239 case Py_GE: c = c >= 0; break;
1240 default:
1241 result = Py_NotImplemented;
1242 goto out;
1243 }
1244 result = c ? Py_True : Py_False;
1245 out:
1246 Py_INCREF(result);
1247 return result;
1248}
1249
1250int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001251_PyString_Eq(PyObject *o1, PyObject *o2)
Christian Heimes44720832008-05-26 13:01:01 +00001252{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001253 PyStringObject *a = (PyStringObject*) o1;
1254 PyStringObject *b = (PyStringObject*) o2;
Christian Heimes44720832008-05-26 13:01:01 +00001255 return Py_SIZE(a) == Py_SIZE(b)
1256 && *a->ob_sval == *b->ob_sval
1257 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1258}
1259
1260static long
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001261string_hash(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001262{
1263 register Py_ssize_t len;
1264 register unsigned char *p;
1265 register long x;
1266
1267 if (a->ob_shash != -1)
1268 return a->ob_shash;
1269 len = Py_SIZE(a);
1270 p = (unsigned char *) a->ob_sval;
1271 x = *p << 7;
1272 while (--len >= 0)
1273 x = (1000003*x) ^ *p++;
1274 x ^= Py_SIZE(a);
1275 if (x == -1)
1276 x = -2;
1277 a->ob_shash = x;
1278 return x;
1279}
1280
1281static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001282string_subscript(PyStringObject* self, PyObject* item)
Christian Heimes44720832008-05-26 13:01:01 +00001283{
1284 if (PyIndex_Check(item)) {
1285 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1286 if (i == -1 && PyErr_Occurred())
1287 return NULL;
1288 if (i < 0)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001289 i += PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001290 return string_item(self, i);
1291 }
1292 else if (PySlice_Check(item)) {
1293 Py_ssize_t start, stop, step, slicelength, cur, i;
1294 char* source_buf;
1295 char* result_buf;
1296 PyObject* result;
1297
1298 if (PySlice_GetIndicesEx((PySliceObject*)item,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001299 PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001300 &start, &stop, &step, &slicelength) < 0) {
1301 return NULL;
1302 }
1303
1304 if (slicelength <= 0) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001305 return PyString_FromStringAndSize("", 0);
Christian Heimes44720832008-05-26 13:01:01 +00001306 }
1307 else if (start == 0 && step == 1 &&
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001308 slicelength == PyString_GET_SIZE(self) &&
1309 PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001310 Py_INCREF(self);
1311 return (PyObject *)self;
1312 }
1313 else if (step == 1) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001314 return PyString_FromStringAndSize(
1315 PyString_AS_STRING(self) + start,
Christian Heimes44720832008-05-26 13:01:01 +00001316 slicelength);
1317 }
1318 else {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001319 source_buf = PyString_AsString((PyObject*)self);
Christian Heimes44720832008-05-26 13:01:01 +00001320 result_buf = (char *)PyMem_Malloc(slicelength);
1321 if (result_buf == NULL)
1322 return PyErr_NoMemory();
1323
1324 for (cur = start, i = 0; i < slicelength;
1325 cur += step, i++) {
1326 result_buf[i] = source_buf[cur];
1327 }
1328
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001329 result = PyString_FromStringAndSize(result_buf,
Christian Heimes44720832008-05-26 13:01:01 +00001330 slicelength);
1331 PyMem_Free(result_buf);
1332 return result;
1333 }
1334 }
1335 else {
1336 PyErr_Format(PyExc_TypeError,
1337 "string indices must be integers, not %.200s",
1338 Py_TYPE(item)->tp_name);
1339 return NULL;
1340 }
1341}
1342
1343static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001344string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001345{
1346 if ( index != 0 ) {
1347 PyErr_SetString(PyExc_SystemError,
1348 "accessing non-existent string segment");
1349 return -1;
1350 }
1351 *ptr = (void *)self->ob_sval;
1352 return Py_SIZE(self);
1353}
1354
1355static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001356string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001357{
1358 PyErr_SetString(PyExc_TypeError,
1359 "Cannot use string as modifiable buffer");
1360 return -1;
1361}
1362
1363static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001364string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Christian Heimes44720832008-05-26 13:01:01 +00001365{
1366 if ( lenp )
1367 *lenp = Py_SIZE(self);
1368 return 1;
1369}
1370
1371static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001372string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001373{
1374 if ( index != 0 ) {
1375 PyErr_SetString(PyExc_SystemError,
1376 "accessing non-existent string segment");
1377 return -1;
1378 }
1379 *ptr = self->ob_sval;
1380 return Py_SIZE(self);
1381}
1382
1383static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001384string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Christian Heimes44720832008-05-26 13:01:01 +00001385{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001386 return PyBuffer_FillInfo(view, (PyObject*)self,
1387 (void *)self->ob_sval, Py_SIZE(self),
Antoine Pitrou92a62402008-08-02 21:58:05 +00001388 1, flags);
Christian Heimes44720832008-05-26 13:01:01 +00001389}
1390
1391static PySequenceMethods string_as_sequence = {
1392 (lenfunc)string_length, /*sq_length*/
1393 (binaryfunc)string_concat, /*sq_concat*/
1394 (ssizeargfunc)string_repeat, /*sq_repeat*/
1395 (ssizeargfunc)string_item, /*sq_item*/
1396 (ssizessizeargfunc)string_slice, /*sq_slice*/
1397 0, /*sq_ass_item*/
1398 0, /*sq_ass_slice*/
1399 (objobjproc)string_contains /*sq_contains*/
1400};
1401
1402static PyMappingMethods string_as_mapping = {
1403 (lenfunc)string_length,
1404 (binaryfunc)string_subscript,
1405 0,
1406};
1407
1408static PyBufferProcs string_as_buffer = {
1409 (readbufferproc)string_buffer_getreadbuf,
1410 (writebufferproc)string_buffer_getwritebuf,
1411 (segcountproc)string_buffer_getsegcount,
1412 (charbufferproc)string_buffer_getcharbuf,
1413 (getbufferproc)string_buffer_getbuffer,
1414 0, /* XXX */
1415};
1416
1417
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001418
Christian Heimes44720832008-05-26 13:01:01 +00001419#define LEFTSTRIP 0
1420#define RIGHTSTRIP 1
1421#define BOTHSTRIP 2
1422
1423/* Arrays indexed by above */
1424static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1425
1426#define STRIPNAME(i) (stripformat[i]+3)
1427
Christian Heimes1a6387e2008-03-26 12:49:49 +00001428
1429/* Don't call if length < 2 */
Christian Heimes44720832008-05-26 13:01:01 +00001430#define Py_STRING_MATCH(target, offset, pattern, length) \
1431 (target[offset] == pattern[0] && \
1432 target[offset+length-1] == pattern[length-1] && \
Christian Heimes1a6387e2008-03-26 12:49:49 +00001433 !memcmp(target+offset+1, pattern+1, length-2) )
1434
1435
Christian Heimes1a6387e2008-03-26 12:49:49 +00001436/* Overallocate the initial list to reduce the number of reallocs for small
1437 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1438 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1439 text (roughly 11 words per line) and field delimited data (usually 1-10
1440 fields). For large strings the split algorithms are bandwidth limited
1441 so increasing the preallocation likely will not improve things.*/
1442
1443#define MAX_PREALLOC 12
1444
1445/* 5 splits gives 6 elements */
1446#define PREALLOC_SIZE(maxsplit) \
Christian Heimes44720832008-05-26 13:01:01 +00001447 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001448
Christian Heimes44720832008-05-26 13:01:01 +00001449#define SPLIT_APPEND(data, left, right) \
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001450 str = PyString_FromStringAndSize((data) + (left), \
Christian Heimes44720832008-05-26 13:01:01 +00001451 (right) - (left)); \
1452 if (str == NULL) \
1453 goto onError; \
1454 if (PyList_Append(list, str)) { \
1455 Py_DECREF(str); \
1456 goto onError; \
1457 } \
1458 else \
1459 Py_DECREF(str);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001460
Christian Heimes44720832008-05-26 13:01:01 +00001461#define SPLIT_ADD(data, left, right) { \
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001462 str = PyString_FromStringAndSize((data) + (left), \
Christian Heimes44720832008-05-26 13:01:01 +00001463 (right) - (left)); \
1464 if (str == NULL) \
1465 goto onError; \
1466 if (count < MAX_PREALLOC) { \
1467 PyList_SET_ITEM(list, count, str); \
1468 } else { \
1469 if (PyList_Append(list, str)) { \
1470 Py_DECREF(str); \
1471 goto onError; \
1472 } \
1473 else \
1474 Py_DECREF(str); \
1475 } \
1476 count++; }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001477
1478/* Always force the list to the expected size. */
1479#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
1480
Christian Heimes44720832008-05-26 13:01:01 +00001481#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1482#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1483#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1484#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001485
1486Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001487split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001488{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001489 const char *s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001490 Py_ssize_t i, j, count=0;
1491 PyObject *str;
1492 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001493
Christian Heimes44720832008-05-26 13:01:01 +00001494 if (list == NULL)
1495 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001496
Christian Heimes44720832008-05-26 13:01:01 +00001497 i = j = 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001498
Christian Heimes44720832008-05-26 13:01:01 +00001499 while (maxsplit-- > 0) {
1500 SKIP_SPACE(s, i, len);
1501 if (i==len) break;
1502 j = i; i++;
1503 SKIP_NONSPACE(s, i, len);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001504 if (j == 0 && i == len && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001505 /* No whitespace in self, so just use it as list[0] */
1506 Py_INCREF(self);
1507 PyList_SET_ITEM(list, 0, (PyObject *)self);
1508 count++;
1509 break;
1510 }
1511 SPLIT_ADD(s, j, i);
1512 }
1513
1514 if (i < len) {
1515 /* Only occurs when maxsplit was reached */
1516 /* Skip any remaining whitespace and copy to end of string */
1517 SKIP_SPACE(s, i, len);
1518 if (i != len)
1519 SPLIT_ADD(s, i, len);
1520 }
1521 FIX_PREALLOC_SIZE(list);
1522 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001523 onError:
Christian Heimes44720832008-05-26 13:01:01 +00001524 Py_DECREF(list);
1525 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001526}
1527
Christian Heimes1a6387e2008-03-26 12:49:49 +00001528Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001529split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001530{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001531 const char *s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001532 register Py_ssize_t i, j, count=0;
1533 PyObject *str;
1534 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001535
Christian Heimes44720832008-05-26 13:01:01 +00001536 if (list == NULL)
1537 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001538
Christian Heimes44720832008-05-26 13:01:01 +00001539 i = j = 0;
1540 while ((j < len) && (maxcount-- > 0)) {
1541 for(; j<len; j++) {
1542 /* I found that using memchr makes no difference */
1543 if (s[j] == ch) {
1544 SPLIT_ADD(s, i, j);
1545 i = j = j + 1;
1546 break;
1547 }
1548 }
1549 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001550 if (i == 0 && count == 0 && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001551 /* ch not in self, so just use self as list[0] */
1552 Py_INCREF(self);
1553 PyList_SET_ITEM(list, 0, (PyObject *)self);
1554 count++;
1555 }
1556 else if (i <= len) {
1557 SPLIT_ADD(s, i, len);
1558 }
1559 FIX_PREALLOC_SIZE(list);
1560 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001561
1562 onError:
Christian Heimes44720832008-05-26 13:01:01 +00001563 Py_DECREF(list);
1564 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001565}
1566
1567PyDoc_STRVAR(split__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001568"S.split([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001569\n\
Christian Heimes44720832008-05-26 13:01:01 +00001570Return a list of the words in the string S, using sep as the\n\
1571delimiter string. If maxsplit is given, at most maxsplit\n\
1572splits are done. If sep is not specified or is None, any\n\
1573whitespace string is a separator and empty strings are removed\n\
1574from the result.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001575
1576static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001577string_split(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001578{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001579 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001580 Py_ssize_t maxsplit = -1, count=0;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001581 const char *s = PyString_AS_STRING(self), *sub;
Christian Heimes44720832008-05-26 13:01:01 +00001582 PyObject *list, *str, *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001583#ifdef USE_FAST
Christian Heimes44720832008-05-26 13:01:01 +00001584 Py_ssize_t pos;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001585#endif
1586
Christian Heimes44720832008-05-26 13:01:01 +00001587 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1588 return NULL;
1589 if (maxsplit < 0)
1590 maxsplit = PY_SSIZE_T_MAX;
1591 if (subobj == Py_None)
1592 return split_whitespace(self, len, maxsplit);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001593 if (PyString_Check(subobj)) {
1594 sub = PyString_AS_STRING(subobj);
1595 n = PyString_GET_SIZE(subobj);
Christian Heimes44720832008-05-26 13:01:01 +00001596 }
1597#ifdef Py_USING_UNICODE
1598 else if (PyUnicode_Check(subobj))
1599 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1600#endif
1601 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1602 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001603
Christian Heimes44720832008-05-26 13:01:01 +00001604 if (n == 0) {
1605 PyErr_SetString(PyExc_ValueError, "empty separator");
1606 return NULL;
1607 }
1608 else if (n == 1)
1609 return split_char(self, len, sub[0], maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001610
Christian Heimes44720832008-05-26 13:01:01 +00001611 list = PyList_New(PREALLOC_SIZE(maxsplit));
1612 if (list == NULL)
1613 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001614
1615#ifdef USE_FAST
Christian Heimes44720832008-05-26 13:01:01 +00001616 i = j = 0;
1617 while (maxsplit-- > 0) {
1618 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1619 if (pos < 0)
1620 break;
1621 j = i+pos;
1622 SPLIT_ADD(s, i, j);
1623 i = j + n;
1624 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001625#else
Christian Heimes44720832008-05-26 13:01:01 +00001626 i = j = 0;
1627 while ((j+n <= len) && (maxsplit-- > 0)) {
1628 for (; j+n <= len; j++) {
1629 if (Py_STRING_MATCH(s, j, sub, n)) {
1630 SPLIT_ADD(s, i, j);
1631 i = j = j + n;
1632 break;
1633 }
1634 }
1635 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001636#endif
Christian Heimes44720832008-05-26 13:01:01 +00001637 SPLIT_ADD(s, i, len);
1638 FIX_PREALLOC_SIZE(list);
1639 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001640
Christian Heimes44720832008-05-26 13:01:01 +00001641 onError:
1642 Py_DECREF(list);
1643 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001644}
1645
1646PyDoc_STRVAR(partition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001647"S.partition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001648\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001649Search for the separator sep in S, and return the part before it,\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001650the separator itself, and the part after it. If the separator is not\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001651found, return S and two empty strings.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001652
1653static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001654string_partition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001655{
Christian Heimes44720832008-05-26 13:01:01 +00001656 const char *sep;
1657 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001658
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001659 if (PyString_Check(sep_obj)) {
1660 sep = PyString_AS_STRING(sep_obj);
1661 sep_len = PyString_GET_SIZE(sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001662 }
1663#ifdef Py_USING_UNICODE
1664 else if (PyUnicode_Check(sep_obj))
1665 return PyUnicode_Partition((PyObject *) self, sep_obj);
1666#endif
1667 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1668 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001669
Christian Heimes44720832008-05-26 13:01:01 +00001670 return stringlib_partition(
1671 (PyObject*) self,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001672 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001673 sep_obj, sep, sep_len
1674 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001675}
1676
1677PyDoc_STRVAR(rpartition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001678"S.rpartition(sep) -> (tail, sep, head)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001679\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001680Search for the separator sep in S, starting at the end of S, and return\n\
Christian Heimes44720832008-05-26 13:01:01 +00001681the part before it, the separator itself, and the part after it. If the\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001682separator is not found, return two empty strings and S.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001683
1684static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001685string_rpartition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001686{
Christian Heimes44720832008-05-26 13:01:01 +00001687 const char *sep;
1688 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001689
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001690 if (PyString_Check(sep_obj)) {
1691 sep = PyString_AS_STRING(sep_obj);
1692 sep_len = PyString_GET_SIZE(sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001693 }
1694#ifdef Py_USING_UNICODE
1695 else if (PyUnicode_Check(sep_obj))
Amaury Forgeot d'Arc3571fbf2008-09-01 19:52:00 +00001696 return PyUnicode_RPartition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001697#endif
1698 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1699 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001700
Christian Heimes44720832008-05-26 13:01:01 +00001701 return stringlib_rpartition(
1702 (PyObject*) self,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001703 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001704 sep_obj, sep, sep_len
1705 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001706}
1707
1708Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001709rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001710{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001711 const char *s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001712 Py_ssize_t i, j, count=0;
1713 PyObject *str;
1714 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001715
Christian Heimes44720832008-05-26 13:01:01 +00001716 if (list == NULL)
1717 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001718
Christian Heimes44720832008-05-26 13:01:01 +00001719 i = j = len-1;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001720
Christian Heimes44720832008-05-26 13:01:01 +00001721 while (maxsplit-- > 0) {
1722 RSKIP_SPACE(s, i);
1723 if (i<0) break;
1724 j = i; i--;
1725 RSKIP_NONSPACE(s, i);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001726 if (j == len-1 && i < 0 && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001727 /* No whitespace in self, so just use it as list[0] */
1728 Py_INCREF(self);
1729 PyList_SET_ITEM(list, 0, (PyObject *)self);
1730 count++;
1731 break;
1732 }
1733 SPLIT_ADD(s, i + 1, j + 1);
1734 }
1735 if (i >= 0) {
1736 /* Only occurs when maxsplit was reached */
1737 /* Skip any remaining whitespace and copy to beginning of string */
1738 RSKIP_SPACE(s, i);
1739 if (i >= 0)
1740 SPLIT_ADD(s, 0, i + 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001741
Christian Heimes44720832008-05-26 13:01:01 +00001742 }
1743 FIX_PREALLOC_SIZE(list);
1744 if (PyList_Reverse(list) < 0)
1745 goto onError;
1746 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001747 onError:
Christian Heimes44720832008-05-26 13:01:01 +00001748 Py_DECREF(list);
1749 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001750}
1751
1752Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001753rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001754{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001755 const char *s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001756 register Py_ssize_t i, j, count=0;
1757 PyObject *str;
1758 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001759
Christian Heimes44720832008-05-26 13:01:01 +00001760 if (list == NULL)
1761 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001762
Christian Heimes44720832008-05-26 13:01:01 +00001763 i = j = len - 1;
1764 while ((i >= 0) && (maxcount-- > 0)) {
1765 for (; i >= 0; i--) {
1766 if (s[i] == ch) {
1767 SPLIT_ADD(s, i + 1, j + 1);
1768 j = i = i - 1;
1769 break;
1770 }
1771 }
1772 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001773 if (i < 0 && count == 0 && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001774 /* ch not in self, so just use self as list[0] */
1775 Py_INCREF(self);
1776 PyList_SET_ITEM(list, 0, (PyObject *)self);
1777 count++;
1778 }
1779 else if (j >= -1) {
1780 SPLIT_ADD(s, 0, j + 1);
1781 }
1782 FIX_PREALLOC_SIZE(list);
1783 if (PyList_Reverse(list) < 0)
1784 goto onError;
1785 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001786
Christian Heimes44720832008-05-26 13:01:01 +00001787 onError:
1788 Py_DECREF(list);
1789 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001790}
1791
1792PyDoc_STRVAR(rsplit__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001793"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001794\n\
Christian Heimes44720832008-05-26 13:01:01 +00001795Return a list of the words in the string S, using sep as the\n\
1796delimiter string, starting at the end of the string and working\n\
1797to the front. If maxsplit is given, at most maxsplit splits are\n\
1798done. If sep is not specified or is None, any whitespace string\n\
1799is a separator.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001800
1801static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001802string_rsplit(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001803{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001804 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001805 Py_ssize_t maxsplit = -1, count=0;
1806 const char *s, *sub;
1807 PyObject *list, *str, *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001808
Christian Heimes44720832008-05-26 13:01:01 +00001809 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1810 return NULL;
1811 if (maxsplit < 0)
1812 maxsplit = PY_SSIZE_T_MAX;
1813 if (subobj == Py_None)
1814 return rsplit_whitespace(self, len, maxsplit);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001815 if (PyString_Check(subobj)) {
1816 sub = PyString_AS_STRING(subobj);
1817 n = PyString_GET_SIZE(subobj);
Christian Heimes44720832008-05-26 13:01:01 +00001818 }
1819#ifdef Py_USING_UNICODE
1820 else if (PyUnicode_Check(subobj))
1821 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1822#endif
1823 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1824 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001825
Christian Heimes44720832008-05-26 13:01:01 +00001826 if (n == 0) {
1827 PyErr_SetString(PyExc_ValueError, "empty separator");
1828 return NULL;
1829 }
1830 else if (n == 1)
1831 return rsplit_char(self, len, sub[0], maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001832
Christian Heimes44720832008-05-26 13:01:01 +00001833 list = PyList_New(PREALLOC_SIZE(maxsplit));
1834 if (list == NULL)
1835 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001836
Christian Heimes44720832008-05-26 13:01:01 +00001837 j = len;
1838 i = j - n;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001839
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001840 s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001841 while ( (i >= 0) && (maxsplit-- > 0) ) {
1842 for (; i>=0; i--) {
1843 if (Py_STRING_MATCH(s, i, sub, n)) {
1844 SPLIT_ADD(s, i + n, j);
1845 j = i;
1846 i -= n;
1847 break;
1848 }
1849 }
1850 }
1851 SPLIT_ADD(s, 0, j);
1852 FIX_PREALLOC_SIZE(list);
1853 if (PyList_Reverse(list) < 0)
1854 goto onError;
1855 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001856
1857onError:
Christian Heimes44720832008-05-26 13:01:01 +00001858 Py_DECREF(list);
1859 return NULL;
1860}
1861
1862
1863PyDoc_STRVAR(join__doc__,
Georg Brandl9b4e5822009-10-14 18:48:32 +00001864"S.join(iterable) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00001865\n\
1866Return a string which is the concatenation of the strings in the\n\
Georg Brandl9b4e5822009-10-14 18:48:32 +00001867iterable. The separator between elements is S.");
Christian Heimes44720832008-05-26 13:01:01 +00001868
1869static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001870string_join(PyStringObject *self, PyObject *orig)
Christian Heimes44720832008-05-26 13:01:01 +00001871{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001872 char *sep = PyString_AS_STRING(self);
1873 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001874 PyObject *res = NULL;
1875 char *p;
1876 Py_ssize_t seqlen = 0;
1877 size_t sz = 0;
1878 Py_ssize_t i;
1879 PyObject *seq, *item;
1880
1881 seq = PySequence_Fast(orig, "");
1882 if (seq == NULL) {
1883 return NULL;
1884 }
1885
1886 seqlen = PySequence_Size(seq);
1887 if (seqlen == 0) {
1888 Py_DECREF(seq);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001889 return PyString_FromString("");
Christian Heimes44720832008-05-26 13:01:01 +00001890 }
1891 if (seqlen == 1) {
1892 item = PySequence_Fast_GET_ITEM(seq, 0);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001893 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
Christian Heimes44720832008-05-26 13:01:01 +00001894 Py_INCREF(item);
1895 Py_DECREF(seq);
1896 return item;
1897 }
1898 }
1899
1900 /* There are at least two things to join, or else we have a subclass
1901 * of the builtin types in the sequence.
1902 * Do a pre-pass to figure out the total amount of space we'll
1903 * need (sz), see whether any argument is absurd, and defer to
1904 * the Unicode join if appropriate.
1905 */
1906 for (i = 0; i < seqlen; i++) {
1907 const size_t old_sz = sz;
1908 item = PySequence_Fast_GET_ITEM(seq, i);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001909 if (!PyString_Check(item)){
Christian Heimes44720832008-05-26 13:01:01 +00001910#ifdef Py_USING_UNICODE
1911 if (PyUnicode_Check(item)) {
1912 /* Defer to Unicode join.
1913 * CAUTION: There's no gurantee that the
1914 * original sequence can be iterated over
1915 * again, so we must pass seq here.
1916 */
1917 PyObject *result;
1918 result = PyUnicode_Join((PyObject *)self, seq);
1919 Py_DECREF(seq);
1920 return result;
1921 }
1922#endif
1923 PyErr_Format(PyExc_TypeError,
1924 "sequence item %zd: expected string,"
1925 " %.80s found",
1926 i, Py_TYPE(item)->tp_name);
1927 Py_DECREF(seq);
1928 return NULL;
1929 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001930 sz += PyString_GET_SIZE(item);
Christian Heimes44720832008-05-26 13:01:01 +00001931 if (i != 0)
1932 sz += seplen;
1933 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1934 PyErr_SetString(PyExc_OverflowError,
1935 "join() result is too long for a Python string");
1936 Py_DECREF(seq);
1937 return NULL;
1938 }
1939 }
1940
1941 /* Allocate result space. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001942 res = PyString_FromStringAndSize((char*)NULL, sz);
Christian Heimes44720832008-05-26 13:01:01 +00001943 if (res == NULL) {
1944 Py_DECREF(seq);
1945 return NULL;
1946 }
1947
1948 /* Catenate everything. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001949 p = PyString_AS_STRING(res);
Christian Heimes44720832008-05-26 13:01:01 +00001950 for (i = 0; i < seqlen; ++i) {
1951 size_t n;
1952 item = PySequence_Fast_GET_ITEM(seq, i);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001953 n = PyString_GET_SIZE(item);
1954 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Christian Heimes44720832008-05-26 13:01:01 +00001955 p += n;
1956 if (i < seqlen - 1) {
1957 Py_MEMCPY(p, sep, seplen);
1958 p += seplen;
1959 }
1960 }
1961
1962 Py_DECREF(seq);
1963 return res;
1964}
1965
1966PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001967_PyString_Join(PyObject *sep, PyObject *x)
Christian Heimes44720832008-05-26 13:01:01 +00001968{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001969 assert(sep != NULL && PyString_Check(sep));
Christian Heimes44720832008-05-26 13:01:01 +00001970 assert(x != NULL);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001971 return string_join((PyStringObject *)sep, x);
Christian Heimes44720832008-05-26 13:01:01 +00001972}
1973
1974Py_LOCAL_INLINE(void)
1975string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1976{
1977 if (*end > len)
1978 *end = len;
1979 else if (*end < 0)
1980 *end += len;
1981 if (*end < 0)
1982 *end = 0;
1983 if (*start < 0)
1984 *start += len;
1985 if (*start < 0)
1986 *start = 0;
1987}
1988
1989Py_LOCAL_INLINE(Py_ssize_t)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001990string_find_internal(PyStringObject *self, PyObject *args, int dir)
Christian Heimes44720832008-05-26 13:01:01 +00001991{
1992 PyObject *subobj;
1993 const char *sub;
1994 Py_ssize_t sub_len;
1995 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1996 PyObject *obj_start=Py_None, *obj_end=Py_None;
1997
1998 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1999 &obj_start, &obj_end))
2000 return -2;
2001 /* To support None in "start" and "end" arguments, meaning
2002 the same as if they were not passed.
2003 */
2004 if (obj_start != Py_None)
2005 if (!_PyEval_SliceIndex(obj_start, &start))
2006 return -2;
2007 if (obj_end != Py_None)
2008 if (!_PyEval_SliceIndex(obj_end, &end))
2009 return -2;
2010
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002011 if (PyString_Check(subobj)) {
2012 sub = PyString_AS_STRING(subobj);
2013 sub_len = PyString_GET_SIZE(subobj);
Christian Heimes44720832008-05-26 13:01:01 +00002014 }
2015#ifdef Py_USING_UNICODE
2016 else if (PyUnicode_Check(subobj))
2017 return PyUnicode_Find(
2018 (PyObject *)self, subobj, start, end, dir);
2019#endif
2020 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2021 /* XXX - the "expected a character buffer object" is pretty
2022 confusing for a non-expert. remap to something else ? */
2023 return -2;
2024
2025 if (dir > 0)
2026 return stringlib_find_slice(
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002027 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00002028 sub, sub_len, start, end);
2029 else
2030 return stringlib_rfind_slice(
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002031 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00002032 sub, sub_len, start, end);
2033}
2034
2035
2036PyDoc_STRVAR(find__doc__,
2037"S.find(sub [,start [,end]]) -> int\n\
2038\n\
2039Return the lowest index in S where substring sub is found,\n\
2040such that sub is contained within s[start:end]. Optional\n\
2041arguments start and end are interpreted as in slice notation.\n\
2042\n\
2043Return -1 on failure.");
2044
2045static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002046string_find(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002047{
2048 Py_ssize_t result = string_find_internal(self, args, +1);
2049 if (result == -2)
2050 return NULL;
2051 return PyInt_FromSsize_t(result);
2052}
2053
2054
2055PyDoc_STRVAR(index__doc__,
2056"S.index(sub [,start [,end]]) -> int\n\
2057\n\
2058Like S.find() but raise ValueError when the substring is not found.");
2059
2060static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002061string_index(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002062{
2063 Py_ssize_t result = string_find_internal(self, args, +1);
2064 if (result == -2)
2065 return NULL;
2066 if (result == -1) {
2067 PyErr_SetString(PyExc_ValueError,
2068 "substring not found");
2069 return NULL;
2070 }
2071 return PyInt_FromSsize_t(result);
2072}
2073
2074
2075PyDoc_STRVAR(rfind__doc__,
2076"S.rfind(sub [,start [,end]]) -> int\n\
2077\n\
2078Return the highest index in S where substring sub is found,\n\
2079such that sub is contained within s[start:end]. Optional\n\
2080arguments start and end are interpreted as in slice notation.\n\
2081\n\
2082Return -1 on failure.");
2083
2084static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002085string_rfind(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002086{
2087 Py_ssize_t result = string_find_internal(self, args, -1);
2088 if (result == -2)
2089 return NULL;
2090 return PyInt_FromSsize_t(result);
2091}
2092
2093
2094PyDoc_STRVAR(rindex__doc__,
2095"S.rindex(sub [,start [,end]]) -> int\n\
2096\n\
2097Like S.rfind() but raise ValueError when the substring is not found.");
2098
2099static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002100string_rindex(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002101{
2102 Py_ssize_t result = string_find_internal(self, args, -1);
2103 if (result == -2)
2104 return NULL;
2105 if (result == -1) {
2106 PyErr_SetString(PyExc_ValueError,
2107 "substring not found");
2108 return NULL;
2109 }
2110 return PyInt_FromSsize_t(result);
2111}
2112
2113
2114Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002115do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
Christian Heimes44720832008-05-26 13:01:01 +00002116{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002117 char *s = PyString_AS_STRING(self);
2118 Py_ssize_t len = PyString_GET_SIZE(self);
2119 char *sep = PyString_AS_STRING(sepobj);
2120 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
Christian Heimes44720832008-05-26 13:01:01 +00002121 Py_ssize_t i, j;
2122
2123 i = 0;
2124 if (striptype != RIGHTSTRIP) {
2125 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2126 i++;
2127 }
2128 }
2129
2130 j = len;
2131 if (striptype != LEFTSTRIP) {
2132 do {
2133 j--;
2134 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2135 j++;
2136 }
2137
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002138 if (i == 0 && j == len && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00002139 Py_INCREF(self);
2140 return (PyObject*)self;
2141 }
2142 else
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002143 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00002144}
2145
2146
2147Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002148do_strip(PyStringObject *self, int striptype)
Christian Heimes44720832008-05-26 13:01:01 +00002149{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002150 char *s = PyString_AS_STRING(self);
2151 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Christian Heimes44720832008-05-26 13:01:01 +00002152
2153 i = 0;
2154 if (striptype != RIGHTSTRIP) {
2155 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2156 i++;
2157 }
2158 }
2159
2160 j = len;
2161 if (striptype != LEFTSTRIP) {
2162 do {
2163 j--;
2164 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2165 j++;
2166 }
2167
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002168 if (i == 0 && j == len && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00002169 Py_INCREF(self);
2170 return (PyObject*)self;
2171 }
2172 else
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002173 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00002174}
2175
2176
2177Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002178do_argstrip(PyStringObject *self, int striptype, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002179{
2180 PyObject *sep = NULL;
2181
2182 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
2183 return NULL;
2184
2185 if (sep != NULL && sep != Py_None) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002186 if (PyString_Check(sep))
Christian Heimes44720832008-05-26 13:01:01 +00002187 return do_xstrip(self, striptype, sep);
2188#ifdef Py_USING_UNICODE
2189 else if (PyUnicode_Check(sep)) {
2190 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2191 PyObject *res;
2192 if (uniself==NULL)
2193 return NULL;
2194 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2195 striptype, sep);
2196 Py_DECREF(uniself);
2197 return res;
2198 }
2199#endif
2200 PyErr_Format(PyExc_TypeError,
2201#ifdef Py_USING_UNICODE
2202 "%s arg must be None, str or unicode",
2203#else
2204 "%s arg must be None or str",
2205#endif
2206 STRIPNAME(striptype));
2207 return NULL;
2208 }
2209
2210 return do_strip(self, striptype);
2211}
2212
2213
2214PyDoc_STRVAR(strip__doc__,
2215"S.strip([chars]) -> string or unicode\n\
2216\n\
2217Return a copy of the string S with leading and trailing\n\
2218whitespace removed.\n\
2219If chars is given and not None, remove characters in chars instead.\n\
2220If chars is unicode, S will be converted to unicode before stripping");
2221
2222static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002223string_strip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002224{
2225 if (PyTuple_GET_SIZE(args) == 0)
2226 return do_strip(self, BOTHSTRIP); /* Common case */
2227 else
2228 return do_argstrip(self, BOTHSTRIP, args);
2229}
2230
2231
2232PyDoc_STRVAR(lstrip__doc__,
2233"S.lstrip([chars]) -> string or unicode\n\
2234\n\
2235Return a copy of the string S with leading whitespace removed.\n\
2236If chars is given and not None, remove characters in chars instead.\n\
2237If chars is unicode, S will be converted to unicode before stripping");
2238
2239static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002240string_lstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002241{
2242 if (PyTuple_GET_SIZE(args) == 0)
2243 return do_strip(self, LEFTSTRIP); /* Common case */
2244 else
2245 return do_argstrip(self, LEFTSTRIP, args);
2246}
2247
2248
2249PyDoc_STRVAR(rstrip__doc__,
2250"S.rstrip([chars]) -> string or unicode\n\
2251\n\
2252Return a copy of the string S with trailing whitespace removed.\n\
2253If chars is given and not None, remove characters in chars instead.\n\
2254If chars is unicode, S will be converted to unicode before stripping");
2255
2256static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002257string_rstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002258{
2259 if (PyTuple_GET_SIZE(args) == 0)
2260 return do_strip(self, RIGHTSTRIP); /* Common case */
2261 else
2262 return do_argstrip(self, RIGHTSTRIP, args);
2263}
2264
2265
2266PyDoc_STRVAR(lower__doc__,
2267"S.lower() -> string\n\
2268\n\
2269Return a copy of the string S converted to lowercase.");
2270
2271/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2272#ifndef _tolower
2273#define _tolower tolower
2274#endif
2275
2276static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002277string_lower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002278{
2279 char *s;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002280 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002281 PyObject *newobj;
2282
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002283 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002284 if (!newobj)
2285 return NULL;
2286
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002287 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002288
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002289 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002290
2291 for (i = 0; i < n; i++) {
2292 int c = Py_CHARMASK(s[i]);
2293 if (isupper(c))
2294 s[i] = _tolower(c);
2295 }
2296
2297 return newobj;
2298}
2299
2300PyDoc_STRVAR(upper__doc__,
2301"S.upper() -> string\n\
2302\n\
2303Return a copy of the string S converted to uppercase.");
2304
2305#ifndef _toupper
2306#define _toupper toupper
2307#endif
2308
2309static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002310string_upper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002311{
2312 char *s;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002313 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002314 PyObject *newobj;
2315
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002316 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002317 if (!newobj)
2318 return NULL;
2319
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002320 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002321
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002322 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002323
2324 for (i = 0; i < n; i++) {
2325 int c = Py_CHARMASK(s[i]);
2326 if (islower(c))
2327 s[i] = _toupper(c);
2328 }
2329
2330 return newobj;
2331}
2332
2333PyDoc_STRVAR(title__doc__,
2334"S.title() -> string\n\
2335\n\
2336Return a titlecased version of S, i.e. words start with uppercase\n\
2337characters, all remaining cased characters have lowercase.");
2338
2339static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002340string_title(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002341{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002342 char *s = PyString_AS_STRING(self), *s_new;
2343 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002344 int previous_is_cased = 0;
2345 PyObject *newobj;
2346
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002347 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002348 if (newobj == NULL)
2349 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002350 s_new = PyString_AsString(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002351 for (i = 0; i < n; i++) {
2352 int c = Py_CHARMASK(*s++);
2353 if (islower(c)) {
2354 if (!previous_is_cased)
2355 c = toupper(c);
2356 previous_is_cased = 1;
2357 } else if (isupper(c)) {
2358 if (previous_is_cased)
2359 c = tolower(c);
2360 previous_is_cased = 1;
2361 } else
2362 previous_is_cased = 0;
2363 *s_new++ = c;
2364 }
2365 return newobj;
2366}
2367
2368PyDoc_STRVAR(capitalize__doc__,
2369"S.capitalize() -> string\n\
2370\n\
2371Return a copy of the string S with only its first character\n\
2372capitalized.");
2373
2374static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002375string_capitalize(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002376{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002377 char *s = PyString_AS_STRING(self), *s_new;
2378 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002379 PyObject *newobj;
2380
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002381 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002382 if (newobj == NULL)
2383 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002384 s_new = PyString_AsString(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002385 if (0 < n) {
2386 int c = Py_CHARMASK(*s++);
2387 if (islower(c))
2388 *s_new = toupper(c);
2389 else
2390 *s_new = c;
2391 s_new++;
2392 }
2393 for (i = 1; i < n; i++) {
2394 int c = Py_CHARMASK(*s++);
2395 if (isupper(c))
2396 *s_new = tolower(c);
2397 else
2398 *s_new = c;
2399 s_new++;
2400 }
2401 return newobj;
2402}
2403
2404
2405PyDoc_STRVAR(count__doc__,
2406"S.count(sub[, start[, end]]) -> int\n\
2407\n\
2408Return the number of non-overlapping occurrences of substring sub in\n\
2409string S[start:end]. Optional arguments start and end are interpreted\n\
2410as in slice notation.");
2411
2412static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002413string_count(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002414{
2415 PyObject *sub_obj;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002416 const char *str = PyString_AS_STRING(self), *sub;
Christian Heimes44720832008-05-26 13:01:01 +00002417 Py_ssize_t sub_len;
2418 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2419
2420 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2421 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2422 return NULL;
2423
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002424 if (PyString_Check(sub_obj)) {
2425 sub = PyString_AS_STRING(sub_obj);
2426 sub_len = PyString_GET_SIZE(sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00002427 }
2428#ifdef Py_USING_UNICODE
2429 else if (PyUnicode_Check(sub_obj)) {
2430 Py_ssize_t count;
2431 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2432 if (count == -1)
2433 return NULL;
2434 else
2435 return PyInt_FromSsize_t(count);
2436 }
2437#endif
2438 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2439 return NULL;
2440
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002441 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002442
2443 return PyInt_FromSsize_t(
2444 stringlib_count(str + start, end - start, sub, sub_len)
2445 );
2446}
2447
2448PyDoc_STRVAR(swapcase__doc__,
2449"S.swapcase() -> string\n\
2450\n\
2451Return a copy of the string S with uppercase characters\n\
2452converted to lowercase and vice versa.");
2453
2454static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002455string_swapcase(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002456{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002457 char *s = PyString_AS_STRING(self), *s_new;
2458 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002459 PyObject *newobj;
2460
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002461 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002462 if (newobj == NULL)
2463 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002464 s_new = PyString_AsString(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002465 for (i = 0; i < n; i++) {
2466 int c = Py_CHARMASK(*s++);
2467 if (islower(c)) {
2468 *s_new = toupper(c);
2469 }
2470 else if (isupper(c)) {
2471 *s_new = tolower(c);
2472 }
2473 else
2474 *s_new = c;
2475 s_new++;
2476 }
2477 return newobj;
2478}
2479
2480
2481PyDoc_STRVAR(translate__doc__,
2482"S.translate(table [,deletechars]) -> string\n\
2483\n\
2484Return a copy of the string S, where all characters occurring\n\
2485in the optional argument deletechars are removed, and the\n\
2486remaining characters have been mapped through the given\n\
2487translation table, which must be a string of length 256.");
2488
2489static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002490string_translate(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002491{
2492 register char *input, *output;
2493 const char *table;
2494 register Py_ssize_t i, c, changed = 0;
2495 PyObject *input_obj = (PyObject*)self;
2496 const char *output_start, *del_table=NULL;
2497 Py_ssize_t inlen, tablen, dellen = 0;
2498 PyObject *result;
2499 int trans_table[256];
2500 PyObject *tableobj, *delobj = NULL;
2501
2502 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2503 &tableobj, &delobj))
2504 return NULL;
2505
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002506 if (PyString_Check(tableobj)) {
2507 table = PyString_AS_STRING(tableobj);
2508 tablen = PyString_GET_SIZE(tableobj);
Christian Heimes44720832008-05-26 13:01:01 +00002509 }
2510 else if (tableobj == Py_None) {
2511 table = NULL;
2512 tablen = 256;
2513 }
2514#ifdef Py_USING_UNICODE
2515 else if (PyUnicode_Check(tableobj)) {
2516 /* Unicode .translate() does not support the deletechars
2517 parameter; instead a mapping to None will cause characters
2518 to be deleted. */
2519 if (delobj != NULL) {
2520 PyErr_SetString(PyExc_TypeError,
2521 "deletions are implemented differently for unicode");
2522 return NULL;
2523 }
2524 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2525 }
2526#endif
2527 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2528 return NULL;
2529
2530 if (tablen != 256) {
2531 PyErr_SetString(PyExc_ValueError,
2532 "translation table must be 256 characters long");
2533 return NULL;
2534 }
2535
2536 if (delobj != NULL) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002537 if (PyString_Check(delobj)) {
2538 del_table = PyString_AS_STRING(delobj);
2539 dellen = PyString_GET_SIZE(delobj);
Christian Heimes44720832008-05-26 13:01:01 +00002540 }
2541#ifdef Py_USING_UNICODE
2542 else if (PyUnicode_Check(delobj)) {
2543 PyErr_SetString(PyExc_TypeError,
2544 "deletions are implemented differently for unicode");
2545 return NULL;
2546 }
2547#endif
2548 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2549 return NULL;
2550 }
2551 else {
2552 del_table = NULL;
2553 dellen = 0;
2554 }
2555
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002556 inlen = PyString_GET_SIZE(input_obj);
2557 result = PyString_FromStringAndSize((char *)NULL, inlen);
Christian Heimes44720832008-05-26 13:01:01 +00002558 if (result == NULL)
2559 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002560 output_start = output = PyString_AsString(result);
2561 input = PyString_AS_STRING(input_obj);
Christian Heimes44720832008-05-26 13:01:01 +00002562
2563 if (dellen == 0 && table != NULL) {
2564 /* If no deletions are required, use faster code */
2565 for (i = inlen; --i >= 0; ) {
2566 c = Py_CHARMASK(*input++);
2567 if (Py_CHARMASK((*output++ = table[c])) != c)
2568 changed = 1;
2569 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002570 if (changed || !PyString_CheckExact(input_obj))
Christian Heimes44720832008-05-26 13:01:01 +00002571 return result;
2572 Py_DECREF(result);
2573 Py_INCREF(input_obj);
2574 return input_obj;
2575 }
2576
2577 if (table == NULL) {
2578 for (i = 0; i < 256; i++)
2579 trans_table[i] = Py_CHARMASK(i);
2580 } else {
2581 for (i = 0; i < 256; i++)
2582 trans_table[i] = Py_CHARMASK(table[i]);
2583 }
2584
2585 for (i = 0; i < dellen; i++)
2586 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2587
2588 for (i = inlen; --i >= 0; ) {
2589 c = Py_CHARMASK(*input++);
2590 if (trans_table[c] != -1)
2591 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2592 continue;
2593 changed = 1;
2594 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002595 if (!changed && PyString_CheckExact(input_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00002596 Py_DECREF(result);
2597 Py_INCREF(input_obj);
2598 return input_obj;
2599 }
2600 /* Fix the size of the resulting string */
2601 if (inlen > 0)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002602 _PyString_Resize(&result, output - output_start);
Christian Heimes44720832008-05-26 13:01:01 +00002603 return result;
2604}
2605
2606
2607#define FORWARD 1
2608#define REVERSE -1
2609
2610/* find and count characters and substrings */
2611
2612#define findchar(target, target_len, c) \
2613 ((char *)memchr((const void *)(target), c, target_len))
2614
2615/* String ops must return a string. */
2616/* If the object is subclass of string, create a copy */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002617Py_LOCAL(PyStringObject *)
2618return_self(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002619{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002620 if (PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00002621 Py_INCREF(self);
2622 return self;
2623 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002624 return (PyStringObject *)PyString_FromStringAndSize(
2625 PyString_AS_STRING(self),
2626 PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002627}
2628
2629Py_LOCAL_INLINE(Py_ssize_t)
2630countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2631{
2632 Py_ssize_t count=0;
2633 const char *start=target;
2634 const char *end=target+target_len;
2635
2636 while ( (start=findchar(start, end-start, c)) != NULL ) {
2637 count++;
2638 if (count >= maxcount)
2639 break;
2640 start += 1;
2641 }
2642 return count;
2643}
2644
2645Py_LOCAL(Py_ssize_t)
2646findstring(const char *target, Py_ssize_t target_len,
2647 const char *pattern, Py_ssize_t pattern_len,
2648 Py_ssize_t start,
2649 Py_ssize_t end,
2650 int direction)
2651{
2652 if (start < 0) {
2653 start += target_len;
2654 if (start < 0)
2655 start = 0;
2656 }
2657 if (end > target_len) {
2658 end = target_len;
2659 } else if (end < 0) {
2660 end += target_len;
2661 if (end < 0)
2662 end = 0;
2663 }
2664
2665 /* zero-length substrings always match at the first attempt */
2666 if (pattern_len == 0)
2667 return (direction > 0) ? start : end;
2668
2669 end -= pattern_len;
2670
2671 if (direction < 0) {
2672 for (; end >= start; end--)
2673 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2674 return end;
2675 } else {
2676 for (; start <= end; start++)
2677 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2678 return start;
2679 }
2680 return -1;
2681}
2682
2683Py_LOCAL_INLINE(Py_ssize_t)
2684countstring(const char *target, Py_ssize_t target_len,
2685 const char *pattern, Py_ssize_t pattern_len,
2686 Py_ssize_t start,
2687 Py_ssize_t end,
2688 int direction, Py_ssize_t maxcount)
2689{
2690 Py_ssize_t count=0;
2691
2692 if (start < 0) {
2693 start += target_len;
2694 if (start < 0)
2695 start = 0;
2696 }
2697 if (end > target_len) {
2698 end = target_len;
2699 } else if (end < 0) {
2700 end += target_len;
2701 if (end < 0)
2702 end = 0;
2703 }
2704
2705 /* zero-length substrings match everywhere */
2706 if (pattern_len == 0 || maxcount == 0) {
2707 if (target_len+1 < maxcount)
2708 return target_len+1;
2709 return maxcount;
2710 }
2711
2712 end -= pattern_len;
2713 if (direction < 0) {
2714 for (; (end >= start); end--)
2715 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2716 count++;
2717 if (--maxcount <= 0) break;
2718 end -= pattern_len-1;
2719 }
2720 } else {
2721 for (; (start <= end); start++)
2722 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2723 count++;
2724 if (--maxcount <= 0)
2725 break;
2726 start += pattern_len-1;
2727 }
2728 }
2729 return count;
2730}
2731
2732
2733/* Algorithms for different cases of string replacement */
2734
2735/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002736Py_LOCAL(PyStringObject *)
2737replace_interleave(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002738 const char *to_s, Py_ssize_t to_len,
2739 Py_ssize_t maxcount)
2740{
2741 char *self_s, *result_s;
2742 Py_ssize_t self_len, result_len;
2743 Py_ssize_t count, i, product;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002744 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002745
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002746 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002747
2748 /* 1 at the end plus 1 after every character */
2749 count = self_len+1;
2750 if (maxcount < count)
2751 count = maxcount;
2752
2753 /* Check for overflow */
2754 /* result_len = count * to_len + self_len; */
2755 product = count * to_len;
2756 if (product / to_len != count) {
2757 PyErr_SetString(PyExc_OverflowError,
2758 "replace string is too long");
2759 return NULL;
2760 }
2761 result_len = product + self_len;
2762 if (result_len < 0) {
2763 PyErr_SetString(PyExc_OverflowError,
2764 "replace string is too long");
2765 return NULL;
2766 }
2767
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002768 if (! (result = (PyStringObject *)
2769 PyString_FromStringAndSize(NULL, result_len)) )
Christian Heimes44720832008-05-26 13:01:01 +00002770 return NULL;
2771
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002772 self_s = PyString_AS_STRING(self);
2773 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002774
2775 /* TODO: special case single character, which doesn't need memcpy */
2776
2777 /* Lay the first one down (guaranteed this will occur) */
2778 Py_MEMCPY(result_s, to_s, to_len);
2779 result_s += to_len;
2780 count -= 1;
2781
2782 for (i=0; i<count; i++) {
2783 *result_s++ = *self_s++;
2784 Py_MEMCPY(result_s, to_s, to_len);
2785 result_s += to_len;
2786 }
2787
2788 /* Copy the rest of the original string */
2789 Py_MEMCPY(result_s, self_s, self_len-i);
2790
2791 return result;
2792}
2793
2794/* Special case for deleting a single character */
2795/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002796Py_LOCAL(PyStringObject *)
2797replace_delete_single_character(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002798 char from_c, Py_ssize_t maxcount)
2799{
2800 char *self_s, *result_s;
2801 char *start, *next, *end;
2802 Py_ssize_t self_len, result_len;
2803 Py_ssize_t count;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002804 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002805
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002806 self_len = PyString_GET_SIZE(self);
2807 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002808
2809 count = countchar(self_s, self_len, from_c, maxcount);
2810 if (count == 0) {
2811 return return_self(self);
2812 }
2813
2814 result_len = self_len - count; /* from_len == 1 */
2815 assert(result_len>=0);
2816
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002817 if ( (result = (PyStringObject *)
2818 PyString_FromStringAndSize(NULL, result_len)) == NULL)
Christian Heimes44720832008-05-26 13:01:01 +00002819 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002820 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002821
2822 start = self_s;
2823 end = self_s + self_len;
2824 while (count-- > 0) {
2825 next = findchar(start, end-start, from_c);
2826 if (next == NULL)
2827 break;
2828 Py_MEMCPY(result_s, start, next-start);
2829 result_s += (next-start);
2830 start = next+1;
2831 }
2832 Py_MEMCPY(result_s, start, end-start);
2833
2834 return result;
2835}
2836
2837/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2838
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002839Py_LOCAL(PyStringObject *)
2840replace_delete_substring(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002841 const char *from_s, Py_ssize_t from_len,
2842 Py_ssize_t maxcount) {
2843 char *self_s, *result_s;
2844 char *start, *next, *end;
2845 Py_ssize_t self_len, result_len;
2846 Py_ssize_t count, offset;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002847 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002848
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002849 self_len = PyString_GET_SIZE(self);
2850 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002851
2852 count = countstring(self_s, self_len,
2853 from_s, from_len,
2854 0, self_len, 1,
2855 maxcount);
2856
2857 if (count == 0) {
2858 /* no matches */
2859 return return_self(self);
2860 }
2861
2862 result_len = self_len - (count * from_len);
2863 assert (result_len>=0);
2864
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002865 if ( (result = (PyStringObject *)
2866 PyString_FromStringAndSize(NULL, result_len)) == NULL )
Christian Heimes44720832008-05-26 13:01:01 +00002867 return NULL;
2868
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002869 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002870
2871 start = self_s;
2872 end = self_s + self_len;
2873 while (count-- > 0) {
2874 offset = findstring(start, end-start,
2875 from_s, from_len,
2876 0, end-start, FORWARD);
2877 if (offset == -1)
2878 break;
2879 next = start + offset;
2880
2881 Py_MEMCPY(result_s, start, next-start);
2882
2883 result_s += (next-start);
2884 start = next+from_len;
2885 }
2886 Py_MEMCPY(result_s, start, end-start);
2887 return result;
2888}
2889
2890/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002891Py_LOCAL(PyStringObject *)
2892replace_single_character_in_place(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002893 char from_c, char to_c,
2894 Py_ssize_t maxcount)
2895{
2896 char *self_s, *result_s, *start, *end, *next;
2897 Py_ssize_t self_len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002898 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002899
2900 /* The result string will be the same size */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002901 self_s = PyString_AS_STRING(self);
2902 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002903
2904 next = findchar(self_s, self_len, from_c);
2905
2906 if (next == NULL) {
2907 /* No matches; return the original string */
2908 return return_self(self);
2909 }
2910
2911 /* Need to make a new string */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002912 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002913 if (result == NULL)
2914 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002915 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002916 Py_MEMCPY(result_s, self_s, self_len);
2917
2918 /* change everything in-place, starting with this one */
2919 start = result_s + (next-self_s);
2920 *start = to_c;
2921 start++;
2922 end = result_s + self_len;
2923
2924 while (--maxcount > 0) {
2925 next = findchar(start, end-start, from_c);
2926 if (next == NULL)
2927 break;
2928 *next = to_c;
2929 start = next+1;
2930 }
2931
2932 return result;
2933}
2934
2935/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002936Py_LOCAL(PyStringObject *)
2937replace_substring_in_place(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002938 const char *from_s, Py_ssize_t from_len,
2939 const char *to_s, Py_ssize_t to_len,
2940 Py_ssize_t maxcount)
2941{
2942 char *result_s, *start, *end;
2943 char *self_s;
2944 Py_ssize_t self_len, offset;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002945 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002946
2947 /* The result string will be the same size */
2948
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002949 self_s = PyString_AS_STRING(self);
2950 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002951
2952 offset = findstring(self_s, self_len,
2953 from_s, from_len,
2954 0, self_len, FORWARD);
2955 if (offset == -1) {
2956 /* No matches; return the original string */
2957 return return_self(self);
2958 }
2959
2960 /* Need to make a new string */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002961 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002962 if (result == NULL)
2963 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002964 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002965 Py_MEMCPY(result_s, self_s, self_len);
2966
2967 /* change everything in-place, starting with this one */
2968 start = result_s + offset;
2969 Py_MEMCPY(start, to_s, from_len);
2970 start += from_len;
2971 end = result_s + self_len;
2972
2973 while ( --maxcount > 0) {
2974 offset = findstring(start, end-start,
2975 from_s, from_len,
2976 0, end-start, FORWARD);
2977 if (offset==-1)
2978 break;
2979 Py_MEMCPY(start+offset, to_s, from_len);
2980 start += offset+from_len;
2981 }
2982
2983 return result;
2984}
2985
2986/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002987Py_LOCAL(PyStringObject *)
2988replace_single_character(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002989 char from_c,
2990 const char *to_s, Py_ssize_t to_len,
2991 Py_ssize_t maxcount)
2992{
2993 char *self_s, *result_s;
2994 char *start, *next, *end;
2995 Py_ssize_t self_len, result_len;
2996 Py_ssize_t count, product;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002997 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002998
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002999 self_s = PyString_AS_STRING(self);
3000 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003001
3002 count = countchar(self_s, self_len, from_c, maxcount);
3003 if (count == 0) {
3004 /* no matches, return unchanged */
3005 return return_self(self);
3006 }
3007
3008 /* use the difference between current and new, hence the "-1" */
3009 /* result_len = self_len + count * (to_len-1) */
3010 product = count * (to_len-1);
3011 if (product / (to_len-1) != count) {
3012 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3013 return NULL;
3014 }
3015 result_len = self_len + product;
3016 if (result_len < 0) {
3017 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3018 return NULL;
3019 }
3020
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003021 if ( (result = (PyStringObject *)
3022 PyString_FromStringAndSize(NULL, result_len)) == NULL)
Christian Heimes44720832008-05-26 13:01:01 +00003023 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003024 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00003025
3026 start = self_s;
3027 end = self_s + self_len;
3028 while (count-- > 0) {
3029 next = findchar(start, end-start, from_c);
3030 if (next == NULL)
3031 break;
3032
3033 if (next == start) {
3034 /* replace with the 'to' */
3035 Py_MEMCPY(result_s, to_s, to_len);
3036 result_s += to_len;
3037 start += 1;
3038 } else {
3039 /* copy the unchanged old then the 'to' */
3040 Py_MEMCPY(result_s, start, next-start);
3041 result_s += (next-start);
3042 Py_MEMCPY(result_s, to_s, to_len);
3043 result_s += to_len;
3044 start = next+1;
3045 }
3046 }
3047 /* Copy the remainder of the remaining string */
3048 Py_MEMCPY(result_s, start, end-start);
3049
3050 return result;
3051}
3052
3053/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003054Py_LOCAL(PyStringObject *)
3055replace_substring(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00003056 const char *from_s, Py_ssize_t from_len,
3057 const char *to_s, Py_ssize_t to_len,
3058 Py_ssize_t maxcount) {
3059 char *self_s, *result_s;
3060 char *start, *next, *end;
3061 Py_ssize_t self_len, result_len;
3062 Py_ssize_t count, offset, product;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003063 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00003064
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003065 self_s = PyString_AS_STRING(self);
3066 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003067
3068 count = countstring(self_s, self_len,
3069 from_s, from_len,
3070 0, self_len, FORWARD, maxcount);
3071 if (count == 0) {
3072 /* no matches, return unchanged */
3073 return return_self(self);
3074 }
3075
3076 /* Check for overflow */
3077 /* result_len = self_len + count * (to_len-from_len) */
3078 product = count * (to_len-from_len);
3079 if (product / (to_len-from_len) != count) {
3080 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3081 return NULL;
3082 }
3083 result_len = self_len + product;
3084 if (result_len < 0) {
3085 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3086 return NULL;
3087 }
3088
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003089 if ( (result = (PyStringObject *)
3090 PyString_FromStringAndSize(NULL, result_len)) == NULL)
Christian Heimes44720832008-05-26 13:01:01 +00003091 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003092 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00003093
3094 start = self_s;
3095 end = self_s + self_len;
3096 while (count-- > 0) {
3097 offset = findstring(start, end-start,
3098 from_s, from_len,
3099 0, end-start, FORWARD);
3100 if (offset == -1)
3101 break;
3102 next = start+offset;
3103 if (next == start) {
3104 /* replace with the 'to' */
3105 Py_MEMCPY(result_s, to_s, to_len);
3106 result_s += to_len;
3107 start += from_len;
3108 } else {
3109 /* copy the unchanged old then the 'to' */
3110 Py_MEMCPY(result_s, start, next-start);
3111 result_s += (next-start);
3112 Py_MEMCPY(result_s, to_s, to_len);
3113 result_s += to_len;
3114 start = next+from_len;
3115 }
3116 }
3117 /* Copy the remainder of the remaining string */
3118 Py_MEMCPY(result_s, start, end-start);
3119
3120 return result;
3121}
3122
3123
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003124Py_LOCAL(PyStringObject *)
3125replace(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00003126 const char *from_s, Py_ssize_t from_len,
3127 const char *to_s, Py_ssize_t to_len,
3128 Py_ssize_t maxcount)
3129{
3130 if (maxcount < 0) {
3131 maxcount = PY_SSIZE_T_MAX;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003132 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
Christian Heimes44720832008-05-26 13:01:01 +00003133 /* nothing to do; return the original string */
3134 return return_self(self);
3135 }
3136
3137 if (maxcount == 0 ||
3138 (from_len == 0 && to_len == 0)) {
3139 /* nothing to do; return the original string */
3140 return return_self(self);
3141 }
3142
3143 /* Handle zero-length special cases */
3144
3145 if (from_len == 0) {
3146 /* insert the 'to' string everywhere. */
3147 /* >>> "Python".replace("", ".") */
3148 /* '.P.y.t.h.o.n.' */
3149 return replace_interleave(self, to_s, to_len, maxcount);
3150 }
3151
3152 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3153 /* point for an empty self string to generate a non-empty string */
3154 /* Special case so the remaining code always gets a non-empty string */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003155 if (PyString_GET_SIZE(self) == 0) {
Christian Heimes44720832008-05-26 13:01:01 +00003156 return return_self(self);
3157 }
3158
3159 if (to_len == 0) {
3160 /* delete all occurances of 'from' string */
3161 if (from_len == 1) {
3162 return replace_delete_single_character(
3163 self, from_s[0], maxcount);
3164 } else {
3165 return replace_delete_substring(self, from_s, from_len, maxcount);
3166 }
3167 }
3168
3169 /* Handle special case where both strings have the same length */
3170
3171 if (from_len == to_len) {
3172 if (from_len == 1) {
3173 return replace_single_character_in_place(
3174 self,
3175 from_s[0],
3176 to_s[0],
3177 maxcount);
3178 } else {
3179 return replace_substring_in_place(
3180 self, from_s, from_len, to_s, to_len, maxcount);
3181 }
3182 }
3183
3184 /* Otherwise use the more generic algorithms */
3185 if (from_len == 1) {
3186 return replace_single_character(self, from_s[0],
3187 to_s, to_len, maxcount);
3188 } else {
3189 /* len('from')>=2, len('to')>=1 */
3190 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
3191 }
3192}
3193
3194PyDoc_STRVAR(replace__doc__,
3195"S.replace (old, new[, count]) -> string\n\
3196\n\
3197Return a copy of string S with all occurrences of substring\n\
3198old replaced by new. If the optional argument count is\n\
3199given, only the first count occurrences are replaced.");
3200
3201static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003202string_replace(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003203{
3204 Py_ssize_t count = -1;
3205 PyObject *from, *to;
3206 const char *from_s, *to_s;
3207 Py_ssize_t from_len, to_len;
3208
3209 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
3210 return NULL;
3211
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003212 if (PyString_Check(from)) {
3213 from_s = PyString_AS_STRING(from);
3214 from_len = PyString_GET_SIZE(from);
Christian Heimes44720832008-05-26 13:01:01 +00003215 }
3216#ifdef Py_USING_UNICODE
3217 if (PyUnicode_Check(from))
3218 return PyUnicode_Replace((PyObject *)self,
3219 from, to, count);
3220#endif
3221 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
3222 return NULL;
3223
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003224 if (PyString_Check(to)) {
3225 to_s = PyString_AS_STRING(to);
3226 to_len = PyString_GET_SIZE(to);
Christian Heimes44720832008-05-26 13:01:01 +00003227 }
3228#ifdef Py_USING_UNICODE
3229 else if (PyUnicode_Check(to))
3230 return PyUnicode_Replace((PyObject *)self,
3231 from, to, count);
3232#endif
3233 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
3234 return NULL;
3235
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003236 return (PyObject *)replace((PyStringObject *) self,
Christian Heimes44720832008-05-26 13:01:01 +00003237 from_s, from_len,
3238 to_s, to_len, count);
3239}
3240
3241/** End DALKE **/
3242
3243/* Matches the end (direction >= 0) or start (direction < 0) of self
3244 * against substr, using the start and end arguments. Returns
3245 * -1 on error, 0 if not found and 1 if found.
3246 */
3247Py_LOCAL(int)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003248_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
Christian Heimes44720832008-05-26 13:01:01 +00003249 Py_ssize_t end, int direction)
3250{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003251 Py_ssize_t len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003252 Py_ssize_t slen;
3253 const char* sub;
3254 const char* str;
3255
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003256 if (PyString_Check(substr)) {
3257 sub = PyString_AS_STRING(substr);
3258 slen = PyString_GET_SIZE(substr);
Christian Heimes44720832008-05-26 13:01:01 +00003259 }
3260#ifdef Py_USING_UNICODE
3261 else if (PyUnicode_Check(substr))
3262 return PyUnicode_Tailmatch((PyObject *)self,
3263 substr, start, end, direction);
3264#endif
3265 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3266 return -1;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003267 str = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003268
3269 string_adjust_indices(&start, &end, len);
3270
3271 if (direction < 0) {
3272 /* startswith */
3273 if (start+slen > len)
3274 return 0;
3275 } else {
3276 /* endswith */
3277 if (end-start < slen || start > len)
3278 return 0;
3279
3280 if (end-slen > start)
3281 start = end - slen;
3282 }
3283 if (end-start >= slen)
3284 return ! memcmp(str+start, sub, slen);
3285 return 0;
3286}
3287
3288
3289PyDoc_STRVAR(startswith__doc__,
3290"S.startswith(prefix[, start[, end]]) -> bool\n\
3291\n\
3292Return True if S starts with the specified prefix, False otherwise.\n\
3293With optional start, test S beginning at that position.\n\
3294With optional end, stop comparing S at that position.\n\
3295prefix can also be a tuple of strings to try.");
3296
3297static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003298string_startswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003299{
3300 Py_ssize_t start = 0;
3301 Py_ssize_t end = PY_SSIZE_T_MAX;
3302 PyObject *subobj;
3303 int result;
3304
3305 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3306 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3307 return NULL;
3308 if (PyTuple_Check(subobj)) {
3309 Py_ssize_t i;
3310 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3311 result = _string_tailmatch(self,
3312 PyTuple_GET_ITEM(subobj, i),
3313 start, end, -1);
3314 if (result == -1)
3315 return NULL;
3316 else if (result) {
3317 Py_RETURN_TRUE;
3318 }
3319 }
3320 Py_RETURN_FALSE;
3321 }
3322 result = _string_tailmatch(self, subobj, start, end, -1);
3323 if (result == -1)
3324 return NULL;
3325 else
3326 return PyBool_FromLong(result);
3327}
3328
3329
3330PyDoc_STRVAR(endswith__doc__,
3331"S.endswith(suffix[, start[, end]]) -> bool\n\
3332\n\
3333Return True if S ends with the specified suffix, False otherwise.\n\
3334With optional start, test S beginning at that position.\n\
3335With optional end, stop comparing S at that position.\n\
3336suffix can also be a tuple of strings to try.");
3337
3338static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003339string_endswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003340{
3341 Py_ssize_t start = 0;
3342 Py_ssize_t end = PY_SSIZE_T_MAX;
3343 PyObject *subobj;
3344 int result;
3345
3346 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3347 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3348 return NULL;
3349 if (PyTuple_Check(subobj)) {
3350 Py_ssize_t i;
3351 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3352 result = _string_tailmatch(self,
3353 PyTuple_GET_ITEM(subobj, i),
3354 start, end, +1);
3355 if (result == -1)
3356 return NULL;
3357 else if (result) {
3358 Py_RETURN_TRUE;
3359 }
3360 }
3361 Py_RETURN_FALSE;
3362 }
3363 result = _string_tailmatch(self, subobj, start, end, +1);
3364 if (result == -1)
3365 return NULL;
3366 else
3367 return PyBool_FromLong(result);
3368}
3369
3370
3371PyDoc_STRVAR(encode__doc__,
3372"S.encode([encoding[,errors]]) -> object\n\
3373\n\
3374Encodes S using the codec registered for encoding. encoding defaults\n\
3375to the default encoding. errors may be given to set a different error\n\
3376handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3377a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3378'xmlcharrefreplace' as well as any other name registered with\n\
3379codecs.register_error that is able to handle UnicodeEncodeErrors.");
3380
3381static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00003382string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes44720832008-05-26 13:01:01 +00003383{
Benjamin Peterson332d7212009-09-18 21:14:55 +00003384 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003385 char *encoding = NULL;
3386 char *errors = NULL;
3387 PyObject *v;
3388
Benjamin Peterson332d7212009-09-18 21:14:55 +00003389 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
3390 kwlist, &encoding, &errors))
Christian Heimes44720832008-05-26 13:01:01 +00003391 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003392 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003393 if (v == NULL)
3394 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003395 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00003396 PyErr_Format(PyExc_TypeError,
3397 "encoder did not return a string/unicode object "
3398 "(type=%.400s)",
3399 Py_TYPE(v)->tp_name);
3400 Py_DECREF(v);
3401 return NULL;
3402 }
3403 return v;
3404
3405 onError:
Christian Heimes1a6387e2008-03-26 12:49:49 +00003406 return NULL;
3407}
3408
Christian Heimes44720832008-05-26 13:01:01 +00003409
3410PyDoc_STRVAR(decode__doc__,
3411"S.decode([encoding[,errors]]) -> object\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003412\n\
Christian Heimes44720832008-05-26 13:01:01 +00003413Decodes S using the codec registered for encoding. encoding defaults\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003414to the default encoding. errors may be given to set a different error\n\
Christian Heimes44720832008-05-26 13:01:01 +00003415handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3416a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003417as well as any other name registered with codecs.register_error that is\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003418able to handle UnicodeDecodeErrors.");
3419
3420static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00003421string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003422{
Benjamin Peterson332d7212009-09-18 21:14:55 +00003423 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003424 char *encoding = NULL;
3425 char *errors = NULL;
3426 PyObject *v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003427
Benjamin Peterson332d7212009-09-18 21:14:55 +00003428 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
3429 kwlist, &encoding, &errors))
Christian Heimes1a6387e2008-03-26 12:49:49 +00003430 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003431 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003432 if (v == NULL)
3433 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003434 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00003435 PyErr_Format(PyExc_TypeError,
3436 "decoder did not return a string/unicode object "
3437 "(type=%.400s)",
3438 Py_TYPE(v)->tp_name);
3439 Py_DECREF(v);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003440 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003441 }
3442 return v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003443
Christian Heimes44720832008-05-26 13:01:01 +00003444 onError:
3445 return NULL;
3446}
3447
3448
3449PyDoc_STRVAR(expandtabs__doc__,
3450"S.expandtabs([tabsize]) -> string\n\
3451\n\
3452Return a copy of S where all tab characters are expanded using spaces.\n\
3453If tabsize is not given, a tab size of 8 characters is assumed.");
3454
3455static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003456string_expandtabs(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003457{
3458 const char *e, *p, *qe;
3459 char *q;
3460 Py_ssize_t i, j, incr;
3461 PyObject *u;
3462 int tabsize = 8;
3463
3464 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3465 return NULL;
3466
3467 /* First pass: determine size of output string */
3468 i = 0; /* chars up to and including most recent \n or \r */
3469 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003470 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3471 for (p = PyString_AS_STRING(self); p < e; p++)
Christian Heimes44720832008-05-26 13:01:01 +00003472 if (*p == '\t') {
3473 if (tabsize > 0) {
3474 incr = tabsize - (j % tabsize);
3475 if (j > PY_SSIZE_T_MAX - incr)
3476 goto overflow1;
3477 j += incr;
3478 }
3479 }
3480 else {
3481 if (j > PY_SSIZE_T_MAX - 1)
3482 goto overflow1;
3483 j++;
3484 if (*p == '\n' || *p == '\r') {
3485 if (i > PY_SSIZE_T_MAX - j)
3486 goto overflow1;
3487 i += j;
3488 j = 0;
3489 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00003490 }
Christian Heimes44720832008-05-26 13:01:01 +00003491
3492 if (i > PY_SSIZE_T_MAX - j)
3493 goto overflow1;
3494
3495 /* Second pass: create output string and fill it */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003496 u = PyString_FromStringAndSize(NULL, i + j);
Christian Heimes44720832008-05-26 13:01:01 +00003497 if (!u)
3498 return NULL;
3499
3500 j = 0; /* same as in first pass */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003501 q = PyString_AS_STRING(u); /* next output char */
3502 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
Christian Heimes44720832008-05-26 13:01:01 +00003503
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003504 for (p = PyString_AS_STRING(self); p < e; p++)
Christian Heimes44720832008-05-26 13:01:01 +00003505 if (*p == '\t') {
3506 if (tabsize > 0) {
3507 i = tabsize - (j % tabsize);
3508 j += i;
3509 while (i--) {
3510 if (q >= qe)
3511 goto overflow2;
3512 *q++ = ' ';
3513 }
3514 }
3515 }
3516 else {
3517 if (q >= qe)
3518 goto overflow2;
3519 *q++ = *p;
3520 j++;
3521 if (*p == '\n' || *p == '\r')
3522 j = 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003523 }
Christian Heimes44720832008-05-26 13:01:01 +00003524
3525 return u;
3526
3527 overflow2:
3528 Py_DECREF(u);
3529 overflow1:
3530 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3531 return NULL;
3532}
3533
3534Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003535pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Christian Heimes44720832008-05-26 13:01:01 +00003536{
3537 PyObject *u;
3538
3539 if (left < 0)
3540 left = 0;
3541 if (right < 0)
3542 right = 0;
3543
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003544 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003545 Py_INCREF(self);
3546 return (PyObject *)self;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003547 }
3548
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003549 u = PyString_FromStringAndSize(NULL,
3550 left + PyString_GET_SIZE(self) + right);
Christian Heimes44720832008-05-26 13:01:01 +00003551 if (u) {
3552 if (left)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003553 memset(PyString_AS_STRING(u), fill, left);
3554 Py_MEMCPY(PyString_AS_STRING(u) + left,
3555 PyString_AS_STRING(self),
3556 PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00003557 if (right)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003558 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00003559 fill, right);
3560 }
3561
3562 return u;
3563}
3564
3565PyDoc_STRVAR(ljust__doc__,
3566"S.ljust(width[, fillchar]) -> string\n"
3567"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003568"Return S left-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003569"done using the specified fill character (default is a space).");
3570
3571static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003572string_ljust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003573{
3574 Py_ssize_t width;
3575 char fillchar = ' ';
3576
3577 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3578 return NULL;
3579
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003580 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003581 Py_INCREF(self);
3582 return (PyObject*) self;
3583 }
3584
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003585 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003586}
3587
3588
3589PyDoc_STRVAR(rjust__doc__,
3590"S.rjust(width[, fillchar]) -> string\n"
3591"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003592"Return S right-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003593"done using the specified fill character (default is a space)");
3594
3595static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003596string_rjust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003597{
3598 Py_ssize_t width;
3599 char fillchar = ' ';
3600
3601 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3602 return NULL;
3603
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003604 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003605 Py_INCREF(self);
3606 return (PyObject*) self;
3607 }
3608
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003609 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003610}
3611
3612
3613PyDoc_STRVAR(center__doc__,
3614"S.center(width[, fillchar]) -> string\n"
3615"\n"
3616"Return S centered in a string of length width. Padding is\n"
3617"done using the specified fill character (default is a space)");
3618
3619static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003620string_center(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003621{
3622 Py_ssize_t marg, left;
3623 Py_ssize_t width;
3624 char fillchar = ' ';
3625
3626 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3627 return NULL;
3628
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003629 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003630 Py_INCREF(self);
3631 return (PyObject*) self;
3632 }
3633
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003634 marg = width - PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003635 left = marg / 2 + (marg & width & 1);
3636
3637 return pad(self, left, marg - left, fillchar);
3638}
3639
3640PyDoc_STRVAR(zfill__doc__,
3641"S.zfill(width) -> string\n"
3642"\n"
3643"Pad a numeric string S with zeros on the left, to fill a field\n"
3644"of the specified width. The string S is never truncated.");
3645
3646static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003647string_zfill(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003648{
3649 Py_ssize_t fill;
3650 PyObject *s;
3651 char *p;
3652 Py_ssize_t width;
3653
3654 if (!PyArg_ParseTuple(args, "n:zfill", &width))
3655 return NULL;
3656
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003657 if (PyString_GET_SIZE(self) >= width) {
3658 if (PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003659 Py_INCREF(self);
3660 return (PyObject*) self;
3661 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00003662 else
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003663 return PyString_FromStringAndSize(
3664 PyString_AS_STRING(self),
3665 PyString_GET_SIZE(self)
Christian Heimes44720832008-05-26 13:01:01 +00003666 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003667 }
3668
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003669 fill = width - PyString_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003670
Christian Heimes44720832008-05-26 13:01:01 +00003671 s = pad(self, fill, 0, '0');
3672
3673 if (s == NULL)
3674 return NULL;
3675
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003676 p = PyString_AS_STRING(s);
Christian Heimes44720832008-05-26 13:01:01 +00003677 if (p[fill] == '+' || p[fill] == '-') {
3678 /* move sign to beginning of string */
3679 p[0] = p[fill];
3680 p[fill] = '0';
3681 }
3682
3683 return (PyObject*) s;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003684}
3685
Christian Heimes44720832008-05-26 13:01:01 +00003686PyDoc_STRVAR(isspace__doc__,
3687"S.isspace() -> bool\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003688\n\
Christian Heimes44720832008-05-26 13:01:01 +00003689Return True if all characters in S are whitespace\n\
3690and there is at least one character in S, False otherwise.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003691
Christian Heimes44720832008-05-26 13:01:01 +00003692static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003693string_isspace(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003694{
Christian Heimes44720832008-05-26 13:01:01 +00003695 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003696 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003697 register const unsigned char *e;
3698
3699 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003700 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003701 isspace(*p))
3702 return PyBool_FromLong(1);
3703
3704 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003705 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003706 return PyBool_FromLong(0);
3707
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003708 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003709 for (; p < e; p++) {
3710 if (!isspace(*p))
3711 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003712 }
Christian Heimes44720832008-05-26 13:01:01 +00003713 return PyBool_FromLong(1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003714}
3715
Christian Heimes44720832008-05-26 13:01:01 +00003716
3717PyDoc_STRVAR(isalpha__doc__,
3718"S.isalpha() -> bool\n\
3719\n\
3720Return True if all characters in S are alphabetic\n\
3721and there is at least one character in S, False otherwise.");
3722
3723static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003724string_isalpha(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003725{
Christian Heimes44720832008-05-26 13:01:01 +00003726 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003727 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003728 register const unsigned char *e;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003729
Christian Heimes44720832008-05-26 13:01:01 +00003730 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003731 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003732 isalpha(*p))
3733 return PyBool_FromLong(1);
3734
3735 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003736 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003737 return PyBool_FromLong(0);
3738
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003739 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003740 for (; p < e; p++) {
3741 if (!isalpha(*p))
3742 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003743 }
Christian Heimes44720832008-05-26 13:01:01 +00003744 return PyBool_FromLong(1);
3745}
Christian Heimes1a6387e2008-03-26 12:49:49 +00003746
Christian Heimes44720832008-05-26 13:01:01 +00003747
3748PyDoc_STRVAR(isalnum__doc__,
3749"S.isalnum() -> bool\n\
3750\n\
3751Return True if all characters in S are alphanumeric\n\
3752and there is at least one character in S, False otherwise.");
3753
3754static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003755string_isalnum(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003756{
3757 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003758 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003759 register const unsigned char *e;
3760
3761 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003762 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003763 isalnum(*p))
3764 return PyBool_FromLong(1);
3765
3766 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003767 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003768 return PyBool_FromLong(0);
3769
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003770 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003771 for (; p < e; p++) {
3772 if (!isalnum(*p))
3773 return PyBool_FromLong(0);
3774 }
3775 return PyBool_FromLong(1);
3776}
3777
3778
3779PyDoc_STRVAR(isdigit__doc__,
3780"S.isdigit() -> bool\n\
3781\n\
3782Return True if all characters in S are digits\n\
3783and there is at least one character in S, False otherwise.");
3784
3785static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003786string_isdigit(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003787{
3788 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003789 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003790 register const unsigned char *e;
3791
3792 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003793 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003794 isdigit(*p))
3795 return PyBool_FromLong(1);
3796
3797 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003798 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003799 return PyBool_FromLong(0);
3800
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003801 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003802 for (; p < e; p++) {
3803 if (!isdigit(*p))
3804 return PyBool_FromLong(0);
3805 }
3806 return PyBool_FromLong(1);
3807}
3808
3809
3810PyDoc_STRVAR(islower__doc__,
3811"S.islower() -> bool\n\
3812\n\
3813Return True if all cased characters in S are lowercase and there is\n\
3814at least one cased character in S, False otherwise.");
3815
3816static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003817string_islower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003818{
3819 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003820 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003821 register const unsigned char *e;
3822 int cased;
3823
3824 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003825 if (PyString_GET_SIZE(self) == 1)
Christian Heimes44720832008-05-26 13:01:01 +00003826 return PyBool_FromLong(islower(*p) != 0);
3827
3828 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003829 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003830 return PyBool_FromLong(0);
3831
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003832 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003833 cased = 0;
3834 for (; p < e; p++) {
3835 if (isupper(*p))
3836 return PyBool_FromLong(0);
3837 else if (!cased && islower(*p))
3838 cased = 1;
3839 }
3840 return PyBool_FromLong(cased);
3841}
3842
3843
3844PyDoc_STRVAR(isupper__doc__,
3845"S.isupper() -> bool\n\
3846\n\
3847Return True if all cased characters in S are uppercase and there is\n\
3848at least one cased character in S, False otherwise.");
3849
3850static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003851string_isupper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003852{
3853 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003854 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003855 register const unsigned char *e;
3856 int cased;
3857
3858 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003859 if (PyString_GET_SIZE(self) == 1)
Christian Heimes44720832008-05-26 13:01:01 +00003860 return PyBool_FromLong(isupper(*p) != 0);
3861
3862 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003863 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003864 return PyBool_FromLong(0);
3865
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003866 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003867 cased = 0;
3868 for (; p < e; p++) {
3869 if (islower(*p))
3870 return PyBool_FromLong(0);
3871 else if (!cased && isupper(*p))
3872 cased = 1;
3873 }
3874 return PyBool_FromLong(cased);
3875}
3876
3877
3878PyDoc_STRVAR(istitle__doc__,
3879"S.istitle() -> bool\n\
3880\n\
3881Return True if S is a titlecased string and there is at least one\n\
3882character in S, i.e. uppercase characters may only follow uncased\n\
3883characters and lowercase characters only cased ones. Return False\n\
3884otherwise.");
3885
3886static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003887string_istitle(PyStringObject *self, PyObject *uncased)
Christian Heimes44720832008-05-26 13:01:01 +00003888{
3889 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003890 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003891 register const unsigned char *e;
3892 int cased, previous_is_cased;
3893
3894 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003895 if (PyString_GET_SIZE(self) == 1)
Christian Heimes44720832008-05-26 13:01:01 +00003896 return PyBool_FromLong(isupper(*p) != 0);
3897
3898 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003899 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003900 return PyBool_FromLong(0);
3901
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003902 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003903 cased = 0;
3904 previous_is_cased = 0;
3905 for (; p < e; p++) {
3906 register const unsigned char ch = *p;
3907
3908 if (isupper(ch)) {
3909 if (previous_is_cased)
3910 return PyBool_FromLong(0);
3911 previous_is_cased = 1;
3912 cased = 1;
3913 }
3914 else if (islower(ch)) {
3915 if (!previous_is_cased)
3916 return PyBool_FromLong(0);
3917 previous_is_cased = 1;
3918 cased = 1;
3919 }
3920 else
3921 previous_is_cased = 0;
3922 }
3923 return PyBool_FromLong(cased);
3924}
3925
3926
3927PyDoc_STRVAR(splitlines__doc__,
3928"S.splitlines([keepends]) -> list of strings\n\
3929\n\
3930Return a list of the lines in S, breaking at line boundaries.\n\
3931Line breaks are not included in the resulting list unless keepends\n\
3932is given and true.");
3933
3934static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003935string_splitlines(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003936{
3937 register Py_ssize_t i;
3938 register Py_ssize_t j;
3939 Py_ssize_t len;
3940 int keepends = 0;
3941 PyObject *list;
3942 PyObject *str;
3943 char *data;
3944
3945 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3946 return NULL;
3947
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003948 data = PyString_AS_STRING(self);
3949 len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003950
3951 /* This does not use the preallocated list because splitlines is
3952 usually run with hundreds of newlines. The overhead of
3953 switching between PyList_SET_ITEM and append causes about a
3954 2-3% slowdown for that common case. A smarter implementation
3955 could move the if check out, so the SET_ITEMs are done first
3956 and the appends only done when the prealloc buffer is full.
3957 That's too much work for little gain.*/
3958
3959 list = PyList_New(0);
3960 if (!list)
3961 goto onError;
3962
3963 for (i = j = 0; i < len; ) {
3964 Py_ssize_t eol;
3965
3966 /* Find a line and append it */
3967 while (i < len && data[i] != '\n' && data[i] != '\r')
3968 i++;
3969
3970 /* Skip the line break reading CRLF as one line break */
3971 eol = i;
3972 if (i < len) {
3973 if (data[i] == '\r' && i + 1 < len &&
3974 data[i+1] == '\n')
3975 i += 2;
3976 else
3977 i++;
3978 if (keepends)
3979 eol = i;
3980 }
3981 SPLIT_APPEND(data, j, eol);
3982 j = i;
3983 }
3984 if (j < len) {
3985 SPLIT_APPEND(data, j, len);
3986 }
3987
3988 return list;
3989
3990 onError:
3991 Py_XDECREF(list);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003992 return NULL;
3993}
3994
Robert Schuppenies51df0642008-06-01 16:16:17 +00003995PyDoc_STRVAR(sizeof__doc__,
Georg Brandl7a6de8b2008-06-01 16:42:16 +00003996"S.__sizeof__() -> size of S in memory, in bytes");
Robert Schuppenies51df0642008-06-01 16:16:17 +00003997
3998static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003999string_sizeof(PyStringObject *v)
Robert Schuppenies51df0642008-06-01 16:16:17 +00004000{
4001 Py_ssize_t res;
Benjamin Peterson4fe03352009-09-17 21:33:46 +00004002 res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
Robert Schuppenies51df0642008-06-01 16:16:17 +00004003 return PyInt_FromSsize_t(res);
4004}
4005
Christian Heimes44720832008-05-26 13:01:01 +00004006#undef SPLIT_APPEND
4007#undef SPLIT_ADD
4008#undef MAX_PREALLOC
4009#undef PREALLOC_SIZE
Christian Heimes1a6387e2008-03-26 12:49:49 +00004010
4011static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004012string_getnewargs(PyStringObject *v)
Christian Heimes1a6387e2008-03-26 12:49:49 +00004013{
Christian Heimes44720832008-05-26 13:01:01 +00004014 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Christian Heimes1a6387e2008-03-26 12:49:49 +00004015}
4016
Christian Heimes1a6387e2008-03-26 12:49:49 +00004017
Christian Heimes44720832008-05-26 13:01:01 +00004018#include "stringlib/string_format.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +00004019
Christian Heimes44720832008-05-26 13:01:01 +00004020PyDoc_STRVAR(format__doc__,
4021"S.format(*args, **kwargs) -> unicode\n\
4022\n\
4023");
Christian Heimes1a6387e2008-03-26 12:49:49 +00004024
Eric Smithdc13b792008-05-30 18:10:04 +00004025static PyObject *
4026string__format__(PyObject* self, PyObject* args)
4027{
4028 PyObject *format_spec;
4029 PyObject *result = NULL;
4030 PyObject *tmp = NULL;
4031
4032 /* If 2.x, convert format_spec to the same type as value */
4033 /* This is to allow things like u''.format('') */
4034 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
4035 goto done;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004036 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
Eric Smithdc13b792008-05-30 18:10:04 +00004037 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
4038 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
4039 goto done;
4040 }
4041 tmp = PyObject_Str(format_spec);
4042 if (tmp == NULL)
4043 goto done;
4044 format_spec = tmp;
4045
4046 result = _PyBytes_FormatAdvanced(self,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004047 PyString_AS_STRING(format_spec),
4048 PyString_GET_SIZE(format_spec));
Eric Smithdc13b792008-05-30 18:10:04 +00004049done:
4050 Py_XDECREF(tmp);
4051 return result;
4052}
4053
Christian Heimes44720832008-05-26 13:01:01 +00004054PyDoc_STRVAR(p_format__doc__,
4055"S.__format__(format_spec) -> unicode\n\
4056\n\
4057");
4058
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00004059
Christian Heimes1a6387e2008-03-26 12:49:49 +00004060static PyMethodDef
Christian Heimes44720832008-05-26 13:01:01 +00004061string_methods[] = {
4062 /* Counterparts of the obsolete stropmodule functions; except
4063 string.maketrans(). */
4064 {"join", (PyCFunction)string_join, METH_O, join__doc__},
4065 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
4066 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
4067 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
4068 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
4069 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
4070 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
4071 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
4072 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
4073 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
4074 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
4075 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
4076 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
4077 capitalize__doc__},
4078 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
4079 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
4080 endswith__doc__},
4081 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
4082 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
4083 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
4084 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
4085 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
4086 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
4087 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
4088 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
4089 {"rpartition", (PyCFunction)string_rpartition, METH_O,
4090 rpartition__doc__},
4091 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
4092 startswith__doc__},
4093 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
4094 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
4095 swapcase__doc__},
4096 {"translate", (PyCFunction)string_translate, METH_VARARGS,
4097 translate__doc__},
4098 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
4099 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
4100 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
4101 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
4102 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
4103 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
4104 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
4105 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
4106 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
Benjamin Peterson332d7212009-09-18 21:14:55 +00004107 {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
4108 {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
Christian Heimes44720832008-05-26 13:01:01 +00004109 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
4110 expandtabs__doc__},
4111 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
4112 splitlines__doc__},
Robert Schuppenies51df0642008-06-01 16:16:17 +00004113 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
4114 sizeof__doc__},
Christian Heimes44720832008-05-26 13:01:01 +00004115 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
4116 {NULL, NULL} /* sentinel */
Christian Heimes1a6387e2008-03-26 12:49:49 +00004117};
4118
4119static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00004120str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes1a6387e2008-03-26 12:49:49 +00004121
Christian Heimes44720832008-05-26 13:01:01 +00004122static PyObject *
4123string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4124{
4125 PyObject *x = NULL;
4126 static char *kwlist[] = {"object", 0};
4127
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004128 if (type != &PyString_Type)
Christian Heimes44720832008-05-26 13:01:01 +00004129 return str_subtype_new(type, args, kwds);
4130 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
4131 return NULL;
4132 if (x == NULL)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004133 return PyString_FromString("");
Christian Heimes44720832008-05-26 13:01:01 +00004134 return PyObject_Str(x);
4135}
4136
4137static PyObject *
4138str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4139{
4140 PyObject *tmp, *pnew;
4141 Py_ssize_t n;
4142
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004143 assert(PyType_IsSubtype(type, &PyString_Type));
4144 tmp = string_new(&PyString_Type, args, kwds);
Christian Heimes44720832008-05-26 13:01:01 +00004145 if (tmp == NULL)
4146 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004147 assert(PyString_CheckExact(tmp));
4148 n = PyString_GET_SIZE(tmp);
Christian Heimes44720832008-05-26 13:01:01 +00004149 pnew = type->tp_alloc(type, n);
4150 if (pnew != NULL) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004151 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
4152 ((PyStringObject *)pnew)->ob_shash =
4153 ((PyStringObject *)tmp)->ob_shash;
4154 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Christian Heimes44720832008-05-26 13:01:01 +00004155 }
4156 Py_DECREF(tmp);
4157 return pnew;
4158}
4159
4160static PyObject *
4161basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4162{
4163 PyErr_SetString(PyExc_TypeError,
4164 "The basestring type cannot be instantiated");
4165 return NULL;
4166}
4167
4168static PyObject *
4169string_mod(PyObject *v, PyObject *w)
4170{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004171 if (!PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00004172 Py_INCREF(Py_NotImplemented);
4173 return Py_NotImplemented;
4174 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004175 return PyString_Format(v, w);
Christian Heimes44720832008-05-26 13:01:01 +00004176}
4177
4178PyDoc_STRVAR(basestring_doc,
4179"Type basestring cannot be instantiated; it is the base for str and unicode.");
4180
4181static PyNumberMethods string_as_number = {
4182 0, /*nb_add*/
4183 0, /*nb_subtract*/
4184 0, /*nb_multiply*/
4185 0, /*nb_divide*/
4186 string_mod, /*nb_remainder*/
4187};
4188
4189
4190PyTypeObject PyBaseString_Type = {
4191 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4192 "basestring",
4193 0,
4194 0,
4195 0, /* tp_dealloc */
4196 0, /* tp_print */
4197 0, /* tp_getattr */
4198 0, /* tp_setattr */
4199 0, /* tp_compare */
4200 0, /* tp_repr */
4201 0, /* tp_as_number */
4202 0, /* tp_as_sequence */
4203 0, /* tp_as_mapping */
4204 0, /* tp_hash */
4205 0, /* tp_call */
4206 0, /* tp_str */
4207 0, /* tp_getattro */
4208 0, /* tp_setattro */
4209 0, /* tp_as_buffer */
4210 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4211 basestring_doc, /* tp_doc */
4212 0, /* tp_traverse */
4213 0, /* tp_clear */
4214 0, /* tp_richcompare */
4215 0, /* tp_weaklistoffset */
4216 0, /* tp_iter */
4217 0, /* tp_iternext */
4218 0, /* tp_methods */
4219 0, /* tp_members */
4220 0, /* tp_getset */
4221 &PyBaseObject_Type, /* tp_base */
4222 0, /* tp_dict */
4223 0, /* tp_descr_get */
4224 0, /* tp_descr_set */
4225 0, /* tp_dictoffset */
4226 0, /* tp_init */
4227 0, /* tp_alloc */
4228 basestring_new, /* tp_new */
4229 0, /* tp_free */
4230};
4231
4232PyDoc_STRVAR(string_doc,
4233"str(object) -> string\n\
4234\n\
4235Return a nice string representation of the object.\n\
4236If the argument is a string, the return value is the same object.");
4237
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004238PyTypeObject PyString_Type = {
Christian Heimes44720832008-05-26 13:01:01 +00004239 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4240 "str",
Mark Dickinson826f3fe2008-12-05 21:55:28 +00004241 PyStringObject_SIZE,
Christian Heimes44720832008-05-26 13:01:01 +00004242 sizeof(char),
4243 string_dealloc, /* tp_dealloc */
4244 (printfunc)string_print, /* tp_print */
4245 0, /* tp_getattr */
4246 0, /* tp_setattr */
4247 0, /* tp_compare */
4248 string_repr, /* tp_repr */
4249 &string_as_number, /* tp_as_number */
4250 &string_as_sequence, /* tp_as_sequence */
4251 &string_as_mapping, /* tp_as_mapping */
4252 (hashfunc)string_hash, /* tp_hash */
4253 0, /* tp_call */
4254 string_str, /* tp_str */
4255 PyObject_GenericGetAttr, /* tp_getattro */
4256 0, /* tp_setattro */
4257 &string_as_buffer, /* tp_as_buffer */
4258 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
4259 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
4260 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
4261 string_doc, /* tp_doc */
4262 0, /* tp_traverse */
4263 0, /* tp_clear */
4264 (richcmpfunc)string_richcompare, /* tp_richcompare */
4265 0, /* tp_weaklistoffset */
4266 0, /* tp_iter */
4267 0, /* tp_iternext */
4268 string_methods, /* tp_methods */
4269 0, /* tp_members */
4270 0, /* tp_getset */
4271 &PyBaseString_Type, /* tp_base */
4272 0, /* tp_dict */
4273 0, /* tp_descr_get */
4274 0, /* tp_descr_set */
4275 0, /* tp_dictoffset */
4276 0, /* tp_init */
4277 0, /* tp_alloc */
4278 string_new, /* tp_new */
4279 PyObject_Del, /* tp_free */
4280};
4281
4282void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004283PyString_Concat(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00004284{
4285 register PyObject *v;
4286 if (*pv == NULL)
4287 return;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004288 if (w == NULL || !PyString_Check(*pv)) {
Christian Heimes44720832008-05-26 13:01:01 +00004289 Py_DECREF(*pv);
4290 *pv = NULL;
4291 return;
4292 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004293 v = string_concat((PyStringObject *) *pv, w);
Christian Heimes44720832008-05-26 13:01:01 +00004294 Py_DECREF(*pv);
4295 *pv = v;
4296}
4297
4298void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004299PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00004300{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004301 PyString_Concat(pv, w);
Christian Heimes44720832008-05-26 13:01:01 +00004302 Py_XDECREF(w);
4303}
4304
4305
4306/* The following function breaks the notion that strings are immutable:
4307 it changes the size of a string. We get away with this only if there
4308 is only one module referencing the object. You can also think of it
4309 as creating a new string object and destroying the old one, only
4310 more efficiently. In any case, don't use this if the string may
4311 already be known to some other part of the code...
4312 Note that if there's not enough memory to resize the string, the original
4313 string object at *pv is deallocated, *pv is set to NULL, an "out of
4314 memory" exception is set, and -1 is returned. Else (on success) 0 is
4315 returned, and the value in *pv may or may not be the same as on input.
4316 As always, an extra byte is allocated for a trailing \0 byte (newsize
4317 does *not* include that), and a trailing \0 byte is stored.
4318*/
4319
4320int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004321_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Christian Heimes44720832008-05-26 13:01:01 +00004322{
4323 register PyObject *v;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004324 register PyStringObject *sv;
Christian Heimes44720832008-05-26 13:01:01 +00004325 v = *pv;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004326 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
4327 PyString_CHECK_INTERNED(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00004328 *pv = 0;
4329 Py_DECREF(v);
4330 PyErr_BadInternalCall();
4331 return -1;
4332 }
4333 /* XXX UNREF/NEWREF interface should be more symmetrical */
4334 _Py_DEC_REFTOTAL;
4335 _Py_ForgetReference(v);
4336 *pv = (PyObject *)
Mark Dickinson826f3fe2008-12-05 21:55:28 +00004337 PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
Christian Heimes44720832008-05-26 13:01:01 +00004338 if (*pv == NULL) {
4339 PyObject_Del(v);
4340 PyErr_NoMemory();
4341 return -1;
4342 }
4343 _Py_NewReference(*pv);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004344 sv = (PyStringObject *) *pv;
Christian Heimes44720832008-05-26 13:01:01 +00004345 Py_SIZE(sv) = newsize;
4346 sv->ob_sval[newsize] = '\0';
4347 sv->ob_shash = -1; /* invalidate cached hash value */
4348 return 0;
4349}
4350
4351/* Helpers for formatstring */
4352
4353Py_LOCAL_INLINE(PyObject *)
4354getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
4355{
4356 Py_ssize_t argidx = *p_argidx;
4357 if (argidx < arglen) {
4358 (*p_argidx)++;
4359 if (arglen < 0)
4360 return args;
4361 else
4362 return PyTuple_GetItem(args, argidx);
4363 }
4364 PyErr_SetString(PyExc_TypeError,
4365 "not enough arguments for format string");
4366 return NULL;
4367}
4368
4369/* Format codes
4370 * F_LJUST '-'
4371 * F_SIGN '+'
4372 * F_BLANK ' '
4373 * F_ALT '#'
4374 * F_ZERO '0'
4375 */
4376#define F_LJUST (1<<0)
4377#define F_SIGN (1<<1)
4378#define F_BLANK (1<<2)
4379#define F_ALT (1<<3)
4380#define F_ZERO (1<<4)
4381
4382Py_LOCAL_INLINE(int)
4383formatfloat(char *buf, size_t buflen, int flags,
4384 int prec, int type, PyObject *v)
4385{
Eric Smithc1bdf892009-10-26 17:46:17 +00004386 char *tmp;
Christian Heimes44720832008-05-26 13:01:01 +00004387 double x;
Eric Smithc1bdf892009-10-26 17:46:17 +00004388 Py_ssize_t len;
4389
Christian Heimes44720832008-05-26 13:01:01 +00004390 x = PyFloat_AsDouble(v);
4391 if (x == -1.0 && PyErr_Occurred()) {
4392 PyErr_Format(PyExc_TypeError, "float argument required, "
4393 "not %.200s", Py_TYPE(v)->tp_name);
4394 return -1;
4395 }
4396 if (prec < 0)
4397 prec = 6;
Mark Dickinson2fdd58a2009-08-28 20:46:24 +00004398#if SIZEOF_INT > 4
Mark Dickinson174e9092009-03-29 16:17:16 +00004399 /* make sure that the decimal representation of precision really does
4400 need at most 10 digits: platforms with sizeof(int) == 8 exist! */
Mark Dickinson2fdd58a2009-08-28 20:46:24 +00004401 if (prec > 0x7fffffff) {
Mark Dickinson174e9092009-03-29 16:17:16 +00004402 PyErr_SetString(PyExc_OverflowError,
4403 "outrageously large precision "
4404 "for formatted float");
4405 return -1;
4406 }
Mark Dickinson2fdd58a2009-08-28 20:46:24 +00004407#endif
Mark Dickinson174e9092009-03-29 16:17:16 +00004408
Mark Dickinson2e648ec2009-03-29 14:37:51 +00004409 if (type == 'f' && fabs(x) >= 1e50)
Eric Smithd6c393a2008-07-17 19:49:47 +00004410 type = 'g';
Christian Heimes44720832008-05-26 13:01:01 +00004411 /* Worst case length calc to ensure no buffer overrun:
4412
4413 'g' formats:
4414 fmt = %#.<prec>g
4415 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
4416 for any double rep.)
4417 len = 1 + prec + 1 + 2 + 5 = 9 + prec
4418
4419 'f' formats:
4420 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4421 len = 1 + 50 + 1 + prec = 52 + prec
4422
4423 If prec=0 the effective precision is 1 (the leading digit is
4424 always given), therefore increase the length by one.
4425
4426 */
4427 if (((type == 'g' || type == 'G') &&
4428 buflen <= (size_t)10 + (size_t)prec) ||
Eric Smithd6c393a2008-07-17 19:49:47 +00004429 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Christian Heimes44720832008-05-26 13:01:01 +00004430 PyErr_SetString(PyExc_OverflowError,
4431 "formatted float is too long (precision too large?)");
4432 return -1;
4433 }
Eric Smithc1bdf892009-10-26 17:46:17 +00004434 tmp = PyOS_double_to_string(x, type, prec,
4435 (flags&F_ALT)?Py_DTSF_ALT:0, NULL);
4436 if (!tmp)
4437 return -1;
4438 len = strlen(tmp);
4439 if (len >= buflen) {
4440 PyErr_SetString(PyExc_OverflowError,
4441 "formatted float is too long (precision too large?)");
4442 PyMem_Free(tmp);
4443 return -1;
4444 }
4445 strcpy(buf, tmp);
4446 PyMem_Free(tmp);
4447 return (int)len;
Christian Heimes44720832008-05-26 13:01:01 +00004448}
4449
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004450/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
Christian Heimes44720832008-05-26 13:01:01 +00004451 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4452 * Python's regular ints.
4453 * Return value: a new PyString*, or NULL if error.
4454 * . *pbuf is set to point into it,
4455 * *plen set to the # of chars following that.
4456 * Caller must decref it when done using pbuf.
4457 * The string starting at *pbuf is of the form
4458 * "-"? ("0x" | "0X")? digit+
4459 * "0x"/"0X" are present only for x and X conversions, with F_ALT
4460 * set in flags. The case of hex digits will be correct,
4461 * There will be at least prec digits, zero-filled on the left if
4462 * necessary to get that many.
4463 * val object to be converted
4464 * flags bitmask of format flags; only F_ALT is looked at
4465 * prec minimum number of digits; 0-fill on left if needed
4466 * type a character in [duoxX]; u acts the same as d
4467 *
4468 * CAUTION: o, x and X conversions on regular ints can never
4469 * produce a '-' sign, but can for Python's unbounded ints.
4470 */
4471PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004472_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
Christian Heimes44720832008-05-26 13:01:01 +00004473 char **pbuf, int *plen)
4474{
4475 PyObject *result = NULL;
4476 char *buf;
4477 Py_ssize_t i;
4478 int sign; /* 1 if '-', else 0 */
4479 int len; /* number of characters */
4480 Py_ssize_t llen;
4481 int numdigits; /* len == numnondigits + numdigits */
4482 int numnondigits = 0;
4483
4484 switch (type) {
4485 case 'd':
4486 case 'u':
4487 result = Py_TYPE(val)->tp_str(val);
4488 break;
4489 case 'o':
4490 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4491 break;
4492 case 'x':
4493 case 'X':
4494 numnondigits = 2;
4495 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4496 break;
4497 default:
4498 assert(!"'type' not in [duoxX]");
4499 }
4500 if (!result)
4501 return NULL;
4502
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004503 buf = PyString_AsString(result);
Christian Heimes44720832008-05-26 13:01:01 +00004504 if (!buf) {
4505 Py_DECREF(result);
4506 return NULL;
4507 }
4508
4509 /* To modify the string in-place, there can only be one reference. */
4510 if (Py_REFCNT(result) != 1) {
4511 PyErr_BadInternalCall();
4512 return NULL;
4513 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004514 llen = PyString_Size(result);
Christian Heimes44720832008-05-26 13:01:01 +00004515 if (llen > INT_MAX) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004516 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
Christian Heimes44720832008-05-26 13:01:01 +00004517 return NULL;
4518 }
4519 len = (int)llen;
4520 if (buf[len-1] == 'L') {
4521 --len;
4522 buf[len] = '\0';
4523 }
4524 sign = buf[0] == '-';
4525 numnondigits += sign;
4526 numdigits = len - numnondigits;
4527 assert(numdigits > 0);
4528
4529 /* Get rid of base marker unless F_ALT */
4530 if ((flags & F_ALT) == 0) {
4531 /* Need to skip 0x, 0X or 0. */
4532 int skipped = 0;
4533 switch (type) {
4534 case 'o':
4535 assert(buf[sign] == '0');
4536 /* If 0 is only digit, leave it alone. */
4537 if (numdigits > 1) {
4538 skipped = 1;
4539 --numdigits;
4540 }
4541 break;
4542 case 'x':
4543 case 'X':
4544 assert(buf[sign] == '0');
4545 assert(buf[sign + 1] == 'x');
4546 skipped = 2;
4547 numnondigits -= 2;
4548 break;
4549 }
4550 if (skipped) {
4551 buf += skipped;
4552 len -= skipped;
4553 if (sign)
4554 buf[0] = '-';
4555 }
4556 assert(len == numnondigits + numdigits);
4557 assert(numdigits > 0);
4558 }
4559
4560 /* Fill with leading zeroes to meet minimum width. */
4561 if (prec > numdigits) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004562 PyObject *r1 = PyString_FromStringAndSize(NULL,
Christian Heimes44720832008-05-26 13:01:01 +00004563 numnondigits + prec);
4564 char *b1;
4565 if (!r1) {
4566 Py_DECREF(result);
4567 return NULL;
4568 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004569 b1 = PyString_AS_STRING(r1);
Christian Heimes44720832008-05-26 13:01:01 +00004570 for (i = 0; i < numnondigits; ++i)
4571 *b1++ = *buf++;
4572 for (i = 0; i < prec - numdigits; i++)
4573 *b1++ = '0';
4574 for (i = 0; i < numdigits; i++)
4575 *b1++ = *buf++;
4576 *b1 = '\0';
4577 Py_DECREF(result);
4578 result = r1;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004579 buf = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00004580 len = numnondigits + prec;
4581 }
4582
4583 /* Fix up case for hex conversions. */
4584 if (type == 'X') {
4585 /* Need to convert all lower case letters to upper case.
4586 and need to convert 0x to 0X (and -0x to -0X). */
4587 for (i = 0; i < len; i++)
4588 if (buf[i] >= 'a' && buf[i] <= 'x')
4589 buf[i] -= 'a'-'A';
4590 }
4591 *pbuf = buf;
4592 *plen = len;
4593 return result;
4594}
4595
4596Py_LOCAL_INLINE(int)
4597formatint(char *buf, size_t buflen, int flags,
4598 int prec, int type, PyObject *v)
4599{
4600 /* fmt = '%#.' + `prec` + 'l' + `type`
4601 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4602 + 1 + 1 = 24 */
4603 char fmt[64]; /* plenty big enough! */
4604 char *sign;
4605 long x;
4606
4607 x = PyInt_AsLong(v);
4608 if (x == -1 && PyErr_Occurred()) {
4609 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4610 Py_TYPE(v)->tp_name);
4611 return -1;
4612 }
4613 if (x < 0 && type == 'u') {
4614 type = 'd';
4615 }
4616 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4617 sign = "-";
4618 else
4619 sign = "";
4620 if (prec < 0)
4621 prec = 1;
4622
4623 if ((flags & F_ALT) &&
4624 (type == 'x' || type == 'X')) {
4625 /* When converting under %#x or %#X, there are a number
4626 * of issues that cause pain:
4627 * - when 0 is being converted, the C standard leaves off
4628 * the '0x' or '0X', which is inconsistent with other
4629 * %#x/%#X conversions and inconsistent with Python's
4630 * hex() function
4631 * - there are platforms that violate the standard and
4632 * convert 0 with the '0x' or '0X'
4633 * (Metrowerks, Compaq Tru64)
4634 * - there are platforms that give '0x' when converting
4635 * under %#X, but convert 0 in accordance with the
4636 * standard (OS/2 EMX)
4637 *
4638 * We can achieve the desired consistency by inserting our
4639 * own '0x' or '0X' prefix, and substituting %x/%X in place
4640 * of %#x/%#X.
4641 *
4642 * Note that this is the same approach as used in
4643 * formatint() in unicodeobject.c
4644 */
4645 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4646 sign, type, prec, type);
4647 }
4648 else {
4649 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4650 sign, (flags&F_ALT) ? "#" : "",
4651 prec, type);
4652 }
4653
4654 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4655 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4656 */
4657 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4658 PyErr_SetString(PyExc_OverflowError,
4659 "formatted integer is too long (precision too large?)");
4660 return -1;
4661 }
4662 if (sign[0])
4663 PyOS_snprintf(buf, buflen, fmt, -x);
4664 else
4665 PyOS_snprintf(buf, buflen, fmt, x);
4666 return (int)strlen(buf);
4667}
4668
4669Py_LOCAL_INLINE(int)
4670formatchar(char *buf, size_t buflen, PyObject *v)
4671{
4672 /* presume that the buffer is at least 2 characters long */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004673 if (PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00004674 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4675 return -1;
4676 }
4677 else {
4678 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4679 return -1;
4680 }
4681 buf[1] = '\0';
4682 return 1;
4683}
4684
4685/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4686
4687 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4688 chars are formatted. XXX This is a magic number. Each formatting
4689 routine does bounds checking to ensure no overflow, but a better
4690 solution may be to malloc a buffer of appropriate size for each
4691 format. For now, the current solution is sufficient.
4692*/
4693#define FORMATBUFLEN (size_t)120
4694
4695PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004696PyString_Format(PyObject *format, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00004697{
4698 char *fmt, *res;
4699 Py_ssize_t arglen, argidx;
4700 Py_ssize_t reslen, rescnt, fmtcnt;
4701 int args_owned = 0;
4702 PyObject *result, *orig_args;
4703#ifdef Py_USING_UNICODE
4704 PyObject *v, *w;
4705#endif
4706 PyObject *dict = NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004707 if (format == NULL || !PyString_Check(format) || args == NULL) {
Christian Heimes44720832008-05-26 13:01:01 +00004708 PyErr_BadInternalCall();
4709 return NULL;
4710 }
4711 orig_args = args;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004712 fmt = PyString_AS_STRING(format);
4713 fmtcnt = PyString_GET_SIZE(format);
Christian Heimes44720832008-05-26 13:01:01 +00004714 reslen = rescnt = fmtcnt + 100;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004715 result = PyString_FromStringAndSize((char *)NULL, reslen);
Christian Heimes44720832008-05-26 13:01:01 +00004716 if (result == NULL)
4717 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004718 res = PyString_AsString(result);
Christian Heimes44720832008-05-26 13:01:01 +00004719 if (PyTuple_Check(args)) {
4720 arglen = PyTuple_GET_SIZE(args);
4721 argidx = 0;
4722 }
4723 else {
4724 arglen = -1;
4725 argidx = -2;
4726 }
4727 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4728 !PyObject_TypeCheck(args, &PyBaseString_Type))
4729 dict = args;
4730 while (--fmtcnt >= 0) {
4731 if (*fmt != '%') {
4732 if (--rescnt < 0) {
4733 rescnt = fmtcnt + 100;
4734 reslen += rescnt;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004735 if (_PyString_Resize(&result, reslen) < 0)
Christian Heimes44720832008-05-26 13:01:01 +00004736 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004737 res = PyString_AS_STRING(result)
Christian Heimes44720832008-05-26 13:01:01 +00004738 + reslen - rescnt;
4739 --rescnt;
4740 }
4741 *res++ = *fmt++;
4742 }
4743 else {
4744 /* Got a format specifier */
4745 int flags = 0;
4746 Py_ssize_t width = -1;
4747 int prec = -1;
4748 int c = '\0';
4749 int fill;
4750 int isnumok;
4751 PyObject *v = NULL;
4752 PyObject *temp = NULL;
4753 char *pbuf;
4754 int sign;
4755 Py_ssize_t len;
4756 char formatbuf[FORMATBUFLEN];
4757 /* For format{float,int,char}() */
4758#ifdef Py_USING_UNICODE
4759 char *fmt_start = fmt;
4760 Py_ssize_t argidx_start = argidx;
4761#endif
4762
4763 fmt++;
4764 if (*fmt == '(') {
4765 char *keystart;
4766 Py_ssize_t keylen;
4767 PyObject *key;
4768 int pcount = 1;
4769
4770 if (dict == NULL) {
4771 PyErr_SetString(PyExc_TypeError,
4772 "format requires a mapping");
4773 goto error;
4774 }
4775 ++fmt;
4776 --fmtcnt;
4777 keystart = fmt;
4778 /* Skip over balanced parentheses */
4779 while (pcount > 0 && --fmtcnt >= 0) {
4780 if (*fmt == ')')
4781 --pcount;
4782 else if (*fmt == '(')
4783 ++pcount;
4784 fmt++;
4785 }
4786 keylen = fmt - keystart - 1;
4787 if (fmtcnt < 0 || pcount > 0) {
4788 PyErr_SetString(PyExc_ValueError,
4789 "incomplete format key");
4790 goto error;
4791 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004792 key = PyString_FromStringAndSize(keystart,
Christian Heimes44720832008-05-26 13:01:01 +00004793 keylen);
4794 if (key == NULL)
4795 goto error;
4796 if (args_owned) {
4797 Py_DECREF(args);
4798 args_owned = 0;
4799 }
4800 args = PyObject_GetItem(dict, key);
4801 Py_DECREF(key);
4802 if (args == NULL) {
4803 goto error;
4804 }
4805 args_owned = 1;
4806 arglen = -1;
4807 argidx = -2;
4808 }
4809 while (--fmtcnt >= 0) {
4810 switch (c = *fmt++) {
4811 case '-': flags |= F_LJUST; continue;
4812 case '+': flags |= F_SIGN; continue;
4813 case ' ': flags |= F_BLANK; continue;
4814 case '#': flags |= F_ALT; continue;
4815 case '0': flags |= F_ZERO; continue;
4816 }
4817 break;
4818 }
4819 if (c == '*') {
4820 v = getnextarg(args, arglen, &argidx);
4821 if (v == NULL)
4822 goto error;
4823 if (!PyInt_Check(v)) {
4824 PyErr_SetString(PyExc_TypeError,
4825 "* wants int");
4826 goto error;
4827 }
4828 width = PyInt_AsLong(v);
4829 if (width < 0) {
4830 flags |= F_LJUST;
4831 width = -width;
4832 }
4833 if (--fmtcnt >= 0)
4834 c = *fmt++;
4835 }
4836 else if (c >= 0 && isdigit(c)) {
4837 width = c - '0';
4838 while (--fmtcnt >= 0) {
4839 c = Py_CHARMASK(*fmt++);
4840 if (!isdigit(c))
4841 break;
4842 if ((width*10) / 10 != width) {
4843 PyErr_SetString(
4844 PyExc_ValueError,
4845 "width too big");
4846 goto error;
4847 }
4848 width = width*10 + (c - '0');
4849 }
4850 }
4851 if (c == '.') {
4852 prec = 0;
4853 if (--fmtcnt >= 0)
4854 c = *fmt++;
4855 if (c == '*') {
4856 v = getnextarg(args, arglen, &argidx);
4857 if (v == NULL)
4858 goto error;
4859 if (!PyInt_Check(v)) {
4860 PyErr_SetString(
4861 PyExc_TypeError,
4862 "* wants int");
4863 goto error;
4864 }
4865 prec = PyInt_AsLong(v);
4866 if (prec < 0)
4867 prec = 0;
4868 if (--fmtcnt >= 0)
4869 c = *fmt++;
4870 }
4871 else if (c >= 0 && isdigit(c)) {
4872 prec = c - '0';
4873 while (--fmtcnt >= 0) {
4874 c = Py_CHARMASK(*fmt++);
4875 if (!isdigit(c))
4876 break;
4877 if ((prec*10) / 10 != prec) {
4878 PyErr_SetString(
4879 PyExc_ValueError,
4880 "prec too big");
4881 goto error;
4882 }
4883 prec = prec*10 + (c - '0');
4884 }
4885 }
4886 } /* prec */
4887 if (fmtcnt >= 0) {
4888 if (c == 'h' || c == 'l' || c == 'L') {
4889 if (--fmtcnt >= 0)
4890 c = *fmt++;
4891 }
4892 }
4893 if (fmtcnt < 0) {
4894 PyErr_SetString(PyExc_ValueError,
4895 "incomplete format");
4896 goto error;
4897 }
4898 if (c != '%') {
4899 v = getnextarg(args, arglen, &argidx);
4900 if (v == NULL)
4901 goto error;
4902 }
4903 sign = 0;
4904 fill = ' ';
4905 switch (c) {
4906 case '%':
4907 pbuf = "%";
4908 len = 1;
4909 break;
4910 case 's':
4911#ifdef Py_USING_UNICODE
4912 if (PyUnicode_Check(v)) {
4913 fmt = fmt_start;
4914 argidx = argidx_start;
4915 goto unicode;
4916 }
4917#endif
4918 temp = _PyObject_Str(v);
4919#ifdef Py_USING_UNICODE
4920 if (temp != NULL && PyUnicode_Check(temp)) {
4921 Py_DECREF(temp);
4922 fmt = fmt_start;
4923 argidx = argidx_start;
4924 goto unicode;
4925 }
4926#endif
4927 /* Fall through */
4928 case 'r':
4929 if (c == 'r')
4930 temp = PyObject_Repr(v);
4931 if (temp == NULL)
4932 goto error;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004933 if (!PyString_Check(temp)) {
Christian Heimes44720832008-05-26 13:01:01 +00004934 PyErr_SetString(PyExc_TypeError,
4935 "%s argument has non-string str()");
4936 Py_DECREF(temp);
4937 goto error;
4938 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004939 pbuf = PyString_AS_STRING(temp);
4940 len = PyString_GET_SIZE(temp);
Christian Heimes44720832008-05-26 13:01:01 +00004941 if (prec >= 0 && len > prec)
4942 len = prec;
4943 break;
4944 case 'i':
4945 case 'd':
4946 case 'u':
4947 case 'o':
4948 case 'x':
4949 case 'X':
4950 if (c == 'i')
4951 c = 'd';
4952 isnumok = 0;
4953 if (PyNumber_Check(v)) {
4954 PyObject *iobj=NULL;
4955
4956 if (PyInt_Check(v) || (PyLong_Check(v))) {
4957 iobj = v;
4958 Py_INCREF(iobj);
4959 }
4960 else {
4961 iobj = PyNumber_Int(v);
4962 if (iobj==NULL) iobj = PyNumber_Long(v);
4963 }
4964 if (iobj!=NULL) {
4965 if (PyInt_Check(iobj)) {
4966 isnumok = 1;
4967 pbuf = formatbuf;
4968 len = formatint(pbuf,
4969 sizeof(formatbuf),
4970 flags, prec, c, iobj);
4971 Py_DECREF(iobj);
4972 if (len < 0)
4973 goto error;
4974 sign = 1;
4975 }
4976 else if (PyLong_Check(iobj)) {
4977 int ilen;
4978
4979 isnumok = 1;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004980 temp = _PyString_FormatLong(iobj, flags,
Christian Heimes44720832008-05-26 13:01:01 +00004981 prec, c, &pbuf, &ilen);
4982 Py_DECREF(iobj);
4983 len = ilen;
4984 if (!temp)
4985 goto error;
4986 sign = 1;
4987 }
4988 else {
4989 Py_DECREF(iobj);
4990 }
4991 }
4992 }
4993 if (!isnumok) {
4994 PyErr_Format(PyExc_TypeError,
4995 "%%%c format: a number is required, "
4996 "not %.200s", c, Py_TYPE(v)->tp_name);
4997 goto error;
4998 }
4999 if (flags & F_ZERO)
5000 fill = '0';
5001 break;
5002 case 'e':
5003 case 'E':
5004 case 'f':
5005 case 'F':
5006 case 'g':
5007 case 'G':
Eric Smithd6c393a2008-07-17 19:49:47 +00005008 if (c == 'F')
5009 c = 'f';
Christian Heimes44720832008-05-26 13:01:01 +00005010 pbuf = formatbuf;
5011 len = formatfloat(pbuf, sizeof(formatbuf),
5012 flags, prec, c, v);
5013 if (len < 0)
5014 goto error;
5015 sign = 1;
5016 if (flags & F_ZERO)
5017 fill = '0';
5018 break;
5019 case 'c':
5020#ifdef Py_USING_UNICODE
5021 if (PyUnicode_Check(v)) {
5022 fmt = fmt_start;
5023 argidx = argidx_start;
5024 goto unicode;
5025 }
5026#endif
5027 pbuf = formatbuf;
5028 len = formatchar(pbuf, sizeof(formatbuf), v);
5029 if (len < 0)
5030 goto error;
5031 break;
5032 default:
5033 PyErr_Format(PyExc_ValueError,
5034 "unsupported format character '%c' (0x%x) "
5035 "at index %zd",
5036 c, c,
5037 (Py_ssize_t)(fmt - 1 -
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005038 PyString_AsString(format)));
Christian Heimes44720832008-05-26 13:01:01 +00005039 goto error;
5040 }
5041 if (sign) {
5042 if (*pbuf == '-' || *pbuf == '+') {
5043 sign = *pbuf++;
5044 len--;
5045 }
5046 else if (flags & F_SIGN)
5047 sign = '+';
5048 else if (flags & F_BLANK)
5049 sign = ' ';
5050 else
5051 sign = 0;
5052 }
5053 if (width < len)
5054 width = len;
5055 if (rescnt - (sign != 0) < width) {
5056 reslen -= rescnt;
5057 rescnt = width + fmtcnt + 100;
5058 reslen += rescnt;
5059 if (reslen < 0) {
5060 Py_DECREF(result);
5061 Py_XDECREF(temp);
5062 return PyErr_NoMemory();
5063 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005064 if (_PyString_Resize(&result, reslen) < 0) {
Christian Heimes44720832008-05-26 13:01:01 +00005065 Py_XDECREF(temp);
5066 return NULL;
5067 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005068 res = PyString_AS_STRING(result)
Christian Heimes44720832008-05-26 13:01:01 +00005069 + reslen - rescnt;
5070 }
5071 if (sign) {
5072 if (fill != ' ')
5073 *res++ = sign;
5074 rescnt--;
5075 if (width > len)
5076 width--;
5077 }
5078 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
5079 assert(pbuf[0] == '0');
5080 assert(pbuf[1] == c);
5081 if (fill != ' ') {
5082 *res++ = *pbuf++;
5083 *res++ = *pbuf++;
5084 }
5085 rescnt -= 2;
5086 width -= 2;
5087 if (width < 0)
5088 width = 0;
5089 len -= 2;
5090 }
5091 if (width > len && !(flags & F_LJUST)) {
5092 do {
5093 --rescnt;
5094 *res++ = fill;
5095 } while (--width > len);
5096 }
5097 if (fill == ' ') {
5098 if (sign)
5099 *res++ = sign;
5100 if ((flags & F_ALT) &&
5101 (c == 'x' || c == 'X')) {
5102 assert(pbuf[0] == '0');
5103 assert(pbuf[1] == c);
5104 *res++ = *pbuf++;
5105 *res++ = *pbuf++;
5106 }
5107 }
5108 Py_MEMCPY(res, pbuf, len);
5109 res += len;
5110 rescnt -= len;
5111 while (--width >= len) {
5112 --rescnt;
5113 *res++ = ' ';
5114 }
5115 if (dict && (argidx < arglen) && c != '%') {
5116 PyErr_SetString(PyExc_TypeError,
5117 "not all arguments converted during string formatting");
5118 Py_XDECREF(temp);
5119 goto error;
5120 }
5121 Py_XDECREF(temp);
5122 } /* '%' */
5123 } /* until end */
5124 if (argidx < arglen && !dict) {
5125 PyErr_SetString(PyExc_TypeError,
5126 "not all arguments converted during string formatting");
5127 goto error;
5128 }
5129 if (args_owned) {
5130 Py_DECREF(args);
5131 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005132 _PyString_Resize(&result, reslen - rescnt);
Christian Heimes44720832008-05-26 13:01:01 +00005133 return result;
5134
5135#ifdef Py_USING_UNICODE
5136 unicode:
5137 if (args_owned) {
5138 Py_DECREF(args);
5139 args_owned = 0;
5140 }
5141 /* Fiddle args right (remove the first argidx arguments) */
5142 if (PyTuple_Check(orig_args) && argidx > 0) {
5143 PyObject *v;
5144 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
5145 v = PyTuple_New(n);
5146 if (v == NULL)
5147 goto error;
5148 while (--n >= 0) {
5149 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
5150 Py_INCREF(w);
5151 PyTuple_SET_ITEM(v, n, w);
5152 }
5153 args = v;
5154 } else {
5155 Py_INCREF(orig_args);
5156 args = orig_args;
5157 }
5158 args_owned = 1;
5159 /* Take what we have of the result and let the Unicode formatting
5160 function format the rest of the input. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005161 rescnt = res - PyString_AS_STRING(result);
5162 if (_PyString_Resize(&result, rescnt))
Christian Heimes44720832008-05-26 13:01:01 +00005163 goto error;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005164 fmtcnt = PyString_GET_SIZE(format) - \
5165 (fmt - PyString_AS_STRING(format));
Christian Heimes44720832008-05-26 13:01:01 +00005166 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
5167 if (format == NULL)
5168 goto error;
5169 v = PyUnicode_Format(format, args);
5170 Py_DECREF(format);
5171 if (v == NULL)
5172 goto error;
5173 /* Paste what we have (result) to what the Unicode formatting
5174 function returned (v) and return the result (or error) */
5175 w = PyUnicode_Concat(result, v);
5176 Py_DECREF(result);
5177 Py_DECREF(v);
5178 Py_DECREF(args);
5179 return w;
5180#endif /* Py_USING_UNICODE */
5181
5182 error:
5183 Py_DECREF(result);
5184 if (args_owned) {
5185 Py_DECREF(args);
5186 }
5187 return NULL;
5188}
5189
5190void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005191PyString_InternInPlace(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00005192{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005193 register PyStringObject *s = (PyStringObject *)(*p);
Christian Heimes44720832008-05-26 13:01:01 +00005194 PyObject *t;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005195 if (s == NULL || !PyString_Check(s))
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005196 Py_FatalError("PyString_InternInPlace: strings only please!");
Christian Heimes44720832008-05-26 13:01:01 +00005197 /* If it's a string subclass, we don't really know what putting
5198 it in the interned dict might do. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005199 if (!PyString_CheckExact(s))
Christian Heimes44720832008-05-26 13:01:01 +00005200 return;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005201 if (PyString_CHECK_INTERNED(s))
Christian Heimes44720832008-05-26 13:01:01 +00005202 return;
5203 if (interned == NULL) {
5204 interned = PyDict_New();
5205 if (interned == NULL) {
5206 PyErr_Clear(); /* Don't leave an exception */
5207 return;
5208 }
5209 }
5210 t = PyDict_GetItem(interned, (PyObject *)s);
5211 if (t) {
5212 Py_INCREF(t);
5213 Py_DECREF(*p);
5214 *p = t;
5215 return;
5216 }
5217
5218 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
5219 PyErr_Clear();
5220 return;
5221 }
5222 /* The two references in interned are not counted by refcnt.
5223 The string deallocator will take care of this */
5224 Py_REFCNT(s) -= 2;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005225 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Christian Heimes44720832008-05-26 13:01:01 +00005226}
5227
5228void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005229PyString_InternImmortal(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00005230{
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005231 PyString_InternInPlace(p);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005232 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
5233 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
Christian Heimes44720832008-05-26 13:01:01 +00005234 Py_INCREF(*p);
5235 }
5236}
5237
5238
5239PyObject *
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005240PyString_InternFromString(const char *cp)
Christian Heimes44720832008-05-26 13:01:01 +00005241{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005242 PyObject *s = PyString_FromString(cp);
Christian Heimes44720832008-05-26 13:01:01 +00005243 if (s == NULL)
5244 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005245 PyString_InternInPlace(&s);
Christian Heimes44720832008-05-26 13:01:01 +00005246 return s;
5247}
5248
5249void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005250PyString_Fini(void)
Christian Heimes44720832008-05-26 13:01:01 +00005251{
5252 int i;
5253 for (i = 0; i < UCHAR_MAX + 1; i++) {
5254 Py_XDECREF(characters[i]);
5255 characters[i] = NULL;
5256 }
5257 Py_XDECREF(nullstring);
5258 nullstring = NULL;
5259}
5260
5261void _Py_ReleaseInternedStrings(void)
5262{
5263 PyObject *keys;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005264 PyStringObject *s;
Christian Heimes44720832008-05-26 13:01:01 +00005265 Py_ssize_t i, n;
5266 Py_ssize_t immortal_size = 0, mortal_size = 0;
5267
5268 if (interned == NULL || !PyDict_Check(interned))
5269 return;
5270 keys = PyDict_Keys(interned);
5271 if (keys == NULL || !PyList_Check(keys)) {
5272 PyErr_Clear();
5273 return;
5274 }
5275
5276 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5277 detector, interned strings are not forcibly deallocated; rather, we
5278 give them their stolen references back, and then clear and DECREF
5279 the interned dict. */
5280
5281 n = PyList_GET_SIZE(keys);
5282 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5283 n);
5284 for (i = 0; i < n; i++) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005285 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
Christian Heimes44720832008-05-26 13:01:01 +00005286 switch (s->ob_sstate) {
5287 case SSTATE_NOT_INTERNED:
5288 /* XXX Shouldn't happen */
5289 break;
5290 case SSTATE_INTERNED_IMMORTAL:
5291 Py_REFCNT(s) += 1;
5292 immortal_size += Py_SIZE(s);
5293 break;
5294 case SSTATE_INTERNED_MORTAL:
5295 Py_REFCNT(s) += 2;
5296 mortal_size += Py_SIZE(s);
5297 break;
5298 default:
5299 Py_FatalError("Inconsistent interned string state.");
5300 }
5301 s->ob_sstate = SSTATE_NOT_INTERNED;
5302 }
5303 fprintf(stderr, "total size of all interned strings: "
5304 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5305 "mortal/immortal\n", mortal_size, immortal_size);
5306 Py_DECREF(keys);
5307 PyDict_Clear(interned);
5308 Py_DECREF(interned);
5309 interned = NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00005310}