blob: 0f3874ebc940dcfa0951b652cfb76855b787491f [file] [log] [blame]
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001/* String (str/bytes) object implementation */
Christian Heimes1a6387e2008-03-26 12:49:49 +00002
3#define PY_SSIZE_T_CLEAN
Christian Heimes44720832008-05-26 13:01:01 +00004
Christian Heimes1a6387e2008-03-26 12:49:49 +00005#include "Python.h"
Christian Heimes44720832008-05-26 13:01:01 +00006#include <ctype.h>
Mark Dickinson826f3fe2008-12-05 21:55:28 +00007#include <stddef.h>
Christian Heimes44720832008-05-26 13:01:01 +00008
9#ifdef COUNT_ALLOCS
Martin v. Löwisb90304a2009-01-07 18:40:40 +000010Py_ssize_t null_strings, one_strings;
Christian Heimes44720832008-05-26 13:01:01 +000011#endif
12
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000013static PyStringObject *characters[UCHAR_MAX + 1];
14static PyStringObject *nullstring;
Christian Heimes44720832008-05-26 13:01:01 +000015
16/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Mark Dickinson826f3fe2008-12-05 21:55:28 +000026/* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyStringObject_SIZE + n bytes.
28
29 Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30 3 bytes per string allocation on a typical system.
31*/
32#define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
33
Christian Heimes44720832008-05-26 13:01:01 +000034/*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000035 For both PyString_FromString() and PyString_FromStringAndSize(), the
Christian Heimes44720832008-05-26 13:01:01 +000036 parameter `size' denotes number of characters to allocate, not counting any
37 null terminating character.
38
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000039 For PyString_FromString(), the parameter `str' points to a null-terminated
Christian Heimes44720832008-05-26 13:01:01 +000040 string containing exactly `size' bytes.
41
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000042 For PyString_FromStringAndSize(), the parameter the parameter `str' is
Christian Heimes44720832008-05-26 13:01:01 +000043 either NULL or else points to a string containing at least `size' bytes.
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000044 For PyString_FromStringAndSize(), the string in the `str' parameter does
Christian Heimes44720832008-05-26 13:01:01 +000045 not have to be null-terminated. (Therefore it is safe to construct a
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000046 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
47 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
Christian Heimes44720832008-05-26 13:01:01 +000048 bytes (setting the last byte to the null terminating character) and you can
49 fill in the data yourself. If `str' is non-NULL then the resulting
50 PyString object must be treated as immutable and you must not fill in nor
51 alter the data yourself, since the strings may be shared.
52
53 The PyObject member `op->ob_size', which denotes the number of "extra
54 items" in a variable-size object, will contain the number of bytes
55 allocated for string data, not counting the null terminating character. It
56 is therefore equal to the equal to the `size' parameter (for
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000057 PyString_FromStringAndSize()) or the length of the string in the `str'
58 parameter (for PyString_FromString()).
Christian Heimes44720832008-05-26 13:01:01 +000059*/
60PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000061PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Christian Heimes1a6387e2008-03-26 12:49:49 +000062{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000063 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +000064 if (size < 0) {
65 PyErr_SetString(PyExc_SystemError,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000066 "Negative size passed to PyString_FromStringAndSize");
Christian Heimes44720832008-05-26 13:01:01 +000067 return NULL;
68 }
69 if (size == 0 && (op = nullstring) != NULL) {
70#ifdef COUNT_ALLOCS
71 null_strings++;
72#endif
73 Py_INCREF(op);
74 return (PyObject *)op;
75 }
76 if (size == 1 && str != NULL &&
77 (op = characters[*str & UCHAR_MAX]) != NULL)
78 {
79#ifdef COUNT_ALLOCS
80 one_strings++;
81#endif
82 Py_INCREF(op);
83 return (PyObject *)op;
84 }
85
Mark Dickinson826f3fe2008-12-05 21:55:28 +000086 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
Neal Norwitze7d8be82008-07-31 17:17:14 +000087 PyErr_SetString(PyExc_OverflowError, "string is too large");
88 return NULL;
89 }
90
Christian Heimes44720832008-05-26 13:01:01 +000091 /* Inline PyObject_NewVar */
Mark Dickinson826f3fe2008-12-05 21:55:28 +000092 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
Christian Heimes44720832008-05-26 13:01:01 +000093 if (op == NULL)
94 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000095 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +000096 op->ob_shash = -1;
97 op->ob_sstate = SSTATE_NOT_INTERNED;
98 if (str != NULL)
99 Py_MEMCPY(op->ob_sval, str, size);
100 op->ob_sval[size] = '\0';
101 /* share short strings */
102 if (size == 0) {
103 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000104 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000105 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +0000106 nullstring = op;
107 Py_INCREF(op);
108 } else if (size == 1 && str != NULL) {
109 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000110 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000111 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +0000112 characters[*str & UCHAR_MAX] = op;
113 Py_INCREF(op);
114 }
115 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000116}
117
Christian Heimes44720832008-05-26 13:01:01 +0000118PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000119PyString_FromString(const char *str)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000120{
Christian Heimes44720832008-05-26 13:01:01 +0000121 register size_t size;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000122 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +0000123
124 assert(str != NULL);
125 size = strlen(str);
Mark Dickinson826f3fe2008-12-05 21:55:28 +0000126 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
Christian Heimes44720832008-05-26 13:01:01 +0000127 PyErr_SetString(PyExc_OverflowError,
128 "string is too long for a Python string");
129 return NULL;
130 }
131 if (size == 0 && (op = nullstring) != NULL) {
132#ifdef COUNT_ALLOCS
133 null_strings++;
134#endif
135 Py_INCREF(op);
136 return (PyObject *)op;
137 }
138 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
139#ifdef COUNT_ALLOCS
140 one_strings++;
141#endif
142 Py_INCREF(op);
143 return (PyObject *)op;
144 }
145
146 /* Inline PyObject_NewVar */
Mark Dickinson826f3fe2008-12-05 21:55:28 +0000147 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
Christian Heimes44720832008-05-26 13:01:01 +0000148 if (op == NULL)
149 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000150 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +0000151 op->ob_shash = -1;
152 op->ob_sstate = SSTATE_NOT_INTERNED;
153 Py_MEMCPY(op->ob_sval, str, size+1);
154 /* share short strings */
155 if (size == 0) {
156 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000157 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000158 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +0000159 nullstring = op;
160 Py_INCREF(op);
161 } else if (size == 1) {
162 PyObject *t = (PyObject *)op;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000163 PyString_InternInPlace(&t);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000164 op = (PyStringObject *)t;
Christian Heimes44720832008-05-26 13:01:01 +0000165 characters[*str & UCHAR_MAX] = op;
166 Py_INCREF(op);
167 }
168 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000169}
170
Christian Heimes44720832008-05-26 13:01:01 +0000171PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000172PyString_FromFormatV(const char *format, va_list vargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000173{
Christian Heimes44720832008-05-26 13:01:01 +0000174 va_list count;
175 Py_ssize_t n = 0;
176 const char* f;
177 char *s;
178 PyObject* string;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000179
Christian Heimes44720832008-05-26 13:01:01 +0000180#ifdef VA_LIST_IS_ARRAY
181 Py_MEMCPY(count, vargs, sizeof(va_list));
182#else
183#ifdef __va_copy
184 __va_copy(count, vargs);
185#else
186 count = vargs;
187#endif
188#endif
189 /* step 1: figure out how large a buffer we need */
190 for (f = format; *f; f++) {
191 if (*f == '%') {
Mark Dickinson82864d12009-11-15 16:18:58 +0000192#ifdef HAVE_LONG_LONG
193 int longlongflag = 0;
194#endif
Christian Heimes44720832008-05-26 13:01:01 +0000195 const char* p = f;
196 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
197 ;
198
199 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
200 * they don't affect the amount of space we reserve.
201 */
Mark Dickinson82864d12009-11-15 16:18:58 +0000202 if (*f == 'l') {
203 if (f[1] == 'd' || f[1] == 'u') {
204 ++f;
205 }
206#ifdef HAVE_LONG_LONG
207 else if (f[1] == 'l' &&
208 (f[2] == 'd' || f[2] == 'u')) {
209 longlongflag = 1;
210 f += 2;
211 }
212#endif
213 }
214 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Christian Heimes44720832008-05-26 13:01:01 +0000215 ++f;
Mark Dickinson82864d12009-11-15 16:18:58 +0000216 }
Christian Heimes44720832008-05-26 13:01:01 +0000217
218 switch (*f) {
219 case 'c':
220 (void)va_arg(count, int);
221 /* fall through... */
222 case '%':
223 n++;
224 break;
225 case 'd': case 'u': case 'i': case 'x':
226 (void) va_arg(count, int);
Mark Dickinson82864d12009-11-15 16:18:58 +0000227#ifdef HAVE_LONG_LONG
228 /* Need at most
229 ceil(log10(256)*SIZEOF_LONG_LONG) digits,
230 plus 1 for the sign. 53/22 is an upper
231 bound for log10(256). */
232 if (longlongflag)
233 n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
234 else
235#endif
236 /* 20 bytes is enough to hold a 64-bit
237 integer. Decimal takes the most
238 space. This isn't enough for
239 octal. */
240 n += 20;
241
Christian Heimes44720832008-05-26 13:01:01 +0000242 break;
243 case 's':
244 s = va_arg(count, char*);
245 n += strlen(s);
246 break;
247 case 'p':
248 (void) va_arg(count, int);
249 /* maximum 64-bit pointer representation:
250 * 0xffffffffffffffff
251 * so 19 characters is enough.
252 * XXX I count 18 -- what's the extra for?
253 */
254 n += 19;
255 break;
256 default:
257 /* if we stumble upon an unknown
258 formatting code, copy the rest of
259 the format string to the output
260 string. (we cannot just skip the
261 code, since there's no way to know
262 what's in the argument list) */
263 n += strlen(p);
264 goto expand;
265 }
266 } else
267 n++;
268 }
269 expand:
270 /* step 2: fill the buffer */
271 /* Since we've analyzed how much space we need for the worst case,
272 use sprintf directly instead of the slower PyOS_snprintf. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000273 string = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +0000274 if (!string)
275 return NULL;
276
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000277 s = PyString_AsString(string);
Christian Heimes44720832008-05-26 13:01:01 +0000278
279 for (f = format; *f; f++) {
280 if (*f == '%') {
281 const char* p = f++;
282 Py_ssize_t i;
283 int longflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000284#ifdef HAVE_LONG_LONG
285 int longlongflag = 0;
286#endif
Christian Heimes44720832008-05-26 13:01:01 +0000287 int size_tflag = 0;
288 /* parse the width.precision part (we're only
289 interested in the precision value, if any) */
290 n = 0;
291 while (isdigit(Py_CHARMASK(*f)))
292 n = (n*10) + *f++ - '0';
293 if (*f == '.') {
294 f++;
295 n = 0;
296 while (isdigit(Py_CHARMASK(*f)))
297 n = (n*10) + *f++ - '0';
298 }
299 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
300 f++;
Mark Dickinson82864d12009-11-15 16:18:58 +0000301 /* Handle %ld, %lu, %lld and %llu. */
302 if (*f == 'l') {
303 if (f[1] == 'd' || f[1] == 'u') {
304 longflag = 1;
305 ++f;
306 }
307#ifdef HAVE_LONG_LONG
308 else if (f[1] == 'l' &&
309 (f[2] == 'd' || f[2] == 'u')) {
310 longlongflag = 1;
311 f += 2;
312 }
313#endif
Christian Heimes44720832008-05-26 13:01:01 +0000314 }
315 /* handle the size_t flag. */
Mark Dickinson82864d12009-11-15 16:18:58 +0000316 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Christian Heimes44720832008-05-26 13:01:01 +0000317 size_tflag = 1;
318 ++f;
319 }
320
321 switch (*f) {
322 case 'c':
323 *s++ = va_arg(vargs, int);
324 break;
325 case 'd':
326 if (longflag)
327 sprintf(s, "%ld", va_arg(vargs, long));
Mark Dickinson82864d12009-11-15 16:18:58 +0000328#ifdef HAVE_LONG_LONG
329 else if (longlongflag)
330 sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
331 va_arg(vargs, PY_LONG_LONG));
332#endif
Christian Heimes44720832008-05-26 13:01:01 +0000333 else if (size_tflag)
334 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
335 va_arg(vargs, Py_ssize_t));
336 else
337 sprintf(s, "%d", va_arg(vargs, int));
338 s += strlen(s);
339 break;
340 case 'u':
341 if (longflag)
342 sprintf(s, "%lu",
343 va_arg(vargs, unsigned long));
Mark Dickinson82864d12009-11-15 16:18:58 +0000344#ifdef HAVE_LONG_LONG
345 else if (longlongflag)
346 sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
347 va_arg(vargs, PY_LONG_LONG));
348#endif
Christian Heimes44720832008-05-26 13:01:01 +0000349 else if (size_tflag)
350 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
351 va_arg(vargs, size_t));
352 else
353 sprintf(s, "%u",
354 va_arg(vargs, unsigned int));
355 s += strlen(s);
356 break;
357 case 'i':
358 sprintf(s, "%i", va_arg(vargs, int));
359 s += strlen(s);
360 break;
361 case 'x':
362 sprintf(s, "%x", va_arg(vargs, int));
363 s += strlen(s);
364 break;
365 case 's':
366 p = va_arg(vargs, char*);
367 i = strlen(p);
368 if (n > 0 && i > n)
369 i = n;
370 Py_MEMCPY(s, p, i);
371 s += i;
372 break;
373 case 'p':
374 sprintf(s, "%p", va_arg(vargs, void*));
375 /* %p is ill-defined: ensure leading 0x. */
376 if (s[1] == 'X')
377 s[1] = 'x';
378 else if (s[1] != 'x') {
379 memmove(s+2, s, strlen(s)+1);
380 s[0] = '0';
381 s[1] = 'x';
382 }
383 s += strlen(s);
384 break;
385 case '%':
386 *s++ = '%';
387 break;
388 default:
389 strcpy(s, p);
390 s += strlen(s);
391 goto end;
392 }
393 } else
394 *s++ = *f;
395 }
396
397 end:
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000398 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Christian Heimes44720832008-05-26 13:01:01 +0000399 return string;
400}
401
402PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000403PyString_FromFormat(const char *format, ...)
Christian Heimes44720832008-05-26 13:01:01 +0000404{
405 PyObject* ret;
406 va_list vargs;
407
408#ifdef HAVE_STDARG_PROTOTYPES
409 va_start(vargs, format);
410#else
411 va_start(vargs);
412#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000413 ret = PyString_FromFormatV(format, vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000414 va_end(vargs);
415 return ret;
416}
417
418
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000419PyObject *PyString_Decode(const char *s,
Christian Heimes44720832008-05-26 13:01:01 +0000420 Py_ssize_t size,
421 const char *encoding,
422 const char *errors)
423{
424 PyObject *v, *str;
425
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000426 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000427 if (str == NULL)
428 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000429 v = PyString_AsDecodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000430 Py_DECREF(str);
431 return v;
432}
433
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000434PyObject *PyString_AsDecodedObject(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000435 const char *encoding,
436 const char *errors)
437{
438 PyObject *v;
439
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000440 if (!PyString_Check(str)) {
Christian Heimes44720832008-05-26 13:01:01 +0000441 PyErr_BadArgument();
442 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000443 }
444
Christian Heimes44720832008-05-26 13:01:01 +0000445 if (encoding == NULL) {
446#ifdef Py_USING_UNICODE
447 encoding = PyUnicode_GetDefaultEncoding();
448#else
449 PyErr_SetString(PyExc_ValueError, "no encoding specified");
450 goto onError;
451#endif
Christian Heimes1a6387e2008-03-26 12:49:49 +0000452 }
Christian Heimes44720832008-05-26 13:01:01 +0000453
454 /* Decode via the codec registry */
455 v = PyCodec_Decode(str, encoding, errors);
456 if (v == NULL)
457 goto onError;
458
459 return v;
460
461 onError:
462 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000463}
464
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000465PyObject *PyString_AsDecodedString(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000466 const char *encoding,
467 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000468{
Christian Heimes44720832008-05-26 13:01:01 +0000469 PyObject *v;
470
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000471 v = PyString_AsDecodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000472 if (v == NULL)
473 goto onError;
474
475#ifdef Py_USING_UNICODE
476 /* Convert Unicode to a string using the default encoding */
477 if (PyUnicode_Check(v)) {
478 PyObject *temp = v;
479 v = PyUnicode_AsEncodedString(v, NULL, NULL);
480 Py_DECREF(temp);
481 if (v == NULL)
482 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000483 }
Christian Heimes44720832008-05-26 13:01:01 +0000484#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000485 if (!PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +0000486 PyErr_Format(PyExc_TypeError,
487 "decoder did not return a string object (type=%.400s)",
488 Py_TYPE(v)->tp_name);
489 Py_DECREF(v);
490 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000491 }
Christian Heimes44720832008-05-26 13:01:01 +0000492
493 return v;
494
495 onError:
496 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000497}
498
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000499PyObject *PyString_Encode(const char *s,
Christian Heimes44720832008-05-26 13:01:01 +0000500 Py_ssize_t size,
501 const char *encoding,
502 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000503{
Christian Heimes44720832008-05-26 13:01:01 +0000504 PyObject *v, *str;
505
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000506 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000507 if (str == NULL)
508 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000509 v = PyString_AsEncodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000510 Py_DECREF(str);
511 return v;
512}
513
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000514PyObject *PyString_AsEncodedObject(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000515 const char *encoding,
516 const char *errors)
517{
518 PyObject *v;
519
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000520 if (!PyString_Check(str)) {
Christian Heimes44720832008-05-26 13:01:01 +0000521 PyErr_BadArgument();
522 goto onError;
523 }
524
525 if (encoding == NULL) {
526#ifdef Py_USING_UNICODE
527 encoding = PyUnicode_GetDefaultEncoding();
528#else
529 PyErr_SetString(PyExc_ValueError, "no encoding specified");
530 goto onError;
531#endif
532 }
533
534 /* Encode via the codec registry */
535 v = PyCodec_Encode(str, encoding, errors);
536 if (v == NULL)
537 goto onError;
538
539 return v;
540
541 onError:
542 return NULL;
543}
544
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000545PyObject *PyString_AsEncodedString(PyObject *str,
Christian Heimes44720832008-05-26 13:01:01 +0000546 const char *encoding,
547 const char *errors)
548{
549 PyObject *v;
550
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000551 v = PyString_AsEncodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000552 if (v == NULL)
553 goto onError;
554
555#ifdef Py_USING_UNICODE
556 /* Convert Unicode to a string using the default encoding */
557 if (PyUnicode_Check(v)) {
558 PyObject *temp = v;
559 v = PyUnicode_AsEncodedString(v, NULL, NULL);
560 Py_DECREF(temp);
561 if (v == NULL)
562 goto onError;
563 }
564#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000565 if (!PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +0000566 PyErr_Format(PyExc_TypeError,
567 "encoder did not return a string object (type=%.400s)",
568 Py_TYPE(v)->tp_name);
569 Py_DECREF(v);
570 goto onError;
571 }
572
573 return v;
574
575 onError:
576 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000577}
578
579static void
Christian Heimes44720832008-05-26 13:01:01 +0000580string_dealloc(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000581{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000582 switch (PyString_CHECK_INTERNED(op)) {
Christian Heimes44720832008-05-26 13:01:01 +0000583 case SSTATE_NOT_INTERNED:
584 break;
585
586 case SSTATE_INTERNED_MORTAL:
587 /* revive dead object temporarily for DelItem */
588 Py_REFCNT(op) = 3;
589 if (PyDict_DelItem(interned, op) != 0)
590 Py_FatalError(
591 "deletion of interned string failed");
592 break;
593
594 case SSTATE_INTERNED_IMMORTAL:
595 Py_FatalError("Immortal interned string died.");
596
597 default:
598 Py_FatalError("Inconsistent interned string state.");
599 }
600 Py_TYPE(op)->tp_free(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000601}
602
Christian Heimes44720832008-05-26 13:01:01 +0000603/* Unescape a backslash-escaped string. If unicode is non-zero,
604 the string is a u-literal. If recode_encoding is non-zero,
605 the string is UTF-8 encoded and should be re-encoded in the
606 specified encoding. */
607
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000608PyObject *PyString_DecodeEscape(const char *s,
Christian Heimes44720832008-05-26 13:01:01 +0000609 Py_ssize_t len,
610 const char *errors,
611 Py_ssize_t unicode,
612 const char *recode_encoding)
613{
614 int c;
615 char *p, *buf;
616 const char *end;
617 PyObject *v;
618 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000619 v = PyString_FromStringAndSize((char *)NULL, newlen);
Christian Heimes44720832008-05-26 13:01:01 +0000620 if (v == NULL)
621 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000622 p = buf = PyString_AsString(v);
Christian Heimes44720832008-05-26 13:01:01 +0000623 end = s + len;
624 while (s < end) {
625 if (*s != '\\') {
626 non_esc:
627#ifdef Py_USING_UNICODE
628 if (recode_encoding && (*s & 0x80)) {
629 PyObject *u, *w;
630 char *r;
631 const char* t;
632 Py_ssize_t rn;
633 t = s;
634 /* Decode non-ASCII bytes as UTF-8. */
635 while (t < end && (*t & 0x80)) t++;
636 u = PyUnicode_DecodeUTF8(s, t - s, errors);
637 if(!u) goto failed;
638
639 /* Recode them in target encoding. */
640 w = PyUnicode_AsEncodedString(
641 u, recode_encoding, errors);
642 Py_DECREF(u);
643 if (!w) goto failed;
644
645 /* Append bytes to output buffer. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000646 assert(PyString_Check(w));
647 r = PyString_AS_STRING(w);
648 rn = PyString_GET_SIZE(w);
Christian Heimes44720832008-05-26 13:01:01 +0000649 Py_MEMCPY(p, r, rn);
650 p += rn;
651 Py_DECREF(w);
652 s = t;
653 } else {
654 *p++ = *s++;
655 }
656#else
657 *p++ = *s++;
658#endif
659 continue;
660 }
661 s++;
662 if (s==end) {
663 PyErr_SetString(PyExc_ValueError,
664 "Trailing \\ in string");
665 goto failed;
666 }
667 switch (*s++) {
668 /* XXX This assumes ASCII! */
669 case '\n': break;
670 case '\\': *p++ = '\\'; break;
671 case '\'': *p++ = '\''; break;
672 case '\"': *p++ = '\"'; break;
673 case 'b': *p++ = '\b'; break;
674 case 'f': *p++ = '\014'; break; /* FF */
675 case 't': *p++ = '\t'; break;
676 case 'n': *p++ = '\n'; break;
677 case 'r': *p++ = '\r'; break;
678 case 'v': *p++ = '\013'; break; /* VT */
679 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
680 case '0': case '1': case '2': case '3':
681 case '4': case '5': case '6': case '7':
682 c = s[-1] - '0';
683 if (s < end && '0' <= *s && *s <= '7') {
684 c = (c<<3) + *s++ - '0';
685 if (s < end && '0' <= *s && *s <= '7')
686 c = (c<<3) + *s++ - '0';
687 }
688 *p++ = c;
689 break;
690 case 'x':
691 if (s+1 < end &&
692 isxdigit(Py_CHARMASK(s[0])) &&
693 isxdigit(Py_CHARMASK(s[1])))
694 {
695 unsigned int x = 0;
696 c = Py_CHARMASK(*s);
697 s++;
698 if (isdigit(c))
699 x = c - '0';
700 else if (islower(c))
701 x = 10 + c - 'a';
702 else
703 x = 10 + c - 'A';
704 x = x << 4;
705 c = Py_CHARMASK(*s);
706 s++;
707 if (isdigit(c))
708 x += c - '0';
709 else if (islower(c))
710 x += 10 + c - 'a';
711 else
712 x += 10 + c - 'A';
713 *p++ = x;
714 break;
715 }
716 if (!errors || strcmp(errors, "strict") == 0) {
717 PyErr_SetString(PyExc_ValueError,
718 "invalid \\x escape");
719 goto failed;
720 }
721 if (strcmp(errors, "replace") == 0) {
722 *p++ = '?';
723 } else if (strcmp(errors, "ignore") == 0)
724 /* do nothing */;
725 else {
726 PyErr_Format(PyExc_ValueError,
727 "decoding error; "
728 "unknown error handling code: %.400s",
729 errors);
730 goto failed;
731 }
732#ifndef Py_USING_UNICODE
733 case 'u':
734 case 'U':
735 case 'N':
736 if (unicode) {
737 PyErr_SetString(PyExc_ValueError,
738 "Unicode escapes not legal "
739 "when Unicode disabled");
740 goto failed;
741 }
742#endif
743 default:
744 *p++ = '\\';
745 s--;
746 goto non_esc; /* an arbitry number of unescaped
747 UTF-8 bytes may follow. */
748 }
749 }
750 if (p-buf < newlen)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000751 _PyString_Resize(&v, p - buf);
Christian Heimes44720832008-05-26 13:01:01 +0000752 return v;
753 failed:
754 Py_DECREF(v);
755 return NULL;
756}
757
758/* -------------------------------------------------------------------- */
759/* object api */
760
Christian Heimes1a6387e2008-03-26 12:49:49 +0000761static Py_ssize_t
Christian Heimes44720832008-05-26 13:01:01 +0000762string_getsize(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000763{
Christian Heimes44720832008-05-26 13:01:01 +0000764 char *s;
765 Py_ssize_t len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000766 if (PyString_AsStringAndSize(op, &s, &len))
Christian Heimes44720832008-05-26 13:01:01 +0000767 return -1;
768 return len;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000769}
770
Christian Heimes44720832008-05-26 13:01:01 +0000771static /*const*/ char *
772string_getbuffer(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000773{
Christian Heimes44720832008-05-26 13:01:01 +0000774 char *s;
775 Py_ssize_t len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000776 if (PyString_AsStringAndSize(op, &s, &len))
Christian Heimes44720832008-05-26 13:01:01 +0000777 return NULL;
778 return s;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000779}
780
781Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000782PyString_Size(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000783{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000784 if (!PyString_Check(op))
Christian Heimes44720832008-05-26 13:01:01 +0000785 return string_getsize(op);
786 return Py_SIZE(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000787}
788
Christian Heimes44720832008-05-26 13:01:01 +0000789/*const*/ char *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000790PyString_AsString(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000791{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000792 if (!PyString_Check(op))
Christian Heimes44720832008-05-26 13:01:01 +0000793 return string_getbuffer(op);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000794 return ((PyStringObject *)op) -> ob_sval;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000795}
796
797int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000798PyString_AsStringAndSize(register PyObject *obj,
Christian Heimes44720832008-05-26 13:01:01 +0000799 register char **s,
800 register Py_ssize_t *len)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000801{
Christian Heimes44720832008-05-26 13:01:01 +0000802 if (s == NULL) {
803 PyErr_BadInternalCall();
804 return -1;
805 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000806
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000807 if (!PyString_Check(obj)) {
Christian Heimes44720832008-05-26 13:01:01 +0000808#ifdef Py_USING_UNICODE
809 if (PyUnicode_Check(obj)) {
810 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
811 if (obj == NULL)
812 return -1;
813 }
814 else
Christian Heimes1a6387e2008-03-26 12:49:49 +0000815#endif
Christian Heimes44720832008-05-26 13:01:01 +0000816 {
817 PyErr_Format(PyExc_TypeError,
818 "expected string or Unicode object, "
819 "%.200s found", Py_TYPE(obj)->tp_name);
820 return -1;
821 }
822 }
823
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000824 *s = PyString_AS_STRING(obj);
Christian Heimes44720832008-05-26 13:01:01 +0000825 if (len != NULL)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000826 *len = PyString_GET_SIZE(obj);
827 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Christian Heimes44720832008-05-26 13:01:01 +0000828 PyErr_SetString(PyExc_TypeError,
829 "expected string without null bytes");
830 return -1;
831 }
832 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000833}
834
Christian Heimes1a6387e2008-03-26 12:49:49 +0000835/* -------------------------------------------------------------------- */
836/* Methods */
837
Christian Heimes44720832008-05-26 13:01:01 +0000838#include "stringlib/stringdefs.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000839#include "stringlib/fastsearch.h"
Christian Heimes44720832008-05-26 13:01:01 +0000840
Christian Heimes1a6387e2008-03-26 12:49:49 +0000841#include "stringlib/count.h"
842#include "stringlib/find.h"
843#include "stringlib/partition.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000844
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000845#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
Christian Heimes44720832008-05-26 13:01:01 +0000846#include "stringlib/localeutil.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000847
Christian Heimes1a6387e2008-03-26 12:49:49 +0000848
849
850static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000851string_print(PyStringObject *op, FILE *fp, int flags)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000852{
Christian Heimes44720832008-05-26 13:01:01 +0000853 Py_ssize_t i, str_len;
854 char c;
855 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000856
Christian Heimes44720832008-05-26 13:01:01 +0000857 /* XXX Ought to check for interrupts when writing long strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000858 if (! PyString_CheckExact(op)) {
Christian Heimes44720832008-05-26 13:01:01 +0000859 int ret;
860 /* A str subclass may have its own __str__ method. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000861 op = (PyStringObject *) PyObject_Str((PyObject *)op);
Christian Heimes44720832008-05-26 13:01:01 +0000862 if (op == NULL)
863 return -1;
864 ret = string_print(op, fp, flags);
865 Py_DECREF(op);
866 return ret;
867 }
868 if (flags & Py_PRINT_RAW) {
869 char *data = op->ob_sval;
870 Py_ssize_t size = Py_SIZE(op);
871 Py_BEGIN_ALLOW_THREADS
872 while (size > INT_MAX) {
873 /* Very long strings cannot be written atomically.
874 * But don't write exactly INT_MAX bytes at a time
875 * to avoid memory aligment issues.
876 */
877 const int chunk_size = INT_MAX & ~0x3FFF;
878 fwrite(data, 1, chunk_size, fp);
879 data += chunk_size;
880 size -= chunk_size;
881 }
882#ifdef __VMS
883 if (size) fwrite(data, (int)size, 1, fp);
884#else
885 fwrite(data, 1, (int)size, fp);
886#endif
887 Py_END_ALLOW_THREADS
888 return 0;
889 }
890
891 /* figure out which quote to use; single is preferred */
892 quote = '\'';
893 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
894 !memchr(op->ob_sval, '"', Py_SIZE(op)))
895 quote = '"';
896
897 str_len = Py_SIZE(op);
898 Py_BEGIN_ALLOW_THREADS
899 fputc(quote, fp);
900 for (i = 0; i < str_len; i++) {
901 /* Since strings are immutable and the caller should have a
902 reference, accessing the interal buffer should not be an issue
903 with the GIL released. */
904 c = op->ob_sval[i];
905 if (c == quote || c == '\\')
906 fprintf(fp, "\\%c", c);
907 else if (c == '\t')
908 fprintf(fp, "\\t");
909 else if (c == '\n')
910 fprintf(fp, "\\n");
911 else if (c == '\r')
912 fprintf(fp, "\\r");
913 else if (c < ' ' || c >= 0x7f)
914 fprintf(fp, "\\x%02x", c & 0xff);
915 else
916 fputc(c, fp);
917 }
918 fputc(quote, fp);
919 Py_END_ALLOW_THREADS
920 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000921}
922
Christian Heimes44720832008-05-26 13:01:01 +0000923PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000924PyString_Repr(PyObject *obj, int smartquotes)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000925{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000926 register PyStringObject* op = (PyStringObject*) obj;
Christian Heimes44720832008-05-26 13:01:01 +0000927 size_t newsize = 2 + 4 * Py_SIZE(op);
928 PyObject *v;
929 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
930 PyErr_SetString(PyExc_OverflowError,
931 "string is too large to make repr");
Christian Heimes1a6387e2008-03-26 12:49:49 +0000932 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000933 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000934 v = PyString_FromStringAndSize((char *)NULL, newsize);
Christian Heimes44720832008-05-26 13:01:01 +0000935 if (v == NULL) {
936 return NULL;
937 }
938 else {
939 register Py_ssize_t i;
940 register char c;
941 register char *p;
942 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000943
Christian Heimes44720832008-05-26 13:01:01 +0000944 /* figure out which quote to use; single is preferred */
945 quote = '\'';
946 if (smartquotes &&
947 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
948 !memchr(op->ob_sval, '"', Py_SIZE(op)))
949 quote = '"';
950
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000951 p = PyString_AS_STRING(v);
Christian Heimes44720832008-05-26 13:01:01 +0000952 *p++ = quote;
953 for (i = 0; i < Py_SIZE(op); i++) {
954 /* There's at least enough room for a hex escape
955 and a closing quote. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000956 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Christian Heimes44720832008-05-26 13:01:01 +0000957 c = op->ob_sval[i];
958 if (c == quote || c == '\\')
959 *p++ = '\\', *p++ = c;
960 else if (c == '\t')
961 *p++ = '\\', *p++ = 't';
962 else if (c == '\n')
963 *p++ = '\\', *p++ = 'n';
964 else if (c == '\r')
965 *p++ = '\\', *p++ = 'r';
966 else if (c < ' ' || c >= 0x7f) {
967 /* For performance, we don't want to call
968 PyOS_snprintf here (extra layers of
969 function call). */
970 sprintf(p, "\\x%02x", c & 0xff);
971 p += 4;
972 }
973 else
974 *p++ = c;
975 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000976 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Christian Heimes44720832008-05-26 13:01:01 +0000977 *p++ = quote;
978 *p = '\0';
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000979 _PyString_Resize(
980 &v, (p - PyString_AS_STRING(v)));
Christian Heimes44720832008-05-26 13:01:01 +0000981 return v;
982 }
983}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000984
985static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000986string_repr(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000987{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000988 return PyString_Repr(op, 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000989}
990
Christian Heimes1a6387e2008-03-26 12:49:49 +0000991static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000992string_str(PyObject *s)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000993{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000994 assert(PyString_Check(s));
995 if (PyString_CheckExact(s)) {
Christian Heimes44720832008-05-26 13:01:01 +0000996 Py_INCREF(s);
997 return s;
998 }
999 else {
1000 /* Subtype -- return genuine string with the same value. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001001 PyStringObject *t = (PyStringObject *) s;
1002 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
Christian Heimes44720832008-05-26 13:01:01 +00001003 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001004}
1005
Christian Heimes44720832008-05-26 13:01:01 +00001006static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001007string_length(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001008{
1009 return Py_SIZE(a);
1010}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001011
Christian Heimes44720832008-05-26 13:01:01 +00001012static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001013string_concat(register PyStringObject *a, register PyObject *bb)
Christian Heimes44720832008-05-26 13:01:01 +00001014{
1015 register Py_ssize_t size;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001016 register PyStringObject *op;
1017 if (!PyString_Check(bb)) {
Christian Heimes44720832008-05-26 13:01:01 +00001018#ifdef Py_USING_UNICODE
1019 if (PyUnicode_Check(bb))
1020 return PyUnicode_Concat((PyObject *)a, bb);
1021#endif
1022 if (PyByteArray_Check(bb))
1023 return PyByteArray_Concat((PyObject *)a, bb);
1024 PyErr_Format(PyExc_TypeError,
1025 "cannot concatenate 'str' and '%.200s' objects",
1026 Py_TYPE(bb)->tp_name);
1027 return NULL;
1028 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001029#define b ((PyStringObject *)bb)
Christian Heimes44720832008-05-26 13:01:01 +00001030 /* Optimize cases with empty left or right operand */
1031 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001032 PyString_CheckExact(a) && PyString_CheckExact(b)) {
Christian Heimes44720832008-05-26 13:01:01 +00001033 if (Py_SIZE(a) == 0) {
1034 Py_INCREF(bb);
1035 return bb;
1036 }
1037 Py_INCREF(a);
1038 return (PyObject *)a;
1039 }
1040 size = Py_SIZE(a) + Py_SIZE(b);
Neal Norwitze7d8be82008-07-31 17:17:14 +00001041 /* Check that string sizes are not negative, to prevent an
1042 overflow in cases where we are passed incorrectly-created
1043 strings with negative lengths (due to a bug in other code).
1044 */
1045 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
1046 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
Christian Heimes44720832008-05-26 13:01:01 +00001047 PyErr_SetString(PyExc_OverflowError,
1048 "strings are too large to concat");
1049 return NULL;
1050 }
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001051
Christian Heimes44720832008-05-26 13:01:01 +00001052 /* Inline PyObject_NewVar */
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001053 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
Neal Norwitze7d8be82008-07-31 17:17:14 +00001054 PyErr_SetString(PyExc_OverflowError,
1055 "strings are too large to concat");
1056 return NULL;
1057 }
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001058 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
Christian Heimes44720832008-05-26 13:01:01 +00001059 if (op == NULL)
1060 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001061 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +00001062 op->ob_shash = -1;
1063 op->ob_sstate = SSTATE_NOT_INTERNED;
1064 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1065 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1066 op->ob_sval[size] = '\0';
1067 return (PyObject *) op;
1068#undef b
1069}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001070
Christian Heimes44720832008-05-26 13:01:01 +00001071static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001072string_repeat(register PyStringObject *a, register Py_ssize_t n)
Christian Heimes44720832008-05-26 13:01:01 +00001073{
1074 register Py_ssize_t i;
1075 register Py_ssize_t j;
1076 register Py_ssize_t size;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001077 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +00001078 size_t nbytes;
1079 if (n < 0)
1080 n = 0;
1081 /* watch out for overflows: the size can overflow int,
1082 * and the # of bytes needed can overflow size_t
1083 */
1084 size = Py_SIZE(a) * n;
1085 if (n && size / n != Py_SIZE(a)) {
1086 PyErr_SetString(PyExc_OverflowError,
1087 "repeated string is too long");
1088 return NULL;
1089 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001090 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
Christian Heimes44720832008-05-26 13:01:01 +00001091 Py_INCREF(a);
1092 return (PyObject *)a;
1093 }
1094 nbytes = (size_t)size;
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001095 if (nbytes + PyStringObject_SIZE <= nbytes) {
Christian Heimes44720832008-05-26 13:01:01 +00001096 PyErr_SetString(PyExc_OverflowError,
1097 "repeated string is too long");
1098 return NULL;
1099 }
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001100 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
Christian Heimes44720832008-05-26 13:01:01 +00001101 if (op == NULL)
1102 return PyErr_NoMemory();
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001103 PyObject_INIT_VAR(op, &PyString_Type, size);
Christian Heimes44720832008-05-26 13:01:01 +00001104 op->ob_shash = -1;
1105 op->ob_sstate = SSTATE_NOT_INTERNED;
1106 op->ob_sval[size] = '\0';
1107 if (Py_SIZE(a) == 1 && n > 0) {
1108 memset(op->ob_sval, a->ob_sval[0] , n);
1109 return (PyObject *) op;
1110 }
1111 i = 0;
1112 if (i < size) {
1113 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1114 i = Py_SIZE(a);
1115 }
1116 while (i < size) {
1117 j = (i <= size-i) ? i : size-i;
1118 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1119 i += j;
1120 }
1121 return (PyObject *) op;
1122}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001123
Christian Heimes44720832008-05-26 13:01:01 +00001124/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1125
1126static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001127string_slice(register PyStringObject *a, register Py_ssize_t i,
Christian Heimes44720832008-05-26 13:01:01 +00001128 register Py_ssize_t j)
1129 /* j -- may be negative! */
1130{
1131 if (i < 0)
1132 i = 0;
1133 if (j < 0)
1134 j = 0; /* Avoid signed/unsigned bug in next line */
1135 if (j > Py_SIZE(a))
1136 j = Py_SIZE(a);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001137 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
Christian Heimes44720832008-05-26 13:01:01 +00001138 /* It's the same as a */
1139 Py_INCREF(a);
1140 return (PyObject *)a;
1141 }
1142 if (j < i)
1143 j = i;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001144 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001145}
1146
1147static int
1148string_contains(PyObject *str_obj, PyObject *sub_obj)
1149{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001150 if (!PyString_CheckExact(sub_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00001151#ifdef Py_USING_UNICODE
1152 if (PyUnicode_Check(sub_obj))
1153 return PyUnicode_Contains(str_obj, sub_obj);
1154#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001155 if (!PyString_Check(sub_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00001156 PyErr_Format(PyExc_TypeError,
1157 "'in <string>' requires string as left operand, "
1158 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1159 return -1;
1160 }
1161 }
1162
1163 return stringlib_contains_obj(str_obj, sub_obj);
1164}
1165
1166static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001167string_item(PyStringObject *a, register Py_ssize_t i)
Christian Heimes44720832008-05-26 13:01:01 +00001168{
1169 char pchar;
1170 PyObject *v;
1171 if (i < 0 || i >= Py_SIZE(a)) {
1172 PyErr_SetString(PyExc_IndexError, "string index out of range");
1173 return NULL;
1174 }
1175 pchar = a->ob_sval[i];
1176 v = (PyObject *)characters[pchar & UCHAR_MAX];
1177 if (v == NULL)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001178 v = PyString_FromStringAndSize(&pchar, 1);
Christian Heimes44720832008-05-26 13:01:01 +00001179 else {
1180#ifdef COUNT_ALLOCS
1181 one_strings++;
1182#endif
1183 Py_INCREF(v);
1184 }
1185 return v;
1186}
1187
1188static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001189string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Christian Heimes44720832008-05-26 13:01:01 +00001190{
1191 int c;
1192 Py_ssize_t len_a, len_b;
1193 Py_ssize_t min_len;
1194 PyObject *result;
1195
1196 /* Make sure both arguments are strings. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001197 if (!(PyString_Check(a) && PyString_Check(b))) {
Christian Heimes44720832008-05-26 13:01:01 +00001198 result = Py_NotImplemented;
1199 goto out;
1200 }
1201 if (a == b) {
1202 switch (op) {
1203 case Py_EQ:case Py_LE:case Py_GE:
1204 result = Py_True;
1205 goto out;
1206 case Py_NE:case Py_LT:case Py_GT:
1207 result = Py_False;
1208 goto out;
1209 }
1210 }
1211 if (op == Py_EQ) {
1212 /* Supporting Py_NE here as well does not save
1213 much time, since Py_NE is rarely used. */
1214 if (Py_SIZE(a) == Py_SIZE(b)
1215 && (a->ob_sval[0] == b->ob_sval[0]
1216 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1217 result = Py_True;
1218 } else {
1219 result = Py_False;
1220 }
1221 goto out;
1222 }
1223 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1224 min_len = (len_a < len_b) ? len_a : len_b;
1225 if (min_len > 0) {
1226 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1227 if (c==0)
1228 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1229 } else
1230 c = 0;
1231 if (c == 0)
1232 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1233 switch (op) {
1234 case Py_LT: c = c < 0; break;
1235 case Py_LE: c = c <= 0; break;
1236 case Py_EQ: assert(0); break; /* unreachable */
1237 case Py_NE: c = c != 0; break;
1238 case Py_GT: c = c > 0; break;
1239 case Py_GE: c = c >= 0; break;
1240 default:
1241 result = Py_NotImplemented;
1242 goto out;
1243 }
1244 result = c ? Py_True : Py_False;
1245 out:
1246 Py_INCREF(result);
1247 return result;
1248}
1249
1250int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001251_PyString_Eq(PyObject *o1, PyObject *o2)
Christian Heimes44720832008-05-26 13:01:01 +00001252{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001253 PyStringObject *a = (PyStringObject*) o1;
1254 PyStringObject *b = (PyStringObject*) o2;
Christian Heimes44720832008-05-26 13:01:01 +00001255 return Py_SIZE(a) == Py_SIZE(b)
1256 && *a->ob_sval == *b->ob_sval
1257 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1258}
1259
1260static long
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001261string_hash(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001262{
1263 register Py_ssize_t len;
1264 register unsigned char *p;
1265 register long x;
1266
1267 if (a->ob_shash != -1)
1268 return a->ob_shash;
1269 len = Py_SIZE(a);
1270 p = (unsigned char *) a->ob_sval;
1271 x = *p << 7;
1272 while (--len >= 0)
1273 x = (1000003*x) ^ *p++;
1274 x ^= Py_SIZE(a);
1275 if (x == -1)
1276 x = -2;
1277 a->ob_shash = x;
1278 return x;
1279}
1280
1281static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001282string_subscript(PyStringObject* self, PyObject* item)
Christian Heimes44720832008-05-26 13:01:01 +00001283{
1284 if (PyIndex_Check(item)) {
1285 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1286 if (i == -1 && PyErr_Occurred())
1287 return NULL;
1288 if (i < 0)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001289 i += PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001290 return string_item(self, i);
1291 }
1292 else if (PySlice_Check(item)) {
1293 Py_ssize_t start, stop, step, slicelength, cur, i;
1294 char* source_buf;
1295 char* result_buf;
1296 PyObject* result;
1297
1298 if (PySlice_GetIndicesEx((PySliceObject*)item,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001299 PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001300 &start, &stop, &step, &slicelength) < 0) {
1301 return NULL;
1302 }
1303
1304 if (slicelength <= 0) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001305 return PyString_FromStringAndSize("", 0);
Christian Heimes44720832008-05-26 13:01:01 +00001306 }
1307 else if (start == 0 && step == 1 &&
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001308 slicelength == PyString_GET_SIZE(self) &&
1309 PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001310 Py_INCREF(self);
1311 return (PyObject *)self;
1312 }
1313 else if (step == 1) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001314 return PyString_FromStringAndSize(
1315 PyString_AS_STRING(self) + start,
Christian Heimes44720832008-05-26 13:01:01 +00001316 slicelength);
1317 }
1318 else {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001319 source_buf = PyString_AsString((PyObject*)self);
Christian Heimes44720832008-05-26 13:01:01 +00001320 result_buf = (char *)PyMem_Malloc(slicelength);
1321 if (result_buf == NULL)
1322 return PyErr_NoMemory();
1323
1324 for (cur = start, i = 0; i < slicelength;
1325 cur += step, i++) {
1326 result_buf[i] = source_buf[cur];
1327 }
1328
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001329 result = PyString_FromStringAndSize(result_buf,
Christian Heimes44720832008-05-26 13:01:01 +00001330 slicelength);
1331 PyMem_Free(result_buf);
1332 return result;
1333 }
1334 }
1335 else {
1336 PyErr_Format(PyExc_TypeError,
1337 "string indices must be integers, not %.200s",
1338 Py_TYPE(item)->tp_name);
1339 return NULL;
1340 }
1341}
1342
1343static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001344string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001345{
1346 if ( index != 0 ) {
1347 PyErr_SetString(PyExc_SystemError,
1348 "accessing non-existent string segment");
1349 return -1;
1350 }
1351 *ptr = (void *)self->ob_sval;
1352 return Py_SIZE(self);
1353}
1354
1355static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001356string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001357{
1358 PyErr_SetString(PyExc_TypeError,
1359 "Cannot use string as modifiable buffer");
1360 return -1;
1361}
1362
1363static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001364string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Christian Heimes44720832008-05-26 13:01:01 +00001365{
1366 if ( lenp )
1367 *lenp = Py_SIZE(self);
1368 return 1;
1369}
1370
1371static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001372string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001373{
1374 if ( index != 0 ) {
1375 PyErr_SetString(PyExc_SystemError,
1376 "accessing non-existent string segment");
1377 return -1;
1378 }
1379 *ptr = self->ob_sval;
1380 return Py_SIZE(self);
1381}
1382
1383static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001384string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Christian Heimes44720832008-05-26 13:01:01 +00001385{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001386 return PyBuffer_FillInfo(view, (PyObject*)self,
1387 (void *)self->ob_sval, Py_SIZE(self),
Antoine Pitrou92a62402008-08-02 21:58:05 +00001388 1, flags);
Christian Heimes44720832008-05-26 13:01:01 +00001389}
1390
1391static PySequenceMethods string_as_sequence = {
1392 (lenfunc)string_length, /*sq_length*/
1393 (binaryfunc)string_concat, /*sq_concat*/
1394 (ssizeargfunc)string_repeat, /*sq_repeat*/
1395 (ssizeargfunc)string_item, /*sq_item*/
1396 (ssizessizeargfunc)string_slice, /*sq_slice*/
1397 0, /*sq_ass_item*/
1398 0, /*sq_ass_slice*/
1399 (objobjproc)string_contains /*sq_contains*/
1400};
1401
1402static PyMappingMethods string_as_mapping = {
1403 (lenfunc)string_length,
1404 (binaryfunc)string_subscript,
1405 0,
1406};
1407
1408static PyBufferProcs string_as_buffer = {
1409 (readbufferproc)string_buffer_getreadbuf,
1410 (writebufferproc)string_buffer_getwritebuf,
1411 (segcountproc)string_buffer_getsegcount,
1412 (charbufferproc)string_buffer_getcharbuf,
1413 (getbufferproc)string_buffer_getbuffer,
1414 0, /* XXX */
1415};
1416
1417
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001418
Christian Heimes44720832008-05-26 13:01:01 +00001419#define LEFTSTRIP 0
1420#define RIGHTSTRIP 1
1421#define BOTHSTRIP 2
1422
1423/* Arrays indexed by above */
1424static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1425
1426#define STRIPNAME(i) (stripformat[i]+3)
1427
Christian Heimes1a6387e2008-03-26 12:49:49 +00001428
1429/* Don't call if length < 2 */
Christian Heimes44720832008-05-26 13:01:01 +00001430#define Py_STRING_MATCH(target, offset, pattern, length) \
1431 (target[offset] == pattern[0] && \
1432 target[offset+length-1] == pattern[length-1] && \
Christian Heimes1a6387e2008-03-26 12:49:49 +00001433 !memcmp(target+offset+1, pattern+1, length-2) )
1434
1435
Christian Heimes1a6387e2008-03-26 12:49:49 +00001436/* Overallocate the initial list to reduce the number of reallocs for small
1437 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1438 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1439 text (roughly 11 words per line) and field delimited data (usually 1-10
1440 fields). For large strings the split algorithms are bandwidth limited
1441 so increasing the preallocation likely will not improve things.*/
1442
1443#define MAX_PREALLOC 12
1444
1445/* 5 splits gives 6 elements */
1446#define PREALLOC_SIZE(maxsplit) \
Christian Heimes44720832008-05-26 13:01:01 +00001447 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001448
Christian Heimes44720832008-05-26 13:01:01 +00001449#define SPLIT_APPEND(data, left, right) \
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001450 str = PyString_FromStringAndSize((data) + (left), \
Christian Heimes44720832008-05-26 13:01:01 +00001451 (right) - (left)); \
1452 if (str == NULL) \
1453 goto onError; \
1454 if (PyList_Append(list, str)) { \
1455 Py_DECREF(str); \
1456 goto onError; \
1457 } \
1458 else \
1459 Py_DECREF(str);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001460
Christian Heimes44720832008-05-26 13:01:01 +00001461#define SPLIT_ADD(data, left, right) { \
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001462 str = PyString_FromStringAndSize((data) + (left), \
Christian Heimes44720832008-05-26 13:01:01 +00001463 (right) - (left)); \
1464 if (str == NULL) \
1465 goto onError; \
1466 if (count < MAX_PREALLOC) { \
1467 PyList_SET_ITEM(list, count, str); \
1468 } else { \
1469 if (PyList_Append(list, str)) { \
1470 Py_DECREF(str); \
1471 goto onError; \
1472 } \
1473 else \
1474 Py_DECREF(str); \
1475 } \
1476 count++; }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001477
1478/* Always force the list to the expected size. */
1479#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
1480
Christian Heimes44720832008-05-26 13:01:01 +00001481#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1482#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1483#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1484#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001485
1486Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001487split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001488{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001489 const char *s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001490 Py_ssize_t i, j, count=0;
1491 PyObject *str;
1492 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001493
Christian Heimes44720832008-05-26 13:01:01 +00001494 if (list == NULL)
1495 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001496
Christian Heimes44720832008-05-26 13:01:01 +00001497 i = j = 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001498
Christian Heimes44720832008-05-26 13:01:01 +00001499 while (maxsplit-- > 0) {
1500 SKIP_SPACE(s, i, len);
1501 if (i==len) break;
1502 j = i; i++;
1503 SKIP_NONSPACE(s, i, len);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001504 if (j == 0 && i == len && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001505 /* No whitespace in self, so just use it as list[0] */
1506 Py_INCREF(self);
1507 PyList_SET_ITEM(list, 0, (PyObject *)self);
1508 count++;
1509 break;
1510 }
1511 SPLIT_ADD(s, j, i);
1512 }
1513
1514 if (i < len) {
1515 /* Only occurs when maxsplit was reached */
1516 /* Skip any remaining whitespace and copy to end of string */
1517 SKIP_SPACE(s, i, len);
1518 if (i != len)
1519 SPLIT_ADD(s, i, len);
1520 }
1521 FIX_PREALLOC_SIZE(list);
1522 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001523 onError:
Christian Heimes44720832008-05-26 13:01:01 +00001524 Py_DECREF(list);
1525 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001526}
1527
Christian Heimes1a6387e2008-03-26 12:49:49 +00001528Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001529split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001530{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001531 const char *s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001532 register Py_ssize_t i, j, count=0;
1533 PyObject *str;
1534 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001535
Christian Heimes44720832008-05-26 13:01:01 +00001536 if (list == NULL)
1537 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001538
Christian Heimes44720832008-05-26 13:01:01 +00001539 i = j = 0;
1540 while ((j < len) && (maxcount-- > 0)) {
1541 for(; j<len; j++) {
1542 /* I found that using memchr makes no difference */
1543 if (s[j] == ch) {
1544 SPLIT_ADD(s, i, j);
1545 i = j = j + 1;
1546 break;
1547 }
1548 }
1549 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001550 if (i == 0 && count == 0 && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001551 /* ch not in self, so just use self as list[0] */
1552 Py_INCREF(self);
1553 PyList_SET_ITEM(list, 0, (PyObject *)self);
1554 count++;
1555 }
1556 else if (i <= len) {
1557 SPLIT_ADD(s, i, len);
1558 }
1559 FIX_PREALLOC_SIZE(list);
1560 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001561
1562 onError:
Christian Heimes44720832008-05-26 13:01:01 +00001563 Py_DECREF(list);
1564 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001565}
1566
1567PyDoc_STRVAR(split__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001568"S.split([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001569\n\
Christian Heimes44720832008-05-26 13:01:01 +00001570Return a list of the words in the string S, using sep as the\n\
1571delimiter string. If maxsplit is given, at most maxsplit\n\
1572splits are done. If sep is not specified or is None, any\n\
1573whitespace string is a separator and empty strings are removed\n\
1574from the result.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001575
1576static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001577string_split(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001578{
Antoine Pitrou5b7139a2010-01-02 21:12:58 +00001579 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j, pos;
Christian Heimes44720832008-05-26 13:01:01 +00001580 Py_ssize_t maxsplit = -1, count=0;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001581 const char *s = PyString_AS_STRING(self), *sub;
Christian Heimes44720832008-05-26 13:01:01 +00001582 PyObject *list, *str, *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001583
Christian Heimes44720832008-05-26 13:01:01 +00001584 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1585 return NULL;
1586 if (maxsplit < 0)
1587 maxsplit = PY_SSIZE_T_MAX;
1588 if (subobj == Py_None)
1589 return split_whitespace(self, len, maxsplit);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001590 if (PyString_Check(subobj)) {
1591 sub = PyString_AS_STRING(subobj);
1592 n = PyString_GET_SIZE(subobj);
Christian Heimes44720832008-05-26 13:01:01 +00001593 }
1594#ifdef Py_USING_UNICODE
1595 else if (PyUnicode_Check(subobj))
1596 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1597#endif
1598 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1599 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001600
Christian Heimes44720832008-05-26 13:01:01 +00001601 if (n == 0) {
1602 PyErr_SetString(PyExc_ValueError, "empty separator");
1603 return NULL;
1604 }
1605 else if (n == 1)
1606 return split_char(self, len, sub[0], maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001607
Christian Heimes44720832008-05-26 13:01:01 +00001608 list = PyList_New(PREALLOC_SIZE(maxsplit));
1609 if (list == NULL)
1610 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001611
Christian Heimes44720832008-05-26 13:01:01 +00001612 i = j = 0;
1613 while (maxsplit-- > 0) {
1614 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1615 if (pos < 0)
1616 break;
Antoine Pitrou5b7139a2010-01-02 21:12:58 +00001617 j = i + pos;
Christian Heimes44720832008-05-26 13:01:01 +00001618 SPLIT_ADD(s, i, j);
1619 i = j + n;
1620 }
Christian Heimes44720832008-05-26 13:01:01 +00001621 SPLIT_ADD(s, i, len);
1622 FIX_PREALLOC_SIZE(list);
1623 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001624
Christian Heimes44720832008-05-26 13:01:01 +00001625 onError:
1626 Py_DECREF(list);
1627 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001628}
1629
1630PyDoc_STRVAR(partition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001631"S.partition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001632\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001633Search for the separator sep in S, and return the part before it,\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001634the separator itself, and the part after it. If the separator is not\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001635found, return S and two empty strings.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001636
1637static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001638string_partition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001639{
Christian Heimes44720832008-05-26 13:01:01 +00001640 const char *sep;
1641 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001642
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001643 if (PyString_Check(sep_obj)) {
1644 sep = PyString_AS_STRING(sep_obj);
1645 sep_len = PyString_GET_SIZE(sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001646 }
1647#ifdef Py_USING_UNICODE
1648 else if (PyUnicode_Check(sep_obj))
1649 return PyUnicode_Partition((PyObject *) self, sep_obj);
1650#endif
1651 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1652 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001653
Christian Heimes44720832008-05-26 13:01:01 +00001654 return stringlib_partition(
1655 (PyObject*) self,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001656 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001657 sep_obj, sep, sep_len
1658 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001659}
1660
1661PyDoc_STRVAR(rpartition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001662"S.rpartition(sep) -> (tail, sep, head)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001663\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001664Search for the separator sep in S, starting at the end of S, and return\n\
Christian Heimes44720832008-05-26 13:01:01 +00001665the part before it, the separator itself, and the part after it. If the\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001666separator is not found, return two empty strings and S.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001667
1668static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001669string_rpartition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001670{
Christian Heimes44720832008-05-26 13:01:01 +00001671 const char *sep;
1672 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001673
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001674 if (PyString_Check(sep_obj)) {
1675 sep = PyString_AS_STRING(sep_obj);
1676 sep_len = PyString_GET_SIZE(sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001677 }
1678#ifdef Py_USING_UNICODE
1679 else if (PyUnicode_Check(sep_obj))
Amaury Forgeot d'Arc3571fbf2008-09-01 19:52:00 +00001680 return PyUnicode_RPartition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001681#endif
1682 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1683 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001684
Christian Heimes44720832008-05-26 13:01:01 +00001685 return stringlib_rpartition(
1686 (PyObject*) self,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001687 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00001688 sep_obj, sep, sep_len
1689 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001690}
1691
1692Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001693rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001694{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001695 const char *s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001696 Py_ssize_t i, j, count=0;
1697 PyObject *str;
1698 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001699
Christian Heimes44720832008-05-26 13:01:01 +00001700 if (list == NULL)
1701 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001702
Christian Heimes44720832008-05-26 13:01:01 +00001703 i = j = len-1;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001704
Christian Heimes44720832008-05-26 13:01:01 +00001705 while (maxsplit-- > 0) {
1706 RSKIP_SPACE(s, i);
1707 if (i<0) break;
1708 j = i; i--;
1709 RSKIP_NONSPACE(s, i);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001710 if (j == len-1 && i < 0 && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001711 /* No whitespace in self, so just use it as list[0] */
1712 Py_INCREF(self);
1713 PyList_SET_ITEM(list, 0, (PyObject *)self);
1714 count++;
1715 break;
1716 }
1717 SPLIT_ADD(s, i + 1, j + 1);
1718 }
1719 if (i >= 0) {
1720 /* Only occurs when maxsplit was reached */
1721 /* Skip any remaining whitespace and copy to beginning of string */
1722 RSKIP_SPACE(s, i);
1723 if (i >= 0)
1724 SPLIT_ADD(s, 0, i + 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001725
Christian Heimes44720832008-05-26 13:01:01 +00001726 }
1727 FIX_PREALLOC_SIZE(list);
1728 if (PyList_Reverse(list) < 0)
1729 goto onError;
1730 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001731 onError:
Christian Heimes44720832008-05-26 13:01:01 +00001732 Py_DECREF(list);
1733 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001734}
1735
1736Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001737rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001738{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001739 const char *s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00001740 register Py_ssize_t i, j, count=0;
1741 PyObject *str;
1742 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001743
Christian Heimes44720832008-05-26 13:01:01 +00001744 if (list == NULL)
1745 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001746
Christian Heimes44720832008-05-26 13:01:01 +00001747 i = j = len - 1;
1748 while ((i >= 0) && (maxcount-- > 0)) {
1749 for (; i >= 0; i--) {
1750 if (s[i] == ch) {
1751 SPLIT_ADD(s, i + 1, j + 1);
1752 j = i = i - 1;
1753 break;
1754 }
1755 }
1756 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001757 if (i < 0 && count == 0 && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00001758 /* ch not in self, so just use self as list[0] */
1759 Py_INCREF(self);
1760 PyList_SET_ITEM(list, 0, (PyObject *)self);
1761 count++;
1762 }
1763 else if (j >= -1) {
1764 SPLIT_ADD(s, 0, j + 1);
1765 }
1766 FIX_PREALLOC_SIZE(list);
1767 if (PyList_Reverse(list) < 0)
1768 goto onError;
1769 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001770
Christian Heimes44720832008-05-26 13:01:01 +00001771 onError:
1772 Py_DECREF(list);
1773 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001774}
1775
1776PyDoc_STRVAR(rsplit__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001777"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001778\n\
Christian Heimes44720832008-05-26 13:01:01 +00001779Return a list of the words in the string S, using sep as the\n\
1780delimiter string, starting at the end of the string and working\n\
1781to the front. If maxsplit is given, at most maxsplit splits are\n\
1782done. If sep is not specified or is None, any whitespace string\n\
1783is a separator.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001784
1785static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001786string_rsplit(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001787{
Antoine Pitrou5b7139a2010-01-02 21:12:58 +00001788 Py_ssize_t len = PyString_GET_SIZE(self), n, j, pos;
Christian Heimes44720832008-05-26 13:01:01 +00001789 Py_ssize_t maxsplit = -1, count=0;
Antoine Pitrou5b7139a2010-01-02 21:12:58 +00001790 const char *s = PyString_AS_STRING(self), *sub;
Christian Heimes44720832008-05-26 13:01:01 +00001791 PyObject *list, *str, *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001792
Christian Heimes44720832008-05-26 13:01:01 +00001793 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1794 return NULL;
1795 if (maxsplit < 0)
1796 maxsplit = PY_SSIZE_T_MAX;
1797 if (subobj == Py_None)
1798 return rsplit_whitespace(self, len, maxsplit);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001799 if (PyString_Check(subobj)) {
1800 sub = PyString_AS_STRING(subobj);
1801 n = PyString_GET_SIZE(subobj);
Christian Heimes44720832008-05-26 13:01:01 +00001802 }
1803#ifdef Py_USING_UNICODE
1804 else if (PyUnicode_Check(subobj))
1805 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1806#endif
1807 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1808 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001809
Christian Heimes44720832008-05-26 13:01:01 +00001810 if (n == 0) {
1811 PyErr_SetString(PyExc_ValueError, "empty separator");
1812 return NULL;
1813 }
1814 else if (n == 1)
1815 return rsplit_char(self, len, sub[0], maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001816
Christian Heimes44720832008-05-26 13:01:01 +00001817 list = PyList_New(PREALLOC_SIZE(maxsplit));
1818 if (list == NULL)
1819 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001820
Christian Heimes44720832008-05-26 13:01:01 +00001821 j = len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001822
Antoine Pitrou5b7139a2010-01-02 21:12:58 +00001823 while (maxsplit-- > 0) {
1824 pos = fastsearch(s, j, sub, n, FAST_RSEARCH);
1825 if (pos < 0)
1826 break;
1827 SPLIT_ADD(s, pos + n, j);
1828 j = pos;
Christian Heimes44720832008-05-26 13:01:01 +00001829 }
1830 SPLIT_ADD(s, 0, j);
1831 FIX_PREALLOC_SIZE(list);
1832 if (PyList_Reverse(list) < 0)
1833 goto onError;
1834 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001835
1836onError:
Christian Heimes44720832008-05-26 13:01:01 +00001837 Py_DECREF(list);
1838 return NULL;
1839}
1840
1841
1842PyDoc_STRVAR(join__doc__,
Georg Brandl9b4e5822009-10-14 18:48:32 +00001843"S.join(iterable) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00001844\n\
1845Return a string which is the concatenation of the strings in the\n\
Georg Brandl9b4e5822009-10-14 18:48:32 +00001846iterable. The separator between elements is S.");
Christian Heimes44720832008-05-26 13:01:01 +00001847
1848static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001849string_join(PyStringObject *self, PyObject *orig)
Christian Heimes44720832008-05-26 13:01:01 +00001850{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001851 char *sep = PyString_AS_STRING(self);
1852 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001853 PyObject *res = NULL;
1854 char *p;
1855 Py_ssize_t seqlen = 0;
1856 size_t sz = 0;
1857 Py_ssize_t i;
1858 PyObject *seq, *item;
1859
1860 seq = PySequence_Fast(orig, "");
1861 if (seq == NULL) {
1862 return NULL;
1863 }
1864
1865 seqlen = PySequence_Size(seq);
1866 if (seqlen == 0) {
1867 Py_DECREF(seq);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001868 return PyString_FromString("");
Christian Heimes44720832008-05-26 13:01:01 +00001869 }
1870 if (seqlen == 1) {
1871 item = PySequence_Fast_GET_ITEM(seq, 0);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001872 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
Christian Heimes44720832008-05-26 13:01:01 +00001873 Py_INCREF(item);
1874 Py_DECREF(seq);
1875 return item;
1876 }
1877 }
1878
1879 /* There are at least two things to join, or else we have a subclass
1880 * of the builtin types in the sequence.
1881 * Do a pre-pass to figure out the total amount of space we'll
1882 * need (sz), see whether any argument is absurd, and defer to
1883 * the Unicode join if appropriate.
1884 */
1885 for (i = 0; i < seqlen; i++) {
1886 const size_t old_sz = sz;
1887 item = PySequence_Fast_GET_ITEM(seq, i);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001888 if (!PyString_Check(item)){
Christian Heimes44720832008-05-26 13:01:01 +00001889#ifdef Py_USING_UNICODE
1890 if (PyUnicode_Check(item)) {
1891 /* Defer to Unicode join.
1892 * CAUTION: There's no gurantee that the
1893 * original sequence can be iterated over
1894 * again, so we must pass seq here.
1895 */
1896 PyObject *result;
1897 result = PyUnicode_Join((PyObject *)self, seq);
1898 Py_DECREF(seq);
1899 return result;
1900 }
1901#endif
1902 PyErr_Format(PyExc_TypeError,
1903 "sequence item %zd: expected string,"
1904 " %.80s found",
1905 i, Py_TYPE(item)->tp_name);
1906 Py_DECREF(seq);
1907 return NULL;
1908 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001909 sz += PyString_GET_SIZE(item);
Christian Heimes44720832008-05-26 13:01:01 +00001910 if (i != 0)
1911 sz += seplen;
1912 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1913 PyErr_SetString(PyExc_OverflowError,
1914 "join() result is too long for a Python string");
1915 Py_DECREF(seq);
1916 return NULL;
1917 }
1918 }
1919
1920 /* Allocate result space. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001921 res = PyString_FromStringAndSize((char*)NULL, sz);
Christian Heimes44720832008-05-26 13:01:01 +00001922 if (res == NULL) {
1923 Py_DECREF(seq);
1924 return NULL;
1925 }
1926
1927 /* Catenate everything. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001928 p = PyString_AS_STRING(res);
Christian Heimes44720832008-05-26 13:01:01 +00001929 for (i = 0; i < seqlen; ++i) {
1930 size_t n;
1931 item = PySequence_Fast_GET_ITEM(seq, i);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001932 n = PyString_GET_SIZE(item);
1933 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Christian Heimes44720832008-05-26 13:01:01 +00001934 p += n;
1935 if (i < seqlen - 1) {
1936 Py_MEMCPY(p, sep, seplen);
1937 p += seplen;
1938 }
1939 }
1940
1941 Py_DECREF(seq);
1942 return res;
1943}
1944
1945PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001946_PyString_Join(PyObject *sep, PyObject *x)
Christian Heimes44720832008-05-26 13:01:01 +00001947{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001948 assert(sep != NULL && PyString_Check(sep));
Christian Heimes44720832008-05-26 13:01:01 +00001949 assert(x != NULL);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001950 return string_join((PyStringObject *)sep, x);
Christian Heimes44720832008-05-26 13:01:01 +00001951}
1952
1953Py_LOCAL_INLINE(void)
1954string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1955{
1956 if (*end > len)
1957 *end = len;
1958 else if (*end < 0)
1959 *end += len;
1960 if (*end < 0)
1961 *end = 0;
1962 if (*start < 0)
1963 *start += len;
1964 if (*start < 0)
1965 *start = 0;
1966}
1967
1968Py_LOCAL_INLINE(Py_ssize_t)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001969string_find_internal(PyStringObject *self, PyObject *args, int dir)
Christian Heimes44720832008-05-26 13:01:01 +00001970{
1971 PyObject *subobj;
1972 const char *sub;
1973 Py_ssize_t sub_len;
1974 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1975 PyObject *obj_start=Py_None, *obj_end=Py_None;
1976
1977 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1978 &obj_start, &obj_end))
1979 return -2;
1980 /* To support None in "start" and "end" arguments, meaning
1981 the same as if they were not passed.
1982 */
1983 if (obj_start != Py_None)
1984 if (!_PyEval_SliceIndex(obj_start, &start))
1985 return -2;
1986 if (obj_end != Py_None)
1987 if (!_PyEval_SliceIndex(obj_end, &end))
1988 return -2;
1989
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001990 if (PyString_Check(subobj)) {
1991 sub = PyString_AS_STRING(subobj);
1992 sub_len = PyString_GET_SIZE(subobj);
Christian Heimes44720832008-05-26 13:01:01 +00001993 }
1994#ifdef Py_USING_UNICODE
1995 else if (PyUnicode_Check(subobj))
1996 return PyUnicode_Find(
1997 (PyObject *)self, subobj, start, end, dir);
1998#endif
1999 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2000 /* XXX - the "expected a character buffer object" is pretty
2001 confusing for a non-expert. remap to something else ? */
2002 return -2;
2003
2004 if (dir > 0)
2005 return stringlib_find_slice(
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002006 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00002007 sub, sub_len, start, end);
2008 else
2009 return stringlib_rfind_slice(
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002010 PyString_AS_STRING(self), PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00002011 sub, sub_len, start, end);
2012}
2013
2014
2015PyDoc_STRVAR(find__doc__,
2016"S.find(sub [,start [,end]]) -> int\n\
2017\n\
2018Return the lowest index in S where substring sub is found,\n\
2019such that sub is contained within s[start:end]. Optional\n\
2020arguments start and end are interpreted as in slice notation.\n\
2021\n\
2022Return -1 on failure.");
2023
2024static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002025string_find(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002026{
2027 Py_ssize_t result = string_find_internal(self, args, +1);
2028 if (result == -2)
2029 return NULL;
2030 return PyInt_FromSsize_t(result);
2031}
2032
2033
2034PyDoc_STRVAR(index__doc__,
2035"S.index(sub [,start [,end]]) -> int\n\
2036\n\
2037Like S.find() but raise ValueError when the substring is not found.");
2038
2039static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002040string_index(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002041{
2042 Py_ssize_t result = string_find_internal(self, args, +1);
2043 if (result == -2)
2044 return NULL;
2045 if (result == -1) {
2046 PyErr_SetString(PyExc_ValueError,
2047 "substring not found");
2048 return NULL;
2049 }
2050 return PyInt_FromSsize_t(result);
2051}
2052
2053
2054PyDoc_STRVAR(rfind__doc__,
2055"S.rfind(sub [,start [,end]]) -> int\n\
2056\n\
2057Return the highest index in S where substring sub is found,\n\
2058such that sub is contained within s[start:end]. Optional\n\
2059arguments start and end are interpreted as in slice notation.\n\
2060\n\
2061Return -1 on failure.");
2062
2063static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002064string_rfind(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002065{
2066 Py_ssize_t result = string_find_internal(self, args, -1);
2067 if (result == -2)
2068 return NULL;
2069 return PyInt_FromSsize_t(result);
2070}
2071
2072
2073PyDoc_STRVAR(rindex__doc__,
2074"S.rindex(sub [,start [,end]]) -> int\n\
2075\n\
2076Like S.rfind() but raise ValueError when the substring is not found.");
2077
2078static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002079string_rindex(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002080{
2081 Py_ssize_t result = string_find_internal(self, args, -1);
2082 if (result == -2)
2083 return NULL;
2084 if (result == -1) {
2085 PyErr_SetString(PyExc_ValueError,
2086 "substring not found");
2087 return NULL;
2088 }
2089 return PyInt_FromSsize_t(result);
2090}
2091
2092
2093Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002094do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
Christian Heimes44720832008-05-26 13:01:01 +00002095{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002096 char *s = PyString_AS_STRING(self);
2097 Py_ssize_t len = PyString_GET_SIZE(self);
2098 char *sep = PyString_AS_STRING(sepobj);
2099 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
Christian Heimes44720832008-05-26 13:01:01 +00002100 Py_ssize_t i, j;
2101
2102 i = 0;
2103 if (striptype != RIGHTSTRIP) {
2104 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2105 i++;
2106 }
2107 }
2108
2109 j = len;
2110 if (striptype != LEFTSTRIP) {
2111 do {
2112 j--;
2113 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2114 j++;
2115 }
2116
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002117 if (i == 0 && j == len && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00002118 Py_INCREF(self);
2119 return (PyObject*)self;
2120 }
2121 else
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002122 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00002123}
2124
2125
2126Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002127do_strip(PyStringObject *self, int striptype)
Christian Heimes44720832008-05-26 13:01:01 +00002128{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002129 char *s = PyString_AS_STRING(self);
2130 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Christian Heimes44720832008-05-26 13:01:01 +00002131
2132 i = 0;
2133 if (striptype != RIGHTSTRIP) {
2134 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2135 i++;
2136 }
2137 }
2138
2139 j = len;
2140 if (striptype != LEFTSTRIP) {
2141 do {
2142 j--;
2143 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2144 j++;
2145 }
2146
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002147 if (i == 0 && j == len && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00002148 Py_INCREF(self);
2149 return (PyObject*)self;
2150 }
2151 else
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002152 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00002153}
2154
2155
2156Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002157do_argstrip(PyStringObject *self, int striptype, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002158{
2159 PyObject *sep = NULL;
2160
2161 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
2162 return NULL;
2163
2164 if (sep != NULL && sep != Py_None) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002165 if (PyString_Check(sep))
Christian Heimes44720832008-05-26 13:01:01 +00002166 return do_xstrip(self, striptype, sep);
2167#ifdef Py_USING_UNICODE
2168 else if (PyUnicode_Check(sep)) {
2169 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2170 PyObject *res;
2171 if (uniself==NULL)
2172 return NULL;
2173 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2174 striptype, sep);
2175 Py_DECREF(uniself);
2176 return res;
2177 }
2178#endif
2179 PyErr_Format(PyExc_TypeError,
2180#ifdef Py_USING_UNICODE
2181 "%s arg must be None, str or unicode",
2182#else
2183 "%s arg must be None or str",
2184#endif
2185 STRIPNAME(striptype));
2186 return NULL;
2187 }
2188
2189 return do_strip(self, striptype);
2190}
2191
2192
2193PyDoc_STRVAR(strip__doc__,
2194"S.strip([chars]) -> string or unicode\n\
2195\n\
2196Return a copy of the string S with leading and trailing\n\
2197whitespace removed.\n\
2198If chars is given and not None, remove characters in chars instead.\n\
2199If chars is unicode, S will be converted to unicode before stripping");
2200
2201static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002202string_strip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002203{
2204 if (PyTuple_GET_SIZE(args) == 0)
2205 return do_strip(self, BOTHSTRIP); /* Common case */
2206 else
2207 return do_argstrip(self, BOTHSTRIP, args);
2208}
2209
2210
2211PyDoc_STRVAR(lstrip__doc__,
2212"S.lstrip([chars]) -> string or unicode\n\
2213\n\
2214Return a copy of the string S with leading whitespace removed.\n\
2215If chars is given and not None, remove characters in chars instead.\n\
2216If chars is unicode, S will be converted to unicode before stripping");
2217
2218static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002219string_lstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002220{
2221 if (PyTuple_GET_SIZE(args) == 0)
2222 return do_strip(self, LEFTSTRIP); /* Common case */
2223 else
2224 return do_argstrip(self, LEFTSTRIP, args);
2225}
2226
2227
2228PyDoc_STRVAR(rstrip__doc__,
2229"S.rstrip([chars]) -> string or unicode\n\
2230\n\
2231Return a copy of the string S with trailing whitespace removed.\n\
2232If chars is given and not None, remove characters in chars instead.\n\
2233If chars is unicode, S will be converted to unicode before stripping");
2234
2235static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002236string_rstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002237{
2238 if (PyTuple_GET_SIZE(args) == 0)
2239 return do_strip(self, RIGHTSTRIP); /* Common case */
2240 else
2241 return do_argstrip(self, RIGHTSTRIP, args);
2242}
2243
2244
2245PyDoc_STRVAR(lower__doc__,
2246"S.lower() -> string\n\
2247\n\
2248Return a copy of the string S converted to lowercase.");
2249
2250/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2251#ifndef _tolower
2252#define _tolower tolower
2253#endif
2254
2255static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002256string_lower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002257{
2258 char *s;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002259 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002260 PyObject *newobj;
2261
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002262 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002263 if (!newobj)
2264 return NULL;
2265
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002266 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002267
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002268 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002269
2270 for (i = 0; i < n; i++) {
2271 int c = Py_CHARMASK(s[i]);
2272 if (isupper(c))
2273 s[i] = _tolower(c);
2274 }
2275
2276 return newobj;
2277}
2278
2279PyDoc_STRVAR(upper__doc__,
2280"S.upper() -> string\n\
2281\n\
2282Return a copy of the string S converted to uppercase.");
2283
2284#ifndef _toupper
2285#define _toupper toupper
2286#endif
2287
2288static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002289string_upper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002290{
2291 char *s;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002292 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002293 PyObject *newobj;
2294
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002295 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002296 if (!newobj)
2297 return NULL;
2298
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002299 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002300
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002301 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002302
2303 for (i = 0; i < n; i++) {
2304 int c = Py_CHARMASK(s[i]);
2305 if (islower(c))
2306 s[i] = _toupper(c);
2307 }
2308
2309 return newobj;
2310}
2311
2312PyDoc_STRVAR(title__doc__,
2313"S.title() -> string\n\
2314\n\
2315Return a titlecased version of S, i.e. words start with uppercase\n\
2316characters, all remaining cased characters have lowercase.");
2317
2318static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002319string_title(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002320{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002321 char *s = PyString_AS_STRING(self), *s_new;
2322 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002323 int previous_is_cased = 0;
2324 PyObject *newobj;
2325
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002326 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002327 if (newobj == NULL)
2328 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002329 s_new = PyString_AsString(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002330 for (i = 0; i < n; i++) {
2331 int c = Py_CHARMASK(*s++);
2332 if (islower(c)) {
2333 if (!previous_is_cased)
2334 c = toupper(c);
2335 previous_is_cased = 1;
2336 } else if (isupper(c)) {
2337 if (previous_is_cased)
2338 c = tolower(c);
2339 previous_is_cased = 1;
2340 } else
2341 previous_is_cased = 0;
2342 *s_new++ = c;
2343 }
2344 return newobj;
2345}
2346
2347PyDoc_STRVAR(capitalize__doc__,
2348"S.capitalize() -> string\n\
2349\n\
2350Return a copy of the string S with only its first character\n\
2351capitalized.");
2352
2353static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002354string_capitalize(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002355{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002356 char *s = PyString_AS_STRING(self), *s_new;
2357 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002358 PyObject *newobj;
2359
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002360 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002361 if (newobj == NULL)
2362 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002363 s_new = PyString_AsString(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002364 if (0 < n) {
2365 int c = Py_CHARMASK(*s++);
2366 if (islower(c))
2367 *s_new = toupper(c);
2368 else
2369 *s_new = c;
2370 s_new++;
2371 }
2372 for (i = 1; i < n; i++) {
2373 int c = Py_CHARMASK(*s++);
2374 if (isupper(c))
2375 *s_new = tolower(c);
2376 else
2377 *s_new = c;
2378 s_new++;
2379 }
2380 return newobj;
2381}
2382
2383
2384PyDoc_STRVAR(count__doc__,
2385"S.count(sub[, start[, end]]) -> int\n\
2386\n\
2387Return the number of non-overlapping occurrences of substring sub in\n\
2388string S[start:end]. Optional arguments start and end are interpreted\n\
2389as in slice notation.");
2390
2391static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002392string_count(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002393{
2394 PyObject *sub_obj;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002395 const char *str = PyString_AS_STRING(self), *sub;
Christian Heimes44720832008-05-26 13:01:01 +00002396 Py_ssize_t sub_len;
2397 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2398
2399 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2400 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2401 return NULL;
2402
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002403 if (PyString_Check(sub_obj)) {
2404 sub = PyString_AS_STRING(sub_obj);
2405 sub_len = PyString_GET_SIZE(sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00002406 }
2407#ifdef Py_USING_UNICODE
2408 else if (PyUnicode_Check(sub_obj)) {
2409 Py_ssize_t count;
2410 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2411 if (count == -1)
2412 return NULL;
2413 else
2414 return PyInt_FromSsize_t(count);
2415 }
2416#endif
2417 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2418 return NULL;
2419
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002420 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002421
2422 return PyInt_FromSsize_t(
2423 stringlib_count(str + start, end - start, sub, sub_len)
2424 );
2425}
2426
2427PyDoc_STRVAR(swapcase__doc__,
2428"S.swapcase() -> string\n\
2429\n\
2430Return a copy of the string S with uppercase characters\n\
2431converted to lowercase and vice versa.");
2432
2433static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002434string_swapcase(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002435{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002436 char *s = PyString_AS_STRING(self), *s_new;
2437 Py_ssize_t i, n = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002438 PyObject *newobj;
2439
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002440 newobj = PyString_FromStringAndSize(NULL, n);
Christian Heimes44720832008-05-26 13:01:01 +00002441 if (newobj == NULL)
2442 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002443 s_new = PyString_AsString(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002444 for (i = 0; i < n; i++) {
2445 int c = Py_CHARMASK(*s++);
2446 if (islower(c)) {
2447 *s_new = toupper(c);
2448 }
2449 else if (isupper(c)) {
2450 *s_new = tolower(c);
2451 }
2452 else
2453 *s_new = c;
2454 s_new++;
2455 }
2456 return newobj;
2457}
2458
2459
2460PyDoc_STRVAR(translate__doc__,
2461"S.translate(table [,deletechars]) -> string\n\
2462\n\
2463Return a copy of the string S, where all characters occurring\n\
2464in the optional argument deletechars are removed, and the\n\
2465remaining characters have been mapped through the given\n\
2466translation table, which must be a string of length 256.");
2467
2468static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002469string_translate(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002470{
2471 register char *input, *output;
2472 const char *table;
2473 register Py_ssize_t i, c, changed = 0;
2474 PyObject *input_obj = (PyObject*)self;
2475 const char *output_start, *del_table=NULL;
2476 Py_ssize_t inlen, tablen, dellen = 0;
2477 PyObject *result;
2478 int trans_table[256];
2479 PyObject *tableobj, *delobj = NULL;
2480
2481 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2482 &tableobj, &delobj))
2483 return NULL;
2484
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002485 if (PyString_Check(tableobj)) {
2486 table = PyString_AS_STRING(tableobj);
2487 tablen = PyString_GET_SIZE(tableobj);
Christian Heimes44720832008-05-26 13:01:01 +00002488 }
2489 else if (tableobj == Py_None) {
2490 table = NULL;
2491 tablen = 256;
2492 }
2493#ifdef Py_USING_UNICODE
2494 else if (PyUnicode_Check(tableobj)) {
2495 /* Unicode .translate() does not support the deletechars
2496 parameter; instead a mapping to None will cause characters
2497 to be deleted. */
2498 if (delobj != NULL) {
2499 PyErr_SetString(PyExc_TypeError,
2500 "deletions are implemented differently for unicode");
2501 return NULL;
2502 }
2503 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2504 }
2505#endif
2506 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2507 return NULL;
2508
2509 if (tablen != 256) {
2510 PyErr_SetString(PyExc_ValueError,
2511 "translation table must be 256 characters long");
2512 return NULL;
2513 }
2514
2515 if (delobj != NULL) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002516 if (PyString_Check(delobj)) {
2517 del_table = PyString_AS_STRING(delobj);
2518 dellen = PyString_GET_SIZE(delobj);
Christian Heimes44720832008-05-26 13:01:01 +00002519 }
2520#ifdef Py_USING_UNICODE
2521 else if (PyUnicode_Check(delobj)) {
2522 PyErr_SetString(PyExc_TypeError,
2523 "deletions are implemented differently for unicode");
2524 return NULL;
2525 }
2526#endif
2527 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2528 return NULL;
2529 }
2530 else {
2531 del_table = NULL;
2532 dellen = 0;
2533 }
2534
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002535 inlen = PyString_GET_SIZE(input_obj);
2536 result = PyString_FromStringAndSize((char *)NULL, inlen);
Christian Heimes44720832008-05-26 13:01:01 +00002537 if (result == NULL)
2538 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002539 output_start = output = PyString_AsString(result);
2540 input = PyString_AS_STRING(input_obj);
Christian Heimes44720832008-05-26 13:01:01 +00002541
2542 if (dellen == 0 && table != NULL) {
2543 /* If no deletions are required, use faster code */
2544 for (i = inlen; --i >= 0; ) {
2545 c = Py_CHARMASK(*input++);
2546 if (Py_CHARMASK((*output++ = table[c])) != c)
2547 changed = 1;
2548 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002549 if (changed || !PyString_CheckExact(input_obj))
Christian Heimes44720832008-05-26 13:01:01 +00002550 return result;
2551 Py_DECREF(result);
2552 Py_INCREF(input_obj);
2553 return input_obj;
2554 }
2555
2556 if (table == NULL) {
2557 for (i = 0; i < 256; i++)
2558 trans_table[i] = Py_CHARMASK(i);
2559 } else {
2560 for (i = 0; i < 256; i++)
2561 trans_table[i] = Py_CHARMASK(table[i]);
2562 }
2563
2564 for (i = 0; i < dellen; i++)
2565 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2566
2567 for (i = inlen; --i >= 0; ) {
2568 c = Py_CHARMASK(*input++);
2569 if (trans_table[c] != -1)
2570 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2571 continue;
2572 changed = 1;
2573 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002574 if (!changed && PyString_CheckExact(input_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00002575 Py_DECREF(result);
2576 Py_INCREF(input_obj);
2577 return input_obj;
2578 }
2579 /* Fix the size of the resulting string */
2580 if (inlen > 0)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002581 _PyString_Resize(&result, output - output_start);
Christian Heimes44720832008-05-26 13:01:01 +00002582 return result;
2583}
2584
2585
2586#define FORWARD 1
2587#define REVERSE -1
2588
2589/* find and count characters and substrings */
2590
2591#define findchar(target, target_len, c) \
2592 ((char *)memchr((const void *)(target), c, target_len))
2593
2594/* String ops must return a string. */
2595/* If the object is subclass of string, create a copy */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002596Py_LOCAL(PyStringObject *)
2597return_self(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002598{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002599 if (PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00002600 Py_INCREF(self);
2601 return self;
2602 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002603 return (PyStringObject *)PyString_FromStringAndSize(
2604 PyString_AS_STRING(self),
2605 PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002606}
2607
2608Py_LOCAL_INLINE(Py_ssize_t)
2609countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2610{
2611 Py_ssize_t count=0;
2612 const char *start=target;
2613 const char *end=target+target_len;
2614
2615 while ( (start=findchar(start, end-start, c)) != NULL ) {
2616 count++;
2617 if (count >= maxcount)
2618 break;
2619 start += 1;
2620 }
2621 return count;
2622}
2623
2624Py_LOCAL(Py_ssize_t)
2625findstring(const char *target, Py_ssize_t target_len,
2626 const char *pattern, Py_ssize_t pattern_len,
2627 Py_ssize_t start,
2628 Py_ssize_t end,
2629 int direction)
2630{
2631 if (start < 0) {
2632 start += target_len;
2633 if (start < 0)
2634 start = 0;
2635 }
2636 if (end > target_len) {
2637 end = target_len;
2638 } else if (end < 0) {
2639 end += target_len;
2640 if (end < 0)
2641 end = 0;
2642 }
2643
2644 /* zero-length substrings always match at the first attempt */
2645 if (pattern_len == 0)
2646 return (direction > 0) ? start : end;
2647
2648 end -= pattern_len;
2649
2650 if (direction < 0) {
2651 for (; end >= start; end--)
2652 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2653 return end;
2654 } else {
2655 for (; start <= end; start++)
2656 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2657 return start;
2658 }
2659 return -1;
2660}
2661
2662Py_LOCAL_INLINE(Py_ssize_t)
2663countstring(const char *target, Py_ssize_t target_len,
2664 const char *pattern, Py_ssize_t pattern_len,
2665 Py_ssize_t start,
2666 Py_ssize_t end,
2667 int direction, Py_ssize_t maxcount)
2668{
2669 Py_ssize_t count=0;
2670
2671 if (start < 0) {
2672 start += target_len;
2673 if (start < 0)
2674 start = 0;
2675 }
2676 if (end > target_len) {
2677 end = target_len;
2678 } else if (end < 0) {
2679 end += target_len;
2680 if (end < 0)
2681 end = 0;
2682 }
2683
2684 /* zero-length substrings match everywhere */
2685 if (pattern_len == 0 || maxcount == 0) {
2686 if (target_len+1 < maxcount)
2687 return target_len+1;
2688 return maxcount;
2689 }
2690
2691 end -= pattern_len;
2692 if (direction < 0) {
2693 for (; (end >= start); end--)
2694 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2695 count++;
2696 if (--maxcount <= 0) break;
2697 end -= pattern_len-1;
2698 }
2699 } else {
2700 for (; (start <= end); start++)
2701 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2702 count++;
2703 if (--maxcount <= 0)
2704 break;
2705 start += pattern_len-1;
2706 }
2707 }
2708 return count;
2709}
2710
2711
2712/* Algorithms for different cases of string replacement */
2713
2714/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002715Py_LOCAL(PyStringObject *)
2716replace_interleave(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002717 const char *to_s, Py_ssize_t to_len,
2718 Py_ssize_t maxcount)
2719{
2720 char *self_s, *result_s;
2721 Py_ssize_t self_len, result_len;
2722 Py_ssize_t count, i, product;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002723 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002724
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002725 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002726
2727 /* 1 at the end plus 1 after every character */
2728 count = self_len+1;
2729 if (maxcount < count)
2730 count = maxcount;
2731
2732 /* Check for overflow */
2733 /* result_len = count * to_len + self_len; */
2734 product = count * to_len;
2735 if (product / to_len != count) {
2736 PyErr_SetString(PyExc_OverflowError,
2737 "replace string is too long");
2738 return NULL;
2739 }
2740 result_len = product + self_len;
2741 if (result_len < 0) {
2742 PyErr_SetString(PyExc_OverflowError,
2743 "replace string is too long");
2744 return NULL;
2745 }
2746
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002747 if (! (result = (PyStringObject *)
2748 PyString_FromStringAndSize(NULL, result_len)) )
Christian Heimes44720832008-05-26 13:01:01 +00002749 return NULL;
2750
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002751 self_s = PyString_AS_STRING(self);
2752 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002753
2754 /* TODO: special case single character, which doesn't need memcpy */
2755
2756 /* Lay the first one down (guaranteed this will occur) */
2757 Py_MEMCPY(result_s, to_s, to_len);
2758 result_s += to_len;
2759 count -= 1;
2760
2761 for (i=0; i<count; i++) {
2762 *result_s++ = *self_s++;
2763 Py_MEMCPY(result_s, to_s, to_len);
2764 result_s += to_len;
2765 }
2766
2767 /* Copy the rest of the original string */
2768 Py_MEMCPY(result_s, self_s, self_len-i);
2769
2770 return result;
2771}
2772
2773/* Special case for deleting a single character */
2774/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002775Py_LOCAL(PyStringObject *)
2776replace_delete_single_character(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002777 char from_c, Py_ssize_t maxcount)
2778{
2779 char *self_s, *result_s;
2780 char *start, *next, *end;
2781 Py_ssize_t self_len, result_len;
2782 Py_ssize_t count;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002783 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002784
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002785 self_len = PyString_GET_SIZE(self);
2786 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002787
2788 count = countchar(self_s, self_len, from_c, maxcount);
2789 if (count == 0) {
2790 return return_self(self);
2791 }
2792
2793 result_len = self_len - count; /* from_len == 1 */
2794 assert(result_len>=0);
2795
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002796 if ( (result = (PyStringObject *)
2797 PyString_FromStringAndSize(NULL, result_len)) == NULL)
Christian Heimes44720832008-05-26 13:01:01 +00002798 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002799 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002800
2801 start = self_s;
2802 end = self_s + self_len;
2803 while (count-- > 0) {
2804 next = findchar(start, end-start, from_c);
2805 if (next == NULL)
2806 break;
2807 Py_MEMCPY(result_s, start, next-start);
2808 result_s += (next-start);
2809 start = next+1;
2810 }
2811 Py_MEMCPY(result_s, start, end-start);
2812
2813 return result;
2814}
2815
2816/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2817
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002818Py_LOCAL(PyStringObject *)
2819replace_delete_substring(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002820 const char *from_s, Py_ssize_t from_len,
2821 Py_ssize_t maxcount) {
2822 char *self_s, *result_s;
2823 char *start, *next, *end;
2824 Py_ssize_t self_len, result_len;
2825 Py_ssize_t count, offset;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002826 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002827
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002828 self_len = PyString_GET_SIZE(self);
2829 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002830
2831 count = countstring(self_s, self_len,
2832 from_s, from_len,
2833 0, self_len, 1,
2834 maxcount);
2835
2836 if (count == 0) {
2837 /* no matches */
2838 return return_self(self);
2839 }
2840
2841 result_len = self_len - (count * from_len);
2842 assert (result_len>=0);
2843
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002844 if ( (result = (PyStringObject *)
2845 PyString_FromStringAndSize(NULL, result_len)) == NULL )
Christian Heimes44720832008-05-26 13:01:01 +00002846 return NULL;
2847
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002848 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002849
2850 start = self_s;
2851 end = self_s + self_len;
2852 while (count-- > 0) {
2853 offset = findstring(start, end-start,
2854 from_s, from_len,
2855 0, end-start, FORWARD);
2856 if (offset == -1)
2857 break;
2858 next = start + offset;
2859
2860 Py_MEMCPY(result_s, start, next-start);
2861
2862 result_s += (next-start);
2863 start = next+from_len;
2864 }
2865 Py_MEMCPY(result_s, start, end-start);
2866 return result;
2867}
2868
2869/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002870Py_LOCAL(PyStringObject *)
2871replace_single_character_in_place(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002872 char from_c, char to_c,
2873 Py_ssize_t maxcount)
2874{
2875 char *self_s, *result_s, *start, *end, *next;
2876 Py_ssize_t self_len;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002877 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002878
2879 /* The result string will be the same size */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002880 self_s = PyString_AS_STRING(self);
2881 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002882
2883 next = findchar(self_s, self_len, from_c);
2884
2885 if (next == NULL) {
2886 /* No matches; return the original string */
2887 return return_self(self);
2888 }
2889
2890 /* Need to make a new string */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002891 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002892 if (result == NULL)
2893 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002894 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002895 Py_MEMCPY(result_s, self_s, self_len);
2896
2897 /* change everything in-place, starting with this one */
2898 start = result_s + (next-self_s);
2899 *start = to_c;
2900 start++;
2901 end = result_s + self_len;
2902
2903 while (--maxcount > 0) {
2904 next = findchar(start, end-start, from_c);
2905 if (next == NULL)
2906 break;
2907 *next = to_c;
2908 start = next+1;
2909 }
2910
2911 return result;
2912}
2913
2914/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002915Py_LOCAL(PyStringObject *)
2916replace_substring_in_place(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002917 const char *from_s, Py_ssize_t from_len,
2918 const char *to_s, Py_ssize_t to_len,
2919 Py_ssize_t maxcount)
2920{
2921 char *result_s, *start, *end;
2922 char *self_s;
2923 Py_ssize_t self_len, offset;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002924 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002925
2926 /* The result string will be the same size */
2927
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002928 self_s = PyString_AS_STRING(self);
2929 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002930
2931 offset = findstring(self_s, self_len,
2932 from_s, from_len,
2933 0, self_len, FORWARD);
2934 if (offset == -1) {
2935 /* No matches; return the original string */
2936 return return_self(self);
2937 }
2938
2939 /* Need to make a new string */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002940 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002941 if (result == NULL)
2942 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002943 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002944 Py_MEMCPY(result_s, self_s, self_len);
2945
2946 /* change everything in-place, starting with this one */
2947 start = result_s + offset;
2948 Py_MEMCPY(start, to_s, from_len);
2949 start += from_len;
2950 end = result_s + self_len;
2951
2952 while ( --maxcount > 0) {
2953 offset = findstring(start, end-start,
2954 from_s, from_len,
2955 0, end-start, FORWARD);
2956 if (offset==-1)
2957 break;
2958 Py_MEMCPY(start+offset, to_s, from_len);
2959 start += offset+from_len;
2960 }
2961
2962 return result;
2963}
2964
2965/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002966Py_LOCAL(PyStringObject *)
2967replace_single_character(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00002968 char from_c,
2969 const char *to_s, Py_ssize_t to_len,
2970 Py_ssize_t maxcount)
2971{
2972 char *self_s, *result_s;
2973 char *start, *next, *end;
2974 Py_ssize_t self_len, result_len;
2975 Py_ssize_t count, product;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002976 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002977
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002978 self_s = PyString_AS_STRING(self);
2979 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002980
2981 count = countchar(self_s, self_len, from_c, maxcount);
2982 if (count == 0) {
2983 /* no matches, return unchanged */
2984 return return_self(self);
2985 }
2986
2987 /* use the difference between current and new, hence the "-1" */
2988 /* result_len = self_len + count * (to_len-1) */
2989 product = count * (to_len-1);
2990 if (product / (to_len-1) != count) {
2991 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2992 return NULL;
2993 }
2994 result_len = self_len + product;
2995 if (result_len < 0) {
2996 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2997 return NULL;
2998 }
2999
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003000 if ( (result = (PyStringObject *)
3001 PyString_FromStringAndSize(NULL, result_len)) == NULL)
Christian Heimes44720832008-05-26 13:01:01 +00003002 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003003 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00003004
3005 start = self_s;
3006 end = self_s + self_len;
3007 while (count-- > 0) {
3008 next = findchar(start, end-start, from_c);
3009 if (next == NULL)
3010 break;
3011
3012 if (next == start) {
3013 /* replace with the 'to' */
3014 Py_MEMCPY(result_s, to_s, to_len);
3015 result_s += to_len;
3016 start += 1;
3017 } else {
3018 /* copy the unchanged old then the 'to' */
3019 Py_MEMCPY(result_s, start, next-start);
3020 result_s += (next-start);
3021 Py_MEMCPY(result_s, to_s, to_len);
3022 result_s += to_len;
3023 start = next+1;
3024 }
3025 }
3026 /* Copy the remainder of the remaining string */
3027 Py_MEMCPY(result_s, start, end-start);
3028
3029 return result;
3030}
3031
3032/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003033Py_LOCAL(PyStringObject *)
3034replace_substring(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00003035 const char *from_s, Py_ssize_t from_len,
3036 const char *to_s, Py_ssize_t to_len,
3037 Py_ssize_t maxcount) {
3038 char *self_s, *result_s;
3039 char *start, *next, *end;
3040 Py_ssize_t self_len, result_len;
3041 Py_ssize_t count, offset, product;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003042 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00003043
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003044 self_s = PyString_AS_STRING(self);
3045 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003046
3047 count = countstring(self_s, self_len,
3048 from_s, from_len,
3049 0, self_len, FORWARD, maxcount);
3050 if (count == 0) {
3051 /* no matches, return unchanged */
3052 return return_self(self);
3053 }
3054
3055 /* Check for overflow */
3056 /* result_len = self_len + count * (to_len-from_len) */
3057 product = count * (to_len-from_len);
3058 if (product / (to_len-from_len) != count) {
3059 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3060 return NULL;
3061 }
3062 result_len = self_len + product;
3063 if (result_len < 0) {
3064 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3065 return NULL;
3066 }
3067
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003068 if ( (result = (PyStringObject *)
3069 PyString_FromStringAndSize(NULL, result_len)) == NULL)
Christian Heimes44720832008-05-26 13:01:01 +00003070 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003071 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00003072
3073 start = self_s;
3074 end = self_s + self_len;
3075 while (count-- > 0) {
3076 offset = findstring(start, end-start,
3077 from_s, from_len,
3078 0, end-start, FORWARD);
3079 if (offset == -1)
3080 break;
3081 next = start+offset;
3082 if (next == start) {
3083 /* replace with the 'to' */
3084 Py_MEMCPY(result_s, to_s, to_len);
3085 result_s += to_len;
3086 start += from_len;
3087 } else {
3088 /* copy the unchanged old then the 'to' */
3089 Py_MEMCPY(result_s, start, next-start);
3090 result_s += (next-start);
3091 Py_MEMCPY(result_s, to_s, to_len);
3092 result_s += to_len;
3093 start = next+from_len;
3094 }
3095 }
3096 /* Copy the remainder of the remaining string */
3097 Py_MEMCPY(result_s, start, end-start);
3098
3099 return result;
3100}
3101
3102
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003103Py_LOCAL(PyStringObject *)
3104replace(PyStringObject *self,
Christian Heimes44720832008-05-26 13:01:01 +00003105 const char *from_s, Py_ssize_t from_len,
3106 const char *to_s, Py_ssize_t to_len,
3107 Py_ssize_t maxcount)
3108{
3109 if (maxcount < 0) {
3110 maxcount = PY_SSIZE_T_MAX;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003111 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
Christian Heimes44720832008-05-26 13:01:01 +00003112 /* nothing to do; return the original string */
3113 return return_self(self);
3114 }
3115
3116 if (maxcount == 0 ||
3117 (from_len == 0 && to_len == 0)) {
3118 /* nothing to do; return the original string */
3119 return return_self(self);
3120 }
3121
3122 /* Handle zero-length special cases */
3123
3124 if (from_len == 0) {
3125 /* insert the 'to' string everywhere. */
3126 /* >>> "Python".replace("", ".") */
3127 /* '.P.y.t.h.o.n.' */
3128 return replace_interleave(self, to_s, to_len, maxcount);
3129 }
3130
3131 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3132 /* point for an empty self string to generate a non-empty string */
3133 /* Special case so the remaining code always gets a non-empty string */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003134 if (PyString_GET_SIZE(self) == 0) {
Christian Heimes44720832008-05-26 13:01:01 +00003135 return return_self(self);
3136 }
3137
3138 if (to_len == 0) {
3139 /* delete all occurances of 'from' string */
3140 if (from_len == 1) {
3141 return replace_delete_single_character(
3142 self, from_s[0], maxcount);
3143 } else {
3144 return replace_delete_substring(self, from_s, from_len, maxcount);
3145 }
3146 }
3147
3148 /* Handle special case where both strings have the same length */
3149
3150 if (from_len == to_len) {
3151 if (from_len == 1) {
3152 return replace_single_character_in_place(
3153 self,
3154 from_s[0],
3155 to_s[0],
3156 maxcount);
3157 } else {
3158 return replace_substring_in_place(
3159 self, from_s, from_len, to_s, to_len, maxcount);
3160 }
3161 }
3162
3163 /* Otherwise use the more generic algorithms */
3164 if (from_len == 1) {
3165 return replace_single_character(self, from_s[0],
3166 to_s, to_len, maxcount);
3167 } else {
3168 /* len('from')>=2, len('to')>=1 */
3169 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
3170 }
3171}
3172
3173PyDoc_STRVAR(replace__doc__,
3174"S.replace (old, new[, count]) -> string\n\
3175\n\
3176Return a copy of string S with all occurrences of substring\n\
3177old replaced by new. If the optional argument count is\n\
3178given, only the first count occurrences are replaced.");
3179
3180static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003181string_replace(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003182{
3183 Py_ssize_t count = -1;
3184 PyObject *from, *to;
3185 const char *from_s, *to_s;
3186 Py_ssize_t from_len, to_len;
3187
3188 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
3189 return NULL;
3190
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003191 if (PyString_Check(from)) {
3192 from_s = PyString_AS_STRING(from);
3193 from_len = PyString_GET_SIZE(from);
Christian Heimes44720832008-05-26 13:01:01 +00003194 }
3195#ifdef Py_USING_UNICODE
3196 if (PyUnicode_Check(from))
3197 return PyUnicode_Replace((PyObject *)self,
3198 from, to, count);
3199#endif
3200 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
3201 return NULL;
3202
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003203 if (PyString_Check(to)) {
3204 to_s = PyString_AS_STRING(to);
3205 to_len = PyString_GET_SIZE(to);
Christian Heimes44720832008-05-26 13:01:01 +00003206 }
3207#ifdef Py_USING_UNICODE
3208 else if (PyUnicode_Check(to))
3209 return PyUnicode_Replace((PyObject *)self,
3210 from, to, count);
3211#endif
3212 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
3213 return NULL;
3214
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003215 return (PyObject *)replace((PyStringObject *) self,
Christian Heimes44720832008-05-26 13:01:01 +00003216 from_s, from_len,
3217 to_s, to_len, count);
3218}
3219
3220/** End DALKE **/
3221
3222/* Matches the end (direction >= 0) or start (direction < 0) of self
3223 * against substr, using the start and end arguments. Returns
3224 * -1 on error, 0 if not found and 1 if found.
3225 */
3226Py_LOCAL(int)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003227_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
Christian Heimes44720832008-05-26 13:01:01 +00003228 Py_ssize_t end, int direction)
3229{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003230 Py_ssize_t len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003231 Py_ssize_t slen;
3232 const char* sub;
3233 const char* str;
3234
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003235 if (PyString_Check(substr)) {
3236 sub = PyString_AS_STRING(substr);
3237 slen = PyString_GET_SIZE(substr);
Christian Heimes44720832008-05-26 13:01:01 +00003238 }
3239#ifdef Py_USING_UNICODE
3240 else if (PyUnicode_Check(substr))
3241 return PyUnicode_Tailmatch((PyObject *)self,
3242 substr, start, end, direction);
3243#endif
3244 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3245 return -1;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003246 str = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003247
3248 string_adjust_indices(&start, &end, len);
3249
3250 if (direction < 0) {
3251 /* startswith */
3252 if (start+slen > len)
3253 return 0;
3254 } else {
3255 /* endswith */
3256 if (end-start < slen || start > len)
3257 return 0;
3258
3259 if (end-slen > start)
3260 start = end - slen;
3261 }
3262 if (end-start >= slen)
3263 return ! memcmp(str+start, sub, slen);
3264 return 0;
3265}
3266
3267
3268PyDoc_STRVAR(startswith__doc__,
3269"S.startswith(prefix[, start[, end]]) -> bool\n\
3270\n\
3271Return True if S starts with the specified prefix, False otherwise.\n\
3272With optional start, test S beginning at that position.\n\
3273With optional end, stop comparing S at that position.\n\
3274prefix can also be a tuple of strings to try.");
3275
3276static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003277string_startswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003278{
3279 Py_ssize_t start = 0;
3280 Py_ssize_t end = PY_SSIZE_T_MAX;
3281 PyObject *subobj;
3282 int result;
3283
3284 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3285 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3286 return NULL;
3287 if (PyTuple_Check(subobj)) {
3288 Py_ssize_t i;
3289 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3290 result = _string_tailmatch(self,
3291 PyTuple_GET_ITEM(subobj, i),
3292 start, end, -1);
3293 if (result == -1)
3294 return NULL;
3295 else if (result) {
3296 Py_RETURN_TRUE;
3297 }
3298 }
3299 Py_RETURN_FALSE;
3300 }
3301 result = _string_tailmatch(self, subobj, start, end, -1);
3302 if (result == -1)
3303 return NULL;
3304 else
3305 return PyBool_FromLong(result);
3306}
3307
3308
3309PyDoc_STRVAR(endswith__doc__,
3310"S.endswith(suffix[, start[, end]]) -> bool\n\
3311\n\
3312Return True if S ends with the specified suffix, False otherwise.\n\
3313With optional start, test S beginning at that position.\n\
3314With optional end, stop comparing S at that position.\n\
3315suffix can also be a tuple of strings to try.");
3316
3317static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003318string_endswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003319{
3320 Py_ssize_t start = 0;
3321 Py_ssize_t end = PY_SSIZE_T_MAX;
3322 PyObject *subobj;
3323 int result;
3324
3325 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3326 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3327 return NULL;
3328 if (PyTuple_Check(subobj)) {
3329 Py_ssize_t i;
3330 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3331 result = _string_tailmatch(self,
3332 PyTuple_GET_ITEM(subobj, i),
3333 start, end, +1);
3334 if (result == -1)
3335 return NULL;
3336 else if (result) {
3337 Py_RETURN_TRUE;
3338 }
3339 }
3340 Py_RETURN_FALSE;
3341 }
3342 result = _string_tailmatch(self, subobj, start, end, +1);
3343 if (result == -1)
3344 return NULL;
3345 else
3346 return PyBool_FromLong(result);
3347}
3348
3349
3350PyDoc_STRVAR(encode__doc__,
3351"S.encode([encoding[,errors]]) -> object\n\
3352\n\
3353Encodes S using the codec registered for encoding. encoding defaults\n\
3354to the default encoding. errors may be given to set a different error\n\
3355handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3356a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3357'xmlcharrefreplace' as well as any other name registered with\n\
3358codecs.register_error that is able to handle UnicodeEncodeErrors.");
3359
3360static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00003361string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes44720832008-05-26 13:01:01 +00003362{
Benjamin Peterson332d7212009-09-18 21:14:55 +00003363 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003364 char *encoding = NULL;
3365 char *errors = NULL;
3366 PyObject *v;
3367
Benjamin Peterson332d7212009-09-18 21:14:55 +00003368 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
3369 kwlist, &encoding, &errors))
Christian Heimes44720832008-05-26 13:01:01 +00003370 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003371 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003372 if (v == NULL)
3373 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003374 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00003375 PyErr_Format(PyExc_TypeError,
3376 "encoder did not return a string/unicode object "
3377 "(type=%.400s)",
3378 Py_TYPE(v)->tp_name);
3379 Py_DECREF(v);
3380 return NULL;
3381 }
3382 return v;
3383
3384 onError:
Christian Heimes1a6387e2008-03-26 12:49:49 +00003385 return NULL;
3386}
3387
Christian Heimes44720832008-05-26 13:01:01 +00003388
3389PyDoc_STRVAR(decode__doc__,
3390"S.decode([encoding[,errors]]) -> object\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003391\n\
Christian Heimes44720832008-05-26 13:01:01 +00003392Decodes S using the codec registered for encoding. encoding defaults\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003393to the default encoding. errors may be given to set a different error\n\
Christian Heimes44720832008-05-26 13:01:01 +00003394handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3395a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003396as well as any other name registered with codecs.register_error that is\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003397able to handle UnicodeDecodeErrors.");
3398
3399static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00003400string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003401{
Benjamin Peterson332d7212009-09-18 21:14:55 +00003402 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003403 char *encoding = NULL;
3404 char *errors = NULL;
3405 PyObject *v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003406
Benjamin Peterson332d7212009-09-18 21:14:55 +00003407 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
3408 kwlist, &encoding, &errors))
Christian Heimes1a6387e2008-03-26 12:49:49 +00003409 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003410 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003411 if (v == NULL)
3412 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003413 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00003414 PyErr_Format(PyExc_TypeError,
3415 "decoder did not return a string/unicode object "
3416 "(type=%.400s)",
3417 Py_TYPE(v)->tp_name);
3418 Py_DECREF(v);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003419 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003420 }
3421 return v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003422
Christian Heimes44720832008-05-26 13:01:01 +00003423 onError:
3424 return NULL;
3425}
3426
3427
3428PyDoc_STRVAR(expandtabs__doc__,
3429"S.expandtabs([tabsize]) -> string\n\
3430\n\
3431Return a copy of S where all tab characters are expanded using spaces.\n\
3432If tabsize is not given, a tab size of 8 characters is assumed.");
3433
3434static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003435string_expandtabs(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003436{
3437 const char *e, *p, *qe;
3438 char *q;
3439 Py_ssize_t i, j, incr;
3440 PyObject *u;
3441 int tabsize = 8;
3442
3443 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3444 return NULL;
3445
3446 /* First pass: determine size of output string */
3447 i = 0; /* chars up to and including most recent \n or \r */
3448 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003449 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3450 for (p = PyString_AS_STRING(self); p < e; p++)
Christian Heimes44720832008-05-26 13:01:01 +00003451 if (*p == '\t') {
3452 if (tabsize > 0) {
3453 incr = tabsize - (j % tabsize);
3454 if (j > PY_SSIZE_T_MAX - incr)
3455 goto overflow1;
3456 j += incr;
3457 }
3458 }
3459 else {
3460 if (j > PY_SSIZE_T_MAX - 1)
3461 goto overflow1;
3462 j++;
3463 if (*p == '\n' || *p == '\r') {
3464 if (i > PY_SSIZE_T_MAX - j)
3465 goto overflow1;
3466 i += j;
3467 j = 0;
3468 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00003469 }
Christian Heimes44720832008-05-26 13:01:01 +00003470
3471 if (i > PY_SSIZE_T_MAX - j)
3472 goto overflow1;
3473
3474 /* Second pass: create output string and fill it */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003475 u = PyString_FromStringAndSize(NULL, i + j);
Christian Heimes44720832008-05-26 13:01:01 +00003476 if (!u)
3477 return NULL;
3478
3479 j = 0; /* same as in first pass */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003480 q = PyString_AS_STRING(u); /* next output char */
3481 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
Christian Heimes44720832008-05-26 13:01:01 +00003482
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003483 for (p = PyString_AS_STRING(self); p < e; p++)
Christian Heimes44720832008-05-26 13:01:01 +00003484 if (*p == '\t') {
3485 if (tabsize > 0) {
3486 i = tabsize - (j % tabsize);
3487 j += i;
3488 while (i--) {
3489 if (q >= qe)
3490 goto overflow2;
3491 *q++ = ' ';
3492 }
3493 }
3494 }
3495 else {
3496 if (q >= qe)
3497 goto overflow2;
3498 *q++ = *p;
3499 j++;
3500 if (*p == '\n' || *p == '\r')
3501 j = 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003502 }
Christian Heimes44720832008-05-26 13:01:01 +00003503
3504 return u;
3505
3506 overflow2:
3507 Py_DECREF(u);
3508 overflow1:
3509 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3510 return NULL;
3511}
3512
3513Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003514pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Christian Heimes44720832008-05-26 13:01:01 +00003515{
3516 PyObject *u;
3517
3518 if (left < 0)
3519 left = 0;
3520 if (right < 0)
3521 right = 0;
3522
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003523 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003524 Py_INCREF(self);
3525 return (PyObject *)self;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003526 }
3527
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003528 u = PyString_FromStringAndSize(NULL,
3529 left + PyString_GET_SIZE(self) + right);
Christian Heimes44720832008-05-26 13:01:01 +00003530 if (u) {
3531 if (left)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003532 memset(PyString_AS_STRING(u), fill, left);
3533 Py_MEMCPY(PyString_AS_STRING(u) + left,
3534 PyString_AS_STRING(self),
3535 PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00003536 if (right)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003537 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
Christian Heimes44720832008-05-26 13:01:01 +00003538 fill, right);
3539 }
3540
3541 return u;
3542}
3543
3544PyDoc_STRVAR(ljust__doc__,
3545"S.ljust(width[, fillchar]) -> string\n"
3546"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003547"Return S left-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003548"done using the specified fill character (default is a space).");
3549
3550static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003551string_ljust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003552{
3553 Py_ssize_t width;
3554 char fillchar = ' ';
3555
3556 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3557 return NULL;
3558
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003559 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003560 Py_INCREF(self);
3561 return (PyObject*) self;
3562 }
3563
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003564 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003565}
3566
3567
3568PyDoc_STRVAR(rjust__doc__,
3569"S.rjust(width[, fillchar]) -> string\n"
3570"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003571"Return S right-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003572"done using the specified fill character (default is a space)");
3573
3574static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003575string_rjust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003576{
3577 Py_ssize_t width;
3578 char fillchar = ' ';
3579
3580 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3581 return NULL;
3582
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003583 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003584 Py_INCREF(self);
3585 return (PyObject*) self;
3586 }
3587
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003588 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003589}
3590
3591
3592PyDoc_STRVAR(center__doc__,
3593"S.center(width[, fillchar]) -> string\n"
3594"\n"
3595"Return S centered in a string of length width. Padding is\n"
3596"done using the specified fill character (default is a space)");
3597
3598static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003599string_center(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003600{
3601 Py_ssize_t marg, left;
3602 Py_ssize_t width;
3603 char fillchar = ' ';
3604
3605 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3606 return NULL;
3607
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003608 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003609 Py_INCREF(self);
3610 return (PyObject*) self;
3611 }
3612
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003613 marg = width - PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003614 left = marg / 2 + (marg & width & 1);
3615
3616 return pad(self, left, marg - left, fillchar);
3617}
3618
3619PyDoc_STRVAR(zfill__doc__,
3620"S.zfill(width) -> string\n"
3621"\n"
3622"Pad a numeric string S with zeros on the left, to fill a field\n"
3623"of the specified width. The string S is never truncated.");
3624
3625static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003626string_zfill(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003627{
3628 Py_ssize_t fill;
3629 PyObject *s;
3630 char *p;
3631 Py_ssize_t width;
3632
3633 if (!PyArg_ParseTuple(args, "n:zfill", &width))
3634 return NULL;
3635
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003636 if (PyString_GET_SIZE(self) >= width) {
3637 if (PyString_CheckExact(self)) {
Christian Heimes44720832008-05-26 13:01:01 +00003638 Py_INCREF(self);
3639 return (PyObject*) self;
3640 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00003641 else
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003642 return PyString_FromStringAndSize(
3643 PyString_AS_STRING(self),
3644 PyString_GET_SIZE(self)
Christian Heimes44720832008-05-26 13:01:01 +00003645 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003646 }
3647
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003648 fill = width - PyString_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003649
Christian Heimes44720832008-05-26 13:01:01 +00003650 s = pad(self, fill, 0, '0');
3651
3652 if (s == NULL)
3653 return NULL;
3654
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003655 p = PyString_AS_STRING(s);
Christian Heimes44720832008-05-26 13:01:01 +00003656 if (p[fill] == '+' || p[fill] == '-') {
3657 /* move sign to beginning of string */
3658 p[0] = p[fill];
3659 p[fill] = '0';
3660 }
3661
3662 return (PyObject*) s;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003663}
3664
Christian Heimes44720832008-05-26 13:01:01 +00003665PyDoc_STRVAR(isspace__doc__,
3666"S.isspace() -> bool\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003667\n\
Christian Heimes44720832008-05-26 13:01:01 +00003668Return True if all characters in S are whitespace\n\
3669and there is at least one character in S, False otherwise.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003670
Christian Heimes44720832008-05-26 13:01:01 +00003671static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003672string_isspace(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003673{
Christian Heimes44720832008-05-26 13:01:01 +00003674 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003675 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003676 register const unsigned char *e;
3677
3678 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003679 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003680 isspace(*p))
3681 return PyBool_FromLong(1);
3682
3683 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003684 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003685 return PyBool_FromLong(0);
3686
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003687 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003688 for (; p < e; p++) {
3689 if (!isspace(*p))
3690 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003691 }
Christian Heimes44720832008-05-26 13:01:01 +00003692 return PyBool_FromLong(1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003693}
3694
Christian Heimes44720832008-05-26 13:01:01 +00003695
3696PyDoc_STRVAR(isalpha__doc__,
3697"S.isalpha() -> bool\n\
3698\n\
3699Return True if all characters in S are alphabetic\n\
3700and there is at least one character in S, False otherwise.");
3701
3702static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003703string_isalpha(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003704{
Christian Heimes44720832008-05-26 13:01:01 +00003705 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003706 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003707 register const unsigned char *e;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003708
Christian Heimes44720832008-05-26 13:01:01 +00003709 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003710 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003711 isalpha(*p))
3712 return PyBool_FromLong(1);
3713
3714 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003715 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003716 return PyBool_FromLong(0);
3717
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003718 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003719 for (; p < e; p++) {
3720 if (!isalpha(*p))
3721 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003722 }
Christian Heimes44720832008-05-26 13:01:01 +00003723 return PyBool_FromLong(1);
3724}
Christian Heimes1a6387e2008-03-26 12:49:49 +00003725
Christian Heimes44720832008-05-26 13:01:01 +00003726
3727PyDoc_STRVAR(isalnum__doc__,
3728"S.isalnum() -> bool\n\
3729\n\
3730Return True if all characters in S are alphanumeric\n\
3731and there is at least one character in S, False otherwise.");
3732
3733static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003734string_isalnum(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003735{
3736 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003737 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003738 register const unsigned char *e;
3739
3740 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003741 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003742 isalnum(*p))
3743 return PyBool_FromLong(1);
3744
3745 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003746 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003747 return PyBool_FromLong(0);
3748
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003749 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003750 for (; p < e; p++) {
3751 if (!isalnum(*p))
3752 return PyBool_FromLong(0);
3753 }
3754 return PyBool_FromLong(1);
3755}
3756
3757
3758PyDoc_STRVAR(isdigit__doc__,
3759"S.isdigit() -> bool\n\
3760\n\
3761Return True if all characters in S are digits\n\
3762and there is at least one character in S, False otherwise.");
3763
3764static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003765string_isdigit(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003766{
3767 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003768 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003769 register const unsigned char *e;
3770
3771 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003772 if (PyString_GET_SIZE(self) == 1 &&
Christian Heimes44720832008-05-26 13:01:01 +00003773 isdigit(*p))
3774 return PyBool_FromLong(1);
3775
3776 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003777 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003778 return PyBool_FromLong(0);
3779
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003780 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003781 for (; p < e; p++) {
3782 if (!isdigit(*p))
3783 return PyBool_FromLong(0);
3784 }
3785 return PyBool_FromLong(1);
3786}
3787
3788
3789PyDoc_STRVAR(islower__doc__,
3790"S.islower() -> bool\n\
3791\n\
3792Return True if all cased characters in S are lowercase and there is\n\
3793at least one cased character in S, False otherwise.");
3794
3795static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003796string_islower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003797{
3798 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003799 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003800 register const unsigned char *e;
3801 int cased;
3802
3803 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003804 if (PyString_GET_SIZE(self) == 1)
Christian Heimes44720832008-05-26 13:01:01 +00003805 return PyBool_FromLong(islower(*p) != 0);
3806
3807 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003808 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003809 return PyBool_FromLong(0);
3810
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003811 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003812 cased = 0;
3813 for (; p < e; p++) {
3814 if (isupper(*p))
3815 return PyBool_FromLong(0);
3816 else if (!cased && islower(*p))
3817 cased = 1;
3818 }
3819 return PyBool_FromLong(cased);
3820}
3821
3822
3823PyDoc_STRVAR(isupper__doc__,
3824"S.isupper() -> bool\n\
3825\n\
3826Return True if all cased characters in S are uppercase and there is\n\
3827at least one cased character in S, False otherwise.");
3828
3829static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003830string_isupper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003831{
3832 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003833 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003834 register const unsigned char *e;
3835 int cased;
3836
3837 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003838 if (PyString_GET_SIZE(self) == 1)
Christian Heimes44720832008-05-26 13:01:01 +00003839 return PyBool_FromLong(isupper(*p) != 0);
3840
3841 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003842 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003843 return PyBool_FromLong(0);
3844
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003845 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003846 cased = 0;
3847 for (; p < e; p++) {
3848 if (islower(*p))
3849 return PyBool_FromLong(0);
3850 else if (!cased && isupper(*p))
3851 cased = 1;
3852 }
3853 return PyBool_FromLong(cased);
3854}
3855
3856
3857PyDoc_STRVAR(istitle__doc__,
3858"S.istitle() -> bool\n\
3859\n\
3860Return True if S is a titlecased string and there is at least one\n\
3861character in S, i.e. uppercase characters may only follow uncased\n\
3862characters and lowercase characters only cased ones. Return False\n\
3863otherwise.");
3864
3865static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003866string_istitle(PyStringObject *self, PyObject *uncased)
Christian Heimes44720832008-05-26 13:01:01 +00003867{
3868 register const unsigned char *p
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003869 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003870 register const unsigned char *e;
3871 int cased, previous_is_cased;
3872
3873 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003874 if (PyString_GET_SIZE(self) == 1)
Christian Heimes44720832008-05-26 13:01:01 +00003875 return PyBool_FromLong(isupper(*p) != 0);
3876
3877 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003878 if (PyString_GET_SIZE(self) == 0)
Christian Heimes44720832008-05-26 13:01:01 +00003879 return PyBool_FromLong(0);
3880
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003881 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003882 cased = 0;
3883 previous_is_cased = 0;
3884 for (; p < e; p++) {
3885 register const unsigned char ch = *p;
3886
3887 if (isupper(ch)) {
3888 if (previous_is_cased)
3889 return PyBool_FromLong(0);
3890 previous_is_cased = 1;
3891 cased = 1;
3892 }
3893 else if (islower(ch)) {
3894 if (!previous_is_cased)
3895 return PyBool_FromLong(0);
3896 previous_is_cased = 1;
3897 cased = 1;
3898 }
3899 else
3900 previous_is_cased = 0;
3901 }
3902 return PyBool_FromLong(cased);
3903}
3904
3905
3906PyDoc_STRVAR(splitlines__doc__,
3907"S.splitlines([keepends]) -> list of strings\n\
3908\n\
3909Return a list of the lines in S, breaking at line boundaries.\n\
3910Line breaks are not included in the resulting list unless keepends\n\
3911is given and true.");
3912
3913static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003914string_splitlines(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003915{
3916 register Py_ssize_t i;
3917 register Py_ssize_t j;
3918 Py_ssize_t len;
3919 int keepends = 0;
3920 PyObject *list;
3921 PyObject *str;
3922 char *data;
3923
3924 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3925 return NULL;
3926
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003927 data = PyString_AS_STRING(self);
3928 len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003929
3930 /* This does not use the preallocated list because splitlines is
3931 usually run with hundreds of newlines. The overhead of
3932 switching between PyList_SET_ITEM and append causes about a
3933 2-3% slowdown for that common case. A smarter implementation
3934 could move the if check out, so the SET_ITEMs are done first
3935 and the appends only done when the prealloc buffer is full.
3936 That's too much work for little gain.*/
3937
3938 list = PyList_New(0);
3939 if (!list)
3940 goto onError;
3941
3942 for (i = j = 0; i < len; ) {
3943 Py_ssize_t eol;
3944
3945 /* Find a line and append it */
3946 while (i < len && data[i] != '\n' && data[i] != '\r')
3947 i++;
3948
3949 /* Skip the line break reading CRLF as one line break */
3950 eol = i;
3951 if (i < len) {
3952 if (data[i] == '\r' && i + 1 < len &&
3953 data[i+1] == '\n')
3954 i += 2;
3955 else
3956 i++;
3957 if (keepends)
3958 eol = i;
3959 }
3960 SPLIT_APPEND(data, j, eol);
3961 j = i;
3962 }
3963 if (j < len) {
3964 SPLIT_APPEND(data, j, len);
3965 }
3966
3967 return list;
3968
3969 onError:
3970 Py_XDECREF(list);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003971 return NULL;
3972}
3973
Robert Schuppenies51df0642008-06-01 16:16:17 +00003974PyDoc_STRVAR(sizeof__doc__,
Georg Brandl7a6de8b2008-06-01 16:42:16 +00003975"S.__sizeof__() -> size of S in memory, in bytes");
Robert Schuppenies51df0642008-06-01 16:16:17 +00003976
3977static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003978string_sizeof(PyStringObject *v)
Robert Schuppenies51df0642008-06-01 16:16:17 +00003979{
3980 Py_ssize_t res;
Benjamin Peterson4fe03352009-09-17 21:33:46 +00003981 res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
Robert Schuppenies51df0642008-06-01 16:16:17 +00003982 return PyInt_FromSsize_t(res);
3983}
3984
Christian Heimes44720832008-05-26 13:01:01 +00003985#undef SPLIT_APPEND
3986#undef SPLIT_ADD
3987#undef MAX_PREALLOC
3988#undef PREALLOC_SIZE
Christian Heimes1a6387e2008-03-26 12:49:49 +00003989
3990static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003991string_getnewargs(PyStringObject *v)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003992{
Christian Heimes44720832008-05-26 13:01:01 +00003993 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Christian Heimes1a6387e2008-03-26 12:49:49 +00003994}
3995
Christian Heimes1a6387e2008-03-26 12:49:49 +00003996
Christian Heimes44720832008-05-26 13:01:01 +00003997#include "stringlib/string_format.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +00003998
Christian Heimes44720832008-05-26 13:01:01 +00003999PyDoc_STRVAR(format__doc__,
4000"S.format(*args, **kwargs) -> unicode\n\
4001\n\
4002");
Christian Heimes1a6387e2008-03-26 12:49:49 +00004003
Eric Smithdc13b792008-05-30 18:10:04 +00004004static PyObject *
4005string__format__(PyObject* self, PyObject* args)
4006{
4007 PyObject *format_spec;
4008 PyObject *result = NULL;
4009 PyObject *tmp = NULL;
4010
4011 /* If 2.x, convert format_spec to the same type as value */
4012 /* This is to allow things like u''.format('') */
4013 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
4014 goto done;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004015 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
Eric Smithdc13b792008-05-30 18:10:04 +00004016 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
4017 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
4018 goto done;
4019 }
4020 tmp = PyObject_Str(format_spec);
4021 if (tmp == NULL)
4022 goto done;
4023 format_spec = tmp;
4024
4025 result = _PyBytes_FormatAdvanced(self,
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004026 PyString_AS_STRING(format_spec),
4027 PyString_GET_SIZE(format_spec));
Eric Smithdc13b792008-05-30 18:10:04 +00004028done:
4029 Py_XDECREF(tmp);
4030 return result;
4031}
4032
Christian Heimes44720832008-05-26 13:01:01 +00004033PyDoc_STRVAR(p_format__doc__,
4034"S.__format__(format_spec) -> unicode\n\
4035\n\
4036");
4037
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00004038
Christian Heimes1a6387e2008-03-26 12:49:49 +00004039static PyMethodDef
Christian Heimes44720832008-05-26 13:01:01 +00004040string_methods[] = {
4041 /* Counterparts of the obsolete stropmodule functions; except
4042 string.maketrans(). */
4043 {"join", (PyCFunction)string_join, METH_O, join__doc__},
4044 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
4045 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
4046 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
4047 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
4048 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
4049 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
4050 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
4051 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
4052 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
4053 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
4054 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
4055 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
4056 capitalize__doc__},
4057 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
4058 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
4059 endswith__doc__},
4060 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
4061 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
4062 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
4063 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
4064 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
4065 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
4066 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
4067 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
4068 {"rpartition", (PyCFunction)string_rpartition, METH_O,
4069 rpartition__doc__},
4070 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
4071 startswith__doc__},
4072 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
4073 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
4074 swapcase__doc__},
4075 {"translate", (PyCFunction)string_translate, METH_VARARGS,
4076 translate__doc__},
4077 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
4078 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
4079 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
4080 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
4081 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
4082 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
4083 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
4084 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
4085 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
Benjamin Peterson332d7212009-09-18 21:14:55 +00004086 {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
4087 {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
Christian Heimes44720832008-05-26 13:01:01 +00004088 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
4089 expandtabs__doc__},
4090 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
4091 splitlines__doc__},
Robert Schuppenies51df0642008-06-01 16:16:17 +00004092 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
4093 sizeof__doc__},
Christian Heimes44720832008-05-26 13:01:01 +00004094 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
4095 {NULL, NULL} /* sentinel */
Christian Heimes1a6387e2008-03-26 12:49:49 +00004096};
4097
4098static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00004099str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes1a6387e2008-03-26 12:49:49 +00004100
Christian Heimes44720832008-05-26 13:01:01 +00004101static PyObject *
4102string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4103{
4104 PyObject *x = NULL;
4105 static char *kwlist[] = {"object", 0};
4106
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004107 if (type != &PyString_Type)
Christian Heimes44720832008-05-26 13:01:01 +00004108 return str_subtype_new(type, args, kwds);
4109 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
4110 return NULL;
4111 if (x == NULL)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004112 return PyString_FromString("");
Christian Heimes44720832008-05-26 13:01:01 +00004113 return PyObject_Str(x);
4114}
4115
4116static PyObject *
4117str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4118{
4119 PyObject *tmp, *pnew;
4120 Py_ssize_t n;
4121
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004122 assert(PyType_IsSubtype(type, &PyString_Type));
4123 tmp = string_new(&PyString_Type, args, kwds);
Christian Heimes44720832008-05-26 13:01:01 +00004124 if (tmp == NULL)
4125 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004126 assert(PyString_CheckExact(tmp));
4127 n = PyString_GET_SIZE(tmp);
Christian Heimes44720832008-05-26 13:01:01 +00004128 pnew = type->tp_alloc(type, n);
4129 if (pnew != NULL) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004130 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
4131 ((PyStringObject *)pnew)->ob_shash =
4132 ((PyStringObject *)tmp)->ob_shash;
4133 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Christian Heimes44720832008-05-26 13:01:01 +00004134 }
4135 Py_DECREF(tmp);
4136 return pnew;
4137}
4138
4139static PyObject *
4140basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4141{
4142 PyErr_SetString(PyExc_TypeError,
4143 "The basestring type cannot be instantiated");
4144 return NULL;
4145}
4146
4147static PyObject *
4148string_mod(PyObject *v, PyObject *w)
4149{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004150 if (!PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00004151 Py_INCREF(Py_NotImplemented);
4152 return Py_NotImplemented;
4153 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004154 return PyString_Format(v, w);
Christian Heimes44720832008-05-26 13:01:01 +00004155}
4156
4157PyDoc_STRVAR(basestring_doc,
4158"Type basestring cannot be instantiated; it is the base for str and unicode.");
4159
4160static PyNumberMethods string_as_number = {
4161 0, /*nb_add*/
4162 0, /*nb_subtract*/
4163 0, /*nb_multiply*/
4164 0, /*nb_divide*/
4165 string_mod, /*nb_remainder*/
4166};
4167
4168
4169PyTypeObject PyBaseString_Type = {
4170 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4171 "basestring",
4172 0,
4173 0,
4174 0, /* tp_dealloc */
4175 0, /* tp_print */
4176 0, /* tp_getattr */
4177 0, /* tp_setattr */
4178 0, /* tp_compare */
4179 0, /* tp_repr */
4180 0, /* tp_as_number */
4181 0, /* tp_as_sequence */
4182 0, /* tp_as_mapping */
4183 0, /* tp_hash */
4184 0, /* tp_call */
4185 0, /* tp_str */
4186 0, /* tp_getattro */
4187 0, /* tp_setattro */
4188 0, /* tp_as_buffer */
4189 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4190 basestring_doc, /* tp_doc */
4191 0, /* tp_traverse */
4192 0, /* tp_clear */
4193 0, /* tp_richcompare */
4194 0, /* tp_weaklistoffset */
4195 0, /* tp_iter */
4196 0, /* tp_iternext */
4197 0, /* tp_methods */
4198 0, /* tp_members */
4199 0, /* tp_getset */
4200 &PyBaseObject_Type, /* tp_base */
4201 0, /* tp_dict */
4202 0, /* tp_descr_get */
4203 0, /* tp_descr_set */
4204 0, /* tp_dictoffset */
4205 0, /* tp_init */
4206 0, /* tp_alloc */
4207 basestring_new, /* tp_new */
4208 0, /* tp_free */
4209};
4210
4211PyDoc_STRVAR(string_doc,
4212"str(object) -> string\n\
4213\n\
4214Return a nice string representation of the object.\n\
4215If the argument is a string, the return value is the same object.");
4216
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004217PyTypeObject PyString_Type = {
Christian Heimes44720832008-05-26 13:01:01 +00004218 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4219 "str",
Mark Dickinson826f3fe2008-12-05 21:55:28 +00004220 PyStringObject_SIZE,
Christian Heimes44720832008-05-26 13:01:01 +00004221 sizeof(char),
4222 string_dealloc, /* tp_dealloc */
4223 (printfunc)string_print, /* tp_print */
4224 0, /* tp_getattr */
4225 0, /* tp_setattr */
4226 0, /* tp_compare */
4227 string_repr, /* tp_repr */
4228 &string_as_number, /* tp_as_number */
4229 &string_as_sequence, /* tp_as_sequence */
4230 &string_as_mapping, /* tp_as_mapping */
4231 (hashfunc)string_hash, /* tp_hash */
4232 0, /* tp_call */
4233 string_str, /* tp_str */
4234 PyObject_GenericGetAttr, /* tp_getattro */
4235 0, /* tp_setattro */
4236 &string_as_buffer, /* tp_as_buffer */
4237 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
4238 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
4239 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
4240 string_doc, /* tp_doc */
4241 0, /* tp_traverse */
4242 0, /* tp_clear */
4243 (richcmpfunc)string_richcompare, /* tp_richcompare */
4244 0, /* tp_weaklistoffset */
4245 0, /* tp_iter */
4246 0, /* tp_iternext */
4247 string_methods, /* tp_methods */
4248 0, /* tp_members */
4249 0, /* tp_getset */
4250 &PyBaseString_Type, /* tp_base */
4251 0, /* tp_dict */
4252 0, /* tp_descr_get */
4253 0, /* tp_descr_set */
4254 0, /* tp_dictoffset */
4255 0, /* tp_init */
4256 0, /* tp_alloc */
4257 string_new, /* tp_new */
4258 PyObject_Del, /* tp_free */
4259};
4260
4261void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004262PyString_Concat(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00004263{
4264 register PyObject *v;
4265 if (*pv == NULL)
4266 return;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004267 if (w == NULL || !PyString_Check(*pv)) {
Christian Heimes44720832008-05-26 13:01:01 +00004268 Py_DECREF(*pv);
4269 *pv = NULL;
4270 return;
4271 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004272 v = string_concat((PyStringObject *) *pv, w);
Christian Heimes44720832008-05-26 13:01:01 +00004273 Py_DECREF(*pv);
4274 *pv = v;
4275}
4276
4277void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004278PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00004279{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004280 PyString_Concat(pv, w);
Christian Heimes44720832008-05-26 13:01:01 +00004281 Py_XDECREF(w);
4282}
4283
4284
4285/* The following function breaks the notion that strings are immutable:
4286 it changes the size of a string. We get away with this only if there
4287 is only one module referencing the object. You can also think of it
4288 as creating a new string object and destroying the old one, only
4289 more efficiently. In any case, don't use this if the string may
4290 already be known to some other part of the code...
4291 Note that if there's not enough memory to resize the string, the original
4292 string object at *pv is deallocated, *pv is set to NULL, an "out of
4293 memory" exception is set, and -1 is returned. Else (on success) 0 is
4294 returned, and the value in *pv may or may not be the same as on input.
4295 As always, an extra byte is allocated for a trailing \0 byte (newsize
4296 does *not* include that), and a trailing \0 byte is stored.
4297*/
4298
4299int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004300_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Christian Heimes44720832008-05-26 13:01:01 +00004301{
4302 register PyObject *v;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004303 register PyStringObject *sv;
Christian Heimes44720832008-05-26 13:01:01 +00004304 v = *pv;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004305 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
4306 PyString_CHECK_INTERNED(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00004307 *pv = 0;
4308 Py_DECREF(v);
4309 PyErr_BadInternalCall();
4310 return -1;
4311 }
4312 /* XXX UNREF/NEWREF interface should be more symmetrical */
4313 _Py_DEC_REFTOTAL;
4314 _Py_ForgetReference(v);
4315 *pv = (PyObject *)
Mark Dickinson826f3fe2008-12-05 21:55:28 +00004316 PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
Christian Heimes44720832008-05-26 13:01:01 +00004317 if (*pv == NULL) {
4318 PyObject_Del(v);
4319 PyErr_NoMemory();
4320 return -1;
4321 }
4322 _Py_NewReference(*pv);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004323 sv = (PyStringObject *) *pv;
Christian Heimes44720832008-05-26 13:01:01 +00004324 Py_SIZE(sv) = newsize;
4325 sv->ob_sval[newsize] = '\0';
4326 sv->ob_shash = -1; /* invalidate cached hash value */
4327 return 0;
4328}
4329
4330/* Helpers for formatstring */
4331
4332Py_LOCAL_INLINE(PyObject *)
4333getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
4334{
4335 Py_ssize_t argidx = *p_argidx;
4336 if (argidx < arglen) {
4337 (*p_argidx)++;
4338 if (arglen < 0)
4339 return args;
4340 else
4341 return PyTuple_GetItem(args, argidx);
4342 }
4343 PyErr_SetString(PyExc_TypeError,
4344 "not enough arguments for format string");
4345 return NULL;
4346}
4347
4348/* Format codes
4349 * F_LJUST '-'
4350 * F_SIGN '+'
4351 * F_BLANK ' '
4352 * F_ALT '#'
4353 * F_ZERO '0'
4354 */
4355#define F_LJUST (1<<0)
4356#define F_SIGN (1<<1)
4357#define F_BLANK (1<<2)
4358#define F_ALT (1<<3)
4359#define F_ZERO (1<<4)
4360
Mark Dickinson18cfada2009-11-23 18:46:41 +00004361/* Returns a new reference to a PyString object, or NULL on failure. */
4362
4363static PyObject *
4364formatfloat(PyObject *v, int flags, int prec, int type)
Christian Heimes44720832008-05-26 13:01:01 +00004365{
Mark Dickinson18cfada2009-11-23 18:46:41 +00004366 char *p;
4367 PyObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00004368 double x;
Eric Smithc1bdf892009-10-26 17:46:17 +00004369
Christian Heimes44720832008-05-26 13:01:01 +00004370 x = PyFloat_AsDouble(v);
4371 if (x == -1.0 && PyErr_Occurred()) {
4372 PyErr_Format(PyExc_TypeError, "float argument required, "
4373 "not %.200s", Py_TYPE(v)->tp_name);
Mark Dickinson18cfada2009-11-23 18:46:41 +00004374 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004375 }
Mark Dickinson18cfada2009-11-23 18:46:41 +00004376
Christian Heimes44720832008-05-26 13:01:01 +00004377 if (prec < 0)
4378 prec = 6;
Mark Dickinson174e9092009-03-29 16:17:16 +00004379
Mark Dickinson18cfada2009-11-23 18:46:41 +00004380 p = PyOS_double_to_string(x, type, prec,
4381 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
Christian Heimes44720832008-05-26 13:01:01 +00004382
Mark Dickinson18cfada2009-11-23 18:46:41 +00004383 if (p == NULL)
4384 return NULL;
4385 result = PyString_FromStringAndSize(p, strlen(p));
4386 PyMem_Free(p);
4387 return result;
Christian Heimes44720832008-05-26 13:01:01 +00004388}
4389
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004390/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
Christian Heimes44720832008-05-26 13:01:01 +00004391 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4392 * Python's regular ints.
4393 * Return value: a new PyString*, or NULL if error.
4394 * . *pbuf is set to point into it,
4395 * *plen set to the # of chars following that.
4396 * Caller must decref it when done using pbuf.
4397 * The string starting at *pbuf is of the form
4398 * "-"? ("0x" | "0X")? digit+
4399 * "0x"/"0X" are present only for x and X conversions, with F_ALT
4400 * set in flags. The case of hex digits will be correct,
4401 * There will be at least prec digits, zero-filled on the left if
4402 * necessary to get that many.
4403 * val object to be converted
4404 * flags bitmask of format flags; only F_ALT is looked at
4405 * prec minimum number of digits; 0-fill on left if needed
4406 * type a character in [duoxX]; u acts the same as d
4407 *
4408 * CAUTION: o, x and X conversions on regular ints can never
4409 * produce a '-' sign, but can for Python's unbounded ints.
4410 */
4411PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004412_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
Christian Heimes44720832008-05-26 13:01:01 +00004413 char **pbuf, int *plen)
4414{
4415 PyObject *result = NULL;
4416 char *buf;
4417 Py_ssize_t i;
4418 int sign; /* 1 if '-', else 0 */
4419 int len; /* number of characters */
4420 Py_ssize_t llen;
4421 int numdigits; /* len == numnondigits + numdigits */
4422 int numnondigits = 0;
4423
4424 switch (type) {
4425 case 'd':
4426 case 'u':
4427 result = Py_TYPE(val)->tp_str(val);
4428 break;
4429 case 'o':
4430 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4431 break;
4432 case 'x':
4433 case 'X':
4434 numnondigits = 2;
4435 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4436 break;
4437 default:
4438 assert(!"'type' not in [duoxX]");
4439 }
4440 if (!result)
4441 return NULL;
4442
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004443 buf = PyString_AsString(result);
Christian Heimes44720832008-05-26 13:01:01 +00004444 if (!buf) {
4445 Py_DECREF(result);
4446 return NULL;
4447 }
4448
4449 /* To modify the string in-place, there can only be one reference. */
4450 if (Py_REFCNT(result) != 1) {
4451 PyErr_BadInternalCall();
4452 return NULL;
4453 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004454 llen = PyString_Size(result);
Christian Heimes44720832008-05-26 13:01:01 +00004455 if (llen > INT_MAX) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004456 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
Christian Heimes44720832008-05-26 13:01:01 +00004457 return NULL;
4458 }
4459 len = (int)llen;
4460 if (buf[len-1] == 'L') {
4461 --len;
4462 buf[len] = '\0';
4463 }
4464 sign = buf[0] == '-';
4465 numnondigits += sign;
4466 numdigits = len - numnondigits;
4467 assert(numdigits > 0);
4468
4469 /* Get rid of base marker unless F_ALT */
4470 if ((flags & F_ALT) == 0) {
4471 /* Need to skip 0x, 0X or 0. */
4472 int skipped = 0;
4473 switch (type) {
4474 case 'o':
4475 assert(buf[sign] == '0');
4476 /* If 0 is only digit, leave it alone. */
4477 if (numdigits > 1) {
4478 skipped = 1;
4479 --numdigits;
4480 }
4481 break;
4482 case 'x':
4483 case 'X':
4484 assert(buf[sign] == '0');
4485 assert(buf[sign + 1] == 'x');
4486 skipped = 2;
4487 numnondigits -= 2;
4488 break;
4489 }
4490 if (skipped) {
4491 buf += skipped;
4492 len -= skipped;
4493 if (sign)
4494 buf[0] = '-';
4495 }
4496 assert(len == numnondigits + numdigits);
4497 assert(numdigits > 0);
4498 }
4499
4500 /* Fill with leading zeroes to meet minimum width. */
4501 if (prec > numdigits) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004502 PyObject *r1 = PyString_FromStringAndSize(NULL,
Christian Heimes44720832008-05-26 13:01:01 +00004503 numnondigits + prec);
4504 char *b1;
4505 if (!r1) {
4506 Py_DECREF(result);
4507 return NULL;
4508 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004509 b1 = PyString_AS_STRING(r1);
Christian Heimes44720832008-05-26 13:01:01 +00004510 for (i = 0; i < numnondigits; ++i)
4511 *b1++ = *buf++;
4512 for (i = 0; i < prec - numdigits; i++)
4513 *b1++ = '0';
4514 for (i = 0; i < numdigits; i++)
4515 *b1++ = *buf++;
4516 *b1 = '\0';
4517 Py_DECREF(result);
4518 result = r1;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004519 buf = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00004520 len = numnondigits + prec;
4521 }
4522
4523 /* Fix up case for hex conversions. */
4524 if (type == 'X') {
4525 /* Need to convert all lower case letters to upper case.
4526 and need to convert 0x to 0X (and -0x to -0X). */
4527 for (i = 0; i < len; i++)
4528 if (buf[i] >= 'a' && buf[i] <= 'x')
4529 buf[i] -= 'a'-'A';
4530 }
4531 *pbuf = buf;
4532 *plen = len;
4533 return result;
4534}
4535
4536Py_LOCAL_INLINE(int)
4537formatint(char *buf, size_t buflen, int flags,
4538 int prec, int type, PyObject *v)
4539{
4540 /* fmt = '%#.' + `prec` + 'l' + `type`
4541 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4542 + 1 + 1 = 24 */
4543 char fmt[64]; /* plenty big enough! */
4544 char *sign;
4545 long x;
4546
4547 x = PyInt_AsLong(v);
4548 if (x == -1 && PyErr_Occurred()) {
4549 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4550 Py_TYPE(v)->tp_name);
4551 return -1;
4552 }
4553 if (x < 0 && type == 'u') {
4554 type = 'd';
4555 }
4556 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4557 sign = "-";
4558 else
4559 sign = "";
4560 if (prec < 0)
4561 prec = 1;
4562
4563 if ((flags & F_ALT) &&
4564 (type == 'x' || type == 'X')) {
4565 /* When converting under %#x or %#X, there are a number
4566 * of issues that cause pain:
4567 * - when 0 is being converted, the C standard leaves off
4568 * the '0x' or '0X', which is inconsistent with other
4569 * %#x/%#X conversions and inconsistent with Python's
4570 * hex() function
4571 * - there are platforms that violate the standard and
4572 * convert 0 with the '0x' or '0X'
4573 * (Metrowerks, Compaq Tru64)
4574 * - there are platforms that give '0x' when converting
4575 * under %#X, but convert 0 in accordance with the
4576 * standard (OS/2 EMX)
4577 *
4578 * We can achieve the desired consistency by inserting our
4579 * own '0x' or '0X' prefix, and substituting %x/%X in place
4580 * of %#x/%#X.
4581 *
4582 * Note that this is the same approach as used in
4583 * formatint() in unicodeobject.c
4584 */
4585 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4586 sign, type, prec, type);
4587 }
4588 else {
4589 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4590 sign, (flags&F_ALT) ? "#" : "",
4591 prec, type);
4592 }
4593
4594 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4595 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4596 */
4597 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4598 PyErr_SetString(PyExc_OverflowError,
4599 "formatted integer is too long (precision too large?)");
4600 return -1;
4601 }
4602 if (sign[0])
4603 PyOS_snprintf(buf, buflen, fmt, -x);
4604 else
4605 PyOS_snprintf(buf, buflen, fmt, x);
4606 return (int)strlen(buf);
4607}
4608
4609Py_LOCAL_INLINE(int)
4610formatchar(char *buf, size_t buflen, PyObject *v)
4611{
4612 /* presume that the buffer is at least 2 characters long */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004613 if (PyString_Check(v)) {
Christian Heimes44720832008-05-26 13:01:01 +00004614 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4615 return -1;
4616 }
4617 else {
4618 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4619 return -1;
4620 }
4621 buf[1] = '\0';
4622 return 1;
4623}
4624
4625/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4626
Mark Dickinson18cfada2009-11-23 18:46:41 +00004627 FORMATBUFLEN is the length of the buffer in which the ints &
Christian Heimes44720832008-05-26 13:01:01 +00004628 chars are formatted. XXX This is a magic number. Each formatting
4629 routine does bounds checking to ensure no overflow, but a better
4630 solution may be to malloc a buffer of appropriate size for each
4631 format. For now, the current solution is sufficient.
4632*/
4633#define FORMATBUFLEN (size_t)120
4634
4635PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004636PyString_Format(PyObject *format, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00004637{
4638 char *fmt, *res;
4639 Py_ssize_t arglen, argidx;
4640 Py_ssize_t reslen, rescnt, fmtcnt;
4641 int args_owned = 0;
4642 PyObject *result, *orig_args;
4643#ifdef Py_USING_UNICODE
4644 PyObject *v, *w;
4645#endif
4646 PyObject *dict = NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004647 if (format == NULL || !PyString_Check(format) || args == NULL) {
Christian Heimes44720832008-05-26 13:01:01 +00004648 PyErr_BadInternalCall();
4649 return NULL;
4650 }
4651 orig_args = args;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004652 fmt = PyString_AS_STRING(format);
4653 fmtcnt = PyString_GET_SIZE(format);
Christian Heimes44720832008-05-26 13:01:01 +00004654 reslen = rescnt = fmtcnt + 100;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004655 result = PyString_FromStringAndSize((char *)NULL, reslen);
Christian Heimes44720832008-05-26 13:01:01 +00004656 if (result == NULL)
4657 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004658 res = PyString_AsString(result);
Christian Heimes44720832008-05-26 13:01:01 +00004659 if (PyTuple_Check(args)) {
4660 arglen = PyTuple_GET_SIZE(args);
4661 argidx = 0;
4662 }
4663 else {
4664 arglen = -1;
4665 argidx = -2;
4666 }
4667 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4668 !PyObject_TypeCheck(args, &PyBaseString_Type))
4669 dict = args;
4670 while (--fmtcnt >= 0) {
4671 if (*fmt != '%') {
4672 if (--rescnt < 0) {
4673 rescnt = fmtcnt + 100;
4674 reslen += rescnt;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004675 if (_PyString_Resize(&result, reslen) < 0)
Christian Heimes44720832008-05-26 13:01:01 +00004676 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004677 res = PyString_AS_STRING(result)
Christian Heimes44720832008-05-26 13:01:01 +00004678 + reslen - rescnt;
4679 --rescnt;
4680 }
4681 *res++ = *fmt++;
4682 }
4683 else {
4684 /* Got a format specifier */
4685 int flags = 0;
4686 Py_ssize_t width = -1;
4687 int prec = -1;
4688 int c = '\0';
4689 int fill;
4690 int isnumok;
4691 PyObject *v = NULL;
4692 PyObject *temp = NULL;
4693 char *pbuf;
4694 int sign;
4695 Py_ssize_t len;
4696 char formatbuf[FORMATBUFLEN];
Mark Dickinson18cfada2009-11-23 18:46:41 +00004697 /* For format{int,char}() */
Christian Heimes44720832008-05-26 13:01:01 +00004698#ifdef Py_USING_UNICODE
4699 char *fmt_start = fmt;
4700 Py_ssize_t argidx_start = argidx;
4701#endif
4702
4703 fmt++;
4704 if (*fmt == '(') {
4705 char *keystart;
4706 Py_ssize_t keylen;
4707 PyObject *key;
4708 int pcount = 1;
4709
4710 if (dict == NULL) {
4711 PyErr_SetString(PyExc_TypeError,
4712 "format requires a mapping");
4713 goto error;
4714 }
4715 ++fmt;
4716 --fmtcnt;
4717 keystart = fmt;
4718 /* Skip over balanced parentheses */
4719 while (pcount > 0 && --fmtcnt >= 0) {
4720 if (*fmt == ')')
4721 --pcount;
4722 else if (*fmt == '(')
4723 ++pcount;
4724 fmt++;
4725 }
4726 keylen = fmt - keystart - 1;
4727 if (fmtcnt < 0 || pcount > 0) {
4728 PyErr_SetString(PyExc_ValueError,
4729 "incomplete format key");
4730 goto error;
4731 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004732 key = PyString_FromStringAndSize(keystart,
Christian Heimes44720832008-05-26 13:01:01 +00004733 keylen);
4734 if (key == NULL)
4735 goto error;
4736 if (args_owned) {
4737 Py_DECREF(args);
4738 args_owned = 0;
4739 }
4740 args = PyObject_GetItem(dict, key);
4741 Py_DECREF(key);
4742 if (args == NULL) {
4743 goto error;
4744 }
4745 args_owned = 1;
4746 arglen = -1;
4747 argidx = -2;
4748 }
4749 while (--fmtcnt >= 0) {
4750 switch (c = *fmt++) {
4751 case '-': flags |= F_LJUST; continue;
4752 case '+': flags |= F_SIGN; continue;
4753 case ' ': flags |= F_BLANK; continue;
4754 case '#': flags |= F_ALT; continue;
4755 case '0': flags |= F_ZERO; continue;
4756 }
4757 break;
4758 }
4759 if (c == '*') {
4760 v = getnextarg(args, arglen, &argidx);
4761 if (v == NULL)
4762 goto error;
4763 if (!PyInt_Check(v)) {
4764 PyErr_SetString(PyExc_TypeError,
4765 "* wants int");
4766 goto error;
4767 }
4768 width = PyInt_AsLong(v);
4769 if (width < 0) {
4770 flags |= F_LJUST;
4771 width = -width;
4772 }
4773 if (--fmtcnt >= 0)
4774 c = *fmt++;
4775 }
4776 else if (c >= 0 && isdigit(c)) {
4777 width = c - '0';
4778 while (--fmtcnt >= 0) {
4779 c = Py_CHARMASK(*fmt++);
4780 if (!isdigit(c))
4781 break;
4782 if ((width*10) / 10 != width) {
4783 PyErr_SetString(
4784 PyExc_ValueError,
4785 "width too big");
4786 goto error;
4787 }
4788 width = width*10 + (c - '0');
4789 }
4790 }
4791 if (c == '.') {
4792 prec = 0;
4793 if (--fmtcnt >= 0)
4794 c = *fmt++;
4795 if (c == '*') {
4796 v = getnextarg(args, arglen, &argidx);
4797 if (v == NULL)
4798 goto error;
4799 if (!PyInt_Check(v)) {
4800 PyErr_SetString(
4801 PyExc_TypeError,
4802 "* wants int");
4803 goto error;
4804 }
4805 prec = PyInt_AsLong(v);
4806 if (prec < 0)
4807 prec = 0;
4808 if (--fmtcnt >= 0)
4809 c = *fmt++;
4810 }
4811 else if (c >= 0 && isdigit(c)) {
4812 prec = c - '0';
4813 while (--fmtcnt >= 0) {
4814 c = Py_CHARMASK(*fmt++);
4815 if (!isdigit(c))
4816 break;
4817 if ((prec*10) / 10 != prec) {
4818 PyErr_SetString(
4819 PyExc_ValueError,
4820 "prec too big");
4821 goto error;
4822 }
4823 prec = prec*10 + (c - '0');
4824 }
4825 }
4826 } /* prec */
4827 if (fmtcnt >= 0) {
4828 if (c == 'h' || c == 'l' || c == 'L') {
4829 if (--fmtcnt >= 0)
4830 c = *fmt++;
4831 }
4832 }
4833 if (fmtcnt < 0) {
4834 PyErr_SetString(PyExc_ValueError,
4835 "incomplete format");
4836 goto error;
4837 }
4838 if (c != '%') {
4839 v = getnextarg(args, arglen, &argidx);
4840 if (v == NULL)
4841 goto error;
4842 }
4843 sign = 0;
4844 fill = ' ';
4845 switch (c) {
4846 case '%':
4847 pbuf = "%";
4848 len = 1;
4849 break;
4850 case 's':
4851#ifdef Py_USING_UNICODE
4852 if (PyUnicode_Check(v)) {
4853 fmt = fmt_start;
4854 argidx = argidx_start;
4855 goto unicode;
4856 }
4857#endif
4858 temp = _PyObject_Str(v);
4859#ifdef Py_USING_UNICODE
4860 if (temp != NULL && PyUnicode_Check(temp)) {
4861 Py_DECREF(temp);
4862 fmt = fmt_start;
4863 argidx = argidx_start;
4864 goto unicode;
4865 }
4866#endif
4867 /* Fall through */
4868 case 'r':
4869 if (c == 'r')
4870 temp = PyObject_Repr(v);
4871 if (temp == NULL)
4872 goto error;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004873 if (!PyString_Check(temp)) {
Christian Heimes44720832008-05-26 13:01:01 +00004874 PyErr_SetString(PyExc_TypeError,
4875 "%s argument has non-string str()");
4876 Py_DECREF(temp);
4877 goto error;
4878 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004879 pbuf = PyString_AS_STRING(temp);
4880 len = PyString_GET_SIZE(temp);
Christian Heimes44720832008-05-26 13:01:01 +00004881 if (prec >= 0 && len > prec)
4882 len = prec;
4883 break;
4884 case 'i':
4885 case 'd':
4886 case 'u':
4887 case 'o':
4888 case 'x':
4889 case 'X':
4890 if (c == 'i')
4891 c = 'd';
4892 isnumok = 0;
4893 if (PyNumber_Check(v)) {
4894 PyObject *iobj=NULL;
4895
4896 if (PyInt_Check(v) || (PyLong_Check(v))) {
4897 iobj = v;
4898 Py_INCREF(iobj);
4899 }
4900 else {
4901 iobj = PyNumber_Int(v);
4902 if (iobj==NULL) iobj = PyNumber_Long(v);
4903 }
4904 if (iobj!=NULL) {
4905 if (PyInt_Check(iobj)) {
4906 isnumok = 1;
4907 pbuf = formatbuf;
4908 len = formatint(pbuf,
4909 sizeof(formatbuf),
4910 flags, prec, c, iobj);
4911 Py_DECREF(iobj);
4912 if (len < 0)
4913 goto error;
4914 sign = 1;
4915 }
4916 else if (PyLong_Check(iobj)) {
4917 int ilen;
4918
4919 isnumok = 1;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004920 temp = _PyString_FormatLong(iobj, flags,
Christian Heimes44720832008-05-26 13:01:01 +00004921 prec, c, &pbuf, &ilen);
4922 Py_DECREF(iobj);
4923 len = ilen;
4924 if (!temp)
4925 goto error;
4926 sign = 1;
4927 }
4928 else {
4929 Py_DECREF(iobj);
4930 }
4931 }
4932 }
4933 if (!isnumok) {
4934 PyErr_Format(PyExc_TypeError,
4935 "%%%c format: a number is required, "
4936 "not %.200s", c, Py_TYPE(v)->tp_name);
4937 goto error;
4938 }
4939 if (flags & F_ZERO)
4940 fill = '0';
4941 break;
4942 case 'e':
4943 case 'E':
4944 case 'f':
4945 case 'F':
4946 case 'g':
4947 case 'G':
Mark Dickinson18cfada2009-11-23 18:46:41 +00004948 temp = formatfloat(v, flags, prec, c);
4949 if (temp == NULL)
Christian Heimes44720832008-05-26 13:01:01 +00004950 goto error;
Mark Dickinson18cfada2009-11-23 18:46:41 +00004951 pbuf = PyString_AS_STRING(temp);
4952 len = PyString_GET_SIZE(temp);
Christian Heimes44720832008-05-26 13:01:01 +00004953 sign = 1;
4954 if (flags & F_ZERO)
4955 fill = '0';
4956 break;
4957 case 'c':
4958#ifdef Py_USING_UNICODE
4959 if (PyUnicode_Check(v)) {
4960 fmt = fmt_start;
4961 argidx = argidx_start;
4962 goto unicode;
4963 }
4964#endif
4965 pbuf = formatbuf;
4966 len = formatchar(pbuf, sizeof(formatbuf), v);
4967 if (len < 0)
4968 goto error;
4969 break;
4970 default:
4971 PyErr_Format(PyExc_ValueError,
4972 "unsupported format character '%c' (0x%x) "
4973 "at index %zd",
4974 c, c,
4975 (Py_ssize_t)(fmt - 1 -
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004976 PyString_AsString(format)));
Christian Heimes44720832008-05-26 13:01:01 +00004977 goto error;
4978 }
4979 if (sign) {
4980 if (*pbuf == '-' || *pbuf == '+') {
4981 sign = *pbuf++;
4982 len--;
4983 }
4984 else if (flags & F_SIGN)
4985 sign = '+';
4986 else if (flags & F_BLANK)
4987 sign = ' ';
4988 else
4989 sign = 0;
4990 }
4991 if (width < len)
4992 width = len;
4993 if (rescnt - (sign != 0) < width) {
4994 reslen -= rescnt;
4995 rescnt = width + fmtcnt + 100;
4996 reslen += rescnt;
4997 if (reslen < 0) {
4998 Py_DECREF(result);
4999 Py_XDECREF(temp);
5000 return PyErr_NoMemory();
5001 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005002 if (_PyString_Resize(&result, reslen) < 0) {
Christian Heimes44720832008-05-26 13:01:01 +00005003 Py_XDECREF(temp);
5004 return NULL;
5005 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005006 res = PyString_AS_STRING(result)
Christian Heimes44720832008-05-26 13:01:01 +00005007 + reslen - rescnt;
5008 }
5009 if (sign) {
5010 if (fill != ' ')
5011 *res++ = sign;
5012 rescnt--;
5013 if (width > len)
5014 width--;
5015 }
5016 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
5017 assert(pbuf[0] == '0');
5018 assert(pbuf[1] == c);
5019 if (fill != ' ') {
5020 *res++ = *pbuf++;
5021 *res++ = *pbuf++;
5022 }
5023 rescnt -= 2;
5024 width -= 2;
5025 if (width < 0)
5026 width = 0;
5027 len -= 2;
5028 }
5029 if (width > len && !(flags & F_LJUST)) {
5030 do {
5031 --rescnt;
5032 *res++ = fill;
5033 } while (--width > len);
5034 }
5035 if (fill == ' ') {
5036 if (sign)
5037 *res++ = sign;
5038 if ((flags & F_ALT) &&
5039 (c == 'x' || c == 'X')) {
5040 assert(pbuf[0] == '0');
5041 assert(pbuf[1] == c);
5042 *res++ = *pbuf++;
5043 *res++ = *pbuf++;
5044 }
5045 }
5046 Py_MEMCPY(res, pbuf, len);
5047 res += len;
5048 rescnt -= len;
5049 while (--width >= len) {
5050 --rescnt;
5051 *res++ = ' ';
5052 }
5053 if (dict && (argidx < arglen) && c != '%') {
5054 PyErr_SetString(PyExc_TypeError,
5055 "not all arguments converted during string formatting");
5056 Py_XDECREF(temp);
5057 goto error;
5058 }
5059 Py_XDECREF(temp);
5060 } /* '%' */
5061 } /* until end */
5062 if (argidx < arglen && !dict) {
5063 PyErr_SetString(PyExc_TypeError,
5064 "not all arguments converted during string formatting");
5065 goto error;
5066 }
5067 if (args_owned) {
5068 Py_DECREF(args);
5069 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005070 _PyString_Resize(&result, reslen - rescnt);
Christian Heimes44720832008-05-26 13:01:01 +00005071 return result;
5072
5073#ifdef Py_USING_UNICODE
5074 unicode:
5075 if (args_owned) {
5076 Py_DECREF(args);
5077 args_owned = 0;
5078 }
5079 /* Fiddle args right (remove the first argidx arguments) */
5080 if (PyTuple_Check(orig_args) && argidx > 0) {
5081 PyObject *v;
5082 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
5083 v = PyTuple_New(n);
5084 if (v == NULL)
5085 goto error;
5086 while (--n >= 0) {
5087 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
5088 Py_INCREF(w);
5089 PyTuple_SET_ITEM(v, n, w);
5090 }
5091 args = v;
5092 } else {
5093 Py_INCREF(orig_args);
5094 args = orig_args;
5095 }
5096 args_owned = 1;
5097 /* Take what we have of the result and let the Unicode formatting
5098 function format the rest of the input. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005099 rescnt = res - PyString_AS_STRING(result);
5100 if (_PyString_Resize(&result, rescnt))
Christian Heimes44720832008-05-26 13:01:01 +00005101 goto error;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005102 fmtcnt = PyString_GET_SIZE(format) - \
5103 (fmt - PyString_AS_STRING(format));
Christian Heimes44720832008-05-26 13:01:01 +00005104 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
5105 if (format == NULL)
5106 goto error;
5107 v = PyUnicode_Format(format, args);
5108 Py_DECREF(format);
5109 if (v == NULL)
5110 goto error;
5111 /* Paste what we have (result) to what the Unicode formatting
5112 function returned (v) and return the result (or error) */
5113 w = PyUnicode_Concat(result, v);
5114 Py_DECREF(result);
5115 Py_DECREF(v);
5116 Py_DECREF(args);
5117 return w;
5118#endif /* Py_USING_UNICODE */
5119
5120 error:
5121 Py_DECREF(result);
5122 if (args_owned) {
5123 Py_DECREF(args);
5124 }
5125 return NULL;
5126}
5127
5128void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005129PyString_InternInPlace(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00005130{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005131 register PyStringObject *s = (PyStringObject *)(*p);
Christian Heimes44720832008-05-26 13:01:01 +00005132 PyObject *t;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005133 if (s == NULL || !PyString_Check(s))
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005134 Py_FatalError("PyString_InternInPlace: strings only please!");
Christian Heimes44720832008-05-26 13:01:01 +00005135 /* If it's a string subclass, we don't really know what putting
5136 it in the interned dict might do. */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005137 if (!PyString_CheckExact(s))
Christian Heimes44720832008-05-26 13:01:01 +00005138 return;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005139 if (PyString_CHECK_INTERNED(s))
Christian Heimes44720832008-05-26 13:01:01 +00005140 return;
5141 if (interned == NULL) {
5142 interned = PyDict_New();
5143 if (interned == NULL) {
5144 PyErr_Clear(); /* Don't leave an exception */
5145 return;
5146 }
5147 }
5148 t = PyDict_GetItem(interned, (PyObject *)s);
5149 if (t) {
5150 Py_INCREF(t);
5151 Py_DECREF(*p);
5152 *p = t;
5153 return;
5154 }
5155
5156 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
5157 PyErr_Clear();
5158 return;
5159 }
5160 /* The two references in interned are not counted by refcnt.
5161 The string deallocator will take care of this */
5162 Py_REFCNT(s) -= 2;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005163 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Christian Heimes44720832008-05-26 13:01:01 +00005164}
5165
5166void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005167PyString_InternImmortal(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00005168{
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005169 PyString_InternInPlace(p);
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005170 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
5171 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
Christian Heimes44720832008-05-26 13:01:01 +00005172 Py_INCREF(*p);
5173 }
5174}
5175
5176
5177PyObject *
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005178PyString_InternFromString(const char *cp)
Christian Heimes44720832008-05-26 13:01:01 +00005179{
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005180 PyObject *s = PyString_FromString(cp);
Christian Heimes44720832008-05-26 13:01:01 +00005181 if (s == NULL)
5182 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005183 PyString_InternInPlace(&s);
Christian Heimes44720832008-05-26 13:01:01 +00005184 return s;
5185}
5186
5187void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005188PyString_Fini(void)
Christian Heimes44720832008-05-26 13:01:01 +00005189{
5190 int i;
5191 for (i = 0; i < UCHAR_MAX + 1; i++) {
5192 Py_XDECREF(characters[i]);
5193 characters[i] = NULL;
5194 }
5195 Py_XDECREF(nullstring);
5196 nullstring = NULL;
5197}
5198
5199void _Py_ReleaseInternedStrings(void)
5200{
5201 PyObject *keys;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005202 PyStringObject *s;
Christian Heimes44720832008-05-26 13:01:01 +00005203 Py_ssize_t i, n;
5204 Py_ssize_t immortal_size = 0, mortal_size = 0;
5205
5206 if (interned == NULL || !PyDict_Check(interned))
5207 return;
5208 keys = PyDict_Keys(interned);
5209 if (keys == NULL || !PyList_Check(keys)) {
5210 PyErr_Clear();
5211 return;
5212 }
5213
5214 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5215 detector, interned strings are not forcibly deallocated; rather, we
5216 give them their stolen references back, and then clear and DECREF
5217 the interned dict. */
5218
5219 n = PyList_GET_SIZE(keys);
5220 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5221 n);
5222 for (i = 0; i < n; i++) {
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00005223 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
Christian Heimes44720832008-05-26 13:01:01 +00005224 switch (s->ob_sstate) {
5225 case SSTATE_NOT_INTERNED:
5226 /* XXX Shouldn't happen */
5227 break;
5228 case SSTATE_INTERNED_IMMORTAL:
5229 Py_REFCNT(s) += 1;
5230 immortal_size += Py_SIZE(s);
5231 break;
5232 case SSTATE_INTERNED_MORTAL:
5233 Py_REFCNT(s) += 2;
5234 mortal_size += Py_SIZE(s);
5235 break;
5236 default:
5237 Py_FatalError("Inconsistent interned string state.");
5238 }
5239 s->ob_sstate = SSTATE_NOT_INTERNED;
5240 }
5241 fprintf(stderr, "total size of all interned strings: "
5242 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5243 "mortal/immortal\n", mortal_size, immortal_size);
5244 Py_DECREF(keys);
5245 PyDict_Clear(interned);
5246 Py_DECREF(interned);
5247 interned = NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00005248}